我在R中写了三个不同的方法来使变量S1_Total_Time成为使用dplyr mutate的多个列的总和。当我运行它们时,我得到变量S1_Total_Time的不同输出。在我的 Dataframe 中,每行都是一个人,所以我使用rowwise来使总和逐行进行。
在方法A中,我按行获得所有列值的总和,忽略NA。每行都以一个值结束,但有一个问题。一行是所有NA的人,他们的总和TotalTime为0,这是不准确的。他们的S1_TotalTime应该是NA,而不是0。为了解决这个问题,我尝试了方法B。
在方法B中,我也得到了所有列值的行和。在这里,我也要求它首先使用ifelse进行确定,因为我希望它只计算时间值,如果这两件事都为真:
1.错误的时间变量的伴随变量不是NA(这有助于我排除点击“下一步”按钮的试验,给它一个时间,但没有实际数据)
1.我感兴趣并且想要返回的定时变量也不是NA如果其中任何一个是NA,那么我要求它返回NA,因为该变量将被包含在sum中。在方法B的结尾,我要求它排除NA,以便它应该只对不是NA的ifelses的乘积求和。这不起作用,它返回S1_TotalTime作为任何具有 any NA的行的NA。
如果我改变方法B,使其在1或2为假时返回0或另一个数字,而不是NA(方法C),它的工作原理几乎与方法A相同,但有一些神秘的偏差。
- 37/41行与方法A相差正好+1。
- 4/41行偏离大于1,其中它们的方法A比它们的方法B低-14到-127。
我的问题:
1.为什么方法A和C产生不同的结果?
1.我是否错过了一些东西,试图确保两个变量在求和之前都有数据,如果它们没有,排除它们,但仍然从我的ifelse没有使NA的变量中得到一个总和?方法A只是一个问题,因为只有少数几行所有定时变量都是NA,我不能让这些变量的总和为0。
sessionInfo()
R版本4.2.0(2022-04-22)
平台:x86_64-apple-darwin 17.0(64位)
运行于:macOS Monterey 12.4
基质产品:默认值
LAPACK:/Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
区域设置:
[1]en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8#
附加的基本 Package :
[1]stats grdevices utils datasets methods base
其他附加包:
[1]nlme_3.1-157 rstatix_0.7.2 ggpubr_0.6.0 reshape2_1.4.4 data.table_1.14.2 lubridate_1.9.2
[7]forcats_1.0.0 stringr_1.5.0 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.1.8
[13]tidyverse_2.0.0 Hmisc_4.7-0公式_1.2 -4生存_3.3 -1晶格_0.20 -45 dplyr_1.1.0
[19]ggplot2_3.4.1 haven_2.5.1
样本数据:
Data_for_analysis \<- structure(list(S1_1.1_timing_Page_Submit = c(38, 12, NA, 52),
S1_1.2a_timing_Page_Submit = c(19, 14, NA, 68), S1_1.2b_timing_Page_Submit = c(48,
65, NA, 190), S1_1.3_timing_Page_Submit = c(66, 9, NA, 20
), S1_2.1_timing_Page_Submit = c(307, 153, NA, 90.38), S1_2.2_timing_Page_Submit = c(NA,
28, NA, 3.752), S1_2.3_timing_Page_Submit = c(NA, NA, NA,
58\.996), S1_2.4a_timing_Page_Submit = c(NA, NA, NA, 1.203
), S1_2.4b_timing_Page_Submit = c(NA, NA, NA, 61.671), S1_2.5_timing_Page_Submit = c(NA,
NA, NA, 0.249), S1_2.6_timing_Page_Submit = c(NA, NA, NA,
0\.201), S1_3.1_timing_Page_Submit = c(NA, NA, NA, 0.244),
S1_3.2_timing_Page_Submit = c(NA, NA, NA, 0.224), S1_3.3_timing_Page_Submit = c(NA,
NA, NA, 0.158), S1_3.4_timing_Page_Submit = c(NA, NA, NA,
0\.2), S1_3.5_timing_Page_Submit = c(NA, NA, NA, 0.159), S1_3.6_timing_Page_Submit = c(NA,
NA, NA, 0.695), S1_3.7_timing_Page_Submit = c(NA, NA, NA,
0\.263), S1_3.8_timing_Page_Submit = c(NA, NA, NA, 0.267),
S1_3.9_timing_Page_Submit = c(NA, NA, NA, 0.136), S1_3.10_timing_Page_Submit = c(NA,
NA, NA, 0.216), S1_3.11_timing_Page_Submit = c(NA, NA, NA,
0\.249), S1_3.12_timing_Page_Submit = c(NA, NA, NA, 3.127),
ErrorNum_S1_1.1 = c(1, 0, 0, 1), ErrorNum_S1_1.2a = c(0,
0, 0, 1), ErrorNum_S1_1.2b = c(0, 1, 0, 2), ErrorNum_S1_1.3 = c(0,
0, 0, 0), ErrorNum_S1_2.1 = c(13, 4, 1, 1), ErrorNum_S1_2.2 = c(NA,
0, 0, 5), ErrorNum_S1_2.3 = c(NA, 7, 0, NA), ErrorNum_S1_2.4a = c(NA,
NA, 0, NA), ErrorNum_S1_2.4b = c(NA, NA, 3, NA), ErrorNum_S1_2.5 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_2.6 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.1 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.2 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.3 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.4 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.5 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.6 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.7 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.8 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.9 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.10 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.11 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_), ErrorNum_S1_3.12 = c(NA_real\_,
NA_real\_, NA_real\_, NA_real\_)), class = c("rowwise_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame")))
library(haven)
library(ggplot2)
library(dplyr)
library(Hmisc)
library(tidyverse)
#Method A
Data_for_analysis_A \<- Data_for_analysis %\>%
rowwise() %\>%
mutate(S1_TotalTime = sum(S1_1.1_timing_Page_Submit,
S1_1.2a_timing_Page_Submit,
S1_1.2b_timing_Page_Submit,
S1_1.3_timing_Page_Submit,
S1_2.1_timing_Page_Submit,
S1_2.2_timing_Page_Submit,
S1_2.3_timing_Page_Submit,
S1_2.4a_timing_Page_Submit,
S1_2.4b_timing_Page_Submit,
S1_2.5_timing_Page_Submit,
S1_2.6_timing_Page_Submit,
S1_3.1_timing_Page_Submit,
S1_3.2_timing_Page_Submit,
S1_3.3_timing_Page_Submit,
S1_3.4_timing_Page_Submit,
S1_3.5_timing_Page_Submit,
S1_3.6_timing_Page_Submit,
S1_3.7_timing_Page_Submit,
S1_3.8_timing_Page_Submit,
S1_3.9_timing_Page_Submit,
S1_3.10_timing_Page_Submit,
S1_3.11_timing_Page_Submit,
S1_3.12_timing_Page_Submit, na.rm=TRUE))
Data_for_analysis_B \<- Data_for_analysis %\>%
rowwise() %\>%
mutate(S1_TotalTime = sum(
ifelse(!is.na(ErrorNum_S1_1.1) && !is.na(S1_1.1_timing_Page_Submit) , S1_1.1_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_1.2a) && !is.na(S1_1.2a_timing_Page_Submit), S1_1.2a_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_1.2b) && !is.na(S1_1.2b_timing_Page_Submit), S1_1.2b_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_1.3) && !is.na(S1_1.3_timing_Page_Submit) , S1_1.3_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.1) && !is.na(S1_2.1_timing_Page_Submit) , S1_2.1_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.2) && !is.na(S1_2.2_timing_Page_Submit) , S1_2.2_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.3) && !is.na(S1_2.3_timing_Page_Submit) , S1_2.3_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.4a) && !is.na(S1_2.4a_timing_Page_Submit), S1_2.4a_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.4b) && !is.na(S1_2.4b_timing_Page_Submit), S1_2.4b_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.5) && !is.na(S1_2.5_timing_Page_Submit) , S1_2.5_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_2.6) && !is.na(S1_2.6_timing_Page_Submit) , S1_2.6_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.1) && !is.na(S1_3.1_timing_Page_Submit) , S1_3.1_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.2) && !is.na(S1_3.2_timing_Page_Submit) , S1_3.2_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.3) && !is.na(S1_3.3_timing_Page_Submit) , S1_3.3_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.4) && !is.na(S1_3.4_timing_Page_Submit) , S1_3.4_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.5) && !is.na(S1_3.5_timing_Page_Submit) , S1_3.5_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.6) && !is.na(S1_3.6_timing_Page_Submit) , S1_3.6_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.7) && !is.na(S1_3.7_timing_Page_Submit) , S1_3.7_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.8) && !is.na(S1_3.8_timing_Page_Submit) , S1_3.8_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.9) && !is.na(S1_3.9_timing_Page_Submit) , S1_3.9_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.10) && !is.na(S1_3.10_timing_Page_Submit), S1_3.10_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.11) && !is.na(S1_3.11_timing_Page_Submit), S1_3.11_timing_Page_Submit, NA),
ifelse(!is.na(ErrorNum_S1_3.12) && !is.na(S1_3.12_timing_Page_Submit), S1_3.12_timing_Page_Submit, NA), rm.na=TRUE))
#Method C (same as B but using 0 instead of NA)
Data_for_analysis_C \<- Data_for_analysis %\>%
rowwise() %\>%
mutate(S1_TotalTime = sum(
ifelse(!is.na(ErrorNum_S1_1.1) && !is.na(S1_1.1_timing_Page_Submit) , S1_1.1_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_1.2a) && !is.na(S1_1.2a_timing_Page_Submit), S1_1.2a_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_1.2b) && !is.na(S1_1.2b_timing_Page_Submit), S1_1.2b_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_1.3) && !is.na(S1_1.3_timing_Page_Submit) , S1_1.3_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.1) && !is.na(S1_2.1_timing_Page_Submit) , S1_2.1_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.2) && !is.na(S1_2.2_timing_Page_Submit) , S1_2.2_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.3) && !is.na(S1_2.3_timing_Page_Submit) , S1_2.3_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.4a) && !is.na(S1_2.4a_timing_Page_Submit), S1_2.4a_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.4b) && !is.na(S1_2.4b_timing_Page_Submit), S1_2.4b_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.5) && !is.na(S1_2.5_timing_Page_Submit) , S1_2.5_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_2.6) && !is.na(S1_2.6_timing_Page_Submit) , S1_2.6_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.1) && !is.na(S1_3.1_timing_Page_Submit) , S1_3.1_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.2) && !is.na(S1_3.2_timing_Page_Submit) , S1_3.2_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.3) && !is.na(S1_3.3_timing_Page_Submit) , S1_3.3_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.4) && !is.na(S1_3.4_timing_Page_Submit) , S1_3.4_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.5) && !is.na(S1_3.5_timing_Page_Submit) , S1_3.5_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.6) && !is.na(S1_3.6_timing_Page_Submit) , S1_3.6_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.7) && !is.na(S1_3.7_timing_Page_Submit) , S1_3.7_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.8) && !is.na(S1_3.8_timing_Page_Submit) , S1_3.8_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.9) && !is.na(S1_3.9_timing_Page_Submit) , S1_3.9_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.10) && !is.na(S1_3.10_timing_Page_Submit), S1_3.10_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.11) && !is.na(S1_3.11_timing_Page_Submit), S1_3.11_timing_Page_Submit, 0),
ifelse(!is.na(ErrorNum_S1_3.12) && !is.na(S1_3.12_timing_Page_Submit), S1_3.12_timing_Page_Submit, 0), rm.na=TRUE))
1条答案
按热度按时间pu3pd22g1#
或许是这样的?