R语言 如何计算组间平均差?

stszievb  于 2023-02-06  发布在  其他
关注(0)|答案(3)|浏览(200)

从这段代码开始,我想计算7个时间点之间的差异:

data <- out %>%
  group_by(tests0, GROUP) %>%
  summarise(
    all = list(across(starts_with("score")) %>%
                 {
                   tibble(
                     means   = data.frame(map(., ~ mean(.x, na.rm = TRUE)) %>% set_names(., str_replace(names(.), "\\D+", "mean"))),
                     stderrs = data.frame(map(., ~ std.error(.x, na.rm = TRUE)) %>% set_names(., str_replace(names(.), "\\D+", "stederr"))),
                     ttest1   = data.frame(possibly(~ reduce(., ~ t.test(.x, .y, paired = TRUE))["estimate"], NA)(.)),
                     ttest2   = data.frame(possibly(~ reduce(., ~ t.test(.x, .y, paired = TRUE))["p.value"], NA)(.))
                   )
                 })
  )>%
  unnest(all)

# Groups:   tests0 [6]
   tests0     GROUP    means$mean0 $mean7 stderrs$stederr0 $stederr7 ttest1$estimate ttest2$p.value
   <fct>      <fct>          <dbl>  <dbl>            <dbl>     <dbl>           <dbl>          <dbl>
 1 ADAS_CogT0 CONTROL         12.6   13.6            0.525     0.662          -1.15         0.00182
 2 ADAS_CogT0 TRAINING        14.0   12.6            0.613     0.570           1.40         0.00295
 3 PVF_T0     CONTROL         32.1   31.3            1.22      1.45            0.498        0.636  
 4 PVF_T0     TRAINING        31.6   34.3            1.37      1.51           -2.48         0.0102 
 5 ROCF_CT0   CONTROL         29.6   30.3            0.893     0.821          -0.180        0.835  
 6 ROCF_CT0   TRAINING        30.1   29.5            0.906     0.929           0.489        0.615  
 7 ROCF_IT0   CONTROL         12.8   12.2            0.563     0.683           0.580        0.356  
 8 ROCF_IT0   TRAINING        10.9   12.3            0.735     0.768          -1.44         0.0238 
 9 ROCF_RT0   CONTROL         12.1   12.5            0.725     0.797          -0.370        0.598  
10 ROCF_RT0   TRAINING        10.5   10.9            0.746     0.742          -0.534        0.370  
11 SVF_T0     CONTROL         35.5   34              1.05      1.15            1.42         0.107  
12 SVF_T0     TRAINING        34.1   32.9            1.04      1.16            0.962        0.231

或者更好的方法是添加一个列来报告组(CONTROL and TRAINING)之间的平均列差。你知道我应该在上面的代码中输入什么吗?
这里是数据集,如果有人想尝试找出一个可能的解决方案。
先谢了

q3qa4bjr

q3qa4bjr1#

what_you_have %>%
  group_by(tests0) %>%
  arrange(GROUP) %>%
  mutate(mean7_ctrl_train_diff = diff(`$mean7`))

使用此数据:

what_you_have = read.table(text = '  tests0     GROUP    means$mean0 $mean7 stderrs$stederr0 $stederr7 ttest1$estimate ttest2$p.value
 1 ADAS_CogT0 CONTROL         12.6   13.6            0.525     0.662          -1.15         0.00182
 2 ADAS_CogT0 TRAINING        14.0   12.6            0.613     0.570           1.40         0.00295
 3 PVF_T0     CONTROL         32.1   31.3            1.22      1.45            0.498        0.636  
 4 PVF_T0     TRAINING        31.6   34.3            1.37      1.51           -2.48         0.0102 
 5 ROCF_CT0   CONTROL         29.6   30.3            0.893     0.821          -0.180        0.835  
 6 ROCF_CT0   TRAINING        30.1   29.5            0.906     0.929           0.489        0.615  
 7 ROCF_IT0   CONTROL         12.8   12.2            0.563     0.683           0.580        0.356  
 8 ROCF_IT0   TRAINING        10.9   12.3            0.735     0.768          -1.44         0.0238 
 9 ROCF_RT0   CONTROL         12.1   12.5            0.725     0.797          -0.370        0.598  
10 ROCF_RT0   TRAINING        10.5   10.9            0.746     0.742          -0.534        0.370  
11 SVF_T0     CONTROL         35.5   34              1.05      1.15            1.42         0.107  
12 SVF_T0     TRAINING        34.1   32.9            1.04      1.16            0.962        0.231', header = T, check.names = FALSE)
iaqfqrcu

iaqfqrcu2#

创建data时,列的形式为data.frame

> str(data)
grouped_df [2 × 6] (S3: grouped_df/tbl_df/tbl/data.frame)
 $ tests0 : Factor w/ 6 levels "ADAS_CogT0","PVF_T0",..: 1 1
 $ GROUP  : Factor w/ 2 levels "CONTROL","TRAINING": 1 2
 $ means  :'data.frame':    2 obs. of  2 variables:
  ..$ mean0: num [1:2] 12.5 14
  ..$ mean7: num [1:2] 13.5 12.6
 $ stderrs:'data.frame':    2 obs. of  2 variables:
  ..$ stederr0: num [1:2] 0.574 0.613
  ..$ stederr7: num [1:2] 0.77 0.57
 $ ttest1 :'data.frame':    2 obs. of  1 variable:
  ..$ estimate: num [1:2] -1.24 1.4
 $ ttest2 :'data.frame':    2 obs. of  1 variable:
  ..$ p.value: num [1:2] 0.00471 0.00295
 - attr(*, "groups")= tibble [1 × 2] (S3: tbl_df/tbl/data.frame)
  ..$ tests0: Factor w/ 6 levels "ADAS_CogT0","PVF_T0",..: 1
  ..$ .rows : list<int> [1:1] 
  .. ..$ : int [1:2] 1 2
  .. ..@ ptype: int(0) 
  ..- attr(*, ".drop")= logi TRUE

因此,在执行diff之前,我们可能需要对这些data.frame列执行unpack

library(dplyr)
library(tidyr)
data  %>% 
   unpack(where(is.data.frame)) %>%
   mutate(mean7diff = c(NA, diff(mean7))) %>%
   ungroup
  • 输出
# A tibble: 2 × 9
  tests0     GROUP    mean0 mean7 stederr0 stederr7 estimate p.value mean7diff
  <fct>      <fct>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>
1 ADAS_CogT0 CONTROL   12.5  13.5    0.574    0.770    -1.24 0.00471    NA    
2 ADAS_CogT0 TRAINING  14.0  12.6    0.613    0.570     1.40 0.00295    -0.878
esyap4oy

esyap4oy3#

不确定这是否是您要找的?

out %>% 
    drop_na() %>% 
    select(-c(ID, Gender, Age, Education)) %>% 
    group_by(tests0) %>% 
    ungroup() %>% 
    pivot_wider(names_from = GROUP, values_from = contains('score'), values_fn = mean) %>% 
    transmute(
        tests0 = tests0,
        diff0 = score0_TRAINING - score0_CONTROL,
        diff7 = score7_TRAINING - score7_CONTROL
    )
# A tibble: 1 x 3
  tests0     diff0  diff7
  <fct>      <dbl>  <dbl>
1 ADAS_CogT0  1.76 -0.878

相关问题