如何在R中创建总结缺失成像数据的tibble

olhwl3o2  于 2023-01-18  发布在  其他
关注(0)|答案(4)|浏览(117)

我使用的成像数据格式类似于:

name  side  modality
   <chr> <chr> <chr>   
 1 alex  right xray    
 2 alex  left xray    
 3 brad  right xray    
 4 brad  left  xray    
 5 alex  right ct      
 6 alex  left  ct      
 7 brad  right ct      
 8 alex  right mri     
 9 brad  right mri     
10 brad  left  mri

假设每个人都应该有所有模态的左图像和右图像,则它显示Alex缺少左MRI,布拉德缺少左CT,而Charlie(他根本没有出现在data中)的所有图像都丢失了。我正在尝试创建一个汇总表,显示哪些元素“存在”或“不存在”,给定一个names的列表(其中包括Charlie),它看起来像这样:

name    left_xray right_xray left_ct right_ct left_mri right_mri n_absent
  <chr>   <chr>     <chr>      <chr>   <chr>    <chr>    <chr>        <dbl>
1 alex    present   present    present present  absent   present          1
2 brad    present   present    absent  present  present  present          1
3 charlie absent    absent     absent  absent   absent   absent           6

我已经使用了各种dplyr动词来获取每种模态中缺失数据的患者列表,但我真的不确定从哪里开始创建汇总表。
虚拟数据:

data <- tibble(name = c('alex', 'alex', 'brad', 'brad', 'alex', 'alex', 'brad', 'alex', 'brad', 'brad'),
                        side = c('right', 'left', 'right', 'left', 'right', 'left', 'right', 'right','right','left'),
                        modality = c('xray','xray','xray','xray','ct','ct','ct','mri','mri','mri'))

names <- tibble(name = c('alex', 'brad', 'charlie'))

谢谢大家!

rkue9o1l

rkue9o1l1#

编号

library(dplyr)
library(tidyr)

expand_grid(
  name = c('alex', 'brad', 'charlie'),
  modality = c("xray","ct","mri"),
  side = c("right",'left')
  ) %>% 
  left_join(
    data %>% 
      mutate(aux = "present")
  )  %>% 
  mutate(aux = replace_na(aux,"absent")) %>% 
  unite(modality_side,side,modality) %>% 
  pivot_wider(names_from = modality_side,values_from = aux) %>%
  rowwise() %>% 
  mutate(n_absent = sum(c_across(-name) == "absent"))

输出

# A tibble: 3 x 8
# Rowwise: 
  name    right_xray left_xray right_ct left_ct right_mri left_mri n_absent
  <chr>   <chr>      <chr>     <chr>    <chr>   <chr>     <chr>       <int>
1 alex    present    present   present  present present   absent          1
2 brad    present    present   present  absent  present   present         1
3 charlie absent     absent    absent   absent  absent    absent          6
zdwk9cvp

zdwk9cvp2#

使用当前的full_join以及 * 名称 侧 * 和 * 模态 * 的所有可能组合的方法。

library(dplyr)
library(tidyr)

full_join(df %>% mutate(grp = 1), 
    setNames(crossing(
        unique(unlist(c(df$name, Names))), unique(df$side), unique(df$modality)),
        colnames(df)) %>% mutate(grp = 2), c("name", "side", "modality")) %>%
  select(name:grp.x) %>% 
  mutate(grp.x = if_else(is.na(grp.x), "absent", "present")) %>% 
  pivot_wider(names_from=c("side", "modality"), values_from=grp.x) %>% 
  rowwise() %>% 
  mutate(n_absent = sum(across(contains("_"), ~ .x == "absent"))) %>% 
  ungroup()
结果
# A tibble: 3 × 8
  name    right_xray left_xray right_ct left_ct right_mri left_mri n_absent
  <chr>   <chr>      <chr>     <chr>    <chr>   <chr>     <chr>       <int>
1 alex    present    present   present  present present   absent          1
2 brad    present    present   present  absent  present   present         1
3 charlie absent     absent    absent   absent  absent    absent          6
数据
df <- structure(list(name = c("alex", "alex", "brad", "brad", "alex",
"alex", "brad", "alex", "brad", "brad"), side = c("right", "left",
"right", "left", "right", "left", "right", "right", "right",
"left"), modality = c("xray", "xray", "xray", "xray", "ct", "ct",
"ct", "mri", "mri", "mri")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L))

Names <- structure(list(name = c("alex", "brad", "charlie")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -3L))
kcwpcxri

kcwpcxri3#

你试试看

library(tidyverse)

data <- tibble(name = c('alex', 'alex', 'brad', 'brad', 'alex', 'alex', 'brad', 'alex', 'brad', 'brad'),
               side = c('right', 'left', 'right', 'left', 'right', 'left', 'right', 'right','right','left'),
               modality = c('xray','xray','xray','xray','ct','ct','ct','mri','mri','mri'))

names <- tibble(name = c('alex', 'brad', 'charlie'))
side <- data %>% select(side) %>% unique()
modality <- data %>% select(modality) %>% unique()

data2 <- names %>% full_join(side, by=character(0)) %>% full_join(modality, by=character(0)) %>% mutate(new_col=paste0(side,'_',modality))
data3 <- data2 %>% left_join(data %>% mutate(id='present'), by=c('name','side','modality')) %>% mutate(id=ifelse(is.na(id), 'absent', id)) %>% 
  pivot_wider(c('name'), names_from = 'new_col', values_from = 'id') 

named <- names(data3)[2:6]

data4 <- data3 %>% mutate(n_absent=rowSums(.[,named]=='absent'))
bihw5rsg

bihw5rsg4#

您可以先将sidemodality列连接在一起,然后生成complete列和姓名的组合,然后将此"long"格式转换为"wide"格式,并计算缺勤人数。

更新

我在解决方案中添加了full_join(tmp, by = "name"),以适应OP的更新请求。

library(tidyverse)

data %>% mutate(tmp = paste0(side, "_", modality), 
                tmp2 = 1, 
                .keep = "unused") %>% 
  complete(name, tmp) %>% 
  pivot_wider(names_from = tmp, values_from = tmp2) %>% 
  full_join(tmp, by = "name") %>% 
  mutate(across(-name, ~ifelse(is.na(.x), "absent", "present"))) %>% 
  rowwise() %>% 
  mutate(n_absent = sum(c_across(-name) == "absent")) %>% 
  ungroup()

# A tibble: 3 × 8
  name    left_ct left_mri left_xray right_ct right_…¹ right…² n_abs…³
  <chr>   <chr>   <chr>    <chr>     <chr>    <chr>    <chr>     <int>
1 alex    present absent   present   present  present  present       1
2 brad    absent  present  present   present  present  present       1
3 charlie absent  absent   absent    absent   absent   absent        6
# … with abbreviated variable names ¹​right_mri, ²​right_xray,
#   ³​n_absent

相关问题