在R中循环/应用以创建计算列对之间的差异的新列

50pmv0ei  于 2023-10-13  发布在  其他
关注(0)|答案(3)|浏览(122)

我有一个数据框,其中包含多组前/后测量值,我想为每个测量值创建一个新列,以计算每个变量的差异(变化)。
示例数据

df <- structure(list(Leptin_OVX = c(101537.030773452, 34184.1969018313, 
54567.1867690491, 29558.5420636246, 40929.680418857), Leptin_SAC = c(19785.945743781, 
124224.32770312, 89539.7367479193, 29335.4677793977, 49085.7085270132
), MIP1A_OVX = c(198.955714001384, 9.39084362457698, 6.31036668689314, 
31.4248133610863, 61.7242016227428), MIP1A_SAC = c(152.595958885867, 
0, 6.31036668689314, 12.0518867341972, 56.3458462409656), IL4_OVX = c(84.3973038052031, 
0, 0, 84.3973038052031, 0), IL4_SAC = c(0, 0, 0, 0, 0), IL1B_OVX = c(20.5433459761151, 
0, 0, 0, 26.9522602664794), IL1B_SAC = c(0, 18.9503177384518, 
14.986896887192, 0, 0)), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))

我可以手动执行以下操作:

df <- df %>%
  mutate(chg_Leptin = Leptin_SAC - Leptin_OVX) %>%
  mutate(chg_MIP1A = MIP1A_SAC - MIP1A_OVX) %>%
  mutate(chg_IL4 = IL4_SAC - IL4_OVX) %>%
  mutate(chg_IL1B = IL1B_SAC - IL1B_OVX)

我试图找出一种方法,在所有剩余列的循环中执行此操作,我想从相关的“_OVX”列中减去“_SAC”列中的值
谢谢你的帮助!

fivyi3re

fivyi3re1#

我们可以使用across,但名称会发生一些变化:

df %>%
  mutate(
    across(ends_with("_SAC"),
           ~ .x - get(sub("_SAC$", "_OVX", cur_column())),
           .names = "chg_{sub('_SAC$','',.col)}")
  )
# # A tibble: 5 × 12
#   Leptin_OVX Leptin_SAC MIP1A_OVX MIP1A_SAC IL4_OVX IL4_SAC IL1B_OVX IL1B_SAC chg_Leptin chg_MIP1A chg_IL4 chg_IL1B
#        <dbl>      <dbl>     <dbl>     <dbl>   <dbl>   <dbl>    <dbl>    <dbl>      <dbl>     <dbl>   <dbl>    <dbl>
# 1    101537.     19786.    199.      153.      84.4       0     20.5      0      -81751.    -46.4    -84.4    -20.5
# 2     34184.    124224.      9.39      0        0         0      0       19.0     90040.     -9.39     0       19.0
# 3     54567.     89540.      6.31      6.31     0         0      0       15.0     34973.      0        0       15.0
# 4     29559.     29335.     31.4      12.1     84.4       0      0        0        -223.    -19.4    -84.4      0  
# 5     40930.     49086.     61.7      56.3      0         0     27.0      0        8156.     -5.38     0      -27.0
guicsvcw

guicsvcw2#

考虑简单的矩阵算术减去整个列块:

stubs <- unique(gsub("_.*", "", colnames(df)))

df[paste0("chg_", stubs)] <- (
    df[paste0(stubs, "_SAC")] - df[paste0(stubs, "_OVX")]
)

MyCompiler.io Demo

ma8fv8wu

ma8fv8wu3#

**Solution1 map_dfr:**一个优雅的方法来完成这样的任务是视觉上分开要减去的对:

split.default(df, sub("\\_.*", "", names(df)))

我们得到了所有的对:

$IL1B
# A tibble: 5 × 2
  IL1B_OVX IL1B_SAC
     <dbl>    <dbl>
1     20.5      0  
2      0       19.0
3      0       15.0
4      0        0  
5     27.0      0  

$IL4
# A tibble: 5 × 2
  IL4_OVX IL4_SAC
    <dbl>   <dbl>
1    84.4       0
2     0         0
3     0         0
4    84.4       0
5     0         0

$Leptin
# A tibble: 5 × 2
  Leptin_OVX Leptin_SAC
       <dbl>      <dbl>
1    101537.     19786.
2     34184.    124224.
3     54567.     89540.
4     29559.     29335.
5     40930.     49086.

$MIP1A
# A tibble: 5 × 2
  MIP1A_OVX MIP1A_SAC
      <dbl>     <dbl>
1    199.      153.  
2      9.39      0   
3      6.31      6.31
4     31.4      12.1 
5     61.7      56.3

对于purrr s map_dfrreduce,我们将-(减法)应用于所有对,重命名并与原始 Dataframe 绑定:

library(purrr)
library(stringr)
library(dplyr)

split.default(df, sub("\\_.*", "", names(df))) %>% 
  map_dfr(reduce, `-`) %>% 
  rename_all(~ str_c('diff_', .)) %>% 
  bind_cols(df, .)

**解决方案2整形:**如果事情太复杂(特别是初学者),长格式的整形将有助于掌握整个过程:

下面是一个长格式的解决方案:

library(dplyr)
library(tidyr)
library(stringr)

df %>%
  mutate(rn = row_number()) %>%
  pivot_longer(cols = -rn, names_to = c( "grp", '.value'), names_sep = "\\_") %>%
  transmute(rn, grp = str_c('diff_', grp), diff = SAC - OVX) %>% 
  pivot_wider(names_from = grp, values_from = diff) %>%
  bind_cols(df, .)

解决方案3 map2_df:

library(purrr)
library(dplyr)

map2_df(
  df %>% select(ends_with("OVX")), 
  df %>% select(ends_with("SAC")), 
  `-`
) %>% 
  rename_with(~ str_c('diff_', .)) %>% 
  bind_cols(df, .)
Leptin_OVX Leptin_SAC MIP1A_OVX MIP1A_SAC IL4_OVX IL4_SAC IL1B_OVX IL1B_SAC    rn diff_Leptin diff_MIP1A diff_IL4
       <dbl>      <dbl>     <dbl>     <dbl>   <dbl>   <dbl>    <dbl>    <dbl> <int>       <dbl>      <dbl>    <dbl>
1    101537.     19786.    199.      153.      84.4       0     20.5      0       1     -81751.     -46.4     -84.4
2     34184.    124224.      9.39      0        0         0      0       19.0     2      90040.      -9.39      0  
3     54567.     89540.      6.31      6.31     0         0      0       15.0     3      34973.       0         0  
4     29559.     29335.     31.4      12.1     84.4       0      0        0       4       -223.     -19.4     -84.4
5     40930.     49086.     61.7      56.3      0         0     27.0      0       5       8156.      -5.38      0  
# ℹ 1 more variable: diff_IL1B <dbl>

相关问题