减少case_when和recode语句中的重复?

kx5bkwkv  于 2022-12-06  发布在  其他
关注(0)|答案(2)|浏览(102)

数据类型

pcadata <- structure(list(sample = c("1-1", "1-2", "1-3", "2-1", "2-2", 
"2-3", "3-1", "3-2", "3-3", "4-1", "4-2", "4-3", "1-1", "1-2", 
"1-3", "2-1", "2-2", "3-2", "4-1", "4-2", "4-3"), compound = c("Linalool", 
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Linalool", 
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Acetic Acid", 
"Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid", 
"Acetic Acid", "Acetic Acid", "Acetic Acid"), conc = c(82855, 
74398, 59563, 117635, 118724, 75271, 95219, 50870, 67546, 58063, 
86610, 88594, 263774, 99287, 79800, 529503, 666771, 117253, 101193, 
65006, 221687), code = c("1", "1", "1", "2", "2", "2", "3", "3", 
"3", "4", "4", "4", "1", "1", "1", "2", "2", "3", "4", "4", "4"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-21L), groups = structure(list(sample = c("1-1", "1-1", "1-2", 
"1-2", "1-3", "1-3", "2-1", "2-1", "2-2", "2-2", "2-3", "3-1", 
"3-2", "3-2", "3-3", "4-1", "4-1", "4-2", "4-2", "4-3", "4-3"
), compound = c("Acetic Acid", "Linalool", "Acetic Acid", "Linalool", 
"Acetic Acid", "Linalool", "Acetic Acid", "Linalool", "Acetic Acid", 
"Linalool", "Linalool", "Linalool", "Acetic Acid", "Linalool", 
"Linalool", "Acetic Acid", "Linalool", "Acetic Acid", "Linalool", 
"Acetic Acid", "Linalool"), .rows = structure(list(13L, 1L, 14L, 
    2L, 15L, 3L, 16L, 4L, 17L, 5L, 6L, 7L, 18L, 8L, 9L, 19L, 
    10L, 20L, 11L, 21L, 12L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -21L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

编码

pacman::p_load(tidyverse)
codes_vector <- c("code1", "code2", "code3", "code4", "code5")
colors_vector <- c("#1B9E77","#D95F02","#7570B3","#E7298A","#66A61E","#E6AB02","#A6761D", "#666666")

analysis1 <- pcadata %>% 
  filter(code %in% c(1, 2)) %>% 
  arrange(code, 4) %>%
  group_by(sample, compound) %>% 
  pivot_wider(names_from = compound,
              values_from = conc,
              values_fill = 0) %>% 
  ungroup() %>% 
  column_to_rownames(var = "sample") %>% 
  mutate(code = recode(code,
                       `1` = codes_vector[1],
                       `2` = codes_vector[2],
                       `3` = codes_vector[3],
                       `4` = codes_vector[4],
                       `5` = codes_vector[5])) %>% 
  mutate(color = case_when(code == codes_vector[1] ~ "#1B9E77",
                           code == codes_vector[2] ~ "#D95F02",
                           code == codes_vector[3] ~ "#7570B3",
                           code == codes_vector[4] ~ "#E7298A",
                           code == codes_vector[5] ~ "#66A61E",
                           code == codes_vector[6] ~ "#E6AB02",
                           code == codes_vector[7] ~ "#A6761D",
                           code == codes_vector[8] ~ "#666666")) %>% 
  mutate(color=as.factor(color)) %>% 
  relocate(color, .after = code)

问题:有没有一种方法可以让我复制这个case_when()的工作,而不需要这么多的复制和粘贴?我怎样才能更简洁地或编程地迭代这些操作?有了我所期望的所有顺序,就有了一种方法。
类似于以下伪代码:

mutate(code = recode(code[i] = codes_vector[i]))

mutate(color = case_when(code == codes(vector[i] ~ colors_vector[i])))

所需输出

structure(list(code = c("hybrid", "hybrid", "hybrid", "plant based", 
"plant based", "plant based"), color = structure(c(1L, 1L, 1L, 
2L, 2L, 2L), levels = c("#1B9E77", "#D95F02"), class = "factor"), 
    Linalool = c(82855, 74398, 59563, 117635, 118724, 75271), 
    Nonanal = c(45433, 27520, 28883, 0, 0, 52454), `Acetic Acid` = c(263774, 
    99287, 79800, 529503, 666771, 0)), row.names = c("1-1", "1-2", 
"1-3", "2-1", "2-2", "2-3"), class = "data.frame")
iyfamqjs

iyfamqjs1#

我们可以通过将code转换为integer列并使用作为索引来替换“codes_vector”中的值来进行简化,然后通过将“code”列与colors_vector和codes_vector中的命名向量进行匹配来创建color

library(dplyr)
library(tidyr)
pcadata %>%
   ungroup %>%
   mutate(code = codes_vector[as.integer(code)],
   color = setNames(colors_vector, codes_vector)[code])%>% 
   pivot_wider(names_from = compound, values_from = conc, values_fill = 0)
  • 输出
# A tibble: 12 × 5
   sample code  color   Linalool `Acetic Acid`
   <chr>  <chr> <chr>      <dbl>         <dbl>
 1 1-1    code1 #1B9E77    82855        263774
 2 1-2    code1 #1B9E77    74398         99287
 3 1-3    code1 #1B9E77    59563         79800
 4 2-1    code2 #D95F02   117635        529503
 5 2-2    code2 #D95F02   118724        666771
 6 2-3    code2 #D95F02    75271             0
 7 3-1    code3 #7570B3    95219             0
 8 3-2    code3 #7570B3    50870        117253
 9 3-3    code3 #7570B3    67546             0
10 4-1    code4 #E7298A    58063        101193
11 4-2    code4 #E7298A    86610         65006
12 4-3    code4 #E7298A    88594        221687
cwtwac6a

cwtwac6a2#

一般来说,我们可以使用函数match

pcadata %>% 
  filter(code %in% c(1, 2)) %>% 
  arrange(code, 4) %>%
  group_by(sample, compound) %>% 
  pivot_wider(names_from = compound,
              values_from = conc,
              values_fill = 0) %>% 
  ungroup() %>% 
  column_to_rownames(var = "sample") %>% 
  mutate(code = codes_vector[match(code, 1:5)]) %>% 
  mutate(color = colors_vector[match(code, codes_vector)]) %>% 
  mutate(color=as.factor(color)) %>% 
  relocate(color, .after = code)
#>      code   color Linalool Acetic Acid
#> 1-1 code1 #1B9E77    82855      263774
#> 1-2 code1 #1B9E77    74398       99287
#> 1-3 code1 #1B9E77    59563       79800
#> 2-1 code2 #D95F02   117635      529503
#> 2-2 code2 #D95F02   118724      666771
#> 2-3 code2 #D95F02    75271           0

相关问题