基于其他列的值在R tidyverse中构建新列

lstz6jyr  于 2022-12-30  发布在  其他
关注(0)|答案(2)|浏览(135)

我正在使用R tidyverse,我有一个类似下面代码的tibble,我试图基于其他列的值创建output_column,数据来自最后一个非空列加上NA(如果在output_column之前有NA列)。

library(tidyverse)
test_df <- 
tibble(kingdom = rep("bacteria",6),
       phylum = c(NA, "sterp", rep("entro", 4)),
       class = c(rep(NA, 2), rep("abc",4)),
       order= c(rep(NA,3), rep("cde", 3)),
       family= c(rep(NA,4), rep("xyz", 2)),
       genus= c(rep(NA,5), "sam"),
       output_column = c("bacteria_NA", "sterp_NA", "abc_NA", "cde_NA", "xyz_NA", "sam" ))

3gtaxfhh

3gtaxfhh1#

可以按如下方式使用row_wise()c_across()

test_df %>%
  rowwise() %>%
  mutate(k = if_else(!is.na(genus), genus, paste0(last(c_across()[!is.na(c_across())]), "_NA")))

输出:

kingdom  phylum class order family genus k          
  <chr>    <chr>  <chr> <chr> <chr>  <chr> <chr>      
1 bacteria NA     NA    NA    NA     NA    bacteria_NA
2 bacteria sterp  NA    NA    NA     NA    sterp_NA   
3 bacteria entro  abc   NA    NA     NA    abc_NA     
4 bacteria entro  abc   cde   NA     NA    cde_NA     
5 bacteria entro  abc   cde   xyz    NA    xyz_NA     
6 bacteria entro  abc   cde   xyz    sam   sam

另一种方法是将last()函数与apply()一起使用:

test_df$output_column = apply(
  test_df, 1, \(x) {
    if_else(is.na(last(x)), paste0(last(x[!is.na(x)]), "_NA"), last(x))
  }
)
b1zrtrql

b1zrtrql2#

这里我们可以使用coalesce

library(dplyr)
library(purrr)
library(stringr)
 test_df %>%
   mutate(output_column = invoke(coalesce, across(last_col():1)), 
   output_column = case_when(if_any((last_col()-1):1, is.na)~ 
       str_c(output_column, '_NA'), TRUE ~ output_column))
  • 输出
# A tibble: 6 × 7
  kingdom  phylum class order family genus output_column
  <chr>    <chr>  <chr> <chr> <chr>  <chr> <chr>        
1 bacteria <NA>   <NA>  <NA>  <NA>   <NA>  bacteria_NA  
2 bacteria sterp  <NA>  <NA>  <NA>   <NA>  sterp_NA     
3 bacteria entro  abc   <NA>  <NA>   <NA>  abc_NA       
4 bacteria entro  abc   cde   <NA>   <NA>  cde_NA       
5 bacteria entro  abc   cde   xyz    <NA>  xyz_NA       
6 bacteria entro  abc   cde   xyz    sam   sam

数据

test_df <- structure(list(kingdom = c("bacteria", "bacteria", "bacteria", 
"bacteria", "bacteria", "bacteria"), phylum = c(NA, "sterp", 
"entro", "entro", "entro", "entro"), class = c(NA, NA, "abc", 
"abc", "abc", "abc"), order = c(NA, NA, NA, "cde", "cde", "cde"
), family = c(NA, NA, NA, NA, "xyz", "xyz"), genus = c(NA, NA, 
NA, NA, NA, "sam")), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

相关问题