根据R中Group的第一个值将值转换为NA

luaexgnf  于 2023-03-15  发布在  其他
关注(0)|答案(2)|浏览(115)

我正尝试按列将 Dataframe 分组为3个组。因此,组1是列1-3,组2是列4-6,依此类推。然后,我希望基于组迭代各行。我希望检查每行每个组中的第一个值是否为“NA”。如果是“NA”,则该组中的所有值都应转换为“NA”。如果不是NA,值应该保持原样。
示例df:

df <- data.frame(col_1 = c(1,2,3,NA,3,2,1),
             col_2 = c(1,3,2,4,3,2,1),
             col_3 = c(1,2,3,4,3,2,1),
             col_4 = c(NA,2,3,4,3,2,1),
             col_5 = c(2,2,3,4,3,2,1),
             col_6 = c(3,2,3,4,3,2,1))

我想要的输出:

df_1 <- data.frame(col_1 = c(1,2,3,NA,3,2,1),
               col_2 = c(1,3,2,NA,3,2,1),
               col_3 = c(1,2,3,NA,3,2,1),
               col_4 = c(NA,2,3,4,3,2,1),
               col_5 = c(NA,2,3,4,3,2,1),
               col_6 = c(NA,2,3,4,3,2,1))

我尝试使用'group_by()'和'mutate()',但毫无效果。这是正确的方法吗?

yvgpqqbh

yvgpqqbh1#

这可能会有帮助。组是由rep(1:(ncol(df)/3), each=3)定义的,如果存在更多的组/行,则必须进行调整。

library(dplyr)

as_tibble(cbind(ID = c(rep(1:(ncol(df)/3), each=3)), t(df))) %>% 
  mutate(across(starts_with("V"), ~ replace(., is.na(.x[1]), NA)), .by = ID) %>% 
  select(-ID) %>% 
  t() %>% 
  data.frame(row.names = c()) %>% 
  setNames(colnames(df))
  col_1 col_2 col_3 col_4 col_5 col_6
1     1     1     1    NA    NA    NA
2     2     3     2     2     2     2
3     3     2     3     3     3     3
4    NA    NA    NA     4     4     4
5     3     3     3     3     3     3
6     2     2     2     2     2     2
7     1     1     1     1     1     1
nfzehxib

nfzehxib2#

这里是另一个选项:

library(dplyr)
library(purrr)
library(tidyr)

id_help <- df |> 
  colnames() |> 
  as_tibble() |>  
  # credits: https://stackoverflow.com/questions/58212957/how-to-add-1-add-unique-identifier-every-3-rows-of-data-frame
  mutate(id = as.numeric(gl(ceiling(n()/3), 3, n()))) |> 
  rename(name = value)

df |> 
  pivot_longer(cols = starts_with('col')) |> 
  left_join(id_help) |> 
  dplyr::group_split(id) |> 
  map(~.x |> 
        select(-id) |> 
        group_by(name) |> 
        # id needed for pivot wider
        mutate(id = row_number()) |> 
        pivot_wider(id_cols = everything(), names_from = name, 
                    values_from = value) |> 
        select(-id) |> 
        rowwise() |>
        mutate(check = any(across(1, ~ is.na(.)))) |>
        mutate(across(everything(), ~ ifelse(exclude == TRUE, yes = NA, .x))) |>
        select(-check) |> 
        ungroup()) |> 
  bind_cols()

输出:

# A tibble: 7 × 6
  col_1 col_2 col_3 col_4 col_5 col_6
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1     1    NA    NA    NA
2     2     3     2     2     2     2
3     3     2     3     3     3     3
4    NA    NA    NA     4     4     4
5     3     3     3     3     3     3
6     2     2     2     2     2     2
7     1     1     1     1     1     1

相关问题