基于R中的布尔值在一组列中创建多个新列

dluptydi  于 2023-10-13  发布在  其他
关注(0)|答案(3)|浏览(90)

我有一个类似于这样的框架,其中A到C事件(为简单起见缩短了,但实际上有更多的列)具有值“0-3”、“3-6”或“6-9”。

df <- data.frame(id = c(1,2,3,4,5,6,7,8,9,10),
                 A = c("0-3","3-6","6-9","0-3","6-9","6-9","0-3","0-3","3-6","3-6"),
                 B = c("9-12","3-6","0-3","3-6","0-3","0-3","3-6","6-9","6-9","3-6"),
                 C = c("3-6","6-9","3-6","6-9","3-6","6-9","6-9","3-6","0-3","6-9"))

df

如果我想创建一个包含事件和值的新列,以及一个1或0的布尔标志,那么什么是有效的方法呢?下面是我想要的一个例子(缩短,我希望列扩展到C_5_9)。

df <- data.frame(id = c(1,2,3,4,5,6,7,8,9,10),
                 A = c("0-3","3-6","6-9","0-3","6-9","6-9","0-3","0-3","3-6","3-6"),
                 B = c("9-12","3-6","0-3","3-6","0-3","0-3","3-6","6-9","6-9","3-6"),
                 C = c("3-6","6-9","3-6","6-9","3-6","6-9","6-9","3-6","0-3","6-9"),
                 A_0_3 = c(1,0,0,1,0,0,1,1,0,0),
                 A_3_6 = c(0,1,0,0,0,0,0,0,1,1),
                 A_6_9 = c(0,0,1,0,1,1,0,0,0,0),
                 B_0_3 = c(0,0,1,0,1,1,0,0,0,0),
                 B_3_6 = c(0,1,0,1,0,0,1,0,0,1))

df
wwwo4jvm

wwwo4jvm1#

也许你可以从data.table包中尝试dcast + melt

library(data.table)
setDT(df)
df[dcast(
    melt(df, id.var = "id"),
    id ~ variable + value,
    fun = length
), on = "id"]

你将获得

id   A    B   C A_0-3 A_3-6 A_6-9 B_0-3 B_3-6 B_6-9 B_9-12 C_0-3 C_3-6
 1:  1 0-3 9-12 3-6     1     0     0     0     0     0      1     0     1
 2:  2 3-6  3-6 6-9     0     1     0     0     1     0      0     0     0
 3:  3 6-9  0-3 3-6     0     0     1     1     0     0      0     0     1
 4:  4 0-3  3-6 6-9     1     0     0     0     1     0      0     0     0
 5:  5 6-9  0-3 3-6     0     0     1     1     0     0      0     0     1
 6:  6 6-9  0-3 6-9     0     0     1     1     0     0      0     0     0
 7:  7 0-3  3-6 6-9     1     0     0     0     1     0      0     0     0
 8:  8 0-3  6-9 3-6     1     0     0     0     0     1      0     0     1
 9:  9 3-6  6-9 0-3     0     1     0     0     0     1      0     1     0
10: 10 3-6  3-6 6-9     0     1     0     0     1     0      0     0     0
    C_6-9
 1:     0
 2:     1
 3:     0
 4:     1
 5:     0
 6:     1
 7:     1
 8:     0
 9:     0
10:     1
9rbhqvlz

9rbhqvlz2#

以下是一个tidyverse解决方案,灵感来自@ pixasIsCoding,使用pivot_longerpivot_wider

library(tidyverse)

df %>%
        pivot_longer(-id, cols_vary = "slowest") %>%
        pivot_wider(names_from = -id,
                    values_fn = length,
                    values_fill = 0,
                    names_sort = TRUE) %>% 
        inner_join(df, ., by = "id")

输出量:

id   A    B   C A_0-3 A_3-6 A_6-9 B_0-3 B_3-6 B_6-9 B_9-12 C_0-3 C_3-6 C_6-9
1   1 0-3 9-12 3-6     1     0     0     0     0     0      1     0     1     0
2   2 3-6  3-6 6-9     0     1     0     0     1     0      0     0     0     1
3   3 6-9  0-3 3-6     0     0     1     1     0     0      0     0     1     0
4   4 0-3  3-6 6-9     1     0     0     0     1     0      0     0     0     1
5   5 6-9  0-3 3-6     0     0     1     1     0     0      0     0     1     0
6   6 6-9  0-3 6-9     0     0     1     1     0     0      0     0     0     1
7   7 0-3  3-6 6-9     1     0     0     0     1     0      0     0     0     1
8   8 0-3  6-9 3-6     1     0     0     0     0     1      0     0     1     0
9   9 3-6  6-9 0-3     0     1     0     0     0     1      0     1     0     0
10 10 3-6  3-6 6-9     0     1     0     0     1     0      0     0     0     1
7gyucuyw

7gyucuyw3#

使用matchtabulate

df['id'] |> cbind(lapply(df[mcols], match, lv) |> lapply(\(x) t(sapply(x, tabulate, nbin=length(lv)))) |>
  do.call(what='data.frame') |> setNames(outer(names(df[mcols]), lb, paste, sep='_')))
#    id A_0_3 B_0_3 C_0_3 A_3_6 B_3_6 C_3_6 A_6_9 B_6_9 C_6_9 A_9_12 B_9_12 C_9_12
# 1   1     1     0     0     0     0     0     0     1     0      1      0      0
# 2   2     0     1     0     0     0     1     0     0     0      0      1      0
# 3   3     0     0     1     0     1     0     0     0     0      1      0      0
# 4   4     1     0     0     0     0     1     0     0     0      0      1      0
# 5   5     0     0     1     0     1     0     0     0     0      1      0      0
# 6   6     0     0     1     0     1     0     0     0     0      0      1      0
# 7   7     1     0     0     0     0     1     0     0     0      0      1      0
# 8   8     1     0     0     0     0     0     1     0     0      1      0      0
# 9   9     0     1     0     0     0     0     1     0     1      0      0      0
# 10 10     0     1     0     0     0     1     0     0     0      0      1      0
  • 数据:*
df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), A = c("0-3", 
"3-6", "6-9", "0-3", "6-9", "6-9", "0-3", "0-3", "3-6", "3-6"
), B = c("9-12", "3-6", "0-3", "3-6", "0-3", "0-3", "3-6", "6-9", 
"6-9", "3-6"), C = c("3-6", "6-9", "3-6", "6-9", "3-6", "6-9", 
"6-9", "3-6", "0-3", "6-9")), class = "data.frame", row.names = c(NA, 
-10L))

相关问题