应答计数 Dataframe 扩展为应答 Dataframe

xienkqul  于 2023-02-10  发布在  其他
关注(0)|答案(1)|浏览(143)

我试图将特定响应计数的数据框架扩展为实际响应的数据框架(而不是按计数聚合)
示例数据:

set.seed(44)
data <- data.frame(ID = LETTERS[1:5]
                   , Count_1 = sample(c(3:8), 5)
                   , Count_2 = sample(c(3:8), 5)
                   , Count_3 = sample(c(3:8), 5))

它看起来像这样:

ID Count_1 Count_2 Count_3
1  A       3       8       3
2  B       8       7       4
3  C       7       4       8
4  D       4       3       5
5  E       5       5       6

我想把它扩展成这样:

desired <- rbind(data.frame(ID = c(rep("A", 3), rep("B", 8), rep("C", 7), rep("D", 4), rep("E", 5)
                             )
                      , Response = c(rep("1", 3), rep("1", 8) , rep("1", 7), rep("1", 4), rep("1", 5)
                                     )
                      )
                 , data.frame(ID = c(rep("A", 8), rep("B", 7), rep("C", 4), rep("D", 3), rep("E", 5)
                 )
                 
                 , Response = c(rep("2", 8), rep("2", 7) , rep("2", 4), rep("2", 3), rep("2", 5)
                 )
                 )
                 
                 , data.frame(ID = c(rep("A", 3), rep("B", 4), rep("C", 8), rep("D", 5), rep("E", 6)
                 )
                 , Response = c(rep("2", 3), rep("2", 4) , rep("2", 8), rep("2", 5), rep("2", 6)
                 )
                 )
                 
                 )

它看起来像:

ID Response
1   A        1
2   A        1
3   A        1
4   B        1
5   B        1
6   B        1
7   B        1
8   B        1
9   B        1
10  B        1
11  B        1
12  C        1
13  C        1
14  C        1
15  C        1
16  C        1
17  C        1
18  C        1
19  D        1
20  D        1
21  D        1
22  D        1
23  E        1
24  E        1
25  E        1
26  E        1
27  E        1
28  A        2
29  A        2
30  A        2
31  A        2
32  A        2
33  A        2
34  A        2
35  A        2
36  B        2
37  B        2
38  B        2
39  B        2
40  B        2
41  B        2
42  B        2
43  C        2
44  C        2
45  C        2
46  C        2
47  D        2
48  D        2
49  D        2
50  E        2
51  E        2
52  E        2
53  E        2
54  E        2
55  A        2
56  A        2
57  A        2
58  B        2
59  B        2
60  B        2
61  B        2
62  C        2
63  C        2
64  C        2
65  C        2
66  C        2
67  C        2
68  C        2
69  C        2
70  D        2
71  D        2
72  D        2
73  D        2
74  D        2
75  E        2
76  E        2
77  E        2
78  E        2
79  E        2
80  E        2

我在想也许可以用一个for循环来为每列中的每个单元格创建向量,但我想知道这是否有必要,或者是否有更有效的方法?

bvn4nwqk

bvn4nwqk1#

我们可以使用pivot_longer将其整形为"long"格式,然后使用uncount

library(dplyr)
library(tidyr)
data %>%
   pivot_longer(cols = starts_with("Count"), names_prefix = 'Count_', 
     names_to = 'Response') %>% 
   arrange(Response) %>%
   uncount(value)
  • 输出
# A tibble: 80 × 2
   ID    Response
   <chr> <chr>   
 1 A     1       
 2 A     1       
 3 A     1       
 4 B     1       
 5 B     1       
 6 B     1       
 7 B     1       
 8 B     1       
 9 B     1       
10 B     1       
# … with 70 more rows

base R中,可以使用rep/unlist/col/row完成此操作

vals <- unlist(data[-1])
out <-  data.frame(ID = rep(data$ID[row(data[-1])], vals), 
      Response = rep(col(data[-1]), vals))

数据

data <- structure(list(ID = c("A", "B", "C", "D", "E"), Count_1 = c(3L, 
8L, 7L, 4L, 5L), Count_2 = c(8L, 7L, 4L, 3L, 5L), Count_3 = c(3L, 
4L, 8L, 5L, 6L)), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5"))

相关问题