我已经附上了一个用PowerQuery完成的预期解决方案。然而,我想知道如何用R完成这一任务。我的数据如下所示:
预期结果:
oalqel3c1#
我使用tidyverse和dplyr来处理数据。
library(stringr) library(dplyr) library(tidyverse) ## Creating a dummy dataframe df = data.frame(ResponseID=1:10, Status=c("Complete","Incomplete","Complete","Complete","Incomplete","Complete","Incomplete","Complete","Complete","Incomplete"), q1 = sample(1:10,10, replace = T), q2a=sample(1:10,10, replace = T), q2b=sample(1:10,10, replace = T), q2c=sample(1:10,10, replace = T), q2d=sample(1:10,10, replace = T), q2e=sample(1:10,10, replace = T), q2f=sample(1:10,10, replace = T), q2g=sample(1:10,10, replace = T), q2h=sample(1:10,10, replace = T), q2i=sample(1:10,10, replace = T), q2j=sample(1:10,10, replace = T), q10a=sample(1:10,10, replace = T), q10b=sample(1:10,10, replace = T), q10c=sample(1:10,10, replace = T), q10d=sample(1:10,10, replace = T), q10e=sample(1:10,10, replace = T) ) ResponseID Status q1 q2a q2b q2c q2d q2e q2f q2g q2h q2i q2j q10a q10b q10c q10d q10e 1 1 Complete 9 8 6 1 10 4 4 4 3 1 4 1 7 1 10 5 2 2 Incomplete 1 10 10 4 1 6 2 2 4 9 4 3 8 1 9 10 3 3 Complete 10 8 1 5 3 4 8 7 8 2 8 5 3 10 9 1 4 4 Complete 1 10 3 5 7 4 9 3 1 4 7 9 4 7 9 10 5 5 Incomplete 7 2 7 6 7 6 5 1 4 10 2 9 10 7 6 7 6 6 Complete 6 8 5 4 5 7 2 1 2 5 3 7 9 2 1 7 7 7 Incomplete 10 5 2 9 5 5 8 5 4 6 5 7 6 9 5 8 8 8 Complete 7 4 1 8 8 1 3 5 9 7 1 4 5 9 5 1 9 9 Complete 4 5 10 8 9 6 6 6 1 7 5 7 7 4 6 9 10 10 Incomplete 3 7 3 1 9 10 2 5 10 3 2 7 3 6 7 9 ## Wider to longer df1 = df %>% pivot_longer(!c(ResponseID,Status), names_to = "Question", values_to = "Values") ## Extracting the last alphabet df1$Alphabet=ifelse(str_detect(df1$Question, "[a,b,c,d,e,f,g,h,i,j]")==T,substr(df1$Question, nchar(df1$Question) - 1 + 1, nchar(df1$Question)),"a") df1$AlphaToNo = match(df1$Alphabet,letters) df1$Q = sub('(\\d+).*', '\\1', df1$Question) df2 = df1 %>% group_by(ResponseID, Q,AlphaToNo) %>% summarise(SumValues=sum(Values)) %>% pivot_wider(names_from = AlphaToNo, values_from = SumValues) %>% arrange(ResponseID,Q) df2[is.na(df2)] <- 0 ## Final Output > head(df2,15) # A tibble: 15 x 12 # Groups: ResponseID, Q [15] ResponseID Q `1` `2` `3` `4` `5` `6` `7` `8` `9` `10` <int> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> 1 1 q1 9 0 0 0 0 0 0 0 0 0 2 1 q10 1 7 1 10 5 0 0 0 0 0 3 1 q2 8 6 1 10 4 4 4 3 1 4 4 2 q1 1 0 0 0 0 0 0 0 0 0 5 2 q10 3 8 1 9 10 0 0 0 0 0 6 2 q2 10 10 4 1 6 2 2 4 9 4 7 3 q1 10 0 0 0 0 0 0 0 0 0 8 3 q10 5 3 10 9 1 0 0 0 0 0 9 3 q2 8 1 5 3 4 8 7 8 2 8 10 4 q1 1 0 0 0 0 0 0 0 0 0 11 4 q10 9 4 7 9 10 0 0 0 0 0 12 4 q2 10 3 5 7 4 9 3 1 4 7 13 5 q1 7 0 0 0 0 0 0 0 0 0 14 5 q10 9 10 7 6 7 0 0 0 0 0 15 5 q2 2 7 6 7 6 5 1 4 10 2
1条答案
按热度按时间oalqel3c1#
我使用tidyverse和dplyr来处理数据。