我尝试将数据输入变量名作为输出值添加到单独的列中(函数“result”中的$V5)。
输入是不同的数据值,我希望将这些名称保存在输出中,以便跟踪数据的来源。
我原以为这会相当简单,我可以使用print(deparse(substitute(input))),但这不起作用。
有人能推荐一个解决方案吗?
library(dplyr)
library(tidyr)
## Inputs ##
input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
## Teams ##
teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary + Frank","Mary + John", "Mary + Bill")), class = "data.frame", row.names = c(NA, -3L))
## Group the inputs into one ##
all_objects = ls()
input_objects = grep("^input", all_objects, value = T)
input_test = as.data.frame(input_obj)
## Function ##
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
V5 = print(deparse(substitute(input))))
return(data)
}
all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
output = input_test %>%
lapply(result, teams) %>%
bind_rows()
### Current output ###
structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input", "input",
"input", "input", "input", "input", "input", "input", "input",
"input", "input", "input")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
### Desired Output ###
structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input_1", "input_1",
"input_1", "input_2", "input_2", "input_2", "input_3", "input_3", "input_3",
"input_4", "input_4", "input_4")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
3条答案
按热度按时间2exbekwf1#
如果我没理解错的话:
purrr::map_df()
函数有一个很好的特性,可以识别finaldata.frames
中的输入列表(理想的命名):请注意,这在不命名列表的情况下也可以工作,尽管您将只获得列表项编号,这可能是不够的。
plyr::ldply
函数也可以用于绑定命名列表,在结果data.frame中生成一个包含列表名称的新列。w46czmvw2#
与
deparse/substitute
不同,在result
中也为名称创建一个参数,然后使用该参数at0kjp5o3#
如果一次只有一个输入,只需将指定
V5
的行移出summarize
和dplyr
管道即可:但是列表的使用使得这有点困难(对于该代码,它将输出
X[[i]]
)。为了解决使用
lapply
时的这个问题,我建议简单地添加一个接受名称的附加输入,并简单地将该名称赋给它,同时对lapply
函数进行额外的调整以适应它: