R语言 将函数输入名称解析为输出名称

uurity8g  于 2023-02-06  发布在  其他
关注(0)|答案(3)|浏览(143)

我尝试将数据输入变量名作为输出值添加到单独的列中(函数“result”中的$V5)。
输入是不同的数据值,我希望将这些名称保存在输出中,以便跟踪数据的来源。
我原以为这会相当简单,我可以使用print(deparse(substitute(input))),但这不起作用。
有人能推荐一个解决方案吗?

library(dplyr)
library(tidyr)

## Inputs ##

input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))

## Teams ##

teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary + Frank","Mary + John", "Mary + Bill")), class = "data.frame", row.names = c(NA, -3L))

## Group the inputs into one ##

all_objects = ls()
input_objects = grep("^input", all_objects, value = T)
input_test = as.data.frame(input_obj)

## Function ##

result = function(input, teams) {
  data = teams %>%
  separate_rows(V2) %>%
  left_join(input, by = c("V2" = "V2")) %>%
  replace_na(list(V4 = 0)) %>%
  group_by(V1.x) %>% fill(V1.y, V3) %>%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = " + "),
            V3 = first(V3),
            V4 = sum(V4),
            V5 = print(deparse(substitute(input))))
  return(data)
}


all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)

output = input_test %>%
  lapply(result, teams)  %>%
  bind_rows()

### Current output ###

structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2", 
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023", 
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024", 
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank", 
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John", 
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill", 
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76, 
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input", "input", 
"input", "input", "input", "input", "input", "input", "input", 
"input", "input", "input")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))

### Desired Output ###

structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2", 
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023", 
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024", 
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank", 
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John", 
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill", 
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76, 
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input_1", "input_1", 
"input_1", "input_2", "input_2", "input_2", "input_3", "input_3", "input_3", 
"input_4", "input_4", "input_4")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
2exbekwf

2exbekwf1#

如果我没理解错的话:purrr::map_df()函数有一个很好的特性,可以识别final data.frames中的输入列表(理想的命名):

library(dplyr)
library(tidyr)
# dropping V5 as it will be "automatically" computed
result = function(input, teams) {
  data = teams %>%
  separate_rows(V2) %>%
  left_join(input, by = c("V2" = "V2")) %>%
  replace_na(list(V4 = 0)) %>%
  group_by(V1.x) %>% 
  fill(V1.y, V3) %>%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = " + "),
            V3 = first(V3),
            V4 = sum(V4)
            # we do not need V5 here anymore
            )
  return(data)
}

l_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)

# name the object list to use the name for identification
names(input_test) <- input_objects

# use purrr map to data.frame with the .id feature
purrr::map_df(input_test, ~result(.x, teams), .id = "V5")

  V5      V1.x  V1.y      V2           V3        V4
   <chr>   <chr> <chr>     <chr>        <chr>  <dbl>
 1 input_1 team1 Team_2022 Mary + Frank Sydney   131
 2 input_1 team2 Team_2022 Mary + John  Sydney    90
 3 input_1 team3 Team_2022 Mary + Bill  Sydney    76
 4 input_2 team1 Team_2023 Mary + Frank Sydney    23
 5 input_2 team2 Team_2023 Mary + John  Sydney    33
 6 input_2 team3 Team_2023 Mary + Bill  Sydney   136
 7 input_3 team1 Team_2024 Mary + Frank Sydney    26
 8 input_3 team2 Team_2024 Mary + John  Sydney    19
 9 input_3 team3 Team_2024 Mary + Bill  Sydney    71
10 input_4 team1 Team_2025 Mary + Frank Sydney    90
11 input_4 team2 Team_2025 Mary + John  Sydney   132
12 input_4 team3 Team_2025 Mary + Bill  Sydney    44

请注意,这在不命名列表的情况下也可以工作,尽管您将只获得列表项编号,这可能是不够的。
plyr::ldply函数也可以用于绑定命名列表,在结果data.frame中生成一个包含列表名称的新列。

w46czmvw

w46czmvw2#

deparse/substitute不同,在result中也为名称创建一个参数,然后使用该参数

library(dplyr)
library(purrr)
library(tidyr)
result <- function(input, teams, inputnm) {
  data = teams %>%
  separate_rows(V2) %>%
  left_join(input, by = c("V2" = "V2")) %>%
  replace_na(list(V4 = 0)) %>%
  group_by(V1.x) %>% fill(V1.y, V3) %>%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = " + "),
            V3 = first(V3),
            V4 = sum(V4),
            V5 = inputnm)
  return(data)
}
  • 测试
input_test %>% 
   pull(input_objects) %>%
   mget(inherits = TRUE) %>%
   imap_dfr(~ result(.x, teams, .y))
  • 输出
# A tibble: 12 × 6
   V1.x  V1.y      V2           V3        V4 V5     
   <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
 1 team1 Team_2022 Mary + Frank Sydney   131 input_1
 2 team2 Team_2022 Mary + John  Sydney    90 input_1
 3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
 4 team1 Team_2023 Mary + Frank Sydney    23 input_2
 5 team2 Team_2023 Mary + John  Sydney    33 input_2
 6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
 7 team1 Team_2024 Mary + Frank Sydney    26 input_3
 8 team2 Team_2024 Mary + John  Sydney    19 input_3
 9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
10 team1 Team_2025 Mary + Frank Sydney    90 input_4
11 team2 Team_2025 Mary + John  Sydney   132 input_4
12 team3 Team_2025 Mary + Bill  Sydney    44 input_4
at0kjp5o

at0kjp5o3#

如果一次只有一个输入,只需将指定V5的行移出summarizedplyr管道即可:

result = function(input, teams) {
  data = teams %>%
    separate_rows(V2) %>%
    left_join(input, by = c("V2" = "V2")) %>%
    replace_na(list(V4 = 0)) %>%
    group_by(V1.x) %>% fill(V1.y, V3) %>%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = " + "),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 <- deparse(substitute(input))
  return(data)
}

result(input_1, teams)

#  V1.x   V1.y       V2           V3        V4 V5     
#  <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
#  1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#  2 team2 Team_2022 Mary + John  Sydney    90 input_1
#  3 team3 Team_2022 Mary + Bill  Sydney    76 input_1

但是列表的使用使得这有点困难(对于该代码,它将输出X[[i]])。
为了解决使用lapply时的这个问题,我建议简单地添加一个接受名称的附加输入,并简单地将该名称赋给它,同时对lapply函数进行额外的调整以适应它:

result = function(input, teams, nme) {
  data = teams %>%
    separate_rows(V2) %>%
    left_join(input, by = c("V2" = "V2")) %>%
    replace_na(list(V4 = 0)) %>%
    group_by(V1.x) %>% fill(V1.y, V3) %>%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = " + "),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 <- nme 
  return(data)
}

all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)

# add in assigning names to the list
names(input_test) <- input_objects 

output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %>%
  bind_rows()

#     V1.x  V1.y      V2           V3        V4 V5     
#     <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
#   1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#   2 team2 Team_2022 Mary + John  Sydney    90 input_1
#   3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
#   4 team1 Team_2023 Mary + Frank Sydney    23 input_2
#   5 team2 Team_2023 Mary + John  Sydney    33 input_2
#   6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
#   7 team1 Team_2024 Mary + Frank Sydney    26 input_3
#   8 team2 Team_2024 Mary + John  Sydney    19 input_3
#   9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
#  10 team1 Team_2025 Mary + Frank Sydney    90 input_4
#  11 team2 Team_2025 Mary + John  Sydney   132 input_4
#  12 team3 Team_2025 Mary + Bill  Sydney    44 input_4

相关问题