将两个 Dataframe 中的字符和数字数据聚合到一个新的 Dataframe 中

eit6fx6z  于 2023-03-05  发布在  其他
关注(0)|答案(1)|浏览(129)

我正在尝试创建一个新的数据框,它连接了一些列的名称和它们的值。
我的输入如下所示:

input1 = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("XMAS_DOWN", "XMAS_DOWN", "XMAS_DOWN", "XMAS_DOWN",
"XMAS_DOWN", "XMAS_DOWN"), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(24, 6, 0, 9,
-7, -13)), row.names = c(NA, 6L), class = "data.frame")

input2 = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN" ), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(22, 2, 3, 2,
9, 16)), row.names = c(NA, 6L), class = "data.frame")

我想要的输出是这样的:

output = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN" ), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(46, 8, 3, 11,
2, 3)), row.names = c(NA, 6L), class = "data.frame")

我可以使用连接得到这个结果,对值求和,将Result1粘贴在一起,然后选择我想要保留的列:

test = left_join(input1, input2, by = c('Date', 'Name', 'ReportType', 'TestType', 'Code1', 'Code2', 'Result2'))
test$Value = test$Value.x + test$Value.y
test$Result1 = paste(test$Result1.x, "+", test$Result1.y)
test_1 = select(test, c('Date', 'Name', 'ReportType', 'TestType', 'Code1', 'Code2', 'Result1', 'Result2', 'Value'))

但这感觉像是一个非常笨拙的解决方案。
有谁能提出一个更好的方法吗?

a8jjtwal

a8jjtwal1#

您可以先bind_rows,然后summarise,其中group_by除了Result1Value之外的所有列:

bind_rows(input1, input2) %>%
  group_by(across(-c(Result1, Value)))  %>% 
  summarise(Value = sum(Value, na.rm = T), 
            Result1 = paste0(Result1, collapse = " + "), 
            .groups = "drop") %>% 
  select(names(input1))
# A tibble: 6 x 9
  Date                Name             ReportType TestType Code1 Code2 Result1               Result2 Value
  <dttm>              <chr>            <chr>      <chr>    <chr> <chr> <chr>                 <chr>   <dbl>
1 2023-02-27 08:00:00 Beijing_China    SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A         3
2 2023-02-27 08:00:00 Boston_USA       SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A        11
3 2023-02-27 08:00:00 Moscow_Russia    SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A         2
4 2023-02-27 08:00:00 Paris_France     SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A         8
5 2023-02-27 08:00:00 Rome_Italy       SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A        46
6 2023-02-27 08:00:00 Sydney_Australia SALES      Internal N/A   N/A   XMAS_DOWN + EAST_DOWN N/A         3

相关问题