我正在尝试创建一个新的数据框,它连接了一些列的名称和它们的值。
我的输入如下所示:
input1 = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("XMAS_DOWN", "XMAS_DOWN", "XMAS_DOWN", "XMAS_DOWN",
"XMAS_DOWN", "XMAS_DOWN"), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(24, 6, 0, 9,
-7, -13)), row.names = c(NA, 6L), class = "data.frame")
input2 = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN", "EAST_DOWN" ), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(22, 2, 3, 2,
9, 16)), row.names = c(NA, 6L), class = "data.frame")
我想要的输出是这样的:
output = structure(list(Date = structure(c(1677502800, 1677502800,
1677502800, 1677502800, 1677502800, 1677502800), class = c("POSIXct",
"POSIXt"), tzone = ""), Name = c("Rome_Italy", "Paris_France",
"Beijing_China", "Boston_USA", "Moscow_Russia",
"Sydney_Australia"), ReportType = c("SALES",
"SALES", "SALES", "SALES", "SALES", "SALES"), TestType = c("Internal",
"Internal", "Internal", "Internal", "Internal", "Internal"), Code1 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"), Code2 = c("N/A",
"N/A", "N/A", "N/A", "N/A", "N/A"
), Result1 = c("XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN", "XMAS_DOWN + EAST_DOWN" ), Result2 = c("N/A", "N/A", "N/A", "N/A",
"N/A", "N/A"), Value = c(46, 8, 3, 11,
2, 3)), row.names = c(NA, 6L), class = "data.frame")
我可以使用连接得到这个结果,对值求和,将Result1粘贴在一起,然后选择我想要保留的列:
test = left_join(input1, input2, by = c('Date', 'Name', 'ReportType', 'TestType', 'Code1', 'Code2', 'Result2'))
test$Value = test$Value.x + test$Value.y
test$Result1 = paste(test$Result1.x, "+", test$Result1.y)
test_1 = select(test, c('Date', 'Name', 'ReportType', 'TestType', 'Code1', 'Code2', 'Result1', 'Result2', 'Value'))
但这感觉像是一个非常笨拙的解决方案。
有谁能提出一个更好的方法吗?
1条答案
按热度按时间a8jjtwal1#
您可以先
bind_rows
,然后summarise
,其中group_by
除了Result1
和Value
之外的所有列: