如何对分组 Dataframe 的每两行应用一个函数?

gkn4icbw  于 2023-03-20  发布在  其他
关注(0)|答案(1)|浏览(109)

我有下面的dput

structure(list(ID = c("ID_1", "ID_1", "ID_1", "ID_1", "ID_1", "ID_1", "ID_1", 
                      "ID_1", "ID_1", "ID_1", "ID_2", "ID_2", "ID_2", "ID_2", 
                      "ID_2", "ID_2", "ID_2", "ID_2", "ID_2", "ID_2"), 
               Team = c("Team_A1", "Team_B1", "Team_A2", "Team_B2", "Team_A3", 
                        "Team_B3", "Team_A4", "Team_B4", "Team_A5", "Team_B5", 
                        "Team_C1", "Team_D1", "Team_C2", "Team_D2", "Team_C3", 
                        "Team_D3", "Team_C4", "Team_D4", "Team_C5", "Team_D5"), 
               Price = c(-155L, 130L, -145L, 115L, -135L, 115L, -135L, 115L, 
                         -145L, 122L, -185L, 150L, 140L, -170L, -195L, 165L, 
                         140L, -165L, -190L, 158L)), 
          row.names = c(NA, -20L), 
          class = c("tbl_df", "tbl", "data.frame"))

它返回以下tibble

# A tibble: 20 × 3
ID    Team    Price
<chr> <chr>   <int>
1 ID_1  Team_A1  -155
2 ID_1  Team_B1   130
3 ID_1  Team_A2  -145
4 ID_1  Team_B2   115
5 ID_1  Team_A3  -135
6 ID_1  Team_B3   115
7 ID_1  Team_A4  -135
8 ID_1  Team_B4   115
9 ID_1  Team_A5  -145
10 ID_1  Team_B5   122
11 ID_2  Team_C1  -185
12 ID_2  Team_D1   150
13 ID_2  Team_C2   140
14 ID_2  Team_D2  -170
15 ID_2  Team_C3  -195
16 ID_2  Team_D3   165
17 ID_2  Team_C4   140
18 ID_2  Team_D4  -165
19 ID_2  Team_C5  -190
20 ID_2  Team_D5   158

我有一个函数使用每两行的Price值,每个团队一个。如何将此函数应用于tibble?我的猜测是,我需要group_by ID列,然后使用map或apply对每2行执行此函数。我在查找语法时遇到了麻烦。这是tibble所需的输出。如您所见,New_值包含每2行的计算。

# A tibble: 20 × 3
ID    Team    Price   New Value
<chr> <chr>   <int>    <int>
1 ID_1  Team_A1  -155    A1B1
2 ID_1  Team_B1   130    A1B1
3 ID_1  Team_A2  -145    A2B2
4 ID_1  Team_B2   115    A2B2
5 ID_1  Team_A3  -135    A3B3
6 ID_1  Team_B3   115    A3B3
7 ID_1  Team_A4  -135    A4B4
8 ID_1  Team_B4   115    A4B4
9 ID_1  Team_A5  -145    A5B5
10 ID_1  Team_B5   122    A5B5
11 ID_2  Team_C1  -185    C1D1
12 ID_2  Team_D1   150    C1D1
13 ID_2  Team_C2   140    C2D2
14 ID_2  Team_D2  -170    C2D2
15 ID_2  Team_C3  -195    C3D3
16 ID_2  Team_D3   165    C3D3
17 ID_2  Team_C4   140    C4D4
18 ID_2  Team_D4  -165    C4D4
19 ID_2  Team_C5  -190    C5D5
20 ID_2  Team_D5   158    C5D5

下面是正在调用的函数。

mutate(New_Value = proprietary_function(Price[1], Price[2]))
hyrbngr7

hyrbngr71#

关键是每两行group_by一次。为了严格遵循您的示例和所需的输出,代码可以如下所示:

library(dplyr)

proprietary_function <- function(x, y) {
  paste0(sub(".*_", "", x), sub(".*_", "", y), collapse = "")
}

df %>% 
  group_by(rn = ceiling(row_number()/2)) %>% 
  mutate(new_value = proprietary_function(Team[1], Team[2])) %>% 
  ungroup() %>% 
  select(-rn)

# A tibble: 20 × 4
   ID    Team    Price new_value
   <chr> <chr>   <int> <chr>    
 1 ID_1  Team_A1  -155 A1B1     
 2 ID_1  Team_B1   130 A1B1     
 3 ID_1  Team_A2  -145 A2B2     
 4 ID_1  Team_B2   115 A2B2     
 5 ID_1  Team_A3  -135 A3B3     
 6 ID_1  Team_B3   115 A3B3     
 7 ID_1  Team_A4  -135 A4B4     
 8 ID_1  Team_B4   115 A4B4     
 9 ID_1  Team_A5  -145 A5B5     
10 ID_1  Team_B5   122 A5B5     
11 ID_2  Team_C1  -185 C1D1     
12 ID_2  Team_D1   150 C1D1     
13 ID_2  Team_C2   140 C2D2     
14 ID_2  Team_D2  -170 C2D2     
15 ID_2  Team_C3  -195 C3D3     
16 ID_2  Team_D3   165 C3D3     
17 ID_2  Team_C4   140 C4D4     
18 ID_2  Team_D4  -165 C4D4     
19 ID_2  Team_C5  -190 C5D5     
20 ID_2  Team_D5   158 C5D5

相关问题