为什么purrr::map函数不能正确地将函数Map到分割 Dataframe 的每一部分?

kpbwa7wx  于 2023-03-27  发布在  其他
关注(0)|答案(1)|浏览(107)

我有下面的 Dataframe ,我们可以调用df_all

structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Bovada", "Bovada", "LowVig.ag", "LowVig.ag"), Home = c("Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide"
), Away = c("San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs"), Team = c("Alabama Crimson Tide", "San Diego St Aztecs", 
"Alabama Crimson Tide", "San Diego St Aztecs"), Price = c(-110, 
-110, -111, -101), Points = c(-7.5, 7.5, -7, 7)), row.names = c(NA, 
-4L), class = c("tbl_df", "tbl", "data.frame"))

我有下面的数据框,我们可以调用df_alt

structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle"), Home = c("Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide"), Away = c("San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs"), Team = c("Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "Alabama Crimson Tide", 
"San Diego St Aztecs"), Price = c(-149, -138, -126, -115, 105, 
114, 122, 132, 128, 119, 110, 102, -119, -131, -142, -154, -104, 
-108), Points = c(-5.5, -6, -6.5, -7, -8, -8.5, -9, -9.5, 5.5, 
6, 6.5, 7, 8, 8.5, 9, 9.5, -7.5, 7.5)), row.names = c(NA, -18L
), class = c("tbl_df", "tbl", "data.frame"))

我有以下函数,它查找df_all和df_alt之间的公共/相交点值。

int_value <- function(df){
    
    df %>% 
            dplyr::select(c(ID, Team, Points)) %>%  
            dplyr::intersect(df_alt %>% dplyr::select(c(ID, Team,Points))) %>% 
            mutate(Book = 'Pinnacle')
    
    df %>% full_join(df_int)%>% left_join(df_alt %>% rename(price=Price)) %>% 
            mutate(Price=ifelse(is.na(price),Price,price))%>% 
            select(-price)
}

我尝试使用下面的map语法应用int_value。

df_all %>% 
    group_split(ID, Book) %>% 
    map(int_value)

这是返回的不是所需输出的输出。

[[1]]
# A tibble: 8 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
4 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
5 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
6 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -108    7.5
7 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
8 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7  

[[2]]
# A tibble: 8 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
4 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
5 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
6 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -108    7.5
7 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
8 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7

这是所需的输出,也是我期望返回的。

[[1]]
# A tibble: 6 × 7
ID                               Book     Home                 Away                Team                 
Price Points
<chr>                            <chr>    <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St Aztecs San Diego 
St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego 
St Aztecs   -108    7.5

[[2]]
# A tibble: 6 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7

map函数似乎并不支持基于Book列的隐含group_by。

eh57zj3b

eh57zj3b1#

解决方案是@stefan推荐的。在定义df_int并分配必要的变量后,输出是准确的。

int_value <- function(df){
    
    df_int <- df %>% 
            dplyr::select(c(ID, Home, Away, Team, Points)) %>%  
            dplyr::intersect(df_alt %>% dplyr::select(c(ID, Home, Away, Team, 
Points))) %>% 
            mutate(Book = 'Pinnacle')
    
    df_join <- df %>% full_join(df_int)
    
    df_final <- df_join %>% left_join(df_alt %>% rename(price=Price)) %>% 
            mutate(Price=ifelse(is.na(price),Price,price))%>% 
            select(-price)
    
}

这是更新后的输出

[[1]]
# A tibble: 4 × 7
ID                               Book     Home                 Away                
Team                 Price Points
<chr>                            <chr>    <chr>                <chr>               
<chr>                <dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St 
Aztecs Alabama Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St 
Aztecs San Diego St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St 
Aztecs Alabama Crimson Tide  -104   -7.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St 
Aztecs San Diego St Aztecs   -108    7.5

[[2]]
# A tibble: 4 × 7
ID                               Book      Home                 Away                
Team                 Price Points
<chr>                            <chr>     <chr>                <chr>               
<chr>                <dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St 
Aztecs Alabama Crimson Tide  -111     -7
2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St 
Aztecs San Diego St Aztecs   -101      7
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St 
Aztecs Alabama Crimson Tide  -115     -7
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St 
Aztecs San Diego St Aztecs    102      7

相关问题