在R中,如何使用left_join合并两个结构不同的 Dataframe ?

j7dteeu8  于 2023-04-27  发布在  其他
关注(0)|答案(2)|浏览(148)

我有两个结构不同的 Dataframe df_1df_2,但我想使用left_join将列从一个df添加到另一个df。
我的主df df_1看起来像这样:

structure(list(ID = c("131", "131", "206", "206", "5", "2"), 
    Company = c("ABC", "ABC", "DEF", "DEF", "EFD", "SDF"), Name = c("Alex", 
    "Marian", "Peter", "Maria", "Mila", "Maya"), Degree = c("0", 
    "1", "1", "0", "1", "0")), class = "data.frame", row.names = c(NA, 
-6L))

ID   Company Name    Degree
131  ABC        Alex    0   
131  ABC        Marian  1   
206  DEF        Peter   1   
206  DEF        Maria   0   
5    EFD        Mila    1   
2    SDF        Maya    0

我的第二个df df_2看起来像这样:

structure(list(ID = c("131", "206", "5", "2"), Company = c("ABC", 
"DEF", "EFD", "SDF"), Name_1 = c("Alex", "Peter", "Mila", "Maya"
), Name_2 = c("Marian", "Maria", NA, NA), Achiev = c("1234", 
"2341", "43", "22")), class = "data.frame", row.names = c(NA, 
-4L))

ID   Company Name_1 Name_2  Achiev
131  ABC     Alex   Marian  1234
206  DEF     Peter  Maria   2341
5    EFD     Mila   NA      43
2    SDF     Maya   NA      22

我想基于列IDCompanyName将列Achiev连接到df_1
在我的原始版本中,我在每个dataframe中有超过500个列,我想至少对几个列执行这种转换,请记住这一点。

vaqhlq81

vaqhlq811#

# data

df_1 = structure(list(ID = c("131", "131", "206", "206", "5", "2"), 
    Company = c("ABC", "ABC", "DEF", "DEF", "EFD", "SDF"), Name = c("Alex", 
    "Marian", "Peter", "Maria", "Mila", "Maya"), Degree = c("0", 
    "1", "1", "0", "1", "0")), class = "data.frame", row.names = c(NA, 
-6L))

df_2 = structure(list(ID = c("131", "206", "5", "2"), Company = c("ABC", 
"DEF", "EFD", "SDF"), Name_1 = c("Alex", "Peter", "Mila", "Maya"
), Name_2 = c("Marian", "Maria", NA, NA), Achiev = c("1234", 
"2341", "43", "22")), class = "data.frame", row.names = c(NA, 
-4L))

# left join
merge(df_1, df_2, by.x = c("ID", "Company", "Name"), by.y = c("ID", "Company", "Name_1"), all.x = TRUE)
#>    ID Company   Name Degree Name_2 Achiev
#> 1 131     ABC   Alex      0 Marian   1234
#> 2 131     ABC Marian      1   <NA>   <NA>
#> 3   2     SDF   Maya      0   <NA>     22
#> 4 206     DEF  Maria      0   <NA>   <NA>
#> 5 206     DEF  Peter      1  Maria   2341
#> 6   5     EFD   Mila      1   <NA>     43

创建于2023-04-25使用reprex v2.0.2

7vux5j2d

7vux5j2d2#

我猜 Name_1Name_2 必须以某种方式处理,例如通过组合df2Names

Cols <- grep("^Name", colnames(df2))

df2_m <- na.omit(cbind(rbind(df2[,-Cols], df2[,-Cols]), 
                       Name = unname(unlist(df2[,Cols]))))

df2_m
   ID Company Achiev   Name
1 131     ABC   1234   Alex
2 206     DEF   2341  Peter
3   5     EFD     43   Mila
4   2     SDF     22   Maya
5 131     ABC   1234 Marian
6 206     DEF   2341  Maria
merge(df1, df2_m, c("ID", "Company", "Name"), all.x = T)
   ID Company   Name Degree Achiev
1 131     ABC   Alex      0   1234
2 131     ABC Marian      1   1234
3   2     SDF   Maya      0     22
4 206     DEF  Maria      0   2341
5 206     DEF  Peter      1   2341
6   5     EFD   Mila      1     43

相关问题