如何根据R中两列的公共值对两个数据框进行子集化

axkjgtzd  于 2023-03-05  发布在  其他
关注(0)|答案(2)|浏览(110)

我有两个 Dataframe ,需要根据两列(即V1和V2)中的值找到两个 Dataframe 共有的行。

df1 <- V1(X,X,Y,Z,Z)
       V2(Q,E,W,Q,Q)
       V3(D,D,Y,V,J)
       V4(O,Z,A,Q,E)

df2 <- V1(X,B,Y,A,Z)
       V2(Q,E,W,Q,Q)
       V3(M,U,C,D,D)
       V4(P,U,I,V,D)

我想把两个数据框都分成子集,这样只保留包含公共值的行。在这里你可以看到我的目标是什么。

df1_subset <- V1(X,Y,Z)
              V2(Q,W,Q)
              V3(D,Y,J)
              V4(O,A,E)

df2_subset <- V1(X,Y,Z)
              V2(Q,W,Q)
              V3(M,C,D)
              V4(P,I,D)

我将非常感谢你的帮助。

oknwwptz

oknwwptz1#

使用数据.表

library(data.table)

setDT(df1)
setDT(df2)

fsetdiff(df1, df1[!df2, on = .(V1, V2)])
fsetdiff(df2, df2[!df1, on = .(V1, V2)])
gcuhipw9

gcuhipw92#

排斥数据:

df1 <- data.frame(
  "V1" = c("X","X","Y","Z","Z"),
  "V2" = c("Q","E","W","Q","Q"),
  "V3" = c("D","D","Y","V","J"),
  "V4" = c("O","Z","A","Q","E")
)

df2 <- data.frame(
  "V1" = c("X","B","Y","A","Z"),
  "V2" = c("Q","E","W","Q","Q"),
  "V3" = c("M","U","C","D","D"),
  "V4" = c("P","U","I","V","D")
)

选项1:使用基本R interaction

idx1 <- interaction(df1[,c("V1", "V2")]) %in% interaction(df2[,c("V1", "V2")])
idx2 <- interaction(df2[,c("V1", "V2")]) %in% interaction(df1[,c("V1", "V2")])

df1_subset <- df1[idx1,]
#    V1 V2 V3 V4
# 1  X  Q  D  O
# 3  Y  W  Y  A
# 4  Z  Q  V  Q
# 5  Z  Q  J  E
df2_subset <- df2[idx2,]
#    V1 V2 V3 V4
# 1  X  Q  M  P
# 3  Y  W  C  I
# 5  Z  Q  D  D

选项2:使用dplyr semi-join

library(dplyr)
df1_subset_dplyr <- semi_join(df1, df2, by = c("V1", "V2"))
#    V1 V2 V3 V4
# 1  X  Q  D  O
# 2  Y  W  Y  A
# 3  Z  Q  V  Q
# 4  Z  Q  J  E
df2_subset_dplyr <- semi_join(df2, df1, by = c("V1", "V2"))
#    V1 V2 V3 V4
# 1  X  Q  M  P
# 2  Y  W  C  I
# 3  Z  Q  D  D

或者另一个选择。

相关问题