如果行名称与另一个 Dataframe 不匹配,如何将行值分配为0?

qojgxg4l  于 2023-05-04  发布在  其他
关注(0)|答案(3)|浏览(173)

数据框架df.list的列表包含6个数据框架,列数相同,行数不同。
我希望所有的 Dataframe 具有相同的行数。如果行名不在 Dataframe 中(即即,唯一的),则将值赋值为0。 Dataframe 应该单独返回(而不是在列表中)。

df.list <- list(scores.1a, scores.1b, scores.1c, scores.2a, scores.2b, scores.2c)
unique.names <- Reduce(unique, lapply(df.list, row.names))

notin1a <- scores.1a[!(rownames(scores.1a) %in% unique.names),] 
scores.1a <- rbind(scores.1a, notin1a)
notin1b <- scores.1b[!(rownames(scores.1b) %in% unique.names),] 
scores.1b <- rbind(scores.1b, notin1b)
notin1c <- scores.1c[!(rownames(scores.1c) %in% unique.names),] 
scores.1c <- rbind(scores.1c, notin1c)
notin2a <- scores.2a[!(rownames(scores.2a) %in% unique.names),] 
scores.2a <- rbind(scores.2a, notin2a)
notin2b <- scores.2b[!(rownames(scores.2b) %in% common.names),] 
scores.2b <- rbind(scores.2b, notin2b)
notin2c <- scores.2c[!(rownames(scores.2c) %in% unique.names),] 
scores.2c <- rbind(scores.2c, notin2c)

回溯:

Error in cbind(scores.1a, scores.1b, scores.1c, scores.2a, scores.2b,  : 
  number of rows of matrices must match (see arg 2)

样本数据:

> dput(df.list)
list(structure(c(4.69416065754249e-42, 6.0513299332676e-38, 2.89877790887805e-36, 
2.85123238031875e-33, 1.47604844342483e-30, 3.43040504604337e-30, 
5.86872957403304e-29, 1.06954287499982e-28, 1.30662578944682e-28, 
4.82163948607124e-28, 4.69416065754249e-42, 6.0513299332676e-38, 
2.89877790887805e-36, 2.85123238031875e-33, 1.47604844342483e-30, 
3.43040504604337e-30, 5.86872957403304e-29, 1.06954287499982e-28, 
1.30662578944682e-28, 4.82163948607124e-28, 4.69416065754249e-42, 
6.0513299332676e-38, 2.89877790887805e-36, 2.85123238031875e-33, 
1.47604844342483e-30, 3.43040504604337e-30, 5.86872957403304e-29, 
1.06954287499982e-28, 1.30662578944682e-28, 4.82163948607124e-28, 
4.69416065754249e-42, 6.0513299332676e-38, 2.89877790887805e-36, 
2.85123238031875e-33, 1.47604844342483e-30, 3.43040504604337e-30, 
5.86872957403304e-29, 1.06954287499982e-28, 1.30662578944682e-28, 
4.82163948607124e-28), dim = c(10L, 4L), dimnames = list(c("A2M", 
"ABCA7", "ABCB9", "ABCD1", "ABLIM2", "ABT1", "ACAP1", "ACAP3", 
"ACCS", "ACOT9"), c("KIRP1a.X3UTR", "KIRP1a.X5UTR", "KIRP1a.CDS", 
"KIRP1a.promCore"))), structure(c(6.02781805188179e-32, 5.82900841856826e-27, 
8.8650545387109e-26, 2.61277026722025e-24, 6.24024734286127e-23, 
8.77298331273884e-23, 1.94852987224159e-22, 3.57104113334633e-22, 
4.86000479954981e-22, 6.49022377188339e-21, 6.02781805188179e-32, 
5.82900841856826e-27, 8.8650545387109e-26, 2.61277026722025e-24, 
6.24024734286127e-23, 8.77298331273884e-23, 1.94852987224159e-22, 
3.57104113334633e-22, 4.86000479954981e-22, 6.49022377188339e-21, 
6.02781805188179e-32, 5.82900841856826e-27, 8.8650545387109e-26, 
2.61277026722025e-24, 6.24024734286127e-23, 8.77298331273884e-23, 
1.94852987224159e-22, 3.57104113334633e-22, 4.86000479954981e-22, 
6.49022377188339e-21, 6.02781805188179e-32, 5.82900841856826e-27, 
8.8650545387109e-26, 2.61277026722025e-24, 6.24024734286127e-23, 
8.77298331273884e-23, 1.94852987224159e-22, 3.57104113334633e-22, 
4.86000479954981e-22, 6.49022377188339e-21), dim = c(10L, 4L), dimnames = list(
    c("A2M", "ABCA7", "ABCB9", "ABCD1", "ACAP1", "ACAP3", "ACCS", 
    "ACRBP", "ACTB", "ACTN4"), c("KIRP1b.X3UTR", "KIRP1b.X5UTR", 
    "KIRP1b.CDS", "KIRP1b.promCore"))), structure(c(1.55502961788497e-37, 
6.82123433099474e-34, 2.16318121565099e-33, 1.61437112767647e-32, 
5.62718186736796e-29, 8.80498672474288e-29, 1.40908074678065e-28, 
1.0691910168236e-27, 2.45751770107637e-27, 3.51830046859091e-27, 
1.55502961788497e-37, 6.82123433099474e-34, 2.16318121565099e-33, 
1.61437112767647e-32, 5.62718186736796e-29, 8.80498672474288e-29, 
1.40908074678065e-28, 1.0691910168236e-27, 2.45751770107637e-27, 
3.51830046859091e-27, 1.55502961788497e-37, 6.82123433099474e-34, 
2.16318121565099e-33, 1.61437112767647e-32, 5.62718186736796e-29, 
8.80498672474288e-29, 1.40908074678065e-28, 1.0691910168236e-27, 
2.45751770107637e-27, 3.51830046859091e-27, 1.55502961788497e-37, 
6.82123433099474e-34, 2.16318121565099e-33, 1.61437112767647e-32, 
5.62718186736796e-29, 8.80498672474288e-29, 1.40908074678065e-28, 
1.0691910168236e-27, 2.45751770107637e-27, 3.51830046859091e-27
), dim = c(10L, 4L), dimnames = list(c("A2M", "ABCA7", "ABCB9", 
"ABCD1", "ACAP1", "ACAP3", "ACCS", "ACOT9", "ACRBP", "ACTB"), 
    c("KIRP1c.X3UTR", "KIRP1c.X5UTR", "KIRP1c.CDS", "KIRP1c.promCore"
    ))), structure(c(4.62071070020255e-35, 1.01109404411977e-30, 
1.62340375422632e-28, 5.05748981709716e-27, 7.4566799068015e-26, 
9.95551308236402e-26, 1.22734181112028e-25, 1.3752043675869e-25, 
5.33041187027489e-25, 6.3150904604211e-25, 4.62071070020255e-35, 
1.01109404411977e-30, 1.62340375422632e-28, 5.05748981709716e-27, 
7.4566799068015e-26, 9.95551308236402e-26, 1.22734181112028e-25, 
1.3752043675869e-25, 5.33041187027489e-25, 6.3150904604211e-25, 
4.62071070020255e-35, 1.01109404411977e-30, 1.62340375422632e-28, 
5.05748981709716e-27, 7.4566799068015e-26, 9.95551308236402e-26, 
1.22734181112028e-25, 1.3752043675869e-25, 5.33041187027489e-25, 
6.3150904604211e-25, 4.62071070020255e-35, 1.01109404411977e-30, 
1.62340375422632e-28, 5.05748981709716e-27, 7.4566799068015e-26, 
9.95551308236402e-26, 1.22734181112028e-25, 1.3752043675869e-25, 
5.33041187027489e-25, 6.3150904604211e-25), dim = c(10L, 4L), dimnames = list(
    c("A2M", "ABCA7", "ABCB9", "ABCD1", "ABLIM2", "ABT1", "ACAP1", 
    "ACCS", "ACOX3", "ACRBP"), c("KIRP2a.X3UTR", "KIRP2a.X5UTR", 
    "KIRP2a.CDS", "KIRP2a.promCore"))), structure(c(9.74240010701507e-40, 
2.11789902281316e-33, 4.46523408292594e-31, 1.30189496759334e-30, 
1.02042844284013e-29, 1.99943105322521e-29, 3.00065886399859e-29, 
3.51759154868364e-29, 1.0237124789762e-28, 1.66731834926145e-28, 
9.74240010701507e-40, 2.11789902281316e-33, 4.46523408292594e-31, 
1.30189496759334e-30, 1.02042844284013e-29, 1.99943105322521e-29, 
3.00065886399859e-29, 3.51759154868364e-29, 1.0237124789762e-28, 
1.66731834926145e-28, 9.74240010701507e-40, 2.11789902281316e-33, 
4.46523408292594e-31, 1.30189496759334e-30, 1.02042844284013e-29, 
1.99943105322521e-29, 3.00065886399859e-29, 3.51759154868364e-29, 
1.0237124789762e-28, 1.66731834926145e-28, 9.74240010701507e-40, 
2.11789902281316e-33, 4.46523408292594e-31, 1.30189496759334e-30, 
1.02042844284013e-29, 1.99943105322521e-29, 3.00065886399859e-29, 
3.51759154868364e-29, 1.0237124789762e-28, 1.66731834926145e-28
), dim = c(10L, 4L), dimnames = list(c("A2M", "ABCA7", "ABCB9", 
"ABCD1", "ABT1", "ACAP1", "ACAP3", "ACCS", "ACOX3", "ACRBP"), 
    c("KIRP2b.X3UTR", "KIRP2b.X5UTR", "KIRP2b.CDS", "KIRP2b.promCore"
    ))), structure(c(2.77090056563127e-28, 1.21015896726265e-25, 
1.84118490393544e-25, 4.65592701166262e-25, 3.26614231027095e-23, 
6.1338391921814e-23, 3.31888253251873e-22, 1.55135657676455e-21, 
3.98472400556953e-20, 6.56496181862045e-20, 2.77090056563127e-28, 
1.21015896726265e-25, 1.84118490393544e-25, 4.65592701166262e-25, 
3.26614231027095e-23, 6.1338391921814e-23, 3.31888253251873e-22, 
1.55135657676455e-21, 3.98472400556953e-20, 6.56496181862045e-20, 
2.77090056563127e-28, 1.21015896726265e-25, 1.84118490393544e-25, 
4.65592701166262e-25, 3.26614231027095e-23, 6.1338391921814e-23, 
3.31888253251873e-22, 1.55135657676455e-21, 3.98472400556953e-20, 
6.56496181862045e-20, 2.77090056563127e-28, 1.21015896726265e-25, 
1.84118490393544e-25, 4.65592701166262e-25, 3.26614231027095e-23, 
6.1338391921814e-23, 3.31888253251873e-22, 1.55135657676455e-21, 
3.98472400556953e-20, 6.56496181862045e-20), dim = c(10L, 4L), dimnames = list(
    c("ABCA7", "ABCB9", "ABCD1", "ACAP1", "ACRBP", "ACTB", "ACYP2", 
    "ADAMTS3", "ADAMTS7", "ADAMTSL4"), c("KIRP2c.X3UTR", "KIRP2c.X5UTR", 
    "KIRP2c.CDS", "KIRP2c.promCore"))))
ql3eal8s

ql3eal8s1#

另一种方法使用matchreplace

uniq_rownames <- unique(as.vector(sapply(df.list, rownames)))

lapply(df.list, function(x){
  res <- x[match(uniq_rownames, rownames(x)),]
  res <- replace(res, is.na(res), 0)
  data.frame(res, row.names=uniq_rownames)})
[[1]]
         KIRP1a.X3UTR KIRP1a.X5UTR   KIRP1a.CDS KIRP1a.promCore
A2M      4.694161e-42 4.694161e-42 4.694161e-42    4.694161e-42
ABCA7    6.051330e-38 6.051330e-38 6.051330e-38    6.051330e-38
ABCB9    2.898778e-36 2.898778e-36 2.898778e-36    2.898778e-36
ABCD1    2.851232e-33 2.851232e-33 2.851232e-33    2.851232e-33
ABLIM2   1.476048e-30 1.476048e-30 1.476048e-30    1.476048e-30
ABT1     3.430405e-30 3.430405e-30 3.430405e-30    3.430405e-30
ACAP1    5.868730e-29 5.868730e-29 5.868730e-29    5.868730e-29
ACAP3    1.069543e-28 1.069543e-28 1.069543e-28    1.069543e-28
ACCS     1.306626e-28 1.306626e-28 1.306626e-28    1.306626e-28
ACOT9    4.821639e-28 4.821639e-28 4.821639e-28    4.821639e-28
ACRBP    0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ACTB     0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ACTN4    0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ACOX3    0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ACYP2    0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ADAMTS3  0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ADAMTS7  0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00
ADAMTSL4 0.000000e+00 0.000000e+00 0.000000e+00    0.000000e+00

[[2]]
         KIRP1b.X3UTR KIRP1b.X5UTR   KIRP1b.CDS KIRP1b.promCore
A2M      6.027818e-32 6.027818e-32 6.027818e-32    6.027818e-32
ABCA7    5.829008e-27 5.829008e-27 5.829008e-27    5.829008e-27
ABCB9    8.865055e-26 8.865055e-26 8.865055e-26    8.865055e-26
...
vjhs03f7

vjhs03f72#

您可以首先找到列表中所有 Dataframe 的最大行,然后使用lapply添加包含0的行(直到最大值),并使用索引跨越所有列。
使用简化数据以便于说明:

ll <- list(data.frame(a = 1:3, b = 1:3),
           data.frame(a = 1:7, b = 1:7))

您可以通过以下方式执行此操作:

maxrows <- max(unlist(lapply(ll, nrow)))

ll_same <- lapply(ll, function(x){if(nrow(x) < maxrows) x[(nrow(x)+1):maxrows,] <- 0; x})

输出:

[[1]]
  a b
1 1 1
2 2 2
3 3 3
4 0 0
5 0 0
6 0 0
7 0 0

[[2]]
  a b
1 1 1
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7

如果你想让列表中的每个df在环境中都是自己的对象,可以使用assign

# assign each df in the list to the environment
for(i in seq_along(ll_same)){
  assign(paste0("df_", i), ll_same[[i]])
}
5cnsuln7

5cnsuln73#

为了使结果更明显,我简化了示例,但这也适用于完整数据。由于您实际使用的是matrix对象列表,因此我将其转换为data.frame,然后将每个集合中缺少的行赋值为0。
示例数据:

mat <- matrix(1:9, nrow=3, dimnames=list(letters[1:3], LETTERS[1:3]))
df.list <- list(mat[c(1,2),], mat[c(1,3),], mat[c(2,3),])

#[[1]]
#  A B C
#a 1 4 7
#b 2 5 8
#
#[[2]]
#  A B C
#a 1 4 7
#c 3 6 9
#
#[[3]]
#  A B C
#b 2 5 8
#c 3 6 9

处理它:

allrn <- Reduce(union, lapply(df.list, rownames))
df.list <- lapply(
    df.list,
    \(x) {
        x <- as.data.frame(x)
        x[setdiff(allrn, rownames(x)),] <- 0
        x[order(rownames(x)),]
    }
)

df.list
#[[1]]
#  A B C
#a 1 4 7
#b 2 5 8
#c 0 0 0
#
#[[2]]
#  A B C
#a 1 4 7
#b 0 0 0
#c 3 6 9
#
#[[3]]
#  A B C
#a 0 0 0
#b 2 5 8
#c 3 6 9

相关问题