我在df.list
中有一个 Dataframe 列表,我想根据external_gene_name
列中的共享值将它们相交。
df.list <- list(df.x3utr, df.x5utr, df.cds, df.promoter)
common.names <- Reduce(function(x, y){intersect(x, y$external_gene_name)}, df.list, init = names(df.list[[1]]))
输出:
> common.names
character(0)
输出df
应该只包含df.list
中每个 Dataframe 的adj.P.Val
。
df.x3utr <- df.x3utr[df.x3utr$adj.P.Val %in% common.names]
df.x5utr <- df.x3utr[df.x3utr$adj.P.Val %in% common.names]
df.cds <- df.cds[df.cds$adj.P.Val %in% common.names]
df.promoter <- df.promoter[df.promoter$adj.P.Val %in% common.names]
df <- rbind(df.x3utr$adj.P.Val, df.x5utr$adj.P.Val, df.cds$adj.P.Val, df.promoter$adj.P.Val)
names(df) <- c("X3UTR", "X5UTR", "CDS", "promCore")
数据:
> dput(df.list)
list(structure(list(seqnames = structure(c(7L, 17L, 1L, 11L,
14L, 2L, 2L, 15L, 20L, 7L), levels = c("chr1", "chr2", "chr3",
"chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11",
"chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
"chr19", "chr20", "chr21", "chr22", "chrX", "chrY"), class = "factor"),
start = c(122073549L, 7217125L, 1292390L, 44065925L, 23058564L,
65227753L, 113890063L, 58588809L, 62302093L, 151085831L),
end = c(122144255L, 7225266L, 1309609L, 44084237L, 23095614L,
65271253L, 113962596L, 58749791L, 62308862L, 151144436L),
width = c(70707L, 8142L, 17220L, 18313L, 37051L, 43501L,
72534L, 160983L, 6770L, 58606L), strand = structure(c(2L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L), levels = c("+", "-",
"*"), class = "factor"), ensembl_gene_id = c("ENSG00000008311",
"ENSG00000072778", "ENSG00000131584", "ENSG00000110455",
"ENSG00000100813", "ENSG00000138071", "ENSG00000115091",
"ENSG00000137845", "ENSG00000130706", "ENSG00000133612"),
external_gene_name = c("AASS", "ACADVL", "ACAP3", "ACCS",
"ACIN1", "ACTR2", "ACTR3", "ADAM10", "ADRM1", "AGAP3"), adj.P.Val = c(4.6737332542265e-10,
1.27392687635188e-09, 2.67749562291447e-09, 4.30421108534489e-09,
6.18032947977852e-09, 8.5958306820173e-09, 9.28539096250232e-09,
1.03280085009177e-08, 2.13672442292269e-08, 2.22031576028495e-08
), annot.seqnames = structure(c(7L, 17L, 1L, 11L, 14L, 2L,
2L, 15L, 20L, 7L), levels = c("chr1", "chr2", "chr3", "chr4",
"chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11",
"chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
"chr19", "chr20", "chr21", "chr22", "chrX", "chrY", "chrM",
"chr1_gl000191_random", "chr1_gl000192_random", "chr4_ctg9_hap1",
"chr4_gl000193_random", "chr4_gl000194_random", "chr6_apd_hap1",
"chr6_cox_hap2", "chr6_dbb_hap3", "chr6_mann_hap4", "chr6_mcf_hap5",
"chr6_qbl_hap6", "chr6_ssto_hap7", "chr7_gl000195_random",
"chr8_gl000196_random", "chr8_gl000197_random", "chr9_gl000198_random",
"chr9_gl000199_random", "chr9_gl000200_random", "chr9_gl000201_random",
"chr11_gl000202_random", "chr17_ctg5_hap1", "chr17_gl000203_random",
"chr17_gl000204_random", "chr17_gl000205_random", "chr17_gl000206_random",
"chr18_gl000207_random", "chr19_gl000208_random", "chr19_gl000209_random",
"chr21_gl000210_random", "chrUn_gl000211", "chrUn_gl000212",
"chrUn_gl000213", "chrUn_gl000214", "chrUn_gl000215", "chrUn_gl000216",
"chrUn_gl000217", "chrUn_gl000218", "chrUn_gl000219", "chrUn_gl000220",
"chrUn_gl000221", "chrUn_gl000222", "chrUn_gl000223", "chrUn_gl000224",
"chrUn_gl000225", "chrUn_gl000226", "chrUn_gl000227", "chrUn_gl000228",
"chrUn_gl000229", "chrUn_gl000230", "chrUn_gl000231", "chrUn_gl000232",
"chrUn_gl000233", "chrUn_gl000234", "chrUn_gl000235", "chrUn_gl000236",
"chrUn_gl000237", "chrUn_gl000238", "chrUn_gl000239", "chrUn_gl000240",
"chrUn_gl000241", "chrUn_gl000242", "chrUn_gl000243", "chrUn_gl000244",
"chrUn_gl000245", "chrUn_gl000246", "chrUn_gl000247", "chrUn_gl000248",
"chrUn_gl000249"), class = "factor"), annot.start = c(122114435L,
7218951L, 1309110L, 44081471L, 23072672L, 65248281L, 113890449L,
58727100L, 62305446L, 151109122L)), row.names = c(296L, 678L,
745L, 882L, 990L, 1422L, 1759L, 1833L, 2061L, 2219L), class = "data.frame"),
structure(list(seqnames = structure(c(12L, 3L, 17L, 1L, 11L,
14L, 2L, 2L, 15L, 8L), levels = c("chr1", "chr2", "chr3",
"chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"
), class = "factor"), start = c(53307456L, 51971426L, 7217125L,
1292390L, 44065925L, 23058564L, 65227753L, 113890063L, 58588809L,
38996869L), end = c(53324864L, 51981199L, 7225266L, 1309609L,
44084237L, 23095614L, 65271253L, 113962596L, 58749791L, 39105261L
), width = c(17409L, 9774L, 8142L, 17220L, 18313L, 37051L,
43501L, 72534L, 160983L, 108393L), strand = structure(c(2L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L), levels = c("+", "-",
"*"), class = "factor"), ensembl_gene_id = c("ENSG00000094914",
"ENSG00000248487", "ENSG00000072778", "ENSG00000131584",
"ENSG00000110455", "ENSG00000100813", "ENSG00000138071",
"ENSG00000115091", "ENSG00000137845", "ENSG00000168615"),
external_gene_name = c("AAAS", "ABHD14A", "ACADVL", "ACAP3",
"ACCS", "ACIN1", "ACTR2", "ACTR3", "ADAM10", "ADAM9"),
adj.P.Val = c(1.83731245162161e-12, 1.01276101635279e-09,
1.27392687635188e-09, 2.67749562291447e-09, 4.30421108534489e-09,
6.18032947977852e-09, 8.5958306820173e-09, 9.28539096250232e-09,
1.03280085009177e-08, 1.31456095388164e-08), annot.seqnames = structure(c(12L,
3L, 17L, 1L, 11L, 14L, 2L, 2L, 15L, 8L), levels = c("chr1",
"chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
"chr9", "chr10", "chr11", "chr12", "chr13", "chr14",
"chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
"chr21", "chr22", "chrX", "chrY", "chrM", "chr1_gl000191_random",
"chr1_gl000192_random", "chr4_ctg9_hap1", "chr4_gl000193_random",
"chr4_gl000194_random", "chr6_apd_hap1", "chr6_cox_hap2",
"chr6_dbb_hap3", "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6",
"chr6_ssto_hap7", "chr7_gl000195_random", "chr8_gl000196_random",
"chr8_gl000197_random", "chr9_gl000198_random", "chr9_gl000199_random",
"chr9_gl000200_random", "chr9_gl000201_random", "chr11_gl000202_random",
"chr17_ctg5_hap1", "chr17_gl000203_random", "chr17_gl000204_random",
"chr17_gl000205_random", "chr17_gl000206_random", "chr18_gl000207_random",
"chr19_gl000208_random", "chr19_gl000209_random", "chr21_gl000210_random",
"chrUn_gl000211", "chrUn_gl000212", "chrUn_gl000213",
"chrUn_gl000214", "chrUn_gl000215", "chrUn_gl000216",
"chrUn_gl000217", "chrUn_gl000218", "chrUn_gl000219",
"chrUn_gl000220", "chrUn_gl000221", "chrUn_gl000222",
"chrUn_gl000223", "chrUn_gl000224", "chrUn_gl000225",
"chrUn_gl000226", "chrUn_gl000227", "chrUn_gl000228",
"chrUn_gl000229", "chrUn_gl000230", "chrUn_gl000231",
"chrUn_gl000232", "chrUn_gl000233", "chrUn_gl000234",
"chrUn_gl000235", "chrUn_gl000236", "chrUn_gl000237",
"chrUn_gl000238", "chrUn_gl000239", "chrUn_gl000240",
"chrUn_gl000241", "chrUn_gl000242", "chrUn_gl000243",
"chrUn_gl000244", "chrUn_gl000245", "chrUn_gl000246",
"chrUn_gl000247", "chrUn_gl000248", "chrUn_gl000249"), class = "factor"),
annot.start = c(53320234L, 51976361L, 7218571L, 1293885L,
44069531L, 23067147L, 65228582L, 113931560L, 58702775L,
39078454L)), row.names = c(4L, 333L, 462L, 709L, 799L,
926L, 1358L, 1516L, 1805L, 1878L), class = "data.frame"),
structure(list(seqnames = structure(c(12L, 17L, 7L, 3L, 17L,
1L, 11L, 16L, 14L, 20L), levels = c("chr1", "chr2", "chr3",
"chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"
), class = "factor"), start = c(53307456L, 42950526L, 122073549L,
51971426L, 7217125L, 1292390L, 44065925L, 67657512L, 23058564L,
45841721L), end = c(53324864L, 42964498L, 122144255L, 51981199L,
7225266L, 1309609L, 44084237L, 67660815L, 23095614L, 45857405L
), width = c(17409L, 13973L, 70707L, 9774L, 8142L, 17220L,
18313L, 3304L, 37051L, 15685L), strand = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L), levels = c("+", "-",
"*"), class = "factor"), ensembl_gene_id = c("ENSG00000094914",
"ENSG00000266967", "ENSG00000008311", "ENSG00000248487",
"ENSG00000072778", "ENSG00000131584", "ENSG00000110455",
"ENSG00000102977", "ENSG00000100813", "ENSG00000101473"),
external_gene_name = c("AAAS", "AARSD1", "AASS", "ABHD14A",
"ACADVL", "ACAP3", "ACCS", "ACD", "ACIN1", "ACOT8"),
adj.P.Val = c(1.83731245162161e-12, 2.64248727238285e-11,
4.6737332542265e-10, 1.01276101635279e-09, 1.27392687635188e-09,
2.67749562291447e-09, 4.30421108534489e-09, 4.6721922893073e-09,
6.18032947977852e-09, 6.2518699283833e-09), annot.seqnames = structure(c(12L,
17L, 7L, 3L, 17L, 1L, 11L, 16L, 14L, 20L), levels = c("chr1",
"chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
"chr9", "chr10", "chr11", "chr12", "chr13", "chr14",
"chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
"chr21", "chr22", "chrX", "chrY", "chrM", "chr1_gl000191_random",
"chr1_gl000192_random", "chr4_ctg9_hap1", "chr4_gl000193_random",
"chr4_gl000194_random", "chr6_apd_hap1", "chr6_cox_hap2",
"chr6_dbb_hap3", "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6",
"chr6_ssto_hap7", "chr7_gl000195_random", "chr8_gl000196_random",
"chr8_gl000197_random", "chr9_gl000198_random", "chr9_gl000199_random",
"chr9_gl000200_random", "chr9_gl000201_random", "chr11_gl000202_random",
"chr17_ctg5_hap1", "chr17_gl000203_random", "chr17_gl000204_random",
"chr17_gl000205_random", "chr17_gl000206_random", "chr18_gl000207_random",
"chr19_gl000208_random", "chr19_gl000209_random", "chr21_gl000210_random",
"chrUn_gl000211", "chrUn_gl000212", "chrUn_gl000213",
"chrUn_gl000214", "chrUn_gl000215", "chrUn_gl000216",
"chrUn_gl000217", "chrUn_gl000218", "chrUn_gl000219",
"chrUn_gl000220", "chrUn_gl000221", "chrUn_gl000222",
"chrUn_gl000223", "chrUn_gl000224", "chrUn_gl000225",
"chrUn_gl000226", "chrUn_gl000227", "chrUn_gl000228",
"chrUn_gl000229", "chrUn_gl000230", "chrUn_gl000231",
"chrUn_gl000232", "chrUn_gl000233", "chrUn_gl000234",
"chrUn_gl000235", "chrUn_gl000236", "chrUn_gl000237",
"chrUn_gl000238", "chrUn_gl000239", "chrUn_gl000240",
"chrUn_gl000241", "chrUn_gl000242", "chrUn_gl000243",
"chrUn_gl000244", "chrUn_gl000245", "chrUn_gl000246",
"chrUn_gl000247", "chrUn_gl000248", "chrUn_gl000249"), class = "factor"),
annot.start = c(53320196L, 42963953L, 122131366L, 51977372L,
7217807L, 1293836L, 44069587L, 67660458L, 23067216L,
45855946L)), row.names = c(3L, 11L, 144L, 311L, 420L,
696L, 772L, 888L, 909L, 1003L), class = "data.frame"), structure(list(
seqnames = structure(c(12L, 3L, 17L, 1L, 17L, 11L, 14L,
2L, 15L, 8L), levels = c("chr1", "chr2", "chr3", "chr4",
"chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11",
"chr12", "chr13", "chr14", "chr15", "chr16", "chr17",
"chr18", "chr19", "chr20", "chr21", "chr22", "chrX",
"chrY"), class = "factor"), start = c(53307456L, 51971426L,
7217125L, 1292390L, 45132600L, 44065925L, 23058564L,
113890063L, 58588809L, 38996869L), end = c(53324864L,
51981199L, 7225266L, 1309609L, 45144181L, 44084237L,
23095614L, 113962596L, 58749791L, 39105261L), width = c(17409L,
9774L, 8142L, 17220L, 11582L, 18313L, 37051L, 72534L,
160983L, 108393L), strand = structure(c(2L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 1L), levels = c("+", "-", "*"), class = "factor"),
ensembl_gene_id = c("ENSG00000094914", "ENSG00000248487",
"ENSG00000072778", "ENSG00000131584", "ENSG00000181513",
"ENSG00000110455", "ENSG00000100813", "ENSG00000115091",
"ENSG00000137845", "ENSG00000168615"), external_gene_name = c("AAAS",
"ABHD14A", "ACADVL", "ACAP3", "ACBD4", "ACCS", "ACIN1",
"ACTR3", "ADAM10", "ADAM9"), adj.P.Val = c(1.83731245162161e-12,
1.01276101635279e-09, 1.27392687635188e-09, 2.67749562291447e-09,
2.90826962635755e-09, 4.30421108534489e-09, 6.18032947977852e-09,
9.28539096250232e-09, 1.03280085009177e-08, 1.31456095388164e-08
), annot.seqnames = structure(c(12L, 3L, 17L, 1L, 17L,
11L, 14L, 2L, 15L, 8L), levels = c("chr1", "chr2", "chr3",
"chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16",
"chr17", "chr18", "chr19", "chr20", "chr21", "chr22",
"chrX", "chrY", "chrM", "chr1_gl000191_random", "chr1_gl000192_random",
"chr4_ctg9_hap1", "chr4_gl000193_random", "chr4_gl000194_random",
"chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3", "chr6_mann_hap4",
"chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7", "chr7_gl000195_random",
"chr8_gl000196_random", "chr8_gl000197_random", "chr9_gl000198_random",
"chr9_gl000199_random", "chr9_gl000200_random", "chr9_gl000201_random",
"chr11_gl000202_random", "chr17_ctg5_hap1", "chr17_gl000203_random",
"chr17_gl000204_random", "chr17_gl000205_random", "chr17_gl000206_random",
"chr18_gl000207_random", "chr19_gl000208_random", "chr19_gl000209_random",
"chr21_gl000210_random", "chrUn_gl000211", "chrUn_gl000212",
"chrUn_gl000213", "chrUn_gl000214", "chrUn_gl000215",
"chrUn_gl000216", "chrUn_gl000217", "chrUn_gl000218",
"chrUn_gl000219", "chrUn_gl000220", "chrUn_gl000221",
"chrUn_gl000222", "chrUn_gl000223", "chrUn_gl000224",
"chrUn_gl000225", "chrUn_gl000226", "chrUn_gl000227",
"chrUn_gl000228", "chrUn_gl000229", "chrUn_gl000230",
"chrUn_gl000231", "chrUn_gl000232", "chrUn_gl000233",
"chrUn_gl000234", "chrUn_gl000235", "chrUn_gl000236",
"chrUn_gl000237", "chrUn_gl000238", "chrUn_gl000239",
"chrUn_gl000240", "chrUn_gl000241", "chrUn_gl000242",
"chrUn_gl000243", "chrUn_gl000244", "chrUn_gl000245",
"chrUn_gl000246", "chrUn_gl000247", "chrUn_gl000248",
"chrUn_gl000249"), class = "factor"), annot.start = c(53320254L,
51975361L, 7218648L, 1293916L, 45137516L, 44068531L,
23066147L, 113930560L, 58701775L, 39077454L)), row.names = c(1L,
305L, 412L, 686L, 756L, 764L, 899L, 1427L, 1790L, 1836L), class = "data.frame"))
2条答案
按热度按时间d4so4syb1#
在
base R
中,我们通过在list
上循环(lapply
)来提取)([[
),即external_gene_name列,然后在提取的列上应用intersect
以获得common.names
。然后,将subset
数据集基于common.names
,并将Reduce
列表转换为单个数据。帧使用merge
通过'external_gene_name'dced5bon2#
一种方法: