rst.mva.one.leave.out["cpg_ids"]
列包含可能与methreg$probeID
对应的cg*
子字符串。对于所有匹配项,我希望检索rst.mva.one.leave.out
中相应的行并将其存储为rst.subset
。
for (i in 1:nrow(rst.mva.one.leave.out)) {
rst.subset <- rst.mva.one.leave.out[strsplit(rst.mva.one.leave.out["cpg_ids"][i], ",", fixed=T) %in% methreg$probeID,]
}
回溯:
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function '%in%': non-character argument
数据:dput(rst.mva.one.leave.out[1:20,2:8])
structure(list(sample = c("TCGA.A3.3357.01", "TCGA.A3.3357.01",
"TCGA.A3.3357.01", "TCGA.A3.3357.01", "TCGA.A3.3357.01", "TCGA.A3.3357.01",
"TCGA.A3.3357.01", "TCGA.A3.3357.01", "TCGA.A3.3357.01", "TCGA.A3.3357.01",
"TCGA.A3.3357.01", "TCGA.A3.3357.01", "TCGA.A3.3367.01", "TCGA.A3.3367.01",
"TCGA.A3.3367.01", "TCGA.A3.3367.01", "TCGA.A3.3367.01", "TCGA.A3.3367.01",
"TCGA.A3.3367.01", "TCGA.A3.3367.01"), chromosome = c("7", "3",
"7", "6", "8", "8", "14", "16", "1", "19", "16", "6", "1", "17",
"16", "5", "7", "5", "6", "3"), start = c(25989524L, 50275446L,
134142981L, 146348486L, 116679763L, 145747140L, 37641261L, 73125573L,
27874258L, 37407041L, 57770390L, 43043691L, 46769035L, 26925852L,
2652948L, 43039592L, 79082898L, 131563095L, 26195697L, 36985996L
), end = c(25989763L, 50275694L, 134144288L, 146348913L, 116680346L,
145748059L, 37641600L, 73126120L, 27874526L, 37407284L, 57771195L,
43043815L, 46769320L, 26926511L, 2653839L, 43040885L, 79083753L,
131563764L, 26197071L, 36986642L), sz = c(239L, 248L, 1307L,
427L, 583L, 919L, 339L, 547L, 268L, 243L, 805L, 124L, 285L, 659L,
891L, 1293L, 855L, 669L, 1374L, 646L), cpg_n = c(3, 3, 4, 3,
3, 4, 3, 3, 3, 3, 3, 3, 5, 6, 6, 6, 6, 3, 5, 6), cpg_ids = c("cg08767938,cg07184013,cg03853208",
"cg06508783,cg00813746,cg05060704", "cg00174851,cg06864853,cg02215070,cg04663564",
"cg06121352,cg09179079,cg03478199", "cg06368590,cg03615269,cg05998283",
"cg03949978,cg01099220,cg02479773,cg08872590", "cg01168865,cg07535928,cg03611555",
"cg08992827,cg08187089,cg00293191", "cg00178877,cg05590948,cg09060489",
"cg03584288,cg02370417,cg02459604", "cg00816177,cg03029127,cg01656750",
"cg02057561,cg08747889,cg00252032", "cg02537149,cg03979582,cg04410181,cg06961071,cg00122254",
"cg01626899,cg00449941,cg05564086,cg06774283,cg01724566,cg06329022",
"cg03846022,cg08981282,cg02512202,cg01195053,cg03314158,cg00433159",
"cg04122815,cg08205639,cg05551979,cg03723510,cg01313313,cg00257271",
"cg04671932,cg04652097,cg08641579,cg02523844,cg05270344,cg07448060",
"cg01211041,cg09140281,cg05501285", "cg08117800,cg02612650,cg03181300,cg03785755,cg02902477",
"cg06686826,cg06163735,cg00325599,cg01756288,cg03450370,cg07696485"
)), row.names = c(NA, 20L), class = "data.frame")
dput(methreg[,1:3])
structure(list(regionID = c("chr7:87152539-87152540", "chr19:51905083-51905084",
"chr19:36687587-36687588", "chr12:53985031-53985032", "chr8:85177989-85177990",
"chr1:40161274-40161275", "chr12:51083289-51083290", "chr7:87152539-87152540",
"chr19:55581239-55581240", "chr19:49927656-49927657", "chr19:55581239-55581240",
"chr18:75212075-75212076", "chr7:99505269-99505270", "chr20:2692357-2692358",
"chr2:173964147-173964148", "chr5:179024077-179024078", "chr19:52369920-52369921"
), probeID = c("cg08767938", "cg04425820", "cg09307868", "cg08737755",
"cg04950789", "cg08707192", "cg04396637", "cg07560681", "cg06560912",
"cg07469215", "cg06560912", "cg05448504", "cg00155700", "cg03227128",
"cg07040405", "cg04681963", "cg03005603"), target_symbol = c("DMTF1",
"ZNF649", "ZNF567", "HOXC10", "E2F5", "RLF", "CSRNP2", "DMTF1",
"ZNF579", "ATF5", "ZNF579", "TSHZ1", "ZNF394", "EBF4", "SP3",
"ZNF879", "ZNF880")), class = "data.frame", row.names = c("14",
"87", "78", "43", "23", "1", "40", "13", "94", "83", "92", "62",
"15", "53", "4", "131", "51"))
预期输出:
structure(list(sample = "TCGA.A3.3357.01", chromosome = "7",
start = 25989524L, end = 25989763L, sz = 239L, cpg_n = 3,
cpg_ids = "cg08767938,cg07184013,cg03853208"), row.names = 1L, class = "data.frame")
2条答案
按热度按时间nfs0ujit1#
这会将
rst.mva.one.leave.out$cpg_ids
中的字符串拆分为逗号,如果有任何值与methreg$probeID
匹配,则选中该行。我们可以使用的另一种方法是使用
grepl
的模式匹配,前提是methreg$probeID
不是很大。感谢@Gwang-Jin Kim对正则表达式的改进。vuktfyat2#
可以使用
fuzzyjoin