R:将一列拆分为r中的多列

vptzau2j  于 2023-03-20  发布在  其他
关注(0)|答案(2)|浏览(162)

我有一个数据框,其中一列每行包含一个因子列表。
每行遵循相同的格式:
因子值跟随在因子名称和等号之后(例如,mm =“MMI”)。
我想拆分此列并为这些因子创建列。即,结果将具有indexmmscorepreferred_namecui ....列,每行包含单引号中的值。是否有有效的方法将这些列表拆分为列?我在下面附上了示例数据集。

示例数据集1

d <- 
    structure(list(0:36, o3.word = c("ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Transurethral cystoscopy (procedure)', cui='C0010707', semtypes='[diap]', trigger='[\"Cystourethroscopy\"-tx-1-\"cystourethroscopy\"-noun-0]', location='TX', pos_info='8/17', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='0%', cui='C3842591', semtypes='[qnco]', trigger='[\"0%\"-tx-1-\"0\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='16.30', preferred_name='Insertion Mutation', cui='C1512796', semtypes='[genf]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='E05.393.420.601.550;G05.365.590.575;G05.558.550')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Clinical act of insertion', cui='C0441587', semtypes='[hlca]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Implantation procedure', cui='C0021107', semtypes='[topp]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Insert (object)', cui='C1883719', semtypes='[ftcn]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='16/9', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+', cui='C3816745', semtypes='[fndg]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+ Score', cui='C2827734', semtypes='[qnco]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='1+ Score, WHO', cui='C2981698', semtypes='[clas]', trigger='[\"1+\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='Greater than one', cui='C4280965', semtypes='[qnco]', trigger='[\">1\"-tx-1-\"1\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Indwelling (qualifier value)', cui='C0439848', semtypes='[ftcn]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-adj-0]', location='TX', pos_info='15/10', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Indwelling Device', cui='C3694424', semtypes='[medd]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-adj-0]', location='TX', pos_info='15/10', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='+2', cui='C0740116', semtypes='[qnco]', trigger='[\"+2\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+', cui='C3833492', semtypes='[fndg]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+ Score', cui='C2827735', semtypes='[qnco]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='2+ Score, WHO', cui='C2981700', semtypes='[clas]', trigger='[\"2+\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='PSA Level Less than Two', cui='C4526661', semtypes='[lbtr]', trigger='[\"<2\"-tx-1-\"2\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='9.99', preferred_name='Ureter', cui='C0041951', semtypes='[bpoc]', trigger='[\"Ureteral\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='17/8', tree_codes='A05.810.776')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Ureteral Route of Drug Administration', cui='C1522613', semtypes='[ftcn]', trigger='[\"URETERAL\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='17/8', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Answer to Question', cui='C3838680', semtypes='[fndg]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Score', cui='C2827736', semtypes='[qnco]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='3+ Score, WHO', cui='C2981702', semtypes='[clas]', trigger='[\"3+\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='<3 (qualifier value)', cui='C0439086', semtypes='[qnco]', trigger='[\"<3\"-tx-1-\"3\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='9.99', preferred_name='Stent, device', cui='C0038257', semtypes='[medd]', trigger='[\"Stent\"-tx-1-\"stent\"-noun-0]', location='TX', pos_info='20/5', tree_codes='E07.695.750')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='4+ Answer to question', cui='C3838679', semtypes='[fndg]', trigger='[\"4+\"-tx-1-\"4\"-integer-0]', location='TX', pos_info='0/1', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='17.80', preferred_name='Name', cui='C0027365', semtypes='[inpr]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='L01.559.598.400.556')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='12.96', preferred_name='Ozone', cui='C0030106', semtypes='[chem]', trigger='[\"O3\"-tx-1-\"o3\"-noun-0]', location='TX', pos_info='6/2', tree_codes='D01.362.670.600;x.x.x.x')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='CDISC ADaM Derivation Type Terminology', cui='C2825527', semtypes='[inpr]', trigger='[\"DTYPE\"-tx-1-\"dtype\"-noun-0]', location='TX', pos_info='29/5', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Computer Programming Object', cui='C1704861', semtypes='[cnce]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Length', cui='C1444754', semtypes='[qnco]', trigger='[\"Length\"-tx-1-\"Length\"-noun-0]', location='TX', pos_info='15/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Length of Trial', cui='C1706316', semtypes='[resa]', trigger='[\"LENGTH\"-tx-1-\"Length\"-noun-0]', location='TX', pos_info='15/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Name (property) (qualifier value)', cui='C4522128', semtypes='[qlco]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Object Class', cui='C1518526', semtypes='[cnce]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Person Name', cui='C1547383', semtypes='[inpr]', trigger='[\"Name\"-tx-1-\"Name\"-noun-0]', location='TX', pos_info='0/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='5.18', preferred_name='Physical object', cui='C0347997', semtypes='[phob]', trigger='[\"Object\"-tx-1-\"object\"-noun-0]', location='TX', pos_info='36/6', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.68', preferred_name='Term (lexical)', cui='C1705313', semtypes='[idcn]', trigger='[\"Word\"-tx-1-\"word\"-noun-0]', location='TX', pos_info='9/4', tree_codes='')", 
                                     "ConceptMMI(index='tmppplgwffe', mm='MMI', score='3.50', preferred_name='United States Military Commissioned Officer O3', cui='C4521399', semtypes='[clas]', trigger='[\"O3\"-tx-1-\"o3\"-noun-0]', location='TX', pos_info='6/2', tree_codes='')"
    )), class = "data.frame", row.names = c(NA, -37L))

示例数据集2

d2 <- 
structure(list(0:9, o3.word = c("[ConceptMMI(index='tmpgjcrubkn', mm='MMI', score='5.18', preferred_name='Transurethral cystoscopy (procedure)', cui='C0010707', semtypes='[diap]', trigger='[\"Cystourethroscopy\"-tx-1-\"cystourethroscopy\"-noun-0]', location='TX', pos_info='1/17', tree_codes='')]", 
"[ConceptMMI(index='tmp9rojcgop', mm='MMI', score='17.80', preferred_name='Insertion Mutation', cui='C1512796', semtypes='[genf]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='1/9', tree_codes='E05.393.420.601.550;G05.365.590.575;G05.558.550'), ConceptMMI(index='tmp9rojcgop', mm='MMI', score='5.18', preferred_name='Clinical act of insertion', cui='C0441587', semtypes='[hlca]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='1/9', tree_codes=''), ConceptMMI(index='tmp9rojcgop', mm='MMI', score='5.18', preferred_name='Implantation procedure', cui='C0021107', semtypes='[topp]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='1/9', tree_codes=''), ConceptMMI(index='tmp9rojcgop', mm='MMI', score='5.18', preferred_name='Insert (object)', cui='C1883719', semtypes='[ftcn]', trigger='[\"Insertion\"-tx-1-\"insertion\"-noun-0]', location='TX', pos_info='1/9', tree_codes='')]", 
"[ConceptMMI(index='tmpt6i439rr', mm='MMI', score='5.18', preferred_name='Indwelling (qualifier value)', cui='C0439848', semtypes='[ftcn]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-noun-0]', location='TX', pos_info='1/10', tree_codes=''), ConceptMMI(index='tmpt6i439rr', mm='MMI', score='5.18', preferred_name='Indwelling Device', cui='C3694424', semtypes='[medd]', trigger='[\"Indwelling\"-tx-1-\"indwelling\"-noun-0]', location='TX', pos_info='1/10', tree_codes='')]", 
"[ConceptMMI(index='tmp2_29bxno', mm='MMI', score='11.49', preferred_name='Ureter', cui='C0041951', semtypes='[bpoc]', trigger='[\"Ureteral\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='1/8', tree_codes='A05.810.776'), ConceptMMI(index='tmp2_29bxno', mm='MMI', score='5.18', preferred_name='Ureteral Route of Drug Administration', cui='C1522613', semtypes='[ftcn]', trigger='[\"URETERAL\"-tx-1-\"ureteral\"-adj-0]', location='TX', pos_info='1/8', tree_codes='')]", 
"[ConceptMMI(index='tmpn5_qzsdq', mm='MMI', score='11.49', preferred_name='Stent, device', cui='C0038257', semtypes='[medd]', trigger='[\"Stent\"-tx-1-\"stent\"-noun-0]', location='TX', pos_info='1/5', tree_codes='E07.695.750')]", 
"[ConceptMMI(index='tmpg7w04iwo', mm='MMI', score='14.64', preferred_name='Upper arm', cui='C0446516', semtypes='[blor]', trigger='[\"ARM\"-tx-1-\"arm\"-noun-0]', location='TX', pos_info='1/3', tree_codes='A01.378.800.075'), ConceptMMI(index='tmpg7w04iwo', mm='MMI', score='5.18', preferred_name='AKR1A1 wt Allele', cui='C3715044', semtypes='[gngm]', trigger='[\"ARM\"-tx-1-\"arm\"-noun-0]', location='TX', pos_info='1/3', tree_codes=''), ConceptMMI(index='tmpg7w04iwo', mm='MMI', score='5.18', preferred_name='Sequence Arm', cui='C4553528', semtypes='[inpr]', trigger='[\"arm\"-tx-1-\"arm\"-noun-0]', location='TX', pos_info='1/3', tree_codes='')]", 
"[]", "[ConceptMMI(index='tmp6ewth8qc', mm='MMI', score='5.18', preferred_name='RNF130 wt Allele', cui='C3811116', semtypes='[gngm]', trigger='[\"GP\"-tx-1-\"gp\"-noun-0]', location='TX', pos_info='1/2', tree_codes=''), ConceptMMI(index='tmp6ewth8qc', mm='MMI', score='5.18', preferred_name='TNC wt Allele', cui='C1705010', semtypes='[gngm]', trigger='[\"GP\"-tx-1-\"gp\"-noun-0]', location='TX', pos_info='1/2', tree_codes='')]", 
"[ConceptMMI(index='tmp7_pewk80', mm='MMI', score='14.64', preferred_name='Ultrasonic', cui='C0220934', semtypes='[ftcn]', trigger='[\"Ultrasonic\"-tx-1-\"ultrasonic\"-adj-0]', location='TX', pos_info='1/10', tree_codes='H01.671.031.849'), ConceptMMI(index='tmp7_pewk80', mm='MMI', score='14.64', preferred_name='Ultrasonics (sound)', cui='C1456803', semtypes='[npop]', trigger='[\"Ultrasonic\"-tx-1-\"ultrasonic\"-adj-0]', location='TX', pos_info='1/10', tree_codes='H01.671.031.849')]", 
"[ConceptMMI(index='tmpssseh8v6', mm='MMI', score='5.18', preferred_name='Advice', cui='C0150600', semtypes='[hlca]', trigger='[\"Guidance\"-tx-1-\"guidance\"-noun-0]', location='TX', pos_info='1/8', tree_codes=''), ConceptMMI(index='tmpssseh8v6', mm='MMI', score='5.18', preferred_name='Spatial guidance for medical procedure', cui='C1959633', semtypes='[topp]', trigger='[\"Guidance\"-tx-1-\"guidance\"-noun-0]', location='TX', pos_info='1/8', tree_codes=''), ConceptMMI(index='tmpssseh8v6', mm='MMI', score='5.18', preferred_name='guidance - Measure Attribute', cui='C3854153', semtypes='[clna]', trigger='[\"guidance\"-tx-1-\"guidance\"-noun-0]', location='TX', pos_info='1/8', tree_codes='')]"
)), class = "data.frame", row.names = c(NA, -10L))
46qrfjad

46qrfjad1#

您可以将每个条目转换为一个列表(包含parseeval),然后将它们绑定在一起:

library(tidyverse)

str_replace(d$o3.word, "ConceptMMI", "list") |> 
  map(\(x) as_tibble(eval(parse(text = x)))) |> 
  list_rbind()

产出

# A tibble: 37 × 10
   index       mm    score preferred_name                       cui    semty…¹ trigger locat…² pos_i…³ tree_…⁴
   <chr>       <chr> <chr> <chr>                                <chr>  <chr>   <chr>   <chr>   <chr>   <chr>  
 1 tmppplgwffe MMI   3.68  Transurethral cystoscopy (procedure) C0010… [diap]  "[\"Cy… TX      8/17    ""     
 2 tmppplgwffe MMI   3.50  0%                                   C3842… [qnco]  "[\"0%… TX      0/1     ""     
 3 tmppplgwffe MMI   16.30 Insertion Mutation                   C1512… [genf]  "[\"In… TX      16/9    "E05.3…
 4 tmppplgwffe MMI   3.68  Clinical act of insertion            C0441… [hlca]  "[\"In… TX      16/9    ""     
 5 tmppplgwffe MMI   3.68  Implantation procedure               C0021… [topp]  "[\"In… TX      16/9    ""     
 6 tmppplgwffe MMI   3.68  Insert (object)                      C1883… [ftcn]  "[\"In… TX      16/9    ""     
 7 tmppplgwffe MMI   3.50  1+                                   C3816… [fndg]  "[\"1+… TX      0/1     ""     
 8 tmppplgwffe MMI   3.50  1+ Score                             C2827… [qnco]  "[\"1+… TX      0/1     ""     
 9 tmppplgwffe MMI   3.50  1+ Score, WHO                        C2981… [clas]  "[\"1+… TX      0/1     ""     
10 tmppplgwffe MMI   3.50  Greater than one                     C4280… [qnco]  "[\">1… TX      0/1     ""     
# … with 27 more rows, and abbreviated variable names ¹​semtypes, ²​location, ³​pos_info, ⁴​tree_codes
# ℹ Use `print(n = ...)` to see more rows
gev0vcfq

gev0vcfq2#

出于安全考虑,我建议避免使用eval,但您仍然可以使用parse,它只使用基本R函数。

parse(text=d$o3.word) |> 
  lapply(as.list) |> 
  lapply(`[`, -1) |> 
  do.call("rbind.data.frame", args=_)

相关问题