我有这样一个 Dataframe :
df<- structure(list(Entry = c("A0A087X1C5", "A0A0B4J2F0", "A0A0C5B5G6",
"A0A0K2S4Q6", "A0A0U1RRE5", "A0A1B0GTW7", "A0A5B9", "A0AV02",
"A0AV96", "A0AVF1", "A0AVI4", "A0AVK6", "A0FGR8", "A0FGR9", "A0JLT2",
"A0JNW5", "A0M8Q6"), Protein.names = c("Putative cytochrome P450 2D7 (EC 1.14.14.1)",
"Protein PIGBOS1 (PIGB opposite strand protein 1)", "Mitochondrial-derived peptide MOTS-c (Mitochondrial open reading frame of the 12S rRNA-c)",
"Protein CD300H (CD300 antigen-like family member H)", "Negative regulator of P-body association (P-body dissociating protein) (Protein NoBody)",
"Ciliated left-right organizer metallopeptidase (EC 3.4.24.-) (Leishmanolysin-like peptidase 2)",
"T cell receptor beta constant 2", "Solute carrier family 12 member 8 (Cation-chloride cotransporter 9)",
"RNA-binding protein 47 (RNA-binding motif protein 47)", "Intraflagellar transport protein 56 (Tetratricopeptide repeat protein 26) (TPR repeat protein 26)",
"E3 ubiquitin-protein ligase TM129 (EC 2.3.2.27) (RING-type E3 ubiquitin transferase TM129)",
"Transcription factor E2F8 (E2F-8)", "Extended synaptotagmin-2 (E-Syt2) (Chr2Syt)",
"Extended synaptotagmin-3 (E-Syt3) (Chr3Syt)", "Mediator of RNA polymerase II transcription subunit 19 (Lung cancer metastasis-related protein 1) (Mediator complex subunit 19)",
"Bridge-like lipid transfer protein family member 3B (Syntaxin-6 Habc-interacting protein of 164 kDa) (UHRF1-binding protein 1-like)",
"Immunoglobulin lambda constant 7 (Ig lambda-7 chain C region)"
), Gene.Names = c("CYP2D7", "PIGBOS1", "MT-RNR1", "CD300H", "NBDY LINC01420",
"CIROP LMLN2", "TRBC2 TCRBC2", "SLC12A8 CCC9", "RBM47", "TTC26 IFT56",
"TMEM129", "E2F8", "ESYT2 FAM62B KIAA1228", "ESYT3 FAM62C", "MED19 LCMR1",
"BLTP3B KIAA0701 SHIP164 UHRF1BP1L", "IGLC7"), Length = c(515L,
54L, 16L, 201L, 68L, 788L, 178L, 714L, 593L, 554L, 362L, 867L,
921L, 886L, 244L, 1464L, 106L), Subcellular.location..CC. = c("Membrane {ECO:0000305}; Multi-pass membrane protein {ECO:0000255}. Cytoplasm {ECO:0000305|PubMed:15051713}. Mitochondrion {ECO:0000269|PubMed:18838503}.",
"Mitochondrion outer membrane {ECO:0000269|PubMed:31653868}; Single-pass membrane protein {ECO:0000255}.",
"Secreted {ECO:0000269|PubMed:25738459}. Mitochondrion {ECO:0000269|PubMed:29983246}. Nucleus {ECO:0000269|PubMed:29983246, ECO:0000269|PubMed:33473109}. Note=Translocates to the nucleus in response to metabolic stress in an AMPK-dependent manner. {ECO:0000269|PubMed:29983246}.",
"[Isoform 1]: Membrane {ECO:0000255}; Single-pass type I membrane protein {ECO:0000255}.; SUBCELLULAR LOCATION: [Isoform 2]: Secreted {ECO:0000269|PubMed:26221034}.",
"Cytoplasm, P-body {ECO:0000269|PubMed:27918561}. Note=Localizes to P-bodies at low concentrations without dissociating them. {ECO:0000269|PubMed:27918561}.Nucleus {ECO:0000250}",
"Membrane {ECO:0000255}; Single-pass type I membrane protein {ECO:0000255}.",
"Cell membrane {ECO:0000303|PubMed:20452950}.", "Membrane {ECO:0000305}; Multi-pass membrane protein {ECO:0000305}.",
"Nucleus {ECO:0000250}.Endoplasmic reticulum membrane {ECO:0000269|PubMed:24807418}; ",
"Cell projection, cilium {ECO:0000250|UniProtKB:Q8BS45}. Note=Localizes at the base to the ciliary transition zone. {ECO:0000250|UniProtKB:Q8BS45}.",
"Endoplasmic reticulum membrane {ECO:0000269|PubMed:24807418}; Multi-pass membrane protein {ECO:0000269|PubMed:24807418}.",
"Nucleus {ECO:0000269|PubMed:15897886}.", "Cell membrane {ECO:0000269|PubMed:17360437, ECO:0000269|PubMed:20833364, ECO:0000269|PubMed:23791178, ECO:0000269|PubMed:29469807}; Peripheral membrane protein {ECO:0000269|PubMed:17360437}. Endoplasmic reticulum membrane {ECO:0000269|PubMed:23791178, ECO:0000269|PubMed:29469807}; Multi-pass membrane protein {ECO:0000255}. Note=Localizes to endoplasmic reticulum-plasma membrane contact sites (EPCS) (PubMed:29469807, PubMed:23791178, PubMed:30220461, PubMed:27044890). Recruited to the cell membrane via the third C2 domain (PubMed:17360437). {ECO:0000269|PubMed:17360437, ECO:0000269|PubMed:23791178, ECO:0000269|PubMed:29469807, ECO:0000269|PubMed:30220461}.",
"Cell membrane {ECO:0000269|PubMed:17360437, ECO:0000269|PubMed:29469807}; Peripheral membrane protein {ECO:0000269|PubMed:17360437}. Endoplasmic reticulum membrane {ECO:0000269|PubMed:29469807}; Multi-pass membrane protein {ECO:0000255}. Note=Localizes to endoplasmic reticulum-plasma membrane contact sites (EPCS) (PubMed:29469807, PubMed:30220461). Recruited to the cell membrane via the third C2 domain. {ECO:0000269|PubMed:17360437, ECO:0000269|PubMed:29469807, ECO:0000269|PubMed:30220461}.",
"Nucleus {ECO:0000305}.", "Cytoplasm, cytosol {ECO:0000269|PubMed:20163565}. Early endosome {ECO:0000269|PubMed:20163565, ECO:0000269|PubMed:35499567}. Note=Localizes on a subpopulation of vesicle clusters in the early endocytic pathway. {ECO:0000269|PubMed:35499567}.",
"Secreted {ECO:0000303|PubMed:20176268, ECO:0000303|PubMed:22158414}. Cell membrane {ECO:0000303|PubMed:20176268, ECO:0000303|PubMed:22158414}."
)), class = "data.frame", row.names = c(NA, -17L))
我想保留Subcellular.location..CC.
列中包含 "Nucleus" 单词的行
我寻找的输出如下所示:
dfout<- structure(list(Entry = c("A0A0C5B5G6", "A0A0U1RRE5", "A0AV96",
"A0AVK6", "A0JLT2"), Protein.names = c("Mitochondrial-derived peptide MOTS-c (Mitochondrial open reading frame of the 12S rRNA-c)",
"Negative regulator of P-body association (P-body dissociating protein) (Protein NoBody)",
"RNA-binding protein 47 (RNA-binding motif protein 47)", "Transcription factor E2F8 (E2F-8)",
"Mediator of RNA polymerase II transcription subunit 19 (Lung cancer metastasis-related protein 1) (Mediator complex subunit 19)"
), Gene.Names = c("MT-RNR1", "NBDY LINC01420", "RBM47", "E2F8",
"MED19 LCMR1"), Length = c(16L, 68L, 593L, 867L, 244L), Subcellular.location..CC. = c("Secreted {ECO:0000269|PubMed:25738459}. Mitochondrion {ECO:0000269|PubMed:29983246}. Nucleus {ECO:0000269|PubMed:29983246, ECO:0000269|PubMed:33473109}. Note=Translocates to the nucleus in response to metabolic stress in an AMPK-dependent manner. {ECO:0000269|PubMed:29983246}.",
"Cytoplasm, P-body {ECO:0000269|PubMed:27918561}. Note=Localizes to P-bodies at low concentrations without dissociating them. {ECO:0000269|PubMed:27918561}.Nucleus {ECO:0000250}",
"Nucleus {ECO:0000250}.Endoplasmic reticulum membrane {ECO:0000269|PubMed:24807418}; ",
"Nucleus {ECO:0000269|PubMed:15897886}.", "Nucleus {ECO:0000305}."
)), class = "data.frame", row.names = c(NA, -5L))
1条答案
按热度按时间dw1jzc5e1#
tidyverse
方法: