>>> from spacy.en import English
>>> nlp = English()
>>> doc = nlp(u'The cat and the dog sleep in the basket near the door.')
>>> for np in doc.noun_chunks:
>>> np.text
u'The cat'
u'the dog'
u'the basket'
u'the door'
from spacy.symbols import *
np_labels = set([nsubj, nsubjpass, dobj, iobj, pobj]) # Probably others too
def iter_nps(doc):
for word in doc:
if word.dep in np_labels:
yield word.subtree
def iter_nps(doc):
for word in doc:
if word.dep in np_labels:
yield(word.dep_)
iter_nps(doc)
'''
<generator object iter_nps at 0x7fd7b08b5bd0>
'''
## Modified method:
def iter_nps(doc):
for word in doc:
if word.dep in np_labels:
print(word.text, word.dep_)
iter_nps(doc)
'''
Bananas nsubj
potassium pobj
'''
doc = nlp('BRCA1 is a tumor suppressor protein that functions to maintain genomic stability.')
for np in doc.noun_chunks:
print(np.text)
'''
BRCA1
a tumor suppressor protein
genomic stability
'''
iter_nps(doc)
'''
BRCA1 nsubj
that nsubj
stability dobj
'''
import spacy
nlp=spacy.load("en_core_web_sm")
doc=nlp("When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. “I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasn’t "
"worth talking to,” said Thrun, in an interview with Recode earlier "
"this week.")
#doc text is from spacy website
for x in doc :
if x.pos_ == "NOUN" or x.pos_ == "PROPN" or x.pos_=="PRON":
print(x)
# here you can get Nouns, Proper Nouns and Pronouns
5条答案
按热度按时间polhcujo1#
如果你想要基本NP,即没有并列结构、介词短语或关系从句的NP,你可以在Doc和Span对象上使用noun_chunks迭代器:
如果你还需要别的东西,最好的方法是遍历句子中的单词,并考虑句法上下文来确定这个单词是否支配你想要的短语类型,如果是,就生成它的子树:
xmjla07d2#
https://www.geeksforgeeks.org/use-yield-keyword-instead-return-keyword-python/
b4lqfgs43#
你也可以从这样的句子中得到名词:
xxhby3vn4#
如果你想更精确地指定你想提取哪种名词短语,你可以使用textacy's
matches
函数。你可以传递任何POS标记的组合。例如,将返回前面有介词以及限定词和/或形容词(可选)的任何名词。
Textacy是建立在spacy之上的,所以它们应该完美地配合在一起。
jutyujz05#
from spacy.en import English
可能会给予您一个错误没有名为“spacy.en”的模块
所有语言数据已移至spacy2.0+中的子模块
spacy.lang
请使用
spacy.lang.en import English
然后按照@syllogism_ answered执行所有剩余步骤