def determine_entities(self):
""" Determines noun entities within a patent claim.
param: pos - list of tuples from nltk pos tagger"""
# Define grammar for chunking
grammar = '''
NP: {<DT|PRP\$> <VBG> <NN.*>+}
{<DT|PRP\$> <NN.*> <POS> <JJ>* <NN.*>+}
{<DT|PRP\$>? <JJ>* <NN.*>+ }
'''
cp = nltk.RegexpParser(grammar)
# Or store as part of claim object property?
# Option: split into features / clauses, run over clauses and
# then re-correlate
return cp.parse(self.pos)
评论列表
文章目录