def chunker(sent):
#a = [("I","PRP"),("hear","VBP"),("Jerusalem","NNP"),("bells","NNS"),("ringing","VBG")]
#input_sent = " Rockwell said the agreement calls for it to supply 200 addititonal so-called shipsets for the planes."
input_sent = sent
text = nltk.word_tokenize(input_sent)
a = nltk.pos_tag(text)
phrases = []
tup = ()
'''test_sents = conll2000.chunked_sents('test.txt', chunk_types=['VP'])
train_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP'])
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])'''
NP_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
VP_sents = conll2000.chunked_sents('train.txt', chunk_types=['VP'])
class ChunkParser(nltk.ChunkParserI):
def __init__(self, train_sents):
train_data = [[(t,c) for w,t,c in nltk.chunk.tree2conlltags(sent)] for sent in train_sents]
self.tagger = nltk.TrigramTagger(train_data)
def parse(self, sentence):
pos_tags = [pos for (word,pos) in sentence]
tagged_pos_tags = self.tagger.tag(pos_tags)
chunktags = [chunktag for (pos, chunktag) in tagged_pos_tags]
conlltags = [(word, pos, chunktag) for ((word,pos),chunktag) in zip(sentence, chunktags)]
return nltk.chunk.util.conlltags2tree(conlltags)
NPChunker = ChunkParser(NP_sents)
VPChunker = ChunkParser(VP_sents)
#print (NPChunker.parse("I hear Jerusalem bells ringing"))
parsed_sent = NPChunker.parse(a)
for i in parsed_sent:
if (type(i)!=type(tup)):
l=[]
for t in tuple(i):
l.append(t[0])
phrases.append({"NP":" ".join(l)})
parsed_sent = VPChunker.parse(a)
for i in parsed_sent:
if (type(i)!=type(tup)):
l=[]
for t in tuple(i):
l.append(t[0])
phrases.append({"VP":" ".join(l)})
return phrases
rer_build_history.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录