def pos_tag_questions(qstn_list):
res = []
count = 0
for i in qstn_list:
r = []
i = i.split(':')
r.append(i[0])
r.append(i[1].split()[0])
i = i[1].split()
del i[0]
sent = nltk.word_tokenize(' '.join(i))
r.append(nltk.pos_tag(sent))
res.append(tuple(r))
count += 1
if (count % 100) == 0:
print ("processed : " + str(count) )
return res
#experiment with different features to get better accuracy
#also dont forget to to include the same feature extractor in the process_grammar.py
评论列表
文章目录