def tag(path, filename):
print("Tagging "+path)
WRITE_HANDLER = open(PREPROCESSED_DATA + filename.strip() + "_features", 'w')
for line in open(path, 'r'):
tokens = line.split()
if(len(tokens) == 0):
continue
tags = pos_tag(tokens) # tag
features = list()
for token in tags:
tok = token[0]
tag = token[1]
if tok.lower() not in stop_words:
features.append(tok+":"+tag)
if(len(features)>0):
WRITE_HANDLER.write(str(features)+'\n\n')
else: ## EMPTY lines
WRITE_HANDLER.write('\n\n')
评论列表
文章目录