def handle_multiple_sentences(infile, outfile):
titles = []
f = open(infile, "r")
f2 = codecs.open(outfile, "w+", "utf-8")
for line in f:
line = line.decode("utf-8")
sentences = sent_detector.tokenize(line.strip())
for i in range(len(sentences)):
if i == 0:
sentences[i] = sentences[i].replace(sentences[i].split()[0],sentences[i].split()[0].title())
else:
sentences[i] = sentences[i].replace(sentences[i].split()[0],sentences[i].split()[0].title())
sentences[i-1] = sentences[i-1].replace(sentences[i-1].split()[-1][-1], " ::::")
titles.append(" ".join(sentences))
title_set = set(titles)
for l in title_set:
print >> f2, l
评论列表
文章目录