def mark(line):
tmp_line = ''
for c in line:
if c in string.punctuation:
if c is not "'":
tmp_line += ' ' + c + ' '
else:
tmp_line += ' ' + c
else:
tmp_line += c
tmp_line = tmp_line.lower()
words = [w for w in tmp_line.split() if len(w) > 0]
for w in words:
if w not in word2freq:
word2freq[w] = 1
else:
word2freq[w] += 1
return words
评论列表
文章目录