def parseDocument(doc, vocab):
wordslist = list()
countslist = list()
doc = doc.lower()
tokens = wordpunct_tokenize(doc)
dictionary = dict()
for word in tokens:
if word in vocab:
wordtk = vocab[word]
if wordtk not in dictionary:
dictionary[wordtk] = 1
else:
dictionary[wordtk] += 1
wordslist.append(dictionary.keys())
countslist.append(dictionary.values())
return (wordslist[0], countslist[0])
评论列表
文章目录