def add(self, filename, document):
"""
Add a document string to the index.
"""
# You can uncomment the following line to see the words found in each
# image.
# print("Words found in %s: %s" % (filename, document))
for token in [t.lower() for t in nltk.word_tokenize(document)]:
if token in self.stopwords:
continue
if token in ['.', ',', ':', '']:
continue
if self.stemmer:
token = self.stemmer.stem(token)
# Add the filename to the set associated with the token.
self.redis_token_client.sadd(token, filename)
# store the 'document text' for the filename.
self.redis_docs_client.set(filename, document)
评论列表
文章目录