def _index_docs(indexFile, writer):
for line in indexFile:
ind, ent_name, info, keywords, imgurl, filename, url = line.split('\t')
print("adding %s" % ind)
filename = "{:05d}".format(int(ind)) + '.jpg'
keywords = keywords.replace('%', ' ')
ent_name = " ".join(x.strip() for x in jieba.cut_for_search(ent_name))
keywords = " ".join(x.strip() for x in jieba.cut_for_search(keywords))
try:
doc = Document()
doc.add(Field('ind', ind, Field.Store.YES, Field.Index.NO))
doc.add(Field('ent_name', ent_name, Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field('keywords', keywords, Field.Store.NO, Field.Index.ANALYZED))
# doc.add(Field('n_colors', n_colors, Field.Store.NO, Field.Index.ANALYZED))
writer.addDocument(doc)
except Exception, e:
print("Failed in indexDocs: %r" % e)
评论列表
文章目录