index.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:AlphaLogo 作者: gigaflw 项目源码 文件源码
def _index_docs(indexFile, writer):
    for line in indexFile:

        ind, ent_name, info, keywords, imgurl, filename, url = line.split('\t')
        print("adding %s" % ind)

        filename = "{:05d}".format(int(ind)) + '.jpg'
        keywords = keywords.replace('%', ' ')

        ent_name = " ".join(x.strip() for x in jieba.cut_for_search(ent_name))
        keywords = " ".join(x.strip() for x in jieba.cut_for_search(keywords))

        try:
            doc = Document()

            doc.add(Field('ind', ind, Field.Store.YES, Field.Index.NO))
            doc.add(Field('ent_name', ent_name, Field.Store.NO, Field.Index.ANALYZED))
            doc.add(Field('keywords', keywords, Field.Store.NO, Field.Index.ANALYZED))
            # doc.add(Field('n_colors', n_colors, Field.Store.NO, Field.Index.ANALYZED))

            writer.addDocument(doc)

        except Exception, e:
            print("Failed in indexDocs: %r" % e)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号