def _index_files(storeDir, indexFile):
jieba.initialize()
store = SimpleFSDirectory(File(storeDir))
analyzer = SimpleAnalyzer(Version.LUCENE_CURRENT)
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
_index_docs(indexFile, writer)
print('commit index')
writer.commit()
writer.close()
print('done')
python类initialize()的实例源码
def get_search_func():
jieba.initialize()
vm_env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
analyzer = SimpleAnalyzer(Version.LUCENE_CURRENT)
searcher = IndexSearcher(DirectoryReader.open(SimpleFSDirectory(File(LUCENE_INDEX_DIR))))
search = search_func_factory(analyzer=analyzer,
searcher=searcher,
vm_env=vm_env)
return search
def __init__(self, slack, custom):
self.slack = slack
self.rundata = custom['data']
self.colorPrint = custom['colorPrint']
self.food_dir = "data/midnight.json"
self.food_dic = "data/dict.txt.big"
# find midnight channel
self.nochannel = False
rep = self.slack.api_call("channels.list")
self.channel_id = ""
for c in rep['channels']:
if c['name'].lower() == custom['food_channelname']:
self.channel_id = c['id']
break
if not self.channel_id:
self.colorPrint(
"No midnight channel",
"Restart when midnight channel can use",
color="FAIL")
self.nochannel = True
return
jieba.set_dictionary(self.food_dic)
jieba.initialize()
# add and del words
for word in self.rundata.get('FOOD_addword'):
jieba.add_word(word)
for word in self.rundata.get('FOOD_delword'):
jieba.del_word(word)
self.init()