def createIndex(self, client, num_docs = 100):
assert isinstance(client, Client)
#conn.flushdb()
#client = Client('test', port=conn.port)
try:
client.create_index((TextField('play', weight=5.0),
TextField('txt'),
NumericField('chapter')))
except redis.ResponseError:
client.drop_index()
return self.createIndex(client, num_docs=num_docs)
chapters = {}
with bz2.BZ2File(WILL_PLAY_TEXT) as fp:
r = csv.reader(fp, delimiter=';')
for n, line in enumerate(r):
#['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."]
play, chapter, character, text = line[1], line[2], line[4], line[5]
key = '{}:{}'.format(play, chapter).lower()
d = chapters.setdefault(key, {})
d['play'] = play
d['txt'] = d.get('txt', '') + ' ' + text
d['chapter'] = int(chapter or 0)
if len(chapters) == num_docs:
break
indexer = client.batch_indexer(chunk_size=50)
self.assertIsInstance(indexer, Client.BatchIndexer)
self.assertEqual(50, indexer.chunk_size)
for key, doc in chapters.iteritems():
indexer.add_document(key, **doc)
indexer.commit()
评论列表
文章目录