def document_corpus_as_iterable(corpus):
stats = defaultdict(int)
with smart_file_open(corpus) as infile:
LOGGER.info("Loading documents from solr xml file: %s" % corpus)
# reader = UnicodeRecoder(infile, encoding='utf-8')
for event, element in ET.iterparse(infile):
if event == 'end' and element.tag == 'doc':
stats['num_xml_entries'] += 1
yield _parse_doc_elements(element)
upload_documents_to_discovery_collection.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录