def download_wiki():
"""Download WikiPedia pages of ambiguous units."""
ambiguous = [i for i in l.UNITS.items() if len(i[1]) > 1]
ambiguous += [i for i in l.DERIVED_ENT.items() if len(i[1]) > 1]
pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])
print
objs = []
for num, page in enumerate(pages):
obj = {'url': page[1]}
obj['_id'] = obj['url'].replace('https://en.wikipedia.org/wiki/', '')
obj['clean'] = obj['_id'].replace('_', ' ')
print '---> Downloading %s (%d of %d)' % \
(obj['clean'], num + 1, len(pages))
obj['text'] = wikipedia.page(obj['clean']).content
obj['unit'] = page[0]
objs.append(obj)
path = os.path.join(l.TOPDIR, 'wiki.json')
os.remove(path)
json.dump(objs, open(path, 'w'), indent=4, sort_keys=True)
print '\n---> All done.\n'
###############################################################################
评论列表
文章目录