def run(input):
""""""
pub = os.path.basename(input.name).split('.', 1)[0]
if pub == 'rsc':
reader = RscHtmlReader()
elif pub == 'acs':
reader = AcsHtmlReader()
elif pub == 'springer':
reader = NlmXmlReader()
else:
raise click.ClickException('Invalid publisher')
doc = reader.read(input)
# Serialize all records apart from those that are just chemical names or just labels
records = [record.serialize(primitive=True) for record in doc.records]
records = [record for record in records if not record.keys() == ['names'] and not record.keys() == ['labels']]
with open('%s-out.json' % os.path.splitext(input.name)[0], 'w') as outf:
json.dump(records, outf, indent=2)
评论列表
文章目录