def _parse_book_info(html):
"""???????????????????????
:param html(string): ?????????html
"""
end_flag = 'END_FLAG'
html = html.replace('<br>', end_flag)
html = html.replace('<br/>', end_flag)
doc = lxml.html.fromstring(html)
text = doc.text_content()
pattern = r'{}[:?](.*?){}'
result = dict()
for key, column in [
('author', '??'),
('press', '???'),
('publish_date', '???'),
('price', '??')]:
result[key] = re.search(pattern.format(column, end_flag),
text,
re.I | re.DOTALL).group(1).strip()
return result
评论列表
文章目录