def get_releasenote(html_source):
html = lxml.html.fromstring(html_source)
versions_dates = html.xpath("//*[contains(@class, 'app-version-block')]//h5")
releasenotes = []
for version_date in versions_dates:
try:
date = datetime.datetime.strptime(re.search('\((.+?)\)', version_date.text).group(1), '%b %d, %Y')
version = re.search(r'Version (.+?) \(', version_date.text).group(1)
try:
note = version_date.getnext()
note = re.sub(b"[\r\n]+", b".", etree.tostring(note, pretty_print=True))
note = re.sub(b"<br />", b".", note)
note = re.sub(b"<br/>", b".", note)
note = b'. '.join(re.findall(b'<p>(.+?)</p>', note))
except:
note = ''
releasenotes.append({'date': date, 'version': version, 'note': note})
except:
pass
# notes = html.xpath("//*[contains(@class, 'app-version-note')]")
return releasenotes
评论列表
文章目录