def get_entries(feed):
NEW_POST = u"""New post, author {author}, title {title} {content}"""
for entry in feed.entries:
if "http" in entry.id:
nid = hashlib.md5(str(entry.id))
entry.id = nid.hexdigest()
entry_content = entry.content[0].value
soup = BeautifulSoup(entry_content, 'html.parser')
chunks = split_content_by_dot(soup, REQUEST_LIMIT-len(NEW_POST))
chunks = list(chunks)
published = dateutil.parser.parse(entry.published)
for i, chunk in enumerate(chunks):
if i == 0:
chunk = NEW_POST.format(
author=entry.author,
title=entry.title,
content=chunk)
yield dict(
content=chunk,
id="%s_%d" % (entry.id, i),
title=entry.title,
published=published - datetime.timedelta(0, i),
)
remaining = chunk
评论列表
文章目录