def collect_news(html):
try:
news_list = []
now_notice = html.find('div', attrs={'id': 'now_notice_area'})
for tr in now_notice.findAll('tr'):
td = tr.findAll('td')
news = map(text, td[0:2])
# ???link????????
links = []
if td[1].a is not None:
for a in td[1].findAll('a'):
link = a.get('href')
links.append(link)
urls = " ".join(links)
# URL???
news.append(urls)
# ?????????
s = news[0] + news[1]
unique_hash = hashlib.sha1(s.encode('utf-8')).hexdigest()
news.append(unique_hash)
news_list.append(news)
else:
return news_list
except Exception as e:
log.exception(e)
评论列表
文章目录