def get_data(self):
config = {
'proxy_host': '124.88.67.32',
'proxy_port': 843
}
content = yield httpclient.AsyncHTTPClient().fetch(self.url, **config)
# content = httpclient.HTTPClient().fetch(self.url)
if not content.error:
root = html.fromstring(content.body.decode('utf-8'))
else:
# return None
raise gen.Return(None)
bangumi_info = []
# weekday = -1
for e in root.xpath('//*[@id="scrollContent-day_update"]//*[@class="week-updateList_each"]'):
weekday = self.WEEKDAY[e.xpath('./div/span')[0].text]
for record in e.xpath('.//li'):
url = record.xpath('./a')[0].attrib['href']
title = record.xpath('./a/div/div[@class="week-cont_title"]')[0].text
info = {'weekday': weekday,
'url': url,
'title': title,
'update_time': None}
bangumi_info.append(info)
raise gen.Return(bangumi_info)
评论列表
文章目录