def scrape(self, pagenum: int) -> None:
try:
url = self.source_url.format(pagenum + 1)
req = requests.get(url, headers={'User-Agent': self.user_agent}, proxies=self.proxy)
bs = BeautifulSoup(req.text, 'lxml')
posts = bs('div', class_='post')
for post in posts:
dlsize = post.find('h2').get_text().strip()
table_row = [
post.find('div', class_='p-c p-c-time').get_text().strip(),
post.find('a', class_='p-title').get('href').strip(),
post.find('a', class_='p-title').get_text().strip(),
dlsize[dlsize.rfind('(') + 1:len(dlsize) - 1]
]
self.addRow.emit(table_row)
except HTTPError:
sys.stderr.write(sys.exc_info()[0])
QMessageBox.critical(self, 'ERROR NOTIFICATION', sys.exc_info()[0])
# self.exit()
评论列表
文章目录