def parse(self, response):
soup = BeautifulSoup(response.body, 'html.parser')
#?????
infos = soup.findAll(attrs={'class': 'item-mod'})
# ????
pagesUrl = soup.find(attrs={'class': 'list-page'})
print("????????")
# ????
number = int(pagesUrl.find(attrs={'class': 'total'}).em.string)
# ??????????50???
pages = number // 50
if (number % 50 > 0):
pages = pages + 1
print("??" + str(pages))
purl = pagesUrl.find(attrs={'class': 'pagination'}).a['href']
purl = purl[0:-3]
for i in range(1, pages + 1):
temp = purl + "p" + str(i) + "/"
print("???????????" + temp)
print("????" + temp)
yield scrapy.Request(temp, callback=self.parse_item)
print("??????")
评论列表
文章目录