def parse_kb(self, response):
mib = None
# need to perform some nasty segmentation because different firmware versions are not clearly separated
# reverse order to get MIB before firmware items
for entry in reversed(response.xpath(
"//div[@id='support-article-downloads']/div/p")):
for segment in reversed(entry.extract().split("<br><br>")):
resp = HtmlResponse(
url=response.url, body=segment, encoding=response.encoding)
for href in resp.xpath("//a/@href").extract():
text = resp.xpath("//text()").extract()
if "MIBs" in href:
mib = href
elif "firmware" in href:
text = resp.xpath("//text()").extract()
item = FirmwareLoader(
item=FirmwareImage(), response=resp, date_fmt=["%m/%d/%Y"])
item.add_value("date", item.find_date(text))
item.add_xpath("url", "//a/@href")
item.add_value("mib", mib)
item.add_value("product", response.meta["product"])
item.add_value("vendor", self.name)
item.add_value(
"version", FirmwareLoader.find_version_period(text))
yield item.load_item()
评论列表
文章目录