def scrape_cisco(self, url):
# Scraping the Ajax page (Identified the json call)
ajax_data = get("https://tools.cisco.com/security/center/publicationService.x?criteria=exact&cves=&keyword=&last_published_date=&limit=30&offset=0&publicationTypeIDs=1,3&securityImpactRatings=&sort=-day_sir&title=").text
json_data = json.loads(ajax_data) #convert to json (Type: List of dicts)
for dictionary in json_data[:9]:
temp_data_ci = deepcopy(self.value)
temp_data_ci['val_name'] = dictionary['title']
temp_data_ci['severity'] = dictionary['severity']
temp_data_ci['date'] = self.convert_cisco_date(dictionary['firstPublished']) # skip all updates and include only new advisories
page_link_ci = dictionary['url']
temp_data_ci['link'] = page_link_ci
# Scraping the CSS part
css_data = get(page_link_ci)
css_tree = lxml.html.fromstring(css_data.text) # build the DOM Tree
sel = CSSSelector('meta') # construct a CSS Selector
results = sel(css_tree) # Apply the selector to the DOM tree.
match = results[38] # copy the list for the 38th result.
temp_data_ci['description'] = match.get('content') # get the content attribute for the 38th result.
new_data_ci = self.get_html_data(page_link_ci)
temp_data_ci['affected'] = new_data_ci.find('div', class_="ud-innercontent-area", id="vulnerableproducts").text.strip()
temp_data_ci['solution'] = new_data_ci.find('div', class_="ud-innercontent-area", id="workaroundsfield").text.strip()
# temp_data_ci['solution'] = new_data_ci.find('div', class_="ud-innercontent-area", id="fixedsoftfield",).text.strip() #alternate
self.data.append(temp_data_ci) # appending temp data info to class variable called self.data
report_generator.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录