def url_links(self):
"""Generic webpage link finder format."""
# https://github.com/detro/ghostdriver/issues/169
@self.phantomjs_short_timeout
def phantomjs_find_elements_by_tag_name():
return WebDriverWait(self.driver,3).until(lambda x: x.find_elements_by_tag_name('a'))
elements = phantomjs_find_elements_by_tag_name()
# get links in random order until max. per page
k = 0
links = []
try:
for a in sorted(elements,key=lambda k: random.random()):
@self.phantomjs_short_timeout
def phantomjs_get_attribute(): return a.get_attribute('href')
href = phantomjs_get_attribute()
if href is not None: links.append(href)
k += 1
if k > self.max_links_per_page or self.link_count() == self.max_links_cached: break
except Exception as e:
if self.debug: print('.get_attribute() exception:\n{}'.format(e))
return links
isp_data_pollution.py 文件源码
python
阅读 35
收藏 0
点赞 0
评论 0
评论列表
文章目录