def parse_news(self,response):
# driver = webdriver.Chrome(self.chromedriver)
driver = webdriver.Firefox(executable_path=self.chromedriver)
item = response.meta.get("item",NewsItem())
page = response.meta.get("page",1)
index = response.meta.get("index",0)
origin_url = response.url
no_res = re.search(r"/(\d+)?.html",origin_url)
news_no = no_res.group(1) if no_res else None
driver.get(origin_url)
time.sleep(3)
code = driver.page_source
driver.quit()
soup = BeautifulSoup(code,"lxml")
# import pdb;pdb.set_trace()
authors = soup("span",class_="author-name")
referer_web = None
for a in authors:
if "??".decode("utf-8") in a.text:
referer_web = a.text[3:]
news_date = soup.find("span",class_="article-time").text if soup.find("span",class_="article-time") else None
content = soup.find("div",id="article_content").get_text(strip=True) if soup.find("div",id="article_content") else None
item["content"]=content
item["news_date"]=news_date
item["referer_web"]=referer_web
item["crawl_date"]=NOW
item["news_no"]=news_no
item =judge_news_crawl(item,end_day=2)
if item:
yield item
else:
self.flag=page
#??????????????????????
if index == 19 and not self.flag:
next_page = page+1
next_page_url = self.next_page_url % next_page
yield scrapy.Request(next_page,meta={"page":next_page})
python类Firefox()的实例源码
def __init__(self):
self.display = Display(visible=0, size=(800, 600)) #???????
self.display.start()
# chromedriver = "/home/youmi/Downloads/chromedriver"
# self.driver = webdriver.Chrome(chromedriver) #??display???????
chromedriver = "/home/ubuntu/geckodriver"
self.driver = webdriver.Firefox(executable_path=chromedriver)
def get_browser():
browser = input("Are you using Chrome or Firefox? -- Enter C for Chrome, F for Firefox. Press X to exit.")
if browser == 'F':
return("Firefox")
elif browser == 'C':
return("Chrome")
elif browser == 'X':
sys.exit(1)
return("Exit")
else:
return("Invalid")
#function that obtains data from browser
def make_browser(cls):
# Build a selenium browser
try:
cls.browser = webdriver.PhantomJS()
except Exception:
try:
# Fall back to Firefox
cls.browser = webdriver.Firefox()
except:
raise Exception("Could not start a Firefox or PhantomJS instance!")
cls.browser.get("http://127.0.0.1:%i/" % cls.port_num)
# Setup to support routing
cls.app = cls._make_app()
def firefox_driver():
# Doesn't work with geckodriver! :(
capabilities = webdriver.DesiredCapabilities().FIREFOX
capabilities['acceptSslCerts'] = True
profile = webdriver.FirefoxProfile()
profile.accept_untrusted_certs = True
return webdriver.Firefox(firefox_profile=profile, capabilities=capabilities)
def Launch():
"""
Launch the Medium bot and ask the user what browser they want to use.
"""
if 'chrome' not in DRIVER.lower() and 'firefox' not in DRIVER.lower() and 'phantomjs' not in DRIVER.lower():
# Browser choice
print 'Choose your browser:'
print '[1] Chrome'
print '[2] Firefox/Iceweasel'
print '[3] PhantomJS'
while True:
try:
browserChoice = int(raw_input('Choice? '))
except ValueError:
print 'Invalid choice.',
else:
if browserChoice not in [1,2,3]:
print 'Invalid choice.',
else:
break
StartBrowser(browserChoice)
elif 'chrome' in DRIVER.lower():
StartBrowser(1)
elif 'firefox' in DRIVER.lower():
StartBrowser(2)
elif 'phantomjs' in DRIVER.lower():
StartBrowser(3)
def StartBrowser(browserChoice):
"""
Based on the option selected by the user start the selenium browser.
browserChoice: browser option selected by the user.
"""
if browserChoice == 1:
print '\nLaunching Chrome'
browser = webdriver.Chrome()
elif browserChoice == 2:
print '\nLaunching Firefox/Iceweasel'
browser = webdriver.Firefox()
elif browserChoice == 3:
print '\nLaunching PhantomJS'
browser = webdriver.PhantomJS()
if SignInToService(browser):
print 'Success!\n'
MediumBot(browser)
else:
soup = BeautifulSoup(browser.page_source, "lxml")
if soup.find('div', {'class':'alert error'}):
print 'Error! Please verify your username and password.'
elif browser.title == '403: Forbidden':
print 'Medium is momentarily unavailable. Please wait a moment, then try again.'
else:
print 'Please make sure your config is set up correctly.'
browser.quit()
def setup_class(self):
start_xvfb()
self.driver = webdriver.Firefox()
self.test_user = register_test_user(self.driver)
def setup_class(self):
start_xvfb()
self.driver = webdriver.Firefox()
self.test_user = register_test_user(self.driver)
bova11_firefox_spider.py 文件源码
项目:spread-knowledge-repository
作者: danieldev13
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def __init__(self):
"""
For firefox web driver's sake, we need to point to the location of the browser's installation.
We, then can pass that in the firefox_binary variable to the Firefox web driver parameter.
"""
binary = FirefoxBinary(settings.web_browser_location)
self.driver = webdriver.Firefox(firefox_binary=binary)
def screenShot(ImageFolder,IP):
'''
Metodo para hacer el screenshot con selenium haciendo la conexion
a la web en caso sea posible esa conexion.
'''
print '[INFO] Trying to connect to:',str(IP)
IP = str(IP)
if 'http' not in IP:
aux = 'http://'+IP
#ahora conectar y screenshot
try:
if not ImageFolder.endswith('/'):
nombreScreenShot = ImageFolder+'/'+IP+'.png'
else:
nombreScreenShot = ImageFolder+IP+'.png'
client = MongoClient()
db = client.test
db.Nobisuke.insert({"IP":str(IP),"Picture":str(nombreScreenShot),"bot":"Nobisuke Nobi"})
driver = webdriver.Firefox()
driver.get(aux)
driver.save_screenshot(nombreScreenShot)
driver.quit()
except Exception as e:
print '[-] ERROR TRYING SCREENSHOT: (%s) %s'%(IP,e)
selenium_testcase.py 文件源码
项目:Software-Architecture-with-Python
作者: PacktPublishing
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def setup():
driver = webdriver.Firefox()
yield driver
driver.quit()
def accessToken(self):
browser = webdriver.Firefox()
browser.get("https://developers.facebook.com/tools/explorer")
def login(self):
browser = webdriver.Firefox()
browser.get("https://www.facebook.com")
browser.find_element_by_id('email').send_keys(self.username)
browser.find_element_by_id('pass').send_keys(self.password)
browser.find_element_by_id('u_0_n').click()
event_firing_webdriver.py 文件源码
项目:devsecops-example-helloworld
作者: boozallen
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def __init__(self, driver, event_listener):
"""
Creates a new instance of the EventFiringWebDriver
:Args:
- driver : A WebDriver instance
- event_listener : Instance of a class that subclasses AbstractEventListener and implements it fully or partially
Example:
.. code-block:: python
from selenium.webdriver import Firefox
from selenium.webdriver.support.events import EventFiringWebDriver, AbstractEventListener
class MyListener(AbstractEventListener):
def before_navigate_to(self, url, driver):
print("Before navigate to %s" % url)
def after_navigate_to(self, url, driver):
print("After navigate to %s" % url)
driver = Firefox()
ef_driver = EventFiringWebDriver(driver, MyListener())
ef_driver.get("http://www.google.co.in/")
"""
if not isinstance(driver, WebDriver):
raise WebDriverException("A WebDriver instance must be supplied")
if not isinstance(event_listener, AbstractEventListener):
raise WebDriverException("Event listener must be a subclass of AbstractEventListener")
self._driver = driver
self._driver._wrap_value = self._wrap_value
self._listener = event_listener
def Launch():
# Check if the file 'config' exists, otherwise quit
if os.path.isfile('config') == False:
print ('Error! No configuration file.')
sys.exit()
# Check if the file 'visitedUsers.txt' exists, otherwise create it
if os.path.isfile('visitedUsers.txt') == False:
visitedUsersFile = open('visitedUsers.txt', 'wb')
visitedUsersFile.close()
# Browser choice
print ('Choose your browser:')
print ('[1] Chrome')
print ('[2] Firefox/Iceweasel')
print ('[3] Firefox/Iceweasel (light)')
print ('[4] PhantomJS')
print ('[5] PhantomJS (light)')
while True:
try:
browserChoice = int(input('Choice? '))
except ValueError:
print ('Invalid choice.')
else:
if browserChoice not in [1,2,3,4,5]:
print ('Invalid choice.')
else:
break
StartBrowser(browserChoice)
def get_browser(self):
"""get a webdriver browser instance """
if self.browser_name == 'firefox':
logger.debug("getting Firefox browser (local)")
if 'DISPLAY' not in os.environ:
logger.debug("exporting DISPLAY=:0")
os.environ['DISPLAY'] = ":0"
browser = webdriver.Firefox()
elif self.browser_name == 'chrome':
logger.debug("getting Chrome browser (local)")
browser = webdriver.Chrome()
elif self.browser_name == 'chrome-headless':
logger.debug('getting Chrome browser (local) with --headless')
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(chrome_options=chrome_options)
elif self.browser_name == 'phantomjs':
logger.debug("getting PhantomJS browser (local)")
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = self.user_agent
args = [
'--ssl-protocol=any',
'--ignore-ssl-errors=true',
'--web-security=false'
]
browser = webdriver.PhantomJS(
desired_capabilities=dcap, service_args=args
)
else:
raise SystemExit(
"ERROR: browser type must be one of 'firefox', 'chrome', "
"'phantomjs', or 'chrome-headless' not '{b}'".format(
b=self.browser_name
)
)
browser.set_window_size(1024, 768)
logger.debug("returning browser")
return browser