def get_user_agent():
return UserAgent().random
python类UserAgent()的实例源码
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
def __init__(self, target, socket_count=300, headers={
'User-Agent': None, # UserAgent()
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'ru,en-us;q=0.7,en;q=0.3',
'Accept-Charset': 'windows-1251,utf-8;q=0.7,*;q=0.7',
'Connection': 'keep-alive'
}):
"""
:param target: link to web server [TargetInfo]
:param socket_count: maximum count of created socket default value 300
:param headers: HTTP headers what puts in request
"""
super(Connection, self).__init__()
# self.lock = lock
self.target = target
self.headers = headers
try:
self.fake_ua = UserAgent()
except FakeUserAgentError as fe:
logger.error(fe)
# Counters
self.socket_count = socket_count
self.__cnt_sent_requests = 0
self.__cnt_died_sockets = 0
self.__cnt_alive_socket = 0
self.__sockets = []
self.is_stop = False
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
self.proxy2ua = {}
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
self.proxy2ua = {}
def google_news_run(keyword, limit=10, year_start=2010, year_end=2011, debug=True, sleep_time_every_ten_articles=0):
num_articles_index = 0
ua = UserAgent()
result = []
while num_articles_index < limit:
url = forge_url(keyword, num_articles_index, year_start, year_end)
if debug:
logging.debug('For Google -> {}'.format(url))
logging.debug('Total number of calls to Google = {}'.format(NUMBER_OF_CALLS_TO_GOOGLE_NEWS_ENDPOINT))
headers = {'User-Agent': ua.chrome}
try:
response = requests.get(url, headers=headers, timeout=20)
links = extract_links(response.content)
nb_links = len(links)
if nb_links == 0 and num_articles_index == 0:
raise Exception(
'No results fetched. Either the keyword is wrong '
'or you have been banned from Google. Retry tomorrow '
'or change of IP Address.')
if nb_links == 0:
print('No more news to read for keyword {}.'.format(keyword))
break
for i in range(nb_links):
cur_link = links[i]
logging.debug('TITLE = {}, URL = {}, DATE = {}'.format(cur_link[1], cur_link[0], cur_link[2]))
result.extend(links)
except requests.exceptions.Timeout:
logging.debug('Google news TimeOut. Maybe the connection is too slow. Skipping.')
pass
num_articles_index += 10
if debug and sleep_time_every_ten_articles != 0:
logging.debug('Program is going to sleep for {} seconds.'.format(sleep_time_every_ten_articles))
time.sleep(sleep_time_every_ten_articles)
return result
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
self.proxy2ua = {}
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get("RANDOM_USERAGENT_TYPE", "random")
def __init__(self, settings):
super(self.__class__, self).__init__()
self.ua = UserAgent()
self.per_proxy = settings.get('RANDOM_UA_PER_PROXY', False)
self.ua_type = settings.get('RANDOM_UA_TYPE', 'random')
self.proxy2ua = {}
self.logger = getLoggerFromSettings(__name__, settings)
def __init__(self,crawler):
super(DownloaderMiddlewareUA,self).__init__()
self.ua_type = crawler.settings.get('USER_AGENT_DEFAULT','random')
self.ua = UserAgent()
def __init__(self):
# self.url_input = input(
# "????????????????????:http://v.youku.com/v_show/id_XMTU3NTkxNDIwMA==.html,?????????" + '\n' + '>>>')
self.headers = {"accept-encoding": "gzip, deflate, sdch",
"accept-language": "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4",
"user-agent": UserAgent().random,
}
# cookies??cna?????????cookies???????????????cookies?????????????url??
self.utid = urllib.parse.quote('onBdERfZriwCAW+uM3cVByOa')
# self.utid = 'onBdERfZriwCAW+uM3cVByOa'
def geraUserAgente():
ua=UserAgent()
ua.update
user=""
return str(user)
def Help():
os.system("setterm -foreground white")
print '''
Uso: python fakeRequestes.py
Endereço da WEB é a pagina que deseja acessar
com um UserAgent falso.
'''
def __init__(self, crawler):
super(RandomUserAgentMiddlware, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
def __init__(self, crawler):
super(RandomUserAgentMiddlware, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
self.proxy2ua = {}
def __init__(self):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
def getUA():
ua = UserAgent()
return ua.random
def get_user_agent():
if platform.uname()[0] =='Windows':
ua = UserAgent()
return ua.random
else:
with codecs.open('/home/rd/fake_useragent.json', encoding='utf-8', mode='rb',) as fp:
s = json.load(fp)
attr = s['randomize'][str(random.randint(0, len(s['randomize']) - 1))]
return s['browsers'][attr][random.randint(0, len(s['browsers'][attr]) - 1)]
def getUA():
ua = UserAgent()
return ua.random