python类UserAgent()的实例源码

pbdl.py 文件源码 项目:PacktpubDownloaderAndGetter 作者: AkdM 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_user_agent():
  return UserAgent().random
middlewares.py 文件源码 项目:django-scrapy-lcv_search 作者: Albino1995 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
connection.py 文件源码 项目:SlowLoris 作者: maxkrivich 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, target, socket_count=300, headers={
        'User-Agent': None,  # UserAgent()
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'ru,en-us;q=0.7,en;q=0.3',
        'Accept-Charset': 'windows-1251,utf-8;q=0.7,*;q=0.7',
        'Connection': 'keep-alive'
    }):
        """

        :param target: link to web server [TargetInfo]
        :param socket_count: maximum count of created socket default value 300
        :param headers: HTTP headers what puts in request
        """
        super(Connection, self).__init__()
        # self.lock = lock
        self.target = target
        self.headers = headers

        try:
            self.fake_ua = UserAgent()
        except FakeUserAgentError as fe:
            logger.error(fe)
        # Counters
        self.socket_count = socket_count
        self.__cnt_sent_requests = 0
        self.__cnt_died_sockets = 0
        self.__cnt_alive_socket = 0
        self.__sockets = []
        self.is_stop = False
rotate_user_agent.py 文件源码 项目:fintech_spider 作者: hee0624 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
middlewares.py 文件源码 项目:fintech_spider 作者: hee0624 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
core.py 文件源码 项目:google-news-scraper 作者: philipperemy 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def google_news_run(keyword, limit=10, year_start=2010, year_end=2011, debug=True, sleep_time_every_ten_articles=0):
    num_articles_index = 0
    ua = UserAgent()
    result = []
    while num_articles_index < limit:
        url = forge_url(keyword, num_articles_index, year_start, year_end)
        if debug:
            logging.debug('For Google -> {}'.format(url))
            logging.debug('Total number of calls to Google = {}'.format(NUMBER_OF_CALLS_TO_GOOGLE_NEWS_ENDPOINT))
        headers = {'User-Agent': ua.chrome}
        try:
            response = requests.get(url, headers=headers, timeout=20)
            links = extract_links(response.content)

            nb_links = len(links)
            if nb_links == 0 and num_articles_index == 0:
                raise Exception(
                    'No results fetched. Either the keyword is wrong '
                    'or you have been banned from Google. Retry tomorrow '
                    'or change of IP Address.')

            if nb_links == 0:
                print('No more news to read for keyword {}.'.format(keyword))
                break

            for i in range(nb_links):
                cur_link = links[i]
                logging.debug('TITLE = {}, URL = {}, DATE = {}'.format(cur_link[1], cur_link[0], cur_link[2]))
            result.extend(links)
        except requests.exceptions.Timeout:
            logging.debug('Google news TimeOut. Maybe the connection is too slow. Skipping.')
            pass
        num_articles_index += 10
        if debug and sleep_time_every_ten_articles != 0:
            logging.debug('Program is going to sleep for {} seconds.'.format(sleep_time_every_ten_articles))
        time.sleep(sleep_time_every_ten_articles)
    return result
middlewares.py 文件源码 项目:jobbole_spider 作者: pujinxiao 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
middlewares.py 文件源码 项目:FirstSpider 作者: yipwinghong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_USERAGENT_TYPE", "random")
randomua.py 文件源码 项目:gradcrawler 作者: cullengao 项目源码 文件源码 阅读 46 收藏 0 点赞 0 评论 0
def __init__(self, settings):
        super(self.__class__, self).__init__()
        self.ua = UserAgent()
        self.per_proxy = settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
        self.logger = getLoggerFromSettings(__name__, settings)
middlewares.py 文件源码 项目:python 作者: panxus 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self,crawler):
        super(DownloaderMiddlewareUA,self).__init__()
        self.ua_type = crawler.settings.get('USER_AGENT_DEFAULT','random')
        self.ua = UserAgent()
youku.py 文件源码 项目:youku 作者: malone6 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        # self.url_input = input(
        #     "????????????????????:http://v.youku.com/v_show/id_XMTU3NTkxNDIwMA==.html,?????????" + '\n' + '>>>')
        self.headers = {"accept-encoding": "gzip, deflate, sdch",
                        "accept-language": "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4",
                        "user-agent": UserAgent().random,
                        }
        # cookies??cna?????????cookies???????????????cookies?????????????url??
        self.utid = urllib.parse.quote('onBdERfZriwCAW+uM3cVByOa')
        # self.utid = 'onBdERfZriwCAW+uM3cVByOa'
fakeRequests.py 文件源码 项目:uname_ctf-tools 作者: unamecorporation 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def geraUserAgente():
    ua=UserAgent()
    ua.update
    user=""
    return str(user)
fakeRequests.py 文件源码 项目:uname_ctf-tools 作者: unamecorporation 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def Help():
    os.system("setterm -foreground white")
    print '''
Uso: python fakeRequestes.py
Endereço da WEB é a pagina que deseja acessar 
com um UserAgent falso.
    '''
middlewares.py 文件源码 项目:Charlotte 作者: LiZoRN 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
middlewares.py 文件源码 项目:Charlotte 作者: LiZoRN 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddlware, self).__init__()
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")
rotate_user_agent.py 文件源码 项目:Charlotte 作者: LiZoRN 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
        self.per_proxy = crawler.settings.get('RANDOM_UA_PER_PROXY', False)
        self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random')
        self.proxy2ua = {}
useragent_middleware.py 文件源码 项目:AmazonScraping 作者: santoshghimire 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self):
        super(RandomUserAgentMiddleware, self).__init__()

        self.ua = UserAgent()
common.py 文件源码 项目:brush 作者: chenshiyang2015 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def getUA():
    ua = UserAgent()
    return ua.random
common.py 文件源码 项目:brush 作者: chenshiyang2015 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_user_agent():
    if platform.uname()[0] =='Windows':
        ua = UserAgent()
        return ua.random
    else:
        with codecs.open('/home/rd/fake_useragent.json', encoding='utf-8', mode='rb',) as fp:
            s = json.load(fp)

        attr = s['randomize'][str(random.randint(0, len(s['randomize']) - 1))]
        return s['browsers'][attr][random.randint(0, len(s['browsers'][attr]) - 1)]
proxy_list.py 文件源码 项目:brush 作者: chenshiyang2015 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def getUA():
    ua = UserAgent()
    return ua.random


问题


面经


文章

微信
公众号

扫码关注公众号