Scrape.py 文件源码

python
阅读 32 收藏 0 点赞 0 评论 0

项目:TorScrapper 作者: ConanKapoor 项目源码 文件源码
def Scrape(url):
    timeout = 10
    socket.setdefaulttimeout(timeout)

    #Collecting html content.
    headers = {'User-Agent': 'TorScrapper - Onion scrapper | github.com/ConanKapoor/TorScrapper.git' }
    req = urllib.request.Request(url,None,headers)
    response = urllib.request.urlopen(req)

    #Using BeautifulSoup to parse html object response.
    page = BeautifulSoup(response.read(),'html.parser')

    #Saving output
    token = re.sub(r'[^\w]', '', url)
    name = os.path.abspath("") + '/Output/Scraped-' + token +'.html'
    file = open(name,'w')
    file.write(str(page))
    file.close()

# Taking input.
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号