core_extract_comments.py 文件源码-python代码片段

core_extract_comments.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：amazon-reviews-scraper 作者: philipperemy 项目源码文件源码

def get_comments_based_on_keyword(search):
    logging.info('SEARCH = {}'.format(search))
    url = 'http://www.amazon.co.jp/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=' + \
          search + '&rh=i%3Aaps%2Ck%3A' + search
    soup = get_soup(url)
    items = []
    for a in soup.find_all('a', class_='s-access-detail-page'):
        if a.find('h2') is not None and validators.url(a.get('href')):
            name = str(a.find('h2').string)
            link = a.get('href')
            items.append((link, name))
    logging.info('Found {} items.'.format(len(items)))
    for (link, name) in items:
        logging.debug('link = {}, name = {}'.format(link, name))
        product_id = extract_product_id(link)
        get_comments_with_product_id(product_id)