intelli.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:tellmeabout.coffee 作者: billyfung 项目源码 文件源码
def scrape_intelli():
    urlfetch.set_default_fetch_deadline(10)
    roaster = 'Intelligentsia'
    intelli = 'https://www.intelligentsiacoffee.com/catalog/ajax/products/?filter%5Bcat%5D=5'
    r = requests.get(intelli)
    soup = BeautifulSoup(r.content, "html.parser")
    x = r.json()

    total_coffees = len(x['data'])
    coffees_entered = 0
    coffees_updated = 0
    error_coffees = []
    for item in x['data']:
        name, description, notes, region, active, size, product_url = [""] * 7
        price = int()
        product_url = item['productUrl']
        logging.info("Getting url: {}".format(product_url))
        try:
            notes = item['flavor_profile_text'].split(',')
        except KeyError:
            notes = [""]
        name = item['original_name']
        description = item['description']
        region = item['country']
        price = float(item['price'])
        size = '12oz'
        active = True
        image_url = 'https://www.intelligentsiacoffee.com/media/catalog/product' + item[
            'small_image']
        image_blob = requests.get(image_url).content
        coffee_data = {
            'name': name,
            'roaster': roaster,
            'description': description,
            'price': price,
            'notes': notes,
            'region': region,
            'active': active,
            'product_page': product_url,
            'size': size,
            'image': image_blob
        }
        coffees_updated, coffees_entered, error_coffees = add_or_update_coffee(
            coffee_data, coffees_updated, coffees_entered, error_coffees)

    logging.info('Intelligentsia New Results:{} / {}'.format(coffees_entered,
                                                             total_coffees))
    logging.info('Intelligentsia Updated Results:{} / {}'.format(
        coffees_updated, total_coffees))
    if error_coffees:
        logging.warning('Intelligensia Error coffees are: {}'.format(
            error_coffees))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号