utils.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:prestashop-sync 作者: dragoon 项目源码 文件源码
def search_shops_on_forum(force=False):
    # Get member pages
    step = 500
    last_page = page_number = (Member.objects.aggregate(Max('page_number')) and not force) or 1
    page_url = 'http://www.prestashop.com/forums/members/page__sort_key__members_display_name__sort_order__asc__max_results__%d__st__%d' % (step, (last_page-1)*step)
    while page_url:
        page = document_fromstring(urllib2.urlopen(page_url).read())
        for member in page.cssselect('ul.members li h3.bar a:first'):
            # member url
            Member.objects.get_or_create(link=member.get('href'), defaults={'page_number':page_number})
        page_url = page.cssselect('ul.pagination.left li.next a').get('href')
        page_number+=1

    for member in Member.objects.filter(page_number__gte=last_page):
        member_page = document_fromstring(urllib2.urlopen(member.link).read())
        for link in member_page.cssselect('div.general_box div.signature a'):
            ShopLink.objects.get_or_create(link=link.get('href'), member=member)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号