def search_shops_on_forum(force=False):
# Get member pages
step = 500
last_page = page_number = (Member.objects.aggregate(Max('page_number')) and not force) or 1
page_url = 'http://www.prestashop.com/forums/members/page__sort_key__members_display_name__sort_order__asc__max_results__%d__st__%d' % (step, (last_page-1)*step)
while page_url:
page = document_fromstring(urllib2.urlopen(page_url).read())
for member in page.cssselect('ul.members li h3.bar a:first'):
# member url
Member.objects.get_or_create(link=member.get('href'), defaults={'page_number':page_number})
page_url = page.cssselect('ul.pagination.left li.next a').get('href')
page_number+=1
for member in Member.objects.filter(page_number__gte=last_page):
member_page = document_fromstring(urllib2.urlopen(member.link).read())
for link in member_page.cssselect('div.general_box div.signature a'):
ShopLink.objects.get_or_create(link=link.get('href'), member=member)
评论列表
文章目录