def scrape_thread_list(self, threads, count):
for t in threads['data']:
extra_params = (('&since=' + self.since) if self.since else '') + (('&until=' + self.until) if self.until else '')
url = self.build_url('{}/messages?fields=from,created_time,message,shares,attachments&limit=400' + extra_params, t['id'])
print("GET", unidecode.unidecode(t['participants']['data'][0]['name']), t['id'])
thread = self.scrape_thread(url, [])
if thread:
self.writer.writerow({
# 'page_id': t['participants']['data'][1]['id'],
# 'page_name': t['participants']['data'][1]['name'],
# 'user_id': t['participants']['data'][0]['id'],
# 'user_name': t['participants']['data'][0]['name'],
'url': t['link'],
})
id_map = {p['id']: p['name'] for p in t['participants']['data']}
for message in reversed(thread):
message['from'] = id_map[message['from_id']]
self.writer.writerow(message)
next = threads.get('paging', {}).get('next', '')
if next and count > 1:
self.scrape_thread_list(requests.get(next).json(), count - 1)
评论列表
文章目录