run.py 文件源码-python代码片段

def scrape_thread_list(self, threads, count):
        for t in threads['data']:
            extra_params = (('&since=' + self.since) if self.since else '') + (('&until=' + self.until) if self.until else '')
            url = self.build_url('{}/messages?fields=from,created_time,message,shares,attachments&limit=400' + extra_params, t['id'])
            print("GET", unidecode.unidecode(t['participants']['data'][0]['name']), t['id'])

            thread = self.scrape_thread(url, [])
            if thread:
                self.writer.writerow({
                    # 'page_id': t['participants']['data'][1]['id'],
                    # 'page_name': t['participants']['data'][1]['name'],
                    # 'user_id': t['participants']['data'][0]['id'],
                    # 'user_name': t['participants']['data'][0]['name'],
                    'url': t['link'],
                })
            id_map = {p['id']: p['name'] for p in t['participants']['data']}
            for message in reversed(thread):
                message['from'] = id_map[message['from_id']]
                self.writer.writerow(message)

        next = threads.get('paging', {}).get('next', '')
        if next and count > 1:
            self.scrape_thread_list(requests.get(next).json(), count - 1)