scraper.py 文件源码-python代码片段

scraper.py 文件源码
python
阅读 26 收藏 0 点赞 0 评论 0
def get_posts(*urls : List[str], **kwargs):
    '''
    Args:
        *urls (List[str]): Url, where each url is a unique thread
        verbose (bool): Verbosity
        cache (bool): Cache results across calls
        disambiguate_threads (bool): When scraping multiple threads will add url to html of the first post to show thread.
    '''
    posts_unflat = []
    disambiguate_threads = True if 'disambiguate_threads' not in kwargs else kwargs['disambiguate_threads']
    kwargs.pop('disambiguate_threads', None)
    for url in urls:
        posts = scrape_all_posts(url, **kwargs)
        # Displaying a link title to show which posts come from which thread if
        # we are getting multiple threads.
        if(disambiguate_threads and len(urls) > 1):
            posts[0].html = '''
            <div style="background-color: #3B6796;">
                <a href="{0}"><h1 style="font-size: 40px; color: white;">{0}</h1></a>
            </div>'''.format(url) + posts[0].html
        posts_unflat.append(posts)

    return [p for slist in posts_unflat for p in slist]