def scrape(screen_name, since_date, until_date, include_retweets=True, wait_secs=5):
log.info("Scraping %s since %s until %s", screen_name, since_date, until_date)
driver = webdriver.Chrome()
try:
driver.implicitly_wait(wait_secs)
url = "https://twitter.com/search?f=tweets&vertical=default&q=from:{}+since:{}+until:{}&src=typd".format(screen_name, since_date.isoformat(),
until_date.isoformat())
if include_retweets:
url += "+include:retweets"
log.debug("Getting %s", url)
driver.get(url)
scroll_count = 0
last_tweet_count = 0
while last_tweet_count != len(driver.find_elements_by_class_name("original-tweet")):
scroll_count += 1
last_tweet_count = len(driver.find_elements_by_class_name("original-tweet"))
log.debug("Scrolling down %s. Found %s tweets.", scroll_count, last_tweet_count)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(wait_secs)
return set([e.get_attribute("data-tweet-id") for e in driver.find_elements_by_class_name("original-tweet")])
finally:
driver.close()
driver.quit()
评论列表
文章目录