def scrap_twitlonger(twitlonger):
'''
Takes a twitlonger post ID, scraps the body of the post
and then returns a string depending on the contents of
the post. If the hour is stated in said post, it's added
If it's not, then it's implied it's current time.
Note to self: Implement GMT - whatever our president
decides to change it to.
'''
page = requests.get('http://www.twitlonger.com/show/%s' %twitlonger)
tree = html.fromstring(page.content)
texto = tree.xpath('/html/body/div[2]/div[1]/div[3]/div/p[1]/text()')
hora = re.search('[0-9]+:[0-9]+',texto[0])
circuitos = texto[0].split(str('detallados a continuación: ').decode('utf-8'))[1].split(str(' #ElNiñoNoEsJuego').decode('utf-8'))[0]
if hora:
return "La luz se ira a las " + hora.group(0) + " en " + circuitos
else:
hora = re.search('En momentos',texto[0])
if hora:
return "La luz se ira a las " + str(datetime.datetime.now().time()) + " en " + circuitos
评论列表
文章目录