def default(self):
if not self.app.pargs.resource_location:
self.app.log.info("Please provide a resource location using -rl flag")
return
#import pdb; pdb.set_trace()
url = self.app.pargs.resource_location
transcript = None
title = None
with closing(urllib2.urlopen(url)) as sf:
content = sf.read()
soup = BeautifulSoup(content)
title = slugify(soup.title.getText())
transcript = soup(itemprop='transcript')[0].attrs['content']
self.app.log.info("Succesfully parsed page")
file_text ="{0}.txt".format(title.encode('utf-8'))
with open(file_text, 'w') as transcript_file:
transcript_file.write(transcript.encode('utf-8'))
#import pdb; pdb.set_trace()
self.app.log.info(u"Wrote transcript to: {0}".format(file_text.decode('utf-8')))
评论列表
文章目录