def _get_quotes(self):
'''Gets book's quote data'''
if self._page_source is None:
return
quotes_page = self._page_source.xpath('//a[@class="actionLink" and contains(., "More quotes")]')
quotes = []
if len(quotes_page) > 0:
resp = open_url(self._connection, quotes_page[0].get('href'))
if not resp:
return
quotes_page = html.fromstring(resp)
if quotes_page is None:
return
for quote in quotes_page.xpath('//div[@class="quoteText"]'):
quotes.append(re.sub(r'\s+', ' ', quote.text).strip().decode('ascii', 'ignore'))
else:
for quote in self._page_source.xpath('//div[@class=" clearFloats bigBox" and contains(., "Quotes from")]//div[@class="bigBoxContent containerWithHeaderContent"]//span[@class="readable"]'):
quotes.append(re.sub(r'\s+', ' ', quote.text).strip().decode('ascii', 'ignore'))
return quotes
goodreads_parser.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录