goodreads_parser.py 文件源码-python代码片段

goodreads_parser.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

项目：X-Ray_Calibre_Plugin 作者: szarroug3 项目源码文件源码

def _get_quotes(self):
        '''Gets book's quote data'''
        if self._page_source is None:
            return

        quotes_page = self._page_source.xpath('//a[@class="actionLink" and contains(., "More quotes")]')
        quotes = []
        if len(quotes_page) > 0:
            resp = open_url(self._connection, quotes_page[0].get('href'))
            if not resp:
                return
            quotes_page = html.fromstring(resp)
            if quotes_page is None:
                return
            for quote in quotes_page.xpath('//div[@class="quoteText"]'):
                quotes.append(re.sub(r'\s+', ' ', quote.text).strip().decode('ascii', 'ignore'))
        else:
            for quote in self._page_source.xpath('//div[@class=" clearFloats bigBox" and contains(., "Quotes from")]//div[@class="bigBoxContent containerWithHeaderContent"]//span[@class="readable"]'):
                quotes.append(re.sub(r'\s+', ' ', quote.text).strip().decode('ascii', 'ignore'))

        return quotes