__init__.py 文件源码-python代码片段

__init__.py 文件源码

python

阅读 17 收藏 0 点赞 0 评论 0

项目：calibre_dangdang 作者: qunxyz 项目源码文件源码

def parse_tags(self, root):
        ans = []
        exclude_tokens = {'kindle', 'a-z'}
        exclude = {'special features', 'by authors', 'authors & illustrators', 'books', 'new; used & rental textbooks'}
        seen = set()

        for a in root.xpath(self.tags_xpath):
            raw = (a.text or '').strip().replace(',', ';').replace('/', ';').replace('>', ';')

            lraw = icu_lower(raw)
            tokens = frozenset(lraw.split())
            if raw and lraw not in exclude and not tokens.intersection(exclude_tokens) and lraw not in seen:
                ans.append(raw)
                seen.add(lraw)

        return ans