def parse_tags(self, root):
ans = []
exclude_tokens = {'kindle', 'a-z'}
exclude = {'special features', 'by authors', 'authors & illustrators', 'books', 'new; used & rental textbooks'}
seen = set()
for a in root.xpath(self.tags_xpath):
raw = (a.text or '').strip().replace(',', ';').replace('/', ';').replace('>', ';')
lraw = icu_lower(raw)
tokens = frozenset(lraw.split())
if raw and lraw not in exclude and not tokens.intersection(exclude_tokens) and lraw not in seen:
ans.append(raw)
seen.add(lraw)
return ans
评论列表
文章目录