def __search(self, titles, year):
try:
query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])))
query = urlparse.urljoin(self.base_link, query)
t = [cleantitle.get(i) for i in set(titles) if i]
r = client.request(query)
r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'})
r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'})
r = [(dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r]
r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]]
r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]]
r = [(i[0], i[1], i[2].strip()) for i in r if i[2]]
r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year
r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year][0]
return source_utils.strip_domain(r)
except:
return
评论列表
文章目录