def fuzzy_title(self, titles):
''' Score and remove results based on title match
titles: list of titles to match against
If titles is an empty list every result is treated as a perfect match
Iterates through self.results and removes any entry that does not
fuzzy match 'title' > 60.
Adds fuzzy_score / 20 points to ['score']
*If title is passed as None, assumes perfect match and scores +20
Does not return
'''
logging.info(u'Checking title match.')
lst = []
if titles == []:
for result in self.results:
result['score'] += 20
lst.append(result)
else:
for result in self.results:
if result['type'] == 'import' and result not in lst:
result['score'] += 20
lst.append(result)
continue
test = Url.encode(result['title'])
matches = [fuzz.partial_ratio(Url.encode(title), test) for title in titles]
if any([match > 70 for match in matches]):
result['score'] += (max(matches) / 5)
lst.append(result)
else:
logging.debug(u'{} best title match was {}%, removing search result.'.format(test, max(matches)))
self.results = lst
logging.info(u'Keeping {} results.'.format(len(self.results)))
评论列表
文章目录