def get_newest(base_url, url_pattern, links):
'''
Returns a tuple with the newest url in the `links` list matching the
pattern `url_pattern` and a datetime object representing the creation
date of the url.
The creation date is extracted from the url using datetime.strptime().
'''
logger = logging.getLogger('auditor.srmdumps')
times = []
pattern_components = url_pattern.split('/')
date_pattern = '{0}/{1}'.format(base_url, pattern_components[0])
if len(pattern_components) > 1:
postfix = '/' + '/'.join(pattern_components[1:])
else:
postfix = ''
for link in links:
try:
time = datetime.datetime.strptime(link, date_pattern)
except ValueError:
pass
else:
times.append((str(link) + postfix, time))
if not times:
msg = 'No links found matching the pattern {0} in {1}'.format(date_pattern, links)
logger.error(msg)
raise Exception(msg)
return max(times, key=operator.itemgetter(1))
评论列表
文章目录