def fn_getdatetime_list(fn):
"""Extract all datetime strings from input filename
"""
#Want to split last component
fn = os.path.split(os.path.splitext(fn)[0])[-1]
import re
#WV01_12JUN152223255-P1BS_R1C1-102001001B3B9800__WV01_12JUN152224050-P1BS_R1C1-102001001C555C00-DEM_4x.tif
#Need to parse above with month name
#Note: made this more restrictive to avoid false matches:
#'20130304_1510_1030010020770600_1030010020CEAB00-DEM_4x'
#This is a problem, b/c 2015/17/00:
#WV02_20130315_10300100207D5600_1030010020151700
#This code should be obsolete before 2019
#Assume new filenames
#fn = fn[0:13]
#Use cascading re find to pull out timestamps
#Note: Want to be less restrictive here - could have a mix of YYYYMMDD_HHMM, YYYYMMDD and YYYY in filename
#Should probably search for all possibilities, then prune
#NOTE: these don't include seconds in the time
#NOTE: could have 20130304_1510__20130304__whatever in filename
#The current approach will only catch the first datetime
dstr = None
dstr = re.findall(r'(?:^|_|-)(?:19|20)[0-9][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])[_T](?:0[0-9]|1[0-9]|2[0-3])[0-5][0-9]', fn)
if not dstr:
dstr = re.findall(r'(?:^|_|-)(?:19|20)[0-9][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])(?:0[0-9]|1[0-9]|2[0-3])[0-5][0-9]', fn)
if not dstr:
dstr = re.findall(r'(?:^|_|-)(?:19|20)[0-9][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])(?:_|-)', fn)
#This should pick up dates separated by a dash
#dstr = re.findall(r'(?:^|_|-)(?:19|20)[0-9][0-9](?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])', fn)
if not dstr:
dstr = re.findall(r'(?:^|_|-)(?:19|20)[0-9][0-9](?:_|-)', fn)
#This is for USGS archive filenames
if not dstr:
dstr = re.findall(r'[0-3][0-9][a-z][a-z][a-z][0-9][0-9]', fn)
#if not dstr:
# dstr = re.findall(r'(?:^|_)(?:19|20)[0-9][0-9]', fn)
#This is a hack to remove peripheral underscores and dashes
dstr = [d.lstrip('_').rstrip('_') for d in dstr]
dstr = [d.lstrip('-').rstrip('-') for d in dstr]
#This returns an empty list of nothing is found
out = [strptime_fuzzy(s) for s in dstr]
#This is USGS archive format
#out = [datetime.strptime(s, '%d%b%y') for s in dstr][0]
return out
评论列表
文章目录