def avg_wl(self, numObs=50, avgtype='stdWL', grptype='bytime', grper='12M'):
"""Calculates standardized statistics for a list of stations or a huc from the USGS
:param numObs: minimum observations per site required to include site in analysis; default is 50
:param avgtype: averaging technique for site data; options are 'avgDiffWL' and 'stdWL'; default is 'stWL'
:param grptype: way to group the averaged data; options are 'bytime' or 'monthly' or user input; default 'bytime'
:param grper: only used if 'bytime' called; defaults to '12M'; other times can be put in
:return:
"""
data = self.cleanGWL(self.data)
# stationWL = pd.merge(siteinfo, data, on = 'site_no')
data.reset_index(inplace=True)
data.set_index(['datetime'], inplace=True)
# get averages by year, month, and site number
site_size = data.groupby('site_no').size()
wl_long = data[data['site_no'].isin(list(site_size[site_size >= numObs].index.values))]
siteList = list(wl_long.site_no.unique())
for site in siteList:
mean = wl_long.ix[wl_long.site_no == site, 'value'].mean()
std = wl_long.ix[wl_long.site_no == site, 'value'].std()
wl_long.ix[wl_long.site_no == site, 'avgDiffWL'] = wl_long.ix[wl_long.site_no == site, 'value'] - mean
wl_long.ix[wl_long.site_no == site, 'stdWL'] = wl_long.ix[wl_long.site_no == site, 'avgDiffWL'] / std
if grptype == 'bytime':
grp = pd.TimeGrouper(grper)
elif grptype == 'monthly':
grp = wl_long.index.month
else:
grp = grptype
wl_stats = wl_long.groupby([grp])[avgtype].agg({'mean': np.mean, 'median': np.median,
'standard': np.std,
'cnt': (lambda x: np.count_nonzero(~np.isnan(x))),
'err_pls': (lambda x: np.mean(x) + (np.std(x) * 1.96)),
'err_min': (lambda x: np.mean(x) - (np.std(x) * 1.96))})
return wl_stats
评论列表
文章目录