def weighted_thin(weights,thin_unit):
'''
Given a weight array, perform thinning.
If the all weights are equal, this should
be equivalent to selecting every N/((thinfrac*N)
where N=len(weights).
'''
N=len(weights)
if thin_unit==0: return range(N),weights
if thin_unit<1:
N2=np.int(N*thin_unit)
else:
N2=N//thin_unit
#bin the weight index to have the desired length
#this defines the bin edges
bins = np.linspace(-1, N, N2+1)
#this collects the indices of the weight array in each bin
ind = np.digitize(np.arange(N), bins)
#this gets the maximum weight in each bin
thin_ix=pd.Series(weights).groupby(ind).idxmax().tolist()
thin_ix=np.array(thin_ix,dtype=np.intp)
logger.info('Thinning with weighted binning: thinfrac={}. new_nsamples={},old_nsamples={}'.format(thin_unit,len(thin_ix),len(w)))
return {'ix':thin_ix, 'w':weights[thin_ix]}
评论列表
文章目录