def calc_information(probTgivenXs, PYgivenTs, PXs, PYs):
"""Calculate the MI - I(X;T) and I(Y;T)"""
PTs = np.nansum(probTgivenXs*PXs, axis=1)
Ht = np.nansum(-np.dot(PTs, np.log2(PTs)))
Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs)), PXs)))
Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
IYT = Hy - Hyt
ITX = Ht - Htx
return ITX, IYT
python类nansum()的实例源码
def calc_information_1(probTgivenXs, PYgivenTs, PXs, PYs, PTs):
"""Calculate the MI - I(X;T) and I(Y;T)"""
#PTs = np.nansum(probTgivenXs*PXs, axis=1)
Ht = np.nansum(-np.dot(PTs, np.log2(PTs+np.spacing(1))))
Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs+np.spacing(1))), PXs)))
Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
IYT = Hy - Hyt
ITX = Ht - Htx
return ITX, IYT
def calc_information(probTgivenXs, PYgivenTs, PXs, PYs, PTs):
"""Calculate the MI - I(X;T) and I(Y;T)"""
#PTs = np.nansum(probTgivenXs*PXs, axis=1)
t_indeces = np.nonzero(PTs)
Ht = np.nansum(-np.dot(PTs, np.log2(PTs+np.spacing(1))))
Htx = - np.nansum((np.dot(np.multiply(probTgivenXs, np.log2(probTgivenXs)), PXs)))
Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
IYT = Hy - Hyt
ITX = Ht - Htx
return ITX, IYT
def t_calc_information(p_x_given_t, PYgivenTs, PXs, PYs):
"""Calculate the MI - I(X;T) and I(Y;T)"""
Hx = np.nansum(-np.dot(PXs, np.log2(PXs)))
Hxt = - np.nansum((np.dot(np.multiply(p_x_given_t, np.log2(p_x_given_t)), PXs)))
Hyt = - np.nansum(np.dot(PYgivenTs*np.log2(PYgivenTs+np.spacing(1)), PTs))
Hy = np.nansum(-PYs * np.log2(PYs+np.spacing(1)))
IYT = Hy - Hyt
ITX = Hx - Hxt
return ITX, IYT
def _fit_cdd_only(df, weighted=False):
bps = [i[4:] for i in df.columns if i[:3] == 'CDD']
best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
best_formula, cdd_qualified = None, False
try: # TODO: fix big try block anti-pattern
for bp in bps:
candidate_cdd_formula = 'upd ~ CDD_' + bp
if (np.nansum(df['CDD_' + bp] > 0) < 10) or \
(np.nansum(df['CDD_' + bp]) < 20):
continue
if weighted:
candidate_cdd_mod = smf.wls(formula=candidate_cdd_formula, data=df,
weights=df['ndays'])
else:
candidate_cdd_mod = smf.ols(formula=candidate_cdd_formula, data=df)
candidate_cdd_res = candidate_cdd_mod.fit()
candidate_cdd_rsquared = candidate_cdd_res.rsquared_adj
if (candidate_cdd_rsquared > best_rsquared and
candidate_cdd_res.params['Intercept'] >= 0 and
candidate_cdd_res.params['CDD_' + bp] >= 0 and
candidate_cdd_res.pvalues['CDD_' + bp] < 0.1):
best_bp, best_rsquared = int(bp), candidate_cdd_rsquared
best_mod, best_res = candidate_cdd_mod, candidate_cdd_res
cdd_qualified = True
best_formula = 'upd ~ CDD_' + bp
except: # TODO: catch specific error
best_rsquared, cdd_qualified = 0, False
best_formula, best_mod, best_res = None, None, None
best_bp = None
return best_formula, best_mod, best_res, best_rsquared, cdd_qualified, best_bp
def _fit_hdd_only(df, weighted=False):
bps = [i[4:] for i in df.columns if i[:3] == 'HDD']
best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
best_formula, hdd_qualified = None, False
try: # TODO: fix big try block anti-pattern
for bp in bps:
candidate_hdd_formula = 'upd ~ HDD_' + bp
if (np.nansum(df['HDD_' + bp] > 0) < 10) or \
(np.nansum(df['HDD_' + bp]) < 20):
continue
if weighted:
candidate_hdd_mod = smf.wls(formula=candidate_hdd_formula, data=df,
weights=df['ndays'])
else:
candidate_hdd_mod = smf.ols(formula=candidate_hdd_formula, data=df)
candidate_hdd_res = candidate_hdd_mod.fit()
candidate_hdd_rsquared = candidate_hdd_res.rsquared_adj
if (candidate_hdd_rsquared > best_rsquared and
candidate_hdd_res.params['Intercept'] >= 0 and
candidate_hdd_res.params['HDD_' + bp] >= 0 and
candidate_hdd_res.pvalues['HDD_' + bp] < 0.1):
best_bp, best_rsquared = int(bp), candidate_hdd_rsquared
best_mod, best_res = candidate_hdd_mod, candidate_hdd_res
hdd_qualified = True
best_formula = 'upd ~ HDD_' + bp
except: # TODO: catch specific error
best_rsquared, hdd_qualified = 0, False
best_formula, best_mod, best_res = None, None, None
best_bp = None
return best_formula, best_mod, best_res, best_rsquared, hdd_qualified, best_bp
def calc_gross(self):
return np.nansum(self.input_data.energy)
def get_relevance_scores(matched_predictions, positive_feedback, not_rated_penalty):
users_num = matched_predictions.shape[0]
reldata = get_relevance_data(matched_predictions, positive_feedback, not_rated_penalty)
true_pos, false_pos = reldata.tp, reldata.fp
true_neg, false_neg = reldata.tn, reldata.fn
with np.errstate(invalid='ignore'):
# true positive rate
precision = true_pos / (true_pos + false_pos)
# sensitivity
recall = true_pos / (true_pos + false_neg)
# false positive rate
fallout = false_pos / (false_pos + true_neg)
# true negative rate
specifity = true_neg / (false_pos + true_neg)
# false negative rate
miss_rate = false_neg / (false_neg + true_pos)
#average over all users
precision = unmask(np.nansum(precision) / users_num)
recall = unmask(np.nansum(recall) / users_num)
fallout = unmask(np.nansum(fallout) / users_num)
specifity = unmask(np.nansum(specifity) / users_num)
miss_rate = unmask(np.nansum(miss_rate) / users_num)
scores = namedtuple('Relevance', ['precision', 'recall', 'fallout', 'specifity', 'miss_rate'])
scores = scores._make([precision, recall, fallout, specifity, miss_rate])
return scores
def get_ranking_scores(matched_predictions, feedback_data, switch_positive, alternative=True):
users_num, topk, holdout = matched_predictions.shape
ideal_scores_idx = np.argsort(feedback_data, axis=1)[:, ::-1] #returns column index only
ideal_scores_idx = np.ravel_multi_index((np.arange(feedback_data.shape[0])[:, None], ideal_scores_idx), dims=feedback_data.shape)
where = np.ma.where if np.ma.is_masked(feedback_data) else np.where
is_positive = feedback_data >= switch_positive
positive_feedback = where(is_positive, feedback_data, 0)
negative_feedback = where(~is_positive, -feedback_data, 0)
relevance_scores_pos = (matched_predictions * positive_feedback[:, None, :]).sum(axis=2)
relevance_scores_neg = (matched_predictions * negative_feedback[:, None, :]).sum(axis=2)
ideal_scores_pos = positive_feedback.ravel()[ideal_scores_idx]
ideal_scores_neg = negative_feedback.ravel()[ideal_scores_idx]
discount_num = max(holdout, topk)
if alternative:
discount = np.log2(np.arange(2, discount_num+2))
relevance_scores_pos = 2**relevance_scores_pos - 1
relevance_scores_neg = 2**relevance_scores_neg - 1
ideal_scores_pos = 2**ideal_scores_pos - 1
ideal_scores_neg = 2**ideal_scores_neg - 1
else:
discount = np.hstack([1, np.log(np.arange(2, discount_num+1))])
dcg = (relevance_scores_pos / discount[:topk]).sum(axis=1)
dcl = (relevance_scores_neg / -discount[:topk]).sum(axis=1)
idcg = (ideal_scores_pos / discount[:holdout]).sum(axis=1)
idcl = (ideal_scores_neg / -discount[:holdout]).sum(axis=1)
with np.errstate(invalid='ignore'):
ndcg = unmask(np.nansum(dcg / idcg) / users_num)
ndcl = unmask(np.nansum(dcl / idcl) / users_num)
ranking_score = namedtuple('Ranking', ['nDCG', 'nDCL'])._make([ndcg, ndcl])
return ranking_score
def vwap(df):
"""
Volume-weighted average price (VWAP) is a ratio generally used by
institutional investors and mutual funds to make buys and sells so as not
to disturb the market prices with large orders. It is the average share
price of a stock weighted against its trading volume within a particular
time frame, generally one day.
Read more: Volume Weighted Average Price - VWAP
https://www.investopedia.com/terms/v/vwap.asp#ixzz4xt922daE
Parameters
----------
df: pd.DataFrame
Returns
-------
"""
if 'close' not in df.columns or 'volume' not in df.columns:
raise ValueError('price data must include `volume` and `close`')
vol_sum = np.nansum(df['volume'].values)
try:
ret = np.nansum(df['close'].values * df['volume'].values) / vol_sum
except ZeroDivisionError:
ret = np.nan
return ret
def _calculate(self, X, y, categorical, metafeatures, helpers):
res = np.nansum(helpers.get_value("NumSymbols"))
return res if np.isfinite(res) else 0
################################################################################
# Statistical meta features
# Only use third and fourth statistical moment because it is common to
# standardize for the other two
# see Engels & Theusinger, 1998 - Using a Data Metric for Preprocessing Advice for Data Mining Applications.
def trajectory_score_array(posterior, slope=None, intercept=None, w=None, weights=None, normalize=False):
"""Docstring goes here
This is the score that Davidson et al. maximizes, in order to get a linear trajectory,
but here we kind of assume that that we have the trajectory already, and then just score it.
w is the number of bin rows to include in score, in each direction. That is, w=0 is only the modes,
and w=1 is a band of width=3, namely the modes, and 1 bin above, and 1 bin below the mode.
The score is NOT averaged!"""
rows, cols = posterior.shape
if w is None:
w = 0
if not float(w).is_integer:
raise ValueError("w has to be an integer!")
if slope is None or intercept is None:
slope, intercept, _ = linregress_array(posterior=posterior)
x = np.arange(cols)
line_y = np.round((slope*x + intercept)) # in position bin #s
# idea: cycle each column so that the top w rows are the band surrounding the regression line
if np.isnan(slope): # this will happen if we have 0 or only 1 decoded bins
return np.nan
else:
temp = column_cycle_array(posterior, -line_y+w)
if normalize:
num_non_nan_bins = round(np.nansum(posterior))
else:
num_non_nan_bins = 1
return np.nansum(temp[:2*w+1,:])/num_non_nan_bins
def test_nsum(x):
assume(np.max(x[np.isfinite(x)]) < 1e4)
assume(np.min(x[np.isfinite(x)]) > -1e4)
aae(nsum(x), np.nansum(x))
def test_nsum_row(x):
assume(np.max(x[np.isfinite(x)]) < 1e4)
assume(np.min(x[np.isfinite(x)]) > -1e4)
aae(nsum_row(x), np.nansum(x, axis=1))
def test_preds_ll(alpha, mu, gamma, err, num, w):
current_impl = Lvm.preds_ll(alpha, mu, gamma, err, num, w)
simple_impl = np.nansum(w * norm.logpdf(num, mu+gamma*alpha, err))
simple_impl += np.sum(norm.logpdf(alpha))
assert_approx_equal(current_impl, simple_impl)
def ests_obj(self, params):
"""The objective function to minimize for the model parameters."""
# return -nsum(self.ests_ll(params))
return -np.nansum(self.ests_ll(params))
def nsum_row(a):
return nansum(a, axis=1)
def getJointNumFramesVisible(self, jointID):
"""
Get number of frames in which joint is visible
:param jointID: joint ID
:return: number of frames
"""
return numpy.nansum(self.gt[:, jointID, :]) / self.gt.shape[2] # 3D
def test_basic_stats(x):
s = SummaryStats()
s.update(x)
assert s.count() == np.count_nonzero(~np.isnan(x))
np.testing.assert_allclose(s.sum(), np.nansum(x), rtol=RTOL, atol=ATOL)
np.testing.assert_equal(s.min(), np.nanmin(x) if len(x) else np.nan)
np.testing.assert_equal(s.max(), np.nanmax(x) if len(x) else np.nan)
np.testing.assert_allclose(s.mean(), np.nanmean(x) if len(x) else np.nan,
rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(s.var(), np.nanvar(x) if len(x) else np.nan,
rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(s.std(), np.nanstd(x) if len(x) else np.nan,
rtol=RTOL, atol=ATOL)
def log_likelihood(y, yhat):
'''Helper function to compute the log likelihood.'''
eps = np.spacing(1)
return np.nansum(y * np.log(eps + yhat) - yhat)