def getMatrix(path, directed=False, log1p=False):
matrix = np.zeros(shape=(NCOUNTRIES,NCOUNTRIES))
with open(path, 'rb') as f:
for line in f:
data = line.split(' ')
c1 = int(data[0])-1
c2 = int(data[1])-1
v = np.log1p(float(data[2])) if log1p else float(data[2])
matrix[c1][c2] = v # real data from file
if not DIRECTED:
matrix[c2][c1] = v # symmetry
print '{} loaded as a matrix!'.format(path)
return matrix
#######################################################################
# Data Matrices
#######################################################################
python类log1p()的实例源码
def log1p(data, copy=False):
"""Logarithmize the data matrix.
Computes `X = log(X + 1)`, where `log` denotes the natural logrithm.
Parameters
----------
data : array-like or AnnData
The data matrix.
copy : bool (default: False)
If an AnnData is passed, determines whether a copy is returned.
Returns
-------
Returns or updates data, depending on `copy`.
"""
if isinstance(data, AnnData):
adata = data.copy() if copy else data
adata.X = log1p(data.X)
return adata if copy else None
X = data # proceed with data matrix
if not issparse(X):
return np.log1p(X)
else:
return X.log1p()
def signPreserveNorm(self):
"""
This is a sign preserving nomalisation used in Eye.
Similar to that used by Romano et al. in SVM paper
except they use log(1+|x|) i.e. don't divide by sigma.
nomalizes the unraveled image
vectorized on 24/07/13
"""
#shape = np.shape(self.getObject())
Vec = np.nan_to_num(self.unravelObject())
#normVec = np.zeros((np.shape(Vec)))
std = np.std(Vec)
#for i in range(len(Vec)):
# # log1p returns the natural log of (1+x)x
# normVec[i] += ((Vec[i])/ np.abs(Vec[i]))*(np.log1p(np.abs(Vec[i])/std))
# #print normVec[i]
normVec = ((Vec)/ np.abs(Vec))*(np.log1p(np.abs(Vec)/std))
return normVec
def signPreserveNorm(self):
"""
This is a sign preserving nomalisation used in Eye.
Similar to that used by Romano et al. in SVM paper
except they use log(1+|x|) i.e. don't divide by sigma.
nomalizes the unraveled image
vectorized on 24/07/13
"""
#shape = np.shape(self.getObject())
Vec = np.nan_to_num(np.ravel(self.getImage(), order="F"))
#normVec = np.zeros((np.shape(Vec)))
std = np.std(Vec)
#for i in range(len(Vec)):
# # log1p returns the natural log of (1+x)x
# normVec[i] += ((Vec[i])/ np.abs(Vec[i]))*(np.log1p(np.abs(Vec[i])/std))
# #print normVec[i]
normVec = ((Vec)/ np.abs(Vec))*(np.log1p(np.abs(Vec)/std))
return normVec
def signPreserveNorm(self):
"""
This is a sign preserving nomalisation used in Eye.
Similar to that used by Romano et al. in SVM paper
except they use log(1+|x|) i.e. don't divide by sigma.
nomalizes the unraveled image
vectorized on 24/07/13
"""
#shape = np.shape(self.getObject())
Vec = np.nan_to_num(self.unravelObject())
#normVec = np.zeros((np.shape(Vec)))
std = np.std(Vec)
#for i in range(len(Vec)):
# # log1p returns the natural log of (1+x)x
# normVec[i] += ((Vec[i])/ np.abs(Vec[i]))*(np.log1p(np.abs(Vec[i])/std))
# #print normVec[i]
normVec = ((Vec)/ np.abs(Vec))*(np.log1p(np.abs(Vec)/std))
return normVec
def logsum_pair_table_interp(self, diff):
"""
Return the log1p term from precomputed table by interpolation.
Cf. Treba
Minimax log sum approximation might be even faster and more precise, TODO
:param diff: x-y or y-x
"""
index = -int(diff)
w = -diff - index
val1 = self.logsum_table[index]
val2 = self.logsum_table[index + 1]
return val1 + (w * (val2 - val1))
def summarizeVdToPi(Vd):
''' Calculate summary vector of given doc-topic stick lengths Vd
Returns
--------
sumLogPi : 1D array, size K+1
sumELogPi[k] = \sum_d log pi_{dk}
'''
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning,
message='divide by zero')
logVd = np.log(Vd)
log1mVd = np.log(1 - Vd)
mask = Vd < 1e-15
log1mVd[mask] = np.log1p(-1 * Vd[mask])
assert not np.any(np.isnan(logVd))
logVd = replaceInfVals(logVd)
log1mVd = replaceInfVals(log1mVd)
sumlogVd = np.sum(logVd, axis=0)
sumlog1mVd = np.sum(log1mVd, axis=0)
sumlogPi = np.hstack([sumlogVd, 0])
sumlogPi[1:] += np.cumsum(sumlog1mVd)
return sumlogPi
def lnPr(s,p,eps=1e-12,axis=-1):
'''
Compute probability of bits s given Bernoulli probabilities p
Assuming factorized distribution
\prod p^x (1-p)^(1-x)
Parameters
----------
s : bits
p : probability of bits being 1
Returns
-------
'''
p = p.copy()
p[p<eps]=eps
p[p>1-eps]=1-eps
s = np.int32(s)
return np.sum(s*slog(p)+(1-s)*np.log1p(-p),axis=axis)
def xgboost(train_sample, validation_sample, features, model_param):
def evalmape(preds, dtrain):
labels = dtrain.get_label()
preds = np.power(log_base, preds) - 1
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return 'mape', np.abs((labels - preds) / labels).sum() / len(labels)
param = {'max_depth': model_param['depth'], 'eta': model_param['lr'], 'silent': 1, 'objective': 'reg:linear', 'booster': 'gbtree',
'subsample': model_param['sample'],
'seed':model_param['seed'],
'colsample_bytree':1, 'min_child_weight':1, 'gamma':0}
param['eval_metric'] = 'mae'
num_round = model_param['tree']
log_base = np.e
plst = param.items()
dtrain = xgb.DMatrix(train_sample[features], np.log1p(train_sample['volume'])/np.log(log_base))
dtest = xgb.DMatrix(validation_sample[features], validation_sample['volume'])
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
bst = xgb.train(plst, dtrain, num_round, watchlist, feval=evalmape)
xgboost_prob = np.power(log_base, bst.predict(dtest)) - 1
# MAPE
print_mape(validation_sample['volume'], xgboost_prob, 'XGBOOST')
return xgboost_prob
def exrf(train_sample, validation_sample, features, seed):
log_base = np.e
exrf_est = ExtraTreesRegressor(n_estimators=1000,
criterion='mse',
max_features='auto',
max_depth=None,
bootstrap=True,
min_samples_split=4,
min_samples_leaf=1,
min_weight_fraction_leaf=0,
max_leaf_nodes=None,
random_state=seed
).fit(
train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base))
exrf_prob = np.power(log_base, exrf_est.predict(validation_sample[features])) - 1
print_mape(validation_sample['volume'], exrf_prob, 'EXTRA-RF')
return exrf_prob
def test_branch_cuts(self):
# check branch cuts and continuity on them
yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True
yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True
yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True
yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True
# check against bogus branch cuts: assert continuity between quadrants
yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1
yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1
yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1
yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1
yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1
yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1
def test_branch_cuts_complex64(self):
# check branch cuts and continuity on them
yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True, np.complex64
yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True, np.complex64
yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True, np.complex64
yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True, np.complex64
yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True, np.complex64
yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64
yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64
yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64
yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64
yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64
yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64
# check against bogus branch cuts: assert continuity between quadrants
yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64
yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64
yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64
yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1, False, np.complex64
yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64
yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64
def log_1minus(x):
"""Computes log(1 - x). More accurate than doing np.log(1-x)."""
return np.log1p(-x)
def log_prob_correct_from_qual(q):
"""Computes the probability of no error given a phred quality."""
return np.log1p(- 10**(-0.1 * q))
def rmsle(y,yp):
y1 = y.copy()
y1[y1<0] = 0
return rmse(np.log1p(y1),np.log1p(yp))
def log_loss_value_from_scores(weights, total_weights, scores):
"""
computes the logistic loss value from a vector of scores in a numerically stable way
where scores = Z.dot(rho)
see also: http://stackoverflow.com/questions/20085768/
this function is used for heuristics (discrete_descent, sequential_rounding).
to save computation when running the heuristics, we store the scores and
call this function to compute the loss directly from the scores
this reduces the need to recompute the dot product.
Parameters
----------
scores numpy.array of scores = Z.dot(rho)
total_weights numpy.sum(total_weights) (only included to reduce computation)
weights numpy.array of sample weights with shape (n_rows,)
Returns
-------
loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho))
"""
pos_idx = scores > 0
loss_value = np.empty_like(scores)
loss_value[pos_idx] = np.log1p(np.exp(-scores[pos_idx]))
loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(np.exp(scores[~pos_idx]))
loss_value = loss_value.dot(weights) / total_weights
return loss_value
def log_loss_value_and_slope(Z, rho):
"""
computes the value and slope of the logistic loss in a numerically stable way
this function should only be used when generating cuts in cutting-plane algorithms
(computing both the value and the slope at the same time is slightly cheaper)
see also: http://stackoverflow.com/questions/20085768/
Parameters
----------
Z numpy.array containing training data with shape = (n_rows, n_cols)
rho numpy.array of coefficients with shape = (n_cols,)
Returns
-------
loss_value scalar = 1/n_rows * sum(log( 1 .+ exp(-Z*rho))
loss_slope: (n_cols x 1) vector = 1/n_rows * sum(-Z*rho ./ (1+exp(-Z*rho))
"""
scores = Z.dot(rho)
pos_idx = scores > 0
exp_scores_pos = np.exp(-scores[pos_idx])
exp_scores_neg = np.exp(scores[~pos_idx])
#compute loss value
loss_value = np.empty_like(scores)
loss_value[pos_idx] = np.log1p(exp_scores_pos)
loss_value[~pos_idx] = -scores[~pos_idx] + np.log1p(exp_scores_neg)
loss_value = loss_value.mean()
#compute loss slope
log_probs = np.empty_like(scores)
log_probs[pos_idx] = 1.0 / (1.0 + exp_scores_pos)
log_probs[~pos_idx] = exp_scores_neg / (1.0 + exp_scores_neg)
loss_slope = Z.T.dot(log_probs - 1.0) / Z.shape[0]
return loss_value, loss_slope
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def skew_correction(df, numerical_features):
# Skew correction
skewed_feats = df[numerical_features].apply(lambda x: skew(x.dropna())) # compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index
df.loc[:, tuple(skewed_feats)] = np.log1p(np.asarray(df[skewed_feats], dtype=float))
def _logcdf(self, samples):
if self.theta == 0:
vals = np.sum(np.log(samples), axis=1)
else:
old_settings = np.seterr(divide='ignore')
vals = np.log(-np.log1p(np.expm1(-self.theta * samples[:, 0])
* np.expm1(-self.theta * samples[:, 1])
/ (np.expm1(-self.theta)))) \
- np.log(self.theta)
np.seterr(**old_settings)
return vals
def _ppcf(self, samples):
if self.theta == 0:
vals = samples[:, 0]
else:
vals = -np.log1p(samples[:, 0] * np.expm1(-self.theta)
/ (np.exp(-self.theta * samples[:, 1])
- samples[:, 0] * np.expm1(-self.theta
* samples[:, 1]))) \
/ self.theta
return vals