def decode(self, input_vectors, output):
tgt_toks = [self.tgt_vocab[tok] for tok in output]
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state()
s = s.add_input(dynet.concatenate([
input_vectors[-1],
dynet.vecInput(self.args.hidden_dim)
]))
loss = []
for tok in tgt_toks:
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
loss.append(-dynet.log(dynet.pick(probs, tok.i)))
embed_vector = self.tgt_lookup[tok.i]
attn_vector = self.attend(input_vectors, s)
inp = dynet.concatenate([embed_vector, attn_vector])
s = s.add_input(inp)
loss = dynet.esum(loss)
return loss
python类log()的实例源码
def _factor_target_indices(self, Y_inds, vocab_size=None, base=2):
if vocab_size is None:
vocab_size = len(self.dp.word_index)
print >>sys.stderr, "Factoring targets of vocabulary size: %d"%(vocab_size)
num_vecs = int(math.ceil(math.log(vocab_size)/math.log(base))) + 1
base_inds = []
div_Y_inds = Y_inds
print >>sys.stderr, "Number of factors: %d"%num_vecs
for i in range(num_vecs):
new_inds = div_Y_inds % base
if i == num_vecs - 1:
if new_inds.sum() == 0:
# Most significant "digit" is a zero. Omit it.
break
base_inds.append(new_inds)
div_Y_inds = numpy.copy(div_Y_inds/base)
base_vecs = [self._make_one_hot(base_inds_i, base) for base_inds_i in base_inds]
return base_vecs
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
# we use 1-random() instead of random() to preclude the
# possibility of taking the log of zero.
return -_log(1.0 - self.random())/lambd
## -------------------- von Mises distribution --------------------
def test_simple_scaling():
Quantity.set_prefs(spacer=None, show_label=None, label_fmt=None, label_fmt_full=None)
q=Quantity('1kg')
assert q.render() == '1 kg'
assert q.render(scale=0.001, show_units=False) == '1'
with pytest.raises(KeyError, message="Unable to convert between 'fuzz' and 'g'."):
q.render(scale='fuzz')
q=Quantity('1', units='g', scale=1000)
assert q.render() == '1 kg'
assert q.render(scale=(0.0022046, 'lbs')) == '2.2046 lbs'
q=Quantity('1', scale=(1000, 'g'))
assert q.render() == '1 kg'
assert q.render(scale=lambda v, u: (0.0022046*v, 'lbs')) == '2.2046 lbs'
def dB(v, u):
return 20*math.log(v, 10), 'dB'+u
def adB(v, u):
return pow(10, v/20), u[2:] if u.startswith('dB') else u
q=Quantity('-40 dBV', scale=adB)
assert q.render() == '10 mV'
assert q.render(scale=dB) == '-40 dBV'
def score_samples(self, X):
"""Return the log-likelihood of each sample
See. "Pattern Recognition and Machine Learning"
by C. Bishop, 12.2.1 p. 574
or http://www.miketipping.com/papers/met-mppca.pdf
Parameters
----------
X: array, shape(n_samples, n_features)
The data.
Returns
-------
ll: array, shape (n_samples,)
Log-likelihood of each sample under the current model
"""
check_is_fitted(self, 'mean_')
X = check_array(X)
Xr = X - self.mean_
n_features = X.shape[1]
log_like = np.zeros(X.shape[0])
precision = self.get_precision()
log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
log_like -= .5 * (n_features * log(2. * np.pi)
- fast_logdet(precision))
return log_like
def compute_possibility(self, start_position, seg_index, oov_pattern, oov_dct, oov_ctx):
# ???????????
weight, j = 0, start_position
test_word = []
for tag in oov_pattern:
word_content = self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content
oov_freq = oov_dct.get_frequence(
word_content,
self.oov_tag_encode(tag)
)
#print('tag:{} word:{} freq:{} start_prob:{}'.format(
# tag, word_content, oov_freq, oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)])))
test_word.append(self.words_graph.get_word(seg_index[j][0], seg_index[j][1]).content)
#????: dPOSPoss=log((double)(m_context.GetFrequency(0,m_nBestTag[i])+1))-log((double)(nFreq+1));
poss = math.log(float(oov_ctx.prob_to_frequence(oov_ctx.start_prob[self.oov_tag_encode(tag)]))) - math.log(float(oov_freq + 1))
weight += poss
j += 1
#print('compute_possibility() {} {} = {}'.format(oov_pattern, ''.join(test_word), weight))
return weight
def __init__( self, get_params_function, try_params_function ):
self.get_params = get_params_function
self.try_params = try_params_function
self.max_iter = 81 # maximum iterations per configuration
self.eta = 3 # defines configuration downsampling rate (default = 3)
self.logeta = lambda x: log( x ) / log( self.eta )
self.s_max = int( self.logeta( self.max_iter ))
self.B = ( self.s_max + 1 ) * self.max_iter
self.results = [] # list of dicts
self.counter = 0
self.best_loss = np.inf
self.best_counter = -1
# can be called multiple times
def calc_mean_lp_scores(log_prob_scores: List[float],
lengths: List[int]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\text{length}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_mean_lp_scores([-14.7579], [4])[0])
'-3.689'
"""
mean_lp_scores = []
for score, length in zip(log_prob_scores, lengths):
x = None \
if score is None or length == 0 \
else float(score) / float(length)
mean_lp_scores.append(x)
return mean_lp_scores
def calc_norm_lp_div_scores(
log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\log P_\text{unigram}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_norm_lp_div_scores([-14.7579], [-35.6325])[0])
'-0.414'
"""
results = []
for log_prob, unigram_score in zip(log_prob_scores, unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = (-1.0) * float(log_prob) / float(unigram_score)
results.append(x)
return results
def calc_norm_lp_sub_scores(
log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\log P_\text{model}\left(\xi\right)
- \log P_\text{unigram}\left(\xi\right)
>>> '{:.3f}'.format(calc_norm_lp_sub_scores([-14.7579], [-35.6325])[0])
'20.875'
"""
results = []
for log_prob, unigram_score in zip(log_prob_scores, unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = float(log_prob) - float(unigram_score)
results.append(x)
return results
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
random = self.random
u = random()
while u <= 1e-7:
u = random()
return -_log(u)/lambd
## -------------------- von Mises distribution --------------------
def randint(minvalue, maxvalue):
"""Returns a random integer x with minvalue <= x <= maxvalue"""
# Safety - get a lot of random data even if the range is fairly
# small
min_nbits = 32
# The range of the random numbers we need to generate
range = maxvalue - minvalue
# Which is this number of bytes
rangebytes = ceil(math.log(range, 2) / 8.)
# Convert to bits, but make sure it's always at least min_nbits*2
rangebits = max(rangebytes * 8, min_nbits * 2)
# Take a random number of bits between min_nbits and rangebits
nbits = random.randint(min_nbits, rangebits)
return (read_random_int(nbits) % range) + minvalue
def randomized_primality_testing(n, k):
"""Calculates whether n is composite (which is always correct) or
prime (which is incorrect with error probability 2**-k)
Returns False if the number if composite, and True if it's
probably prime.
"""
q = 0.5 # Property of the jacobi_witness function
# t = int(math.ceil(k / math.log(1/q, 2)))
t = ceil(k / math.log(1/q, 2))
for i in range(t+1):
x = randint(1, n-1)
if jacobi_witness(x, n): return False
return True
def idf(tf_dic_list,global_idf_dic,silent=1):
"""
Input:
global_idf_dic = {} # word -> idf, which may be updated in place
"""
if silent==0:
print("idf ...")
doc_len = len(tf_dic_list)
idf_dic_list = [] # [{word:idf} for each sample]
for c,tf_dic in enumerate(tf_dic_list):
idf_dic = {}
for word in tf_dic:
if word not in global_idf_dic:
n_containing = sum([word in tf_dic for tf_dic in tf_dic_list])
global_idf_dic[word] = log(doc_len/(1.0+n_containing))
idf_dic[word] = global_idf_dic[word]
idf_dic_list.append(idf_dic)
if silent == 0 and c>0 and c%100 == 0:
print("{} documents done, total {}, word {}, idf {}".format(c,len(tf_dic_list),word,global_idf_dic[word]))
return idf_dic_list
def _ndcg_at(k, label_col):
def ndcg_at_k(predicted, actual):
# TODO: Taking in rn and then re-sorting might not be necessary, but i can't
# find any real guarantee that they would come in order after a groupBy + collect_list,
# since they were only ordered within the window function.
predicted = [row[label_col] for row in sorted(predicted, key=lambda r: r.rn)]
actual = [row[label_col] for row in sorted(actual, key=lambda r: r.rn)]
dcg = 0.
for i, label in enumerate(predicted):
# This form is used to match EvalNDCG in xgboost
dcg += ((1 << label) - 1) / math.log(i + 2.0, 2)
idcg = 0.
for i, label in enumerate(actual):
idcg += ((1 << label) - 1) / math.log(i + 2.0, 2)
if idcg == 0:
return 0
else:
return dcg / idcg
return F.udf(ndcg_at_k, pyspark.sql.types.DoubleType())
def launch():
opts, h5_files, motifs_fn = __parseArgs()
__initLog(opts)
motifs = np.loadtxt(motifs_fn, dtype="str", ndmin=1)
motifs,not_found = find_motifs_in_control(opts, motifs)
if len(not_found)>0:
logging.warning("")
logging.warning(" ******************** Important *********************")
logging.warning(" Did not find %s motifs in %s:" % (len(not_found), opts.control_pkl_name))
for nf in not_found:
logging.warning(" %s" % nf)
logging.warning(" These motif(s) will be removed from further analysis.")
logging.warning(" These %s motifs will be kept:" % len(motifs))
for m in motifs:
logging.warning(" %s" % m)
logging.warning(" ****************************************************")
logging.warning("")
else:
logging.info("Found entries for all %s motifs in %s" % (len(motifs), opts.control_pkl_name))
build_profiles(opts, h5_files, motifs, motifs_fn)
print >> sys.stderr, "mBin methylation profiling has finished running. See log for details."
def get_differentially_private_std(sensitivity, epsilon, delta,
tol=DEFAULT_SIGMA_TOLERANCE):
'''
Determine smallest standard deviation for a normal distribution such that
the probability of a value violating epsilon-differential privacy is at
most delta.
'''
# std upper bound determined by improving result in literature,
# Hardt and Roth, "Beating Randomized Response on Incoherent Matrices"
# Thm. 2.6 (and the Lemma in App. A) can be improved to provide the
# following upper bound
std_upper_bound = (float(sensitivity)/epsilon) * (4.0/3.0) *\
(2 * math.log(1.0/delta))**(0.5)
std_lower_bound = tol # use small but non-zero value for std lower-bound
if (satisfies_dp(sensitivity, epsilon, delta, std_lower_bound) is True):
raise ValueError('Could not find lower bound for std interval.')
std = interval_boolean_binary_search(\
lambda x: satisfies_dp(sensitivity, epsilon, delta, x), std_lower_bound,
std_upper_bound, tol, return_true=True)
return std
def _encode_ratio(inval, outval):
'''
Calculate the log ratio between inbound and outbound traffic.
Positive when outval > inval, and negative when inval > outval.
Returns a non-infinite floating point value:
- zero when inval and outval are zero,
- a large negative number (< -100) when outval is zero, and
- a large positive number (> 100) when inval is zero, and
- log(base 2)(outval/inval) otherwise.
'''
inval = float(inval)
outval = float(outval)
if inval == 0.0 and outval == 0.0:
return 0.0
elif inval == 0.0:
return sys.float_info.max_exp
elif outval == 0.0:
return sys.float_info.min_exp
else:
return math.log(outval/inval, 2)
def calculate_oobatake_dS(seq, temp):
"""Get dS using Oobatake method in units cal/mol.
Args:
seq (str, Seq, SeqRecord): Amino acid sequence
temp (float): Temperature in degrees C
Returns:
float: dS in units cal/mol
"""
seq = ssbio.protein.sequence.utils.cast_to_str(seq)
dS = 0
temp += 273.15
T0 = 298.15
dCp_sum = _sum_of_dCp(seq)
for aa in seq:
S0 = oobatake_dictionary[aa]['dS']
dS += S0
return dS + dCp_sum * math.log(temp / T0)
def calculate_dill_dG(seq_len, temp):
"""Get free energy of unfolding (dG) using Dill method in units J/mol.
Args:
seq_len (int): Length of amino acid sequence
temp (float): Temperature in degrees C
Returns:
float: Free energy of unfolding dG (J/mol)
"""
Th = 373.5 # This quantity affects the up-and-down of the dG vs temperature curve (dG values)
Ts = 385 # This quantity affects the left-and-right
temp += 273.15
dH = (4.0 * seq_len + 143) * 1000
dS = 13.27 * seq_len + 448
dCp = (0.049 * seq_len + 0.85) * 1000
dG = dH + dCp * (temp - Th) - temp * dS - temp * dCp * math.log(float(temp) / Ts)
return dG
def Rstr(self):
array2=[]
prixe = math.log(0.03637 / float(252) + 1)
ret = self.sharedf
ret['change']=ret['change']-prixe
rstr = []
print 1
if len(ret) > 525:
for z in range(0, 504):
array2.append(math.pow(math.pow(float(1) / 2, float(1 / float(126))), (503 - z)))
for h in range(0,525):
rstr.append(numpy.NaN)
for c in range(525, len(ret)):
rett=0
for f in range(0,len(duan)-21):
rett=rett+duan.iloc[f, 16]*array2[f]
rstr.append(rett)
print rstr
ret['rstr'] = rstr
return ret[['date','rstr']]
def Cmra(self):
df=self.sharedf
cc=[]
cmra=[]
prixe=math.log(0.03637/float(12)+1)
df=df.set_index('date')
df1=df['change']
for x in range(20,len(df1.index)+1):
cc.append(df1[x-20:x].sum()-prixe)
dd=[]
for x in range(12,len(cc)+1):
dd.append(sum(cc[x-12:x]))
for x in range(252,len(dd)+1):
cmra.append(max(cc[x-252:x])-min(cc[x-252:x]))
df=df[281:]
df['cmra']=cmra
df['date']=df.index
df=pandas.DataFrame(df.reset_index(drop=True))
return df[['date','cmra']]
def blackcox_pd(equity, extasset, sigma):
"""Compute the probability of default for external assets following a
Geometric Brownian Motion and the Black and Cox model.
Parameters:
equity (float): equity
extasset (float): external assets
sigma (float): volatility of the Geometric Browninan Motion
Returns:
probability of default
"""
if equity <= 0.0:
return 1.0
if equity >= extasset:
return 0.0
else:
#return 1 + (- 1/2 * (1 + math.erf((-math.log(1 - equity/extasset) - sigma**2/2) /
# (math.sqrt(2) * sigma)) )
# + (extasset/equity)/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) /
# (math.sqrt(2) * sigma)) ) )
return (1/2 * (1 + math.erf((math.log(1 - equity/extasset) + sigma**2/2) /
(math.sqrt(2) * sigma)) ) +
(extasset/(extasset - equity))/2 * (1 + math.erf((math.log(1 - equity/extasset) - sigma**2/2) /
(math.sqrt(2) * sigma)) ) )
def ndcg(self, y_true, y_pred, k = 20):
s = 0.
c = self.zipped(y_true, y_pred)
c_g = sorted(c, key=lambda x:x[0], reverse=True)
c_p = sorted(c, key=lambda x:x[1], reverse=True)
#idcg = [0. for i in range(k)]
idcg = np.zeros([k], dtype=np.float32)
dcg = np.zeros([k], dtype=np.float32)
#dcg = [0. for i in range(k)]
for i, (g,p) in enumerate(c_g):
if g > self.rel_threshold:
idcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for i, (g,p) in enumerate(c_p):
if g > self.rel_threshold:
dcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for idx, v in enumerate(idcg):
if v == 0.:
dcg[idx] = 0.
else:
dcg[idx] /= v
return dcg
def ndcg(self, y_true, y_pred, k = 20):
s = 0.
c = self.zipped(y_true, y_pred)
c_g = sorted(c, key=lambda x:x[0], reverse=True)
c_p = sorted(c, key=lambda x:x[1], reverse=True)
#idcg = [0. for i in range(k)]
idcg = np.zeros([k], dtype=np.float32)
dcg = np.zeros([k], dtype=np.float32)
#dcg = [0. for i in range(k)]
for i, (g,p) in enumerate(c_g):
if g > self.rel_threshold:
idcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for i, (g,p) in enumerate(c_p):
if g > self.rel_threshold:
dcg[i:] += (math.pow(2., g) - 1.) / math.log(2. + i)
if i >= k:
break
for idx, v in enumerate(idcg):
if v == 0.:
dcg[idx] = 0.
else:
dcg[idx] /= v
return dcg
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def _quantile(self,gamma,sigma):
"""
Compute the quantile at level 1-q
Parameters
----------
gamma : float
GPD parameter
sigma : float
GPD parameter
Returns
----------
float
quantile at level 1-q for the GPD(?,?,?=0)
"""
r = self.n * self.proba / self.Nt
if gamma != 0:
return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
else:
return self.init_threshold - sigma*log(r)
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L