def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
python类log()的实例源码
def _quantile(self,gamma,sigma):
"""
Compute the quantile at level 1-q
Parameters
----------
gamma : float
GPD parameter
sigma : float
GPD parameter
Returns
----------
float
quantile at level 1-q for the GPD(?,?,?=0)
"""
r = self.n * self.proba / self.Nt
if gamma != 0:
return self.init_threshold + (sigma/gamma)*(pow(r,-gamma)-1)
else:
return self.init_threshold - sigma*log(r)
def _log_likelihood(Y,gamma,sigma):
"""
Compute the log-likelihood for the Generalized Pareto Distribution (?=0)
Parameters
----------
Y : numpy.array
observations
gamma : float
GPD index parameter
sigma : float
GPD scale parameter (>0)
Returns
----------
float
log-likelihood of the sample Y to be drawn from a GPD(?,?,?=0)
"""
n = Y.size
if gamma != 0:
tau = gamma/sigma
L = -n * log(sigma) - ( 1 + (1/gamma) ) * ( np.log(1+tau*Y) ).sum()
else:
L = n * ( 1 + log(Y.mean()) )
return L
def get_results(self):
result = {}
fn = self.rec_fn()
txt = open(fn).read()
params_txt = open(self.par_fn()).read().splitlines()
columns = ['parameter','value','scale','offset']
param_vals = [dict(zip(columns,line.strip().split())) for line in params_txt[1:]]
params = pd.DataFrame(param_vals)
params = params.set_index('parameter')
for col in columns[1:]:
params[col] = params[col].astype('f')
result['results_file']=fn
result['text']=txt
result['parameters'] = params
if self.detailed_log:
result['log'] = self.read_logs()
return result
def get_similarity(word_list1, word_list2):
"""??????????????????
Keyword arguments:
word_list1, word_list2 -- ???????????????????
"""
words = list(set(word_list1 + word_list2))
vector1 = [float(word_list1.count(word)) for word in words]
vector2 = [float(word_list2.count(word)) for word in words]
vector3 = [vector1[x]*vector2[x] for x in xrange(len(vector1))]
vector4 = [1 for num in vector3 if num > 0.]
co_occur_num = sum(vector4)
if abs(co_occur_num) <= 1e-12:
return 0.
denominator = math.log(float(len(word_list1))) + math.log(float(len(word_list2))) # ??
if abs(denominator) < 1e-12:
return 0.
return co_occur_num / denominator
def lonlat_to_pixel(self, lonlat, zoom):
"Converts a longitude, latitude coordinate pair for the given zoom level."
# Setting up, unpacking the longitude, latitude values and getting the
# number of pixels for the given zoom level.
lon, lat = self.get_lon_lat(lonlat)
npix = self._npix[zoom]
# Calculating the pixel x coordinate by multiplying the longitude value
# with the number of degrees/pixel at the given zoom level.
px_x = round(npix + (lon * self._degpp[zoom]))
# Creating the factor, and ensuring that 1 or -1 is not passed in as the
# base to the logarithm. Here's why:
# if fac = -1, we'll get log(0) which is undefined;
# if fac = 1, our logarithm base will be divided by 0, also undefined.
fac = min(max(sin(DTOR * lat), -0.9999), 0.9999)
# Calculating the pixel y coordinate.
px_y = round(npix + (0.5 * log((1 + fac) / (1 - fac)) * (-1.0 * self._radpp[zoom])))
# Returning the pixel x, y to the caller of the function.
return (px_x, px_y)
def getPSD( df , dw = 0.05, roverlap = 0.5, window='hanning', detrend='constant') :
"""
Compute the power spectral density
"""
if type(df) == pd.Series : df = pd.DataFrame(df)
nfft = int ( (2*pi / dw) / dx(df) )
nperseg = 2**int(log(nfft)/log(2))
noverlap = nperseg * roverlap
""" Return the PSD of a time signal """
try :
from scipy.signal import welch
except :
raise Exception("Welch function not found, please install scipy > 0.12")
data = []
for iSig in range(df.shape[1]) :
test = welch( df.values[:,iSig] , fs = 1. / dx(df) , window=window, nperseg=nperseg, noverlap=noverlap, nfft=nfft, detrend=detrend, return_onesided=True, scaling='density')
data.append( test[1] / (2*pi) )
xAxis = test[0][:] * 2*pi
return pd.DataFrame( data = np.transpose(data), index = xAxis , columns = [ "psd("+ str(x) +")" for x in df.columns ] )
def find_top_two_peaks(sdata):
samples = len(sdata)
fft_size = 2**int(floor(log(samples)/log(2.0)))
freq = fft(sdata[0:fft_size])
pdata = numpy.zeros(fft_size)
for i in xrange(fft_size): pdata[i] = abs(freq[i])
peak = 0
peak1 = 0
peak2 = 0
peak1_index = 0
peak2_index = 0
for i in xrange(fft_size/2):
if (pdata[i] > peak1):
peak1 = pdata[i]
peak1_index = i
for i in xrange(fft_size/2):
if (pdata[i] > peak2) and (abs(i - peak1_index) > 4):
peak2 = pdata[i]
peak2_index = i
return (peak1,peak1_index,peak2,peak2_index)
# REMOVAL CASES
def save_fft(fil,audio_in):
samples = len(audio_in)
fft_size = 2**int(floor(log(samples)/log(2.0)))
freq = fft(audio_in[0:fft_size])
s_data = numpy.zeros(fft_size/2)
x_data = numpy.zeros(fft_size/2)
peak = 0;
for j in xrange(fft_size/2):
if (abs(freq[j]) > peak):
peak = abs(freq[j])
for j in xrange(fft_size/2):
x_data[j] = log(2.0*(j+1.0)/fft_size);
if (x_data[j] < -10):
x_data[j] = -10
s_data[j] = 10.0*log(abs(freq[j])/peak)/log(10.0)
plt.ylim([-50,0])
plt.plot(x_data,s_data)
plt.title('fft log power')
plt.grid()
fields = fil.split('.')
plt.savefig(fields[0]+'_fft.png', bbox_inches="tight")
plt.clf()
plt.close()
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
# Math Software, 3, (1977), pp257-260.
random = self.random
while 1:
u1 = random()
u2 = 1.0 - random()
z = NV_MAGICCONST*(u1-0.5)/u2
zz = z*z/4.0
if zz <= -_log(u2):
break
return mu + z*sigma
## -------------------- lognormal distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
lambd is 1.0 divided by the desired mean. It should be
nonzero. (The parameter would be called "lambda", but that is
a reserved word in Python.) Returned values range from 0 to
positive infinity if lambd is positive, and from negative
infinity to 0 if lambd is negative.
"""
# lambd: rate lambd = 1/mean
# ('lambda' is a Python reserved word)
# we use 1-random() instead of random() to preclude the
# possibility of taking the log of zero.
return -_log(1.0 - self.random())/lambd
## -------------------- von Mises distribution --------------------
def get_test_probs(cmd_args, ngrams_test, corpus_files, model):
""" Get sum of probabilities for ngrams of test data. """
# Initialize probs
sumprobs = {}
for lang in corpus_files:
sumprobs[lang] = 0.0
for ngram in ngrams_test:
for lang in corpus_files:
sumprobs[lang] += ngrams_test[ngram] * probability.LaplaceProbDist.logprob(model.smoothed[lang], ngram)
# The population prior is mostly useful for really small test snippets
if not cmd_args.no_prior:
for lang in corpus_files:
# Strip trailing .txt, and check if it's in the population statistics dict
lang_prefix = lang[:-4]
if lang_prefix in model.stats:
# Normalize population counts by approximate total number of people on earth
sumprobs[lang] += math.log(model.stats[lang_prefix] / 8e9)
else:
# If language isn't in the language population statistics,
# assume median value of all langs, which is about 500K
sumprobs[lang] += math.log(500000 / 8e9)
return sumprobs
def construct_pssm(cds, length=90, out_path="", prob=None):
"""
Construct Position Specific Scoring Matrices with log-likelihood values
length: size of analyzed region from start, in bp (sequences that are not this size are discarded)
prob : a dict of bases with a priori expected probabilities
"""
cds = cds[0]
if not prob:
prob = {"a":0.25, "t":0.25, "g":0.25, "c":0.25}
m = {"a":[0]*length, "t":[0]*length, "g":[0]*length, "c":[0]*length}
tot_gene = 0.0
for gene in cds:
if len(cds[gene]) >= length:
tot_gene += 1
for i in range(length):
m[cds[gene][i]][i] += 1
for k in m:
m[k] = [log((v/tot_gene)/prob[k]) for v in m[k]]
if out_path:
h = open(out_path, "w")
h.write(","+",".join([str(i) for i in range(1,length+1)])+"\n")
for b in ["a", "t", "g", "c"]:
h.write(b+","+",".join(["%.2f" % v for v in m[b]])+"\n")
h.close()
return m
def shrink_bgest(r,rvar,theta):
"""Bernoulli-Gaussian MMSE estimator
Perform MMSE estimation E[x|r]
for x ~ BernoulliGaussian(lambda,xvar1)
r|x ~ Normal(x,rvar)
The parameters theta[0],theta[1] represent
The variance of non-zero x[i]
xvar1 = abs(theta[0])
The probability of nonzero x[i]
lamba = 1/(exp(theta[1])+1)
"""
xvar1 = abs(theta[...,0])
loglam = theta[...,1] # log(1/lambda - 1)
beta = 1/(1+rvar/xvar1)
r2scale = r*r*beta/rvar
rho = tf.exp(loglam - .5*r2scale ) * tf.sqrt(1 +xvar1/rvar)
rho1 = rho+1
xhat = beta*r/rho1
dxdr = beta*((1+rho*(1+r2scale) ) / tf.square( rho1 ))
dxdr = tf.reduce_mean(dxdr,0)
return (xhat,dxdr)
def load_trackball_action(self, action):
cbTracballOutput = self.builder.get_object("cbTracballOutput")
cbAxisOutput = self.builder.get_object("cbAxisOutput")
sclFriction = self.builder.get_object("sclFriction")
self._recursing = True
if isinstance(action.action, MouseAction):
self.set_cb(cbTracballOutput, "mouse", 1)
self.set_cb(cbAxisOutput, "trackball", 2)
elif isinstance(action.action, XYAction):
if isinstance(action.action.x, AxisAction):
if action.action.x.parameters[0] == Axes.ABS_X:
self.set_cb(cbTracballOutput, "left", 1)
else:
self.set_cb(cbTracballOutput, "right", 1)
self.set_cb(cbAxisOutput, "trackball", 2)
elif isinstance(action.action.x, MouseAction):
if self.editor.get_id() in STICKS:
self.set_cb(cbAxisOutput, "wheel_stick", 2)
else:
self.set_cb(cbAxisOutput, "wheel_pad", 2)
if action.friction <= 0:
sclFriction.set_value(0)
else:
sclFriction.set_value(math.log(action.friction * 1000.0, 10))
self._recursing = False
def decode(self, encoding, input, output):
"""
Single training example decoding function
:param encoding: last hidden state from encoder
:param input: source sentence
:param output: target sentence
:return: loss value
"""
src_toks = [self.src_vocab[tok] for tok in input]
tgt_toks = [self.tgt_vocab[tok] for tok in output]
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state().add_input(encoding)
loss = []
sent = []
for tok in tgt_toks:
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
cross_ent_loss = - dynet.log(dynet.pick(probs, tok.i))
loss.append(cross_ent_loss)
embed_vector = self.tgt_lookup[tok.i]
s = s.add_input(embed_vector)
loss = dynet.esum(loss)
return loss
def beam_search_generate(self, src_seq, beam_n=5):
dynet.renew_cg()
embedded = self.embed_seq(src_seq)
input_vectors = self.encode_seq(embedded)
w = dynet.parameter(self.decoder_w)
b = dynet.parameter(self.decoder_b)
s = self.dec_lstm.initial_state()
s = s.add_input(input_vectors[-1])
beams = [{"state": s,
"out": [],
"err": 0}]
completed_beams = []
while len(completed_beams) < beam_n:
potential_beams = []
for beam in beams:
if len(beam["out"]) > 0:
embed_vector = self.tgt_lookup[beam["out"][-1].i]
s = beam["state"].add_input(embed_vector)
out_vector = dynet.affine_transform([b, w, s.output()])
probs = dynet.softmax(out_vector)
probs = probs.vec_value()
for potential_next_i in range(len(probs)):
potential_beams.append({"state": s,
"out": beam["out"]+[self.tgt_vocab[potential_next_i]],
"err": beam["err"]-math.log(probs[potential_next_i])})
potential_beams.sort(key=lambda x:x["err"])
beams = potential_beams[:beam_n-len(completed_beams)]
completed_beams = completed_beams+[beam for beam in beams if beam["out"][-1] == self.tgt_vocab.END_TOK
or len(beam["out"]) > 5*len(src_seq)]
beams = [beam for beam in beams if beam["out"][-1] != self.tgt_vocab.END_TOK
and len(beam["out"]) <= 5*len(src_seq)]
completed_beams.sort(key=lambda x:x["err"])
return [beam["out"] for beam in completed_beams]
def fEntropy(countByte, countTotal):
x = float(countByte) / countTotal
if x > 0:
return - x * math.log(x, 2)
else:
return 0.0
def Print(lines, options):
print(lines)
filename = None
if options.scan:
filename = 'PDFiD.log'
if options.output != '':
filename = options.output
if filename:
logfile = open(filename, 'a')
logfile.write(lines + '\n')
logfile.close()
def Main():
moredesc = '''
Arguments:
pdf-file and zip-file can be a single file, several files, and/or @file
@file: run PDFiD on each file listed in the text file specified
wildcards are supported
Source code put in the public domain by Didier Stevens, no Copyright
Use at your own risk
https://DidierStevens.com'''
oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__)
oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
(options, args) = oParser.parse_args()
if len(args) == 0:
if options.disarm:
print('Option disarm not supported with stdin')
options.disarm = False
if options.scan:
print('Option scan not supported with stdin')
options.scan = False
filenames = ['']
else:
try:
filenames = ExpandFilenameArguments(args)
except Exception as e:
print(e)
return
PDFiDMain(filenames, options)