def cheaptrick(x, fs, temporal_positions, f0_sequence,
vuv, fftlen="auto", q1=-0.15):
f0_sequence = f0_sequence.copy()
f0_low_limit = 71
default_f0 = 500
if fftlen == "auto":
fftlen = int(2 ** np.ceil(np.log2(3. * float(fs) / f0_low_limit + 1)))
#raise ValueError("Only fftlen auto currently supported")
fft_size = fftlen
f0_low_limit = fs * 3.0 / (fft_size - 3.0)
f0_sequence[vuv == 0] = default_f0
spectrogram = np.zeros((int(fft_size / 2.) + 1, len(f0_sequence)))
for i in range(len(f0_sequence)):
if f0_sequence[i] < f0_low_limit:
f0_sequence[i] = default_f0
spectrogram[:, i] = cheaptrick_estimate_one_slice(x, fs, f0_sequence[i],
temporal_positions[i], fft_size, q1)
return temporal_positions, spectrogram.T, fs
python类log2()的实例源码
def d4c_love_train(x, fs, current_f0, current_position, threshold):
vuv = 0
if current_f0 == 0:
return vuv
lowest_f0 = 40
current_f0 = max([current_f0, lowest_f0])
fft_size = int(2 ** np.ceil(np.log2(3. * fs / lowest_f0 + 1)))
boundary0 = int(np.ceil(100 / (float(fs) / fft_size)))
boundary1 = int(np.ceil(4000 / (float(fs) / fft_size)))
boundary2 = int(np.ceil(7900 / (float(fs) / fft_size)))
waveform = d4c_get_windowed_waveform(x, fs, current_f0, current_position,
1.5, 2)
power_spectrum = np.abs(np.fft.fft(waveform, int(fft_size)) ** 2)
power_spectrum[0:boundary0 + 1] = 0.
cumulative_spectrum = np.cumsum(power_spectrum)
if (cumulative_spectrum[boundary1] / cumulative_spectrum[boundary2]) > threshold:
vuv = 1
return vuv
def cheaptrick(x, fs, temporal_positions, f0_sequence,
vuv, fftlen="auto", q1=-0.15):
f0_sequence = f0_sequence.copy()
f0_low_limit = 71
default_f0 = 500
if fftlen == "auto":
fftlen = int(2 ** np.ceil(np.log2(3. * float(fs) / f0_low_limit + 1)))
#raise ValueError("Only fftlen auto currently supported")
fft_size = fftlen
f0_low_limit = fs * 3.0 / (fft_size - 3.0)
f0_sequence[vuv == 0] = default_f0
spectrogram = np.zeros((int(fft_size / 2.) + 1, len(f0_sequence)))
for i in range(len(f0_sequence)):
if f0_sequence[i] < f0_low_limit:
f0_sequence[i] = default_f0
spectrogram[:, i] = cheaptrick_estimate_one_slice(x, fs, f0_sequence[i],
temporal_positions[i], fft_size, q1)
return temporal_positions, spectrogram.T, fs
def d4c_love_train(x, fs, current_f0, current_position, threshold):
vuv = 0
if current_f0 == 0:
return vuv
lowest_f0 = 40
current_f0 = max([current_f0, lowest_f0])
fft_size = int(2 ** np.ceil(np.log2(3. * fs / lowest_f0 + 1)))
boundary0 = int(np.ceil(100 / (float(fs) / fft_size)))
boundary1 = int(np.ceil(4000 / (float(fs) / fft_size)))
boundary2 = int(np.ceil(7900 / (float(fs) / fft_size)))
waveform = d4c_get_windowed_waveform(x, fs, current_f0, current_position,
1.5, 2)
power_spectrum = np.abs(np.fft.fft(waveform, int(fft_size)) ** 2)
power_spectrum[0:boundary0 + 1] = 0.
cumulative_spectrum = np.cumsum(power_spectrum)
if (cumulative_spectrum[boundary1] / cumulative_spectrum[boundary2]) > threshold:
vuv = 1
return vuv
def IntToBinVec(x, v = None):
#If no vector is passed create a new one
if(v is None):
dim = int(np.log2(x)) + 1
v = np.zeros([dim], dtype = np.int)
#v will contain the binary vector
c = 0
while(x > 0):
#If the vector has been filled; return truncating the rest
if c >= len(v):
break
#Test if the LSB is set
if(x & 1 == 1):
#Set the bits in right-to-left order
v[c] = 1
#Onto the next column and bit
c += 1
x >>= 1
return v
#Plot the model R learning the data set A, Y
#R: A regression model
#A: The data samples
#Y: The target vectors
def plot_state(rho, method='city'):
"""Plot the quantum state."""
num = int(np.log2(len(rho)))
# Need updating to check its a matrix
if method == 'city':
plot_state_city(rho)
elif method == "paulivec":
plot_state_paulivec(rho)
elif method == "qsphere":
plot_state_qsphere(rho)
elif method == "bloch":
for i in range(num):
bloch_state = list(map(lambda x: np.real(np.trace(
np.dot(x.to_matrix(), rho))),
pauli_singles(i, num)))
plot_bloch_vector(bloch_state, "qubit " + str(i))
elif method == "wigner":
plot_wigner_function(rho)
###############################################################
# Plotting Wigner functions
###############################################################
def max_shannon_entropy(n):
"""Returns max possible entropy given "n" mutations.
The maximum possible entropy is the entropy of the
uniform distribution. The uniform distribution has
entropy equal to log(n) (I will use base 2).
Parameters
----------
n : int
total mutation counts
Returns
-------
max possible shannon entropy in bits
"""
if n <= 0:
return 0.
return float(np.log2(n))
def kl_divergence(p, q):
"""Compute the Kullback-Leibler (KL) divergence for discrete distributions.
Parameters
----------
p : np.array
"Ideal"/"true" Probability distribution
q : np.array
Approximation of probability distribution p
Returns
-------
kl : float
KL divergence of approximating p with the distribution q
"""
# make sure numpy arrays are floats
p = p.astype(float)
q = q.astype(float)
# compute kl divergence
kl = np.sum(np.where(p!=0, p*np.log2(p/q), 0))
return kl
def test_branch_cuts(self):
# check branch cuts and continuity on them
yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True
yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True
yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1], -1, 1, True
yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True
yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True
yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True
# check against bogus branch cuts: assert continuity between quadrants
yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1
yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1
yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1
yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1], 1, 1
yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1
yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1
def _hist_bin_sturges(x):
"""
Sturges histogram bin estimator.
A very simplistic estimator based on the assumption of normality of
the data. This estimator has poor performance for non-normal data,
which becomes especially obvious for large data sets. The estimate
depends only on size of the data.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
return x.ptp() / (np.log2(x.size) + 1.0)
def _test_calculate_on_random_set(self, alpha, random_str):
"""
Test utils.entropy on random set generated from alphanumerics.
Note, this method presumes a uniform distribution from the
stringer.random_string method.
"""
STRING_LENGTH = len(random_str)
ALPHA_LEN = len(alpha)
ent_calc = self.ent.calculate(random_str)
p = 1/ALPHA_LEN
# this is the expected Shannon entropy for a uniform distribution
exp_ent = STRING_LENGTH * p * log2(p)
accepted_err = 10
# why the hell is this failing?
# assert_true(exp_ent-accepted_err <=
# ent_calc <=
# exp_ent+accepted_err)
est_rel_entro_MLE.py 文件源码
项目:HJW_KL_divergence_estimator
作者: Mathegineer
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def est_entro_MLE(samp):
"""MLE estimate of Shannon entropy (in bits) of input sample
This function returns a scalar MLE estimate of the entropy of samp when
samp is a vector, or returns a row vector containing the MLE estimate of
each column of samp when samp is a matrix.
Input:
----- samp: a vector or matrix which can only contain integers. The
input data type can be any integer types such as uint8/int8/
uint16/int16/uint32/int32/uint64/int64, or floating-point
such as single/double.
Output:
----- est: the entropy (in bits) of the input vector or that of each
column of the input matrix. The output data type is double.
"""
samp = formalize_sample(samp)
[n, wid] = samp.shape
n = float(n)
f = fingerprint(samp)
prob = np.arange(1, f.shape[0] + 1) / n
prob_mat = - prob * np.log2(prob)
return prob_mat.dot(f)
def updateTrace(self, event=None):
if self.collecting:
# make sure we're not passing through the current strans filter
self.flt.stransFilter = None
self.flt.trainCap = self.getCap()
if self.rawView:
data = self.flt.trainCap.data
chanNames = self.flt.trainCap.getChanNames()
else:
data = self.flt.filteredTrain
chanNames = self.flt.getOutChans()
data = data - data.mean(axis=0)
#dFactor = int(np.log2(2*data.size/float(256*8*10))) + 1
dFactor = (data.size // (256*8*10)) + 1
if dFactor > 1:
data = sig.decimate(data, factor=dFactor)#, lowpassFrac=0.75, order=4)
t = self.flt.trainCap.getNSec()
scale = np.max(2*data.std(axis=0))
self.tracePlot.draw(data, t=t, chanNames=chanNames, scale=scale)
def get_zoom(cls, latitudes, longitudes, size, scale):
"""Compute level of zoom needed to display all points in a single tile.
:param pandas.Series latitudes: set of latitudes
:param pandas.Series longitudes: set of longitudes
:param int size: size of the tile
:param int scale: 1 or 2 (free plan), see Google Static Maps API docs
:return: zoom level
:rtype: int
"""
# Extreme pixels
min_pixel = cls.to_pixel(latitudes.min(), longitudes.min())
max_pixel = cls.to_pixel(latitudes.max(), longitudes.max())
# Longitude spans from -180 to +180, latitudes only from -90 to +90
amplitudes = (max_pixel - min_pixel).abs() * pd.Series([2., 1.], index=['x_pixel', 'y_pixel'])
return int(np.log2(2 * size / amplitudes.max()))
def pwdist_kld(self, seq1idx, seq2idx):
"""Kullback-Leibler discrepancy (KL) between two vectors.
The KL discrepancy between sequences X and Y,
is computed from their L-tuple (word) frequencies.
References:
1. Wu, Hsieh, Li (2001) Biometrics 57: 441-448.
doi: 10.1111/j.0006-341X.2001.00441.x
Notes:
1. KL discrepancy must be computed based on relative
frequencies (those that sum to 1).
2. To avoid having an infinite dK L (X, Y) when freqs2 = 0,
the authors suggest modifying the orifinal formulation
by adding a unit to both terms of the frequency ratio.
"""
freqs1 = self[seq1idx] + 1
freqs2 = self[seq2idx] + 1
values = freqs1 * np.log2(freqs1 / freqs2)
value = np.sum(values)
return value
merge_vocab.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def merge_TFIDF(N, vocab_path, TF_DF_prefix, total_doc):
t0 = time.time()
TF = {}
DF = {}
total_TF = 1e-10
for i in range(N):
new_TF = pk.load(open(TF_DF_prefix+str(i)+'TF.pkl'))
new_DF = pk.load(open(TF_DF_prefix+str(i)+'DF.pkl'))
total_TF += merge_dict(TF, new_TF)
merge_dict(DF, new_DF)
t = time.time() - t0
print '%d / %d merged. time %fs' %(i+1, N, t)
pk.dump(TF, open(TF_DF_prefix+'TF.pkl', 'w'))
pk.dump(DF, open(TF_DF_prefix+'DF.pkl', 'w'))
TFIDF = TF.copy()
#for word, value in TFIDF.iteritems():
#TFIDF[word] = TF[word] * 1.0 / total_TF * np.log2(total_doc*1.0/DF[word])
save_vocab_txt(TFIDF, vocab_path+'_tfidf.txt')
tfidf_from_seg.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def compute_word2vec(docs, DF, nDoc, model, vecDim=300):
N = len(docs)
nonExist_vocab = {}
feat = np.zeros((N, 300), dtype=np.float32)
for idx, doc in enumerate(docs):
nonExist_list = []
TF = {}
spt = doc.split(' ')
nWord = len(spt)
update_vocab(TF, spt)
vec = np.zeros(vecDim, dtype=np.float32)
for word, tf in TF.items():
try:
tfidf = 1.0 * tf / nWord * np.log2(1.0 * nDoc / DF[word])
vec += tfidf * word2vec(model, word)
except:
nonExist_list.append(word)
pass
feat[idx, :] = vec
update_vocab(nonExist_vocab, nonExist_list)
if np.mod(idx, 10000) == 0:
print '# %d' %idx
print 'nonExist: %d' %len(nonExist_vocab.keys())
return feat, nonExist_vocab
def save_histogram_with_otsu(name, histograms, otsu):
plt.clf()
figure, axarr = plt.subplots(3)
figure.tight_layout()
for x, otsu_value in zip(range(3), otsu):
axarr[x].bar(np.arange(0, histograms[x].size),
np.log2(np.where(histograms[x] >= 1,
histograms[x],
1)),
1.0)
axarr[x].grid(True)
axarr[x].set_ylabel("log2")
for val in otsu_value:
axarr[x].axvline(x=val, color="r")
axarr[x].set_xlim(0, histograms[x].size)
axarr[0].set_title('Hue')
axarr[1].set_title('Saturation')
axarr[2].set_title('Value')
return plt
def build_classifier(labeled, unlabeled):
err = np.zeros(self.num_classifiers)
err_prime = np.zeros(self.num_classifiers)
s_prime = np.zeros(self.num_classifiers)
inbags = [None] * self.num_classifiers
np.random.seed(self.m_seed)
num_original_labeled_insts = labeled.shape[0]
# set up the random tree options
self.num_kvalue = self.num_features
if self.num_kvalue < 1:
self.num_kvalue = int(np.log2(labeled.shape[1])) + 1
self.estimator = self.estimator.set_params(**{"max_features": self.m_kvalue})
pass
def get_grid(self, max_size=2048):
import itertools
possible_combos = self.get_combinations_size()
#print "possible combos:",possible_combos
assert possible_combos > 0
num_columns = int(np.log2(possible_combos))
if possible_combos > max_size:
# Grid too large so sample instead
combo_grid = np.random.binomial(1, 0.5, (max_size, num_columns))
else:
# Get entire grid
combo_grid = list(itertools.product([0, 1], repeat=num_columns))
assert len(combo_grid) == possible_combos
combo_grid = np.array(combo_grid)
# Scale the grid
cat_mask = ~self.get_numerical_mask()
X_scaler_cat = copy.deepcopy(self.scaler_)
X_scaler_cat.mean_ = X_scaler_cat.mean_[cat_mask]
X_scaler_cat.scale_ = X_scaler_cat.scale_[cat_mask]
X_scaler_cat.var_ = X_scaler_cat.var_[cat_mask]
combo_grid = X_scaler_cat.transform(combo_grid)
return combo_grid