def pyfftw_empty_aligned(shape, dtype, order='C', n=None):
"""
Construct an empty byte-aligned array for efficient use by :mod:`pyfftw`.
This function is a wrapper for :func:`pyfftw.empty_aligned`
Parameters
----------
shape : sequence of ints
Output array shape
dtype : dtype
Output array dtype
n : int, optional (default None)
Output array should be aligned to n-byte boundary
Returns
-------
a : ndarray
Empty array with required byte-alignment
"""
return pyfftw.empty_aligned(shape, dtype, order, n)
python类mod()的实例源码
def blockcirculant(A):
"""
Construct a block circulant matrix from a tuple of arrays. This is a
block-matrix variant of :func:`scipy.linalg.circulant`.
Parameters
----------
A : tuple of array_like
Tuple of arrays corresponding to the first block column of the output
block matrix
Returns
-------
B : ndarray
Output array
"""
r, c = A[0].shape
B = np.zeros((len(A)*r, len(A)*c), dtype=A[0].dtype)
for k in range(len(A)):
for l in range(len(A)):
kl = np.mod(k + l, len(A))
B[r*kl:r*(kl+1), c*k:c*(k+1)] = A[l]
return B
def SortByAngle(kNearestPoints, currentPoint, prevPoint):
''' Sorts the k nearest points given by angle '''
angles = np.zeros(kNearestPoints.shape[0])
i = 0
for NearestPoint in kNearestPoints:
# calculate the angle
angle = np.arctan2(NearestPoint[1]-currentPoint[1],
NearestPoint[0]-currentPoint[0]) - \
np.arctan2(prevPoint[1]-currentPoint[1],
prevPoint[0]-currentPoint[0])
angle = np.rad2deg(angle)
# only positive angles
angle = np.mod(angle+360,360)
#print NearestPoint[0], NearestPoint[1], angle
angles[i] = angle
i=i+1
return kNearestPoints[np.argsort(angles)]
def world_synthesis_time_base_generation(temporal_positions, f0, fs, vuv,
time_axis, default_f0):
f0_interpolated_raw = interp1d(temporal_positions, f0, kind="linear",
fill_value="extrapolate")(time_axis)
vuv_interpolated = interp1d(temporal_positions, vuv, kind="linear",
fill_value="extrapolate")(time_axis)
vuv_interpolated = vuv_interpolated > 0.5
f0_interpolated = f0_interpolated_raw * vuv_interpolated.astype("float32")
f0_interpolated[f0_interpolated == 0] = f0_interpolated[f0_interpolated == 0] + default_f0
total_phase = np.cumsum(2 * np.pi * f0_interpolated / float(fs))
core = np.mod(total_phase, 2 * np.pi)
core = np.abs(core[1:] - core[:-1])
# account for diff, avoid deprecation warning with [:-1]
pulse_locations = time_axis[:-1][core > (np.pi / 2.)]
pulse_locations_index = np.round(pulse_locations * fs).astype("int32")
return pulse_locations, pulse_locations_index, vuv_interpolated
def world_synthesis_time_base_generation(temporal_positions, f0, fs, vuv,
time_axis, default_f0):
f0_interpolated_raw = interp1d(temporal_positions, f0, kind="linear",
fill_value="extrapolate")(time_axis)
vuv_interpolated = interp1d(temporal_positions, vuv, kind="linear",
fill_value="extrapolate")(time_axis)
vuv_interpolated = vuv_interpolated > 0.5
f0_interpolated = f0_interpolated_raw * vuv_interpolated.astype("float32")
f0_interpolated[f0_interpolated == 0] = f0_interpolated[f0_interpolated == 0] + default_f0
total_phase = np.cumsum(2 * np.pi * f0_interpolated / float(fs))
core = np.mod(total_phase, 2 * np.pi)
core = np.abs(core[1:] - core[:-1])
# account for diff, avoid deprecation warning with [:-1]
pulse_locations = time_axis[:-1][core > (np.pi / 2.)]
pulse_locations_index = np.round(pulse_locations * fs).astype("int32")
return pulse_locations, pulse_locations_index, vuv_interpolated
def setup(x_shape, resize_ratio):
box_size = 1.0 / resize_ratio
if np.mod(x_shape[1], box_size) != 0 or np.mod(x_shape[2], box_size) != 0:
print "only support width (and height) * resize_ratio is an interger!"
def A_fun(x):
y = box_average(x, int(box_size))
return y
def AT_fun(y):
x = box_repeat(y, int(box_size))
return x
return (A_fun, AT_fun)
def form_sets(samples, labels, max_num, verbose=False):
"""Form sample and label sets.
"""
# form training set data
set_ids = form_set_data(labels, max_num, verbose)
set_data = []
set_labels = []
print "forming set samples"
sys.stdout.flush()
count = 0
for key, ids in set_ids.iteritems():
# ignore small sets
if len(ids) < max_num:
continue
set_data.append(samples[ids])
set_labels.append(labels[key])
count += 1
if np.mod(count, 500) == 0:
sys.stdout.write(".")
#sys.stdout.write(".{}-{}".format(key,train_labels[key]))
sys.stdout.write("\n")
return set_data, set_labels, set_ids
def test_NotImplemented_not_returned(self):
# See gh-5964 and gh-2091. Some of these functions are not operator
# related and were fixed for other reasons in the past.
binary_funcs = [
np.power, np.add, np.subtract, np.multiply, np.divide,
np.true_divide, np.floor_divide, np.bitwise_and, np.bitwise_or,
np.bitwise_xor, np.left_shift, np.right_shift, np.fmax,
np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2,
np.logical_and, np.logical_or, np.logical_xor, np.maximum,
np.minimum, np.mod
]
# These functions still return NotImplemented. Will be fixed in
# future.
# bad = [np.greater, np.greater_equal, np.less, np.less_equal, np.not_equal]
a = np.array('1')
b = 1
for f in binary_funcs:
assert_raises(TypeError, f, a, b)
def get_periodic_rvec(data):
coords = obtain_rvec(data)
if sum(data.ds.periodicity) == 0: return coords
le = data.ds.domain_left_edge.in_units("code_length").d
dw = data.ds.domain_width.in_units("code_length").d
for i in range(coords.shape[0]):
if not data.ds.periodicity[i]: continue
coords[i, ...] -= le[i]
#figure out which measure is less
mins = np.argmin([np.abs(np.mod(coords[i, ...], dw[i])),
np.abs(np.mod(coords[i, ...], -dw[i]))],
axis=0)
temp_coords = np.mod(coords[i, ...], dw[i])
#Where second measure is better, updating temporary coords
ii = mins==1
temp_coords[ii] = np.mod(coords[i, ...], -dw[i])[ii]
# Putting the temporary coords into the actual storage
coords[i, ...] = temp_coords
coords[i, ...] + le[i]
return coords
def bbox_filter(left, right, domain_width):
def myfilter(chunk, mask=None):
pos = np.array([chunk['x'], chunk['y'], chunk['z']]).T
# This hurts, but is useful for periodicity. Probably should check
# first if it is even needed for a given left/right
for i in range(3):
pos[:, i] = np.mod(pos[:, i] - left[i], domain_width[i]) + left[i]
# Now get all particles that are within the bbox
if mask is None:
mask = np.all(pos >= left, axis=1)
np.logical_and(mask, np.all(pos < right, axis=1), mask)
else:
np.logical_and(mask, np.all(pos >= left, axis=1), mask)
np.logical_and(mask, np.all(pos < right, axis=1), mask)
return mask
return myfilter
def sphere_filter(center, radius, domain_width):
def myfilter(chunk, mask=None):
pos = np.array([chunk['x'], chunk['y'], chunk['z']]).T
left = center-radius
# This hurts, but is useful for periodicity. Probably should check
# first if it is even needed for a given left/right
for i in range(3):
pos[:, i] = np.mod(pos[:, i] - left[i], domain_width[i]) + left[i]
# Now get all particles that are within the radius
if mask is None:
mask = ((pos-center)**2).sum(axis=1)**0.5 < radius
else:
np.multiply(mask, np.linalg.norm(pos - center, 2) < radius, mask)
return mask
return myfilter
def train(self, x, y, learning_rate=1e-3, reg = 1e-5, num_iter=1500, batch_size=200):
num_train, num_feature = x.shape
num_classes = np.max(y) + 1
if self.W == None:
self.W = np.random.randn(num_feature, num_classes)
loss_history = []
acc_history = []
for iter in range(num_iter):
indices = np.random.choice(num_train, batch_size)
x_batch = x[indices]
y_batch = y[indices]
loss, grad = self.loss(x_batch, y_batch, reg)
acc = self.accuracy(x_batch, y_batch)
loss_history.append(loss)
acc_history.append(acc)
self.W += -learning_rate * grad
if np.mod(iter, 100) == 0:
print("iteration {}/{} loss: {:.7f}".format(iter, num_iter, loss))
return loss_history, acc_history
def train(self, x, y, learning_rate=1e-3, reg = 1e-5, num_iter=1500, batch_size=200):
num_train, num_feature = x.shape
num_classes = np.max(y) + 1
if self.W == None:
self.W = np.random.randn(num_feature, num_classes)
loss_history = []
accuracy_history = []
for iter in range(num_iter):
indices = np.random.choice(num_train, batch_size)
x_batch = x[indices]
y_batch = y[indices]
loss, grad = self.loss(x_batch, y_batch, reg)
acc = self.accuracy(x_batch, y_batch)
loss_history.append(loss)
accuracy_history.append(acc)
self.W += -learning_rate * grad
if np.mod(iter, 100) == 0:
print("iteration {}/{} loss: {:.7f}".format(iter, num_iter, loss))
return loss_history, accuracy_history
def survey(self, quantity=None):
if not quantity:
quantity = len(self.ascii_vals)
# x = np.linspace(0, len(self.ascii_vals) - 1, quantity).astype(int) # Size changes error granularity
x = np.random.randint(len(self.ascii_vals), size=quantity)
if self.noise:
generated_noise = np.random.normal(0., scale=len(self.character_set) // 2, size=self.stimuli[x].shape).astype(int)
mask = np.random.binomial(1, self.noise, size=self.stimuli[x].shape)
stimuli = np.mod(self.stimuli[x] + generated_noise * mask, len(self.character_set))
else:
stimuli = self.stimuli[x]
print(self.reformat(stimuli))
if self.autoencoder:
return [stimuli.T, self.stimuli[x].T]
else:
return [stimuli.T, self.expected[x].T]
tfidf_weighted_word2vec.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def doc2word2vec(data_txt_path, word2vec_model, save_path, dim=300, length=10):
# do not use tf-idf values as coefficients.
# usually because the data_txt_path is a tfidf-sorted text.
# length = 1: mean of vectors
# length > 1: concate vectors
word2vec = pk.load(open(word2vec_model, 'r'))
docs = open(data_txt_path).readlines()
N = len(docs)
feat = np.zeros((N, dim * length), dtype=np.float32)
t0 = time.time()
for idx, doc in enumerate(docs):
words = doc.strip().split(' ')
feat[idx, :] = create_vec_from_words(words, word2vec, dim, length)
if np.mod(idx, 10000) == 0:
t = time.time() - t0
print '# %d, t = %d minutes' %(idx, t/60)
h5file = h5py.File(save_path, 'w')
h5file.create_dataset('feature', data=feat, dtype=np.float32)
h5file.close()
print 'saved to %s' %save_path
tfidf_weighted_word2vec.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def tfidf_cluster_feature(data_txt_path, word2vec_distr_path, save_path, df_path, nDoc):
word2vec_distr = pk.load(open(word2vec_distr_path))
docs = open(data_txt_path).readlines()
DF = pk.load(open(df_path))
N = len(docs)
DIM = word2vec_distr.values()[0].shape[0]
h5file = h5py.File(save_path, 'w')
feat = h5file.create_dataset('feature', shape=(N, DIM), dtype=np.float32)
t0 = time.time()
for idx, doc in enumerate(docs):
words = doc.strip().split(' ')
feat[idx, :] = compute_tfidf_cluster_feat(words, DF, nDoc, word2vec_distr)
if np.mod(idx, 10000) == 0:
t = time.time() - t0
print '#%d, t = %d mins' %(idx, t/60)
h5file.close()
print 'saved to %s' %save_path
tfidf_from_seg.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def compute_word2vec(docs, DF, nDoc, model, vecDim=300):
N = len(docs)
nonExist_vocab = {}
feat = np.zeros((N, 300), dtype=np.float32)
for idx, doc in enumerate(docs):
nonExist_list = []
TF = {}
spt = doc.split(' ')
nWord = len(spt)
update_vocab(TF, spt)
vec = np.zeros(vecDim, dtype=np.float32)
for word, tf in TF.items():
try:
tfidf = 1.0 * tf / nWord * np.log2(1.0 * nDoc / DF[word])
vec += tfidf * word2vec(model, word)
except:
nonExist_list.append(word)
pass
feat[idx, :] = vec
update_vocab(nonExist_vocab, nonExist_list)
if np.mod(idx, 10000) == 0:
print '# %d' %idx
print 'nonExist: %d' %len(nonExist_vocab.keys())
return feat, nonExist_vocab
tfidf_from_seg.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def tfidf(data_txt_path, df_path, nDoc, word2id_path, save_path):
t0 = time.time()
docs = open(data_txt_path).readlines()
word2id = pk.load(open(word2id_path, 'r'))
N = len(docs)
DIM = len(word2id.keys())
h5file = h5py.File(save_path, 'w')
h5set = h5file.create_dataset('feature', shape=(N, DIM), dtype=np.float32)
print 'word2id loaded from %s' %word2id_path
print 'dataset created, shape (%d, %d)' %(N, DIM)
# load DF
DF = pk.load(open(df_path))
# compute tfidf
for idx, doc in enumerate(docs):
feat= compute_tfidf(doc, DIM, DF, nDoc, word2id)
h5set[idx, :] = feat.copy()
if np.mod(idx, 10000) ==0:
t = time.time() - t0
print '# %d, t = %f hours' %(idx, t / 3600.)
h5file.close()
print 'TF-IDF feature saved to %s' %save_path
def row_col_from_lin(ct, sh):
"""
Convert a 1D counter into a col and row counter
"""
assert len(sh) == 2, 'Shape must be 2D'
tot_rows = sh[0]
tot_cols = sh[1]
if isinstance(ct, _np.ndarray):
if (ct > tot_rows*tot_cols).any():
print('Count is out-of-range. Returning None.')
return None
else:
if ct > tot_rows*tot_cols:
print('Count is out-of-range. Returning None.')
return None
row = _np.mod(ct, tot_rows)
col = ct//tot_rows
return [row, col]