def roll_zeropad(a, shift, axis=None):
a = np.asanyarray(a)
if shift == 0: return a
if axis is None:
n = a.size
reshape = True
else:
n = a.shape[axis]
reshape = False
if np.abs(shift) > n:
res = np.zeros_like(a)
elif shift < 0:
shift += n
zeros = np.zeros_like(a.take(np.arange(n-shift), axis))
res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis)
else:
zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis))
res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis)
if reshape:
return res.reshape(a.shape)
else:
return res
python类concatenate()的实例源码
def _create_feat_arr(self, X, prf_crf_pred):
""" Concatenate the original feature vector with the predicition probabilities
of a cascade layer.
:param X: np.array
Array containing the input samples.
Must be of shape [n_samples, data] where data is a 1D array.
:param prf_crf_pred: list
Prediction probabilities by a cascade layer for X.
:return: np.array
Concatenation of X and the predicted probabilities.
To be used for the next layer in a cascade forest.
"""
swap_pred = np.swapaxes(prf_crf_pred, 0, 1)
add_feat = swap_pred.reshape([np.shape(X)[0], -1])
feat_arr = np.concatenate([add_feat, X], axis=1)
return feat_arr
def encode_batch_seq(self, src_seq, src_seq_rev):
forward_states = self.enc_fwd_lstm.initial_state().add_inputs(src_seq)
backward_states = self.enc_bwd_lstm.initial_state().add_inputs(src_seq_rev)[::-1]
src_encodings = []
forward_cells = []
backward_cells = []
for forward_state, backward_state in zip(forward_states, backward_states):
fwd_cell, fwd_enc = forward_state.s()
bak_cell, bak_enc = backward_state.s()
src_encodings.append(dynet.concatenate([fwd_enc, bak_enc]))
forward_cells.append(fwd_cell)
backward_cells.append(bak_cell)
decoder_init = dynet.concatenate([forward_cells[-1], backward_cells[0]])
decoder_all = [dynet.concatenate([fwd, bwd]) for fwd, bwd in zip(forward_cells, list(reversed(backward_cells)))]
return src_encodings, decoder_all
def _ncc_c(x, y):
"""
>>> _ncc_c([1,2,3,4], [1,2,3,4])
array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
0.36666667, 0.13333333])
>>> _ncc_c([1,1,1], [1,1,1])
array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333])
>>> _ncc_c([1,2,3], [-1,-1,-1])
array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
"""
den = np.array(norm(x) * norm(y))
den[den == 0] = np.Inf
x_len = len(x)
fft_size = 1<<(2*x_len-1).bit_length()
cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
return np.real(cc) / den
def test_server_logprob_normalized(N, V, C, M):
model = generate_fake_model(N, V, C, M)
config = TINY_CONFIG.copy()
config['model_num_clusters'] = M
model['config'] = config
server = TreeCatServer(model)
# The total probability of all categorical rows should be 1.
ragged_index = model['suffstats']['ragged_index']
factors = []
for v in range(V):
C = ragged_index[v + 1] - ragged_index[v]
factors.append([one_hot(c, C) for c in range(C)])
data = np.array(
[np.concatenate(columns) for columns in itertools.product(*factors)],
dtype=np.int8)
logprobs = server.logprob(data)
logtotal = np.logaddexp.reduce(logprobs)
assert logtotal == pytest.approx(0.0, abs=1e-5)
def load_features(self, episode_paths):
features = {}
labels = None
for episode_path in episode_paths:
episode_features, episode_labels = self.load_features_episode(episode_path)
for key, value in episode_features.items():
if key not in features:
features[key] = value
else:
features[key] = np.concatenate([features[key], value], axis=0)
if labels is None:
labels = episode_labels
else:
labels = np.concatenate([labels, episode_labels], axis=0)
print(episode_path)
return features, labels
def minutes_for_days_in_range(self, start, end):
"""
Get all market minutes for the days between start and end, inclusive.
"""
start_date = self.normalize_date(start)
end_date = self.normalize_date(end)
all_minutes = []
for day in self.days_in_range(start_date, end_date):
day_minutes = self.market_minutes_for_day(day)
all_minutes.append(day_minutes)
# Concatenate all minutes and truncate minutes before start/after end.
return pd.DatetimeIndex(
np.concatenate(all_minutes), copy=False, tz='UTC',
)
def plot_barcode_rank(chart, sample_properties, sample_data):
""" Generate the RNA counter barcode rank plot """
if sample_properties.get('genomes') is None or sample_data.barcode_summary is None:
return None
if len(sample_properties['genomes']) == 0:
return None
counts_per_bc = []
for genome in sample_properties['genomes']:
key = cr_utils.format_barcode_summary_h5_key(genome, cr_constants.TRANSCRIPTOME_REGION, cr_constants.CONF_MAPPED_DEDUPED_READ_TYPE)
if key in sample_data.barcode_summary:
counts_per_bc.append(sample_data.barcode_summary[key][:])
else:
# Not guaranteed to exist, depending on pipeline
return
counts_per_bc = np.concatenate(counts_per_bc)
return _plot_barcode_rank(chart, counts_per_bc, sample_data.num_cells)
def numpy_groupby(values, keys):
""" Group a collection of numpy arrays by key arrays.
Yields (key_tuple, view_tuple) where key_tuple is the key grouped on and view_tuple is a tuple of views into the value arrays.
values: tuple of arrays to group
keys: tuple of sorted, numeric arrays to group by """
if len(values) == 0:
return
if len(values[0]) == 0:
return
for key_array in keys:
assert len(key_array) == len(keys[0])
for value_array in values:
assert len(value_array) == len(keys[0])
# The indices where any of the keys differ from the previous key become group boundaries
key_change_indices = np.logical_or.reduce(tuple(np.concatenate(([1], np.diff(key))) != 0 for key in keys))
group_starts = np.flatnonzero(key_change_indices)
group_ends = np.roll(group_starts, -1)
group_ends[-1] = len(keys[0])
for group_start, group_end in itertools.izip(group_starts, group_ends):
yield tuple(key[group_start] for key in keys), tuple(value[group_start:group_end] for value in values)
def concatenate(out_filename, in_filenames, metrics=None):
# Append each column from each input h5 to the output h5
out_mc = MoleculeCounter.open(out_filename, mode='w')
ref_set = False
for in_filename in in_filenames:
in_mc = MoleculeCounter.open(in_filename, mode='r')
# if no metrics specified, copy them from the first file
if metrics is None:
metrics = in_mc.get_all_metrics()
for name, array_tuple in in_mc.columns.iteritems():
h5_array, _ = array_tuple
out_mc.add_many(name, h5_array[:])
if not ref_set: # only set once
for name, h5_array in in_mc.ref_columns.iteritems():
out_mc.set_ref_column(name, h5_array[:])
ref_set = True
in_mc.close()
out_mc.set_all_metrics(metrics)
out_mc.save()
def concatenate_sort(out_filename, in_filenames, sort_cols, metrics=None):
in_mcs = [MoleculeCounter.open(f, 'r') for f in in_filenames]
out_mc = MoleculeCounter.open(out_filename, mode='w')
if metrics is None:
metrics = in_mcs[0].get_all_metrics()
out_mc.set_all_metrics(metrics)
for col, array in in_mcs[0].ref_columns.iteritems():
out_mc.set_ref_column(col, array[:])
sort_array = []
# reverse sort columns so they get sorted in the right order
for col in reversed(sort_cols):
sort_array.append(np.concatenate([mc.get_column(col) for mc in in_mcs]))
sort_index = np.lexsort(sort_array)
for col in MOLECULE_INFO_COLUMNS:
col_sorted = np.concatenate([mc.get_column(col) for mc in in_mcs])[sort_index]
out_mc.add_many(col, col_sorted)
for mc in in_mcs:
mc.close()
out_mc.save()
def get_bc_counts(genomes, genes, molecule_counter):
genome_ids = molecule_counter.get_column('genome')
genome_index = cr_reference.get_genome_index(genomes)
conf_mapped_reads = molecule_counter.get_column('reads')
barcodes = molecule_counter.get_column('barcode')
bc_counts = {}
for genome in genomes:
genome_id = cr_reference.get_genome_id(genome, genome_index)
genome_indices = genome_ids == genome_id
if genome_indices.sum() == 0:
# edge case - there's no data for this genome (e.g. empty sample, false barnyard sample, or nothing confidently mapped)
continue
bcs_for_genome = barcodes[genome_indices]
# only count UMIs with at least one conf mapped read
umi_conf_mapped_to_genome = conf_mapped_reads[genome_indices] > 0
bc_breaks = bcs_for_genome[1:] - bcs_for_genome[:-1]
bc_breaks = np.concatenate(([1], bc_breaks)) # first row is always a break
bc_break_indices = np.nonzero(bc_breaks)[0]
unique_bcs = bcs_for_genome[bc_break_indices]
umis_per_bc = np.add.reduceat(umi_conf_mapped_to_genome, bc_break_indices)
cmb_reads_per_bc = np.add.reduceat(conf_mapped_reads[genome_indices], bc_break_indices)
bc_counts[genome] = (unique_bcs, umis_per_bc, cmb_reads_per_bc)
return bc_counts
def load(shape,vertex_array):
destination = vertex_array[gx.VA_PTNMTXIDX.name]
vertex_index = 0
matrix_table = numpy.zeros(10,numpy.uint32)
for batch in shape.batches:
source = numpy.concatenate([primitive.vertices[gx.VA_PTNMTXIDX.name] for primitive in batch.primitives])
source //= 3
for i,index in enumerate(batch.matrix_table):
if index == 0xFFFF: continue
matrix_table[i] = index
length = sum(len(primitive.vertices) for primitive in batch.primitives)
numpy.take(matrix_table,source,0,destination[vertex_index:vertex_index + length])
vertex_index += length
glEnableVertexAttribArray(MATRIX_INDEX_ATTRIBUTE_LOCATION)
vertex_type = vertex_array.dtype
stride = vertex_type.itemsize
offset = vertex_type.fields[gx.VA_PTNMTXIDX.name][1]
glVertexAttribIPointer(MATRIX_INDEX_ATTRIBUTE_LOCATION,1,GL_UNSIGNED_INT,stride,GLvoidp(offset))
def sample_data(data, num_sample):
""" data is in N x ...
we want to keep num_samplexC of them.
if N > num_sample, we will randomly keep num_sample of them.
if N < num_sample, we will randomly duplicate samples.
"""
N = data.shape[0]
if (N == num_sample):
return data, range(N)
elif (N > num_sample):
sample = np.random.choice(N, num_sample)
return data[sample, ...], sample
else:
sample = np.random.choice(N, num_sample-N)
dup_data = data[sample, ...]
return np.concatenate([data, dup_data], 0), range(N)+list(sample)
def read_data_files(self, subset='train'):
"""Reads from data file and returns images and labels in a numpy array."""
assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
'data')
if subset == 'train':
filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i)
for i in xrange(1, 6)]
elif subset == 'validation':
filenames = [os.path.join(self.data_dir, 'test_batch')]
else:
raise ValueError('Invalid data subset "%s"' % subset)
inputs = []
for filename in filenames:
with gfile.Open(filename, 'r') as f:
inputs.append(cPickle.load(f))
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
all_images = np.concatenate(
[each_input['data'] for each_input in inputs]).astype(np.float32)
all_labels = np.concatenate(
[each_input['labels'] for each_input in inputs])
return all_images, all_labels
def param_init_gru(params, prefix='gru', nin=None, dim=None, hiero=False):
if not hiero:
W = numpy.concatenate([norm_weight(nin, dim),
norm_weight(nin, dim)], axis=1)
params[_p(prefix, 'W')] = W
params[_p(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32')
U = numpy.concatenate([ortho_weight(dim),
ortho_weight(dim)], axis=1)
params[_p(prefix, 'U')] = U
Wx = norm_weight(nin, dim)
params[_p(prefix, 'Wx')] = Wx
Ux = ortho_weight(dim)
params[_p(prefix, 'Ux')] = Ux
params[_p(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32')
return params
def load_data_and_labels(positive_data_file, negative_data_file):
"""
Loads MR polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# Load data from files
positive_examples = list(open(positive_data_file, "r").readlines())
positive_examples = [s.strip() for s in positive_examples]
negative_examples = list(open(negative_data_file, "r").readlines())
negative_examples = [s.strip() for s in negative_examples]
# Split by words
x_text = positive_examples + negative_examples
x_text = [clean_str(sent) for sent in x_text]
# Generate labels
positive_labels = [[0, 1] for _ in positive_examples]
negative_labels = [[1, 0] for _ in negative_examples]
y = np.concatenate([positive_labels, negative_labels], 0)
return [x_text, y]
def extract_and_save_bin_to(dir_to_bin, dir_to_source):
sets = [s for s in os.listdir(dir_to_source) if s in SETS]
for d in sets:
path = join(dir_to_source, d)
speakers = [s for s in os.listdir(path) if s in SPEAKERS]
for s in speakers:
path = join(dir_to_source, d, s)
output_dir = join(dir_to_bin, d, s)
if not tf.gfile.Exists(output_dir):
tf.gfile.MakeDirs(output_dir)
for f in os.listdir(path):
filename = join(path, f)
print(filename)
if not os.path.isdir(filename):
features = extract(filename)
labels = SPEAKERS.index(s) * np.ones(
[features.shape[0], 1],
np.float32,
)
b = os.path.splitext(f)[0]
features = np.concatenate([features, labels], 1)
with open(join(output_dir, '{}.bin'.format(b)), 'wb') as fp:
fp.write(features.tostring())
def plot_spectra(results):
plt.figure(figsize=(10, 4))
plt.imshow(
np.concatenate(
[np.flipud(results['x'].T),
np.flipud(results['xh'].T),
np.flipud(results['x_conv'].T)],
0),
aspect='auto',
cmap='jet',
)
plt.colorbar()
plt.title('Upper: Real input; Mid: Reconstrution; Lower: Conversion to target.')
plt.savefig(
os.path.join(
args.logdir,
'{}.png'.format(
os.path.split(str(results['f'], 'utf-8'))[-1]
)
)
)
def preprocessExample(self, image, coords, angle, shear_x, shear_y, scale):
size_in = image.shape[0]
size_out = self.config['tile_size'] + 2 * self.config['contextual_pad']
# h = base64.b64encode(struct.pack(">q", hash(image.tostring()))).decode()
# data_preparation.imshow(image, coords=coords, save=True, title='%s_preprocessExampleA' %h)
image = self.applyLinearTransformToImage(image, angle, shear_x, shear_y, scale, size_out)
image = self.applyColorAugmentation(image, self.config['aug_color_std'], \
self.config['aug_gamma_factor'])
coords[:, 1:] = self.applyLinearTransformToCoords(coords[:, 1:], angle, shear_x,
shear_y, scale, size_in, size_out)
target = self.generateCountMaps(coords)
large_target = self.generateLargeCountMaps(coords)
if self.config['draw_border'] and self.config['contextual_pad'] > 0:
image = self.draw_border(image, self.config['contextual_pad'], self.config['tile_size'])
# data_preparation.imshow(image, coords=coords, save=True, title='%s_preprocessExampleB' % h)
# t = np.concatenate(np.moveaxis(target, -1, 0))
# data_preparation.imshow(t, normalize=True, save=True, title='%s_preprocessExampleC' % h)
return image.astype(np.float32), target, large_target
def score(self, profiles, bin_sites):
"""Compute AUC ROC from predictions."""
app_profiles = list()
app_true_vals = list()
for k, profile in profiles.iteritems():
app_profiles.append(profile)
true_vals = np.zeros(len(profile))
bins = bin_sites.get(k, False)
if bins is not False:
for s, e, _ in bins:
true_vals[s:e] = 1
app_true_vals.append(true_vals)
vec_profiles = np.concatenate(app_profiles)
vec_true_vals = np.concatenate(app_true_vals)
roc_auc = roc_auc_score(vec_true_vals, vec_profiles)
return roc_auc
def load_data_and_labels():
"""
Loads MR polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# Load data from files
positive_examples = list(open("./data/rt-polaritydata/rt-polarity.pos", "r").readlines())
positive_examples = [s.strip() for s in positive_examples]
negative_examples = list(open("./data/rt-polaritydata/rt-polarity.neg", "r").readlines())
negative_examples = [s.strip() for s in negative_examples]
# Split by words
x_text = positive_examples + negative_examples
x_text = [clean_str(sent) for sent in x_text]
# Generate labels
positive_labels = [[0, 1] for _ in positive_examples]
negative_labels = [[1, 0] for _ in negative_examples]
y = np.concatenate([positive_labels, negative_labels], 0)
return [x_text, y]
def load_data_and_labels():
"""
Loads polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# Load data from files
positive_examples = list(open("./data/rt-polaritydata/rt-polarity.pos", "r").readlines())
positive_examples = [s.strip() for s in positive_examples]
negative_examples = list(open("./data/rt-polaritydata/rt-polarity.neg", "r").readlines())
negative_examples = [s.strip() for s in negative_examples]
# Split by words
x_text = positive_examples + negative_examples
x_text = [clean_str(sent) for sent in x_text]
# Generate labels
positive_labels = [[0, 1] for _ in positive_examples]
negative_labels = [[1, 0] for _ in negative_examples]
y = np.concatenate([positive_labels, negative_labels], 0)
# Generate sequence lengths
seqlen = np.array([len(sent.split(" ")) for sent in x_text])
return [x_text, y, seqlen]
def load_data_and_labels():
"""
Loads polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# Load data from files
positive_examples = list(open("./data/rt-polaritydata/rt-polarity.pos", "r").readlines())
positive_examples = [s.strip() for s in positive_examples]
negative_examples = list(open("./data/rt-polaritydata/rt-polarity.neg", "r").readlines())
negative_examples = [s.strip() for s in negative_examples]
# Split by words
x_text = positive_examples + negative_examples
x_text = [clean_str(sent) for sent in x_text]
# Generate labels
positive_labels = [[0, 1] for _ in positive_examples]
negative_labels = [[1, 0] for _ in negative_examples]
y = np.concatenate([positive_labels, negative_labels], 0)
return [x_text, y]
def _prepare_image(I):
assert isinstance(I, np.ndarray), 'plugin error, should pass numpy array here'
assert I.ndim == 2 or I.ndim == 3 or I.ndim == 4
if I.ndim == 4: # NCHW
if I.shape[1] == 1: # N1HW
I = np.concatenate((I, I, I), 1) # N3HW
assert I.shape[1] == 3
I = make_grid(I) # 3xHxW
if I.ndim == 3 and I.shape[0] == 1: # 1xHxW
I = np.concatenate((I, I, I), 0) # 3xHxW
if I.ndim == 2: # HxW
I = np.expand_dims(I, 0) # 1xHxW
I = np.concatenate((I, I, I), 0) # 3xHxW
I = I.transpose(1, 2, 0)
return I
def combine_img_prediction(data, gt, pred):
"""
Combines the data, grouth thruth and the prediction into one rgb image
:param data: the data tensor
:param gt: the ground thruth tensor
:param pred: the prediction tensor
:returns img: the concatenated rgb image
"""
ny = pred.shape[2]
ch = data.shape[3]
img = np.concatenate((to_rgb(crop_to_shape(data, pred.shape).reshape(-1, ny, ch)),
to_rgb(crop_to_shape(gt[..., 1], pred.shape).reshape(-1, ny, 1)),
to_rgb(pred[..., 1].reshape(-1, ny, 1))), axis=1)
return img
def testMerge(self, dtype=dtype):
testarray1 = range(1,101)
testarray2 = range(5,106)
a = numpy.empty((100,2), dtype=dtype)
b = numpy.empty((100,2), dtype=dtype)
merged = numpy.empty((200,2), dtype=dtype)
incompatible1 = numpy.empty((200,3), dtype=dtype)
incompatible2 = numpy.empty(200, dtype=dtype)
a[:,0] = numpy.arange(1,101)
a[:,1] = numpy.arange(2,102)
b[:,0] = numpy.arange(5,105)
b[:,1] = numpy.arange(6,106)
ref = numpy.concatenate([a,b])
ref = ref[numpy.argsort(ref[:,0])]
self.assertEqual(mapped_struct.index_merge(a, b, merged), 200)
self.assertTrue((merged == ref).all())
self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible1)
self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible1, merged)
self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible2)
self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible2, merged)
def getArrayRegion(self, arr, img=None, axes=(0,1), **kwds):
rgns = []
for l in self.lines:
rgn = l.getArrayRegion(arr, img, axes=axes, **kwds)
if rgn is None:
continue
#return None
rgns.append(rgn)
#print l.state['size']
## make sure orthogonal axis is the same size
## (sometimes fp errors cause differences)
if img.axisOrder == 'row-major':
axes = axes[::-1]
ms = min([r.shape[axes[1]] for r in rgns])
sl = [slice(None)] * rgns[0].ndim
sl[axes[1]] = slice(0,ms)
rgns = [r[sl] for r in rgns]
#print [r.shape for r in rgns], axes
return np.concatenate(rgns, axis=axes[0])
def getArrayRegion(self, data, img, axes=(0,1), order=1, **kwds):
"""
Use the position of this ROI relative to an imageItem to pull a slice
from an array.
Since this pulls 1D data from a 2D coordinate system, the return value
will have ndim = data.ndim-1
See ROI.getArrayRegion() for a description of the arguments.
"""
imgPts = [self.mapToItem(img, h['item'].pos()) for h in self.handles]
rgns = []
for i in range(len(imgPts)-1):
d = Point(imgPts[i+1] - imgPts[i])
o = Point(imgPts[i])
r = fn.affineSlice(data, shape=(int(d.length()),), vectors=[Point(d.norm())], origin=o, axes=axes, order=order, **kwds)
rgns.append(r)
return np.concatenate(rgns, axis=axes[0])
def load_bytes(self, data_blocks, dtype='<i1', start=None, end=None, expected_size=None):
"""
Return list of bytes contained
in the specified set of blocks.
NB : load all data as files cannot exceed 4Gb
find later other solutions to spare memory.
"""
chunks = list()
raw = ''
# keep only data blocks having
# a size greater than zero
blocks = [k for k in data_blocks if k.size > 0]
for data_block in blocks :
self.file.seek(data_block.start)
raw = self.file.read(data_block.size)[0:expected_size]
databytes = np.frombuffer(raw, dtype=dtype)
chunks.append(databytes)
# concatenate all chunks and return
# the specified slice
if len(chunks)>0 :
databytes = np.concatenate(chunks)
return databytes[start:end]
else :
return np.array([])