def plot_sent_trajectories(sents, decode_plot):
font = {'family' : 'normal',
'size' : 14}
matplotlib.rc('font', **font)
i = 0
l = ["Portuguese","Catalan"]
axes = plt.gca()
#axes.set_xlim([xmin,xmax])
axes.set_ylim([-1,1])
for sent, enc in zip(sents, decode_plot):
if i==2: continue
i += 1
#times = np.arange(len(enc))
times = np.linspace(0,1,len(enc))
plt.plot(times, enc, label=l[i-1])
plt.title("Hidden Node Trajectories")
plt.xlabel('timestep')
plt.ylabel('trajectories')
plt.legend(loc='best')
plt.savefig("final_tests/cr_por_cat_hidden_cell_trajectories", bbox_inches="tight")
plt.close()
python类arange()的实例源码
def roll_zeropad(a, shift, axis=None):
a = np.asanyarray(a)
if shift == 0: return a
if axis is None:
n = a.size
reshape = True
else:
n = a.shape[axis]
reshape = False
if np.abs(shift) > n:
res = np.zeros_like(a)
elif shift < 0:
shift += n
zeros = np.zeros_like(a.take(np.arange(n-shift), axis))
res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis)
else:
zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis))
res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis)
if reshape:
return res.reshape(a.shape)
else:
return res
def _generate_data():
"""
?????
????u(k-1) ? y(k-1)?????y(k)
"""
# u = np.random.uniform(-1,1,200)
# y=[]
# former_y_value = 0
# for i in np.arange(0,200):
# y.append(former_y_value)
# next_y_value = (29.0 / 40) * np.sin(
# (16.0 * u[i] + 8 * former_y_value) / (3.0 + 4.0 * (u[i] ** 2) + 4 * (former_y_value ** 2))) \
# + (2.0 / 10) * u[i] + (2.0 / 10) * former_y_value
# former_y_value = next_y_value
# return u,y
u1 = np.random.uniform(-np.pi,np.pi,200)
u2 = np.random.uniform(-1,1,200)
y = np.zeros(200)
for i in range(200):
value = np.sin(u1[i]) + u2[i]
y[i] = value
return u1, u2, y
def plot_counts(counts, gene_type):
"""Plot expression counts. Return a Figure object"""
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
fig = plt.figure(figsize=((50 + len(counts) * 5) / 25.4, 210/25.4))
matplotlib.rcParams.update({'font.size': 14})
ax = fig.gca()
ax.set_title('{} gene usage'.format(gene_type))
ax.set_xlabel('{} gene'.format(gene_type))
ax.set_ylabel('Count')
ax.set_xticks(np.arange(len(counts)) + 0.5)
ax.set_xticklabels(counts.index, rotation='vertical')
ax.grid(axis='x')
ax.set_xlim((-0.25, len(counts)))
ax.bar(np.arange(len(counts)), counts['count'])
fig.set_tight_layout(True)
return fig
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1.0 for _ in xrange(784)]
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1.0 for _ in xrange(784)]
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def rhoA(self):
# rhoA
rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
weights = pd.DataFrame(self.outer_weights[self.latent[i]])
weights = weights[(weights.T != 0).any()]
result = pd.DataFrame.dot(weights.T, weights)
result_ = pd.DataFrame.dot(weights, weights.T)
S = self.data_[self.Variables['measurement'][
self.Variables['latent'] == self.latent[i]]]
S = pd.DataFrame.dot(S.T, S) / S.shape[0]
numerador = (
np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights))
denominador = (
(np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights)))
rhoA_ = ((result)**2) * (numerador / denominador)
if(np.isnan(rhoA_.values)):
rhoA[self.latent[i]] = 1
else:
rhoA[self.latent[i]] = rhoA_.values
return rhoA.T
def split_dataset(dataset, split_ratio, mode):
if mode=='SPLIT_CLASSES':
nrof_classes = len(dataset)
class_indices = np.arange(nrof_classes)
np.random.shuffle(class_indices)
split = int(round(nrof_classes*split_ratio))
train_set = [dataset[i] for i in class_indices[0:split]]
test_set = [dataset[i] for i in class_indices[split:-1]]
elif mode=='SPLIT_IMAGES':
train_set = []
test_set = []
min_nrof_images = 2
for cls in dataset:
paths = cls.image_paths
np.random.shuffle(paths)
split = int(round(len(paths)*split_ratio))
if split<min_nrof_images:
continue # Not enough images for test set. Skip class...
train_set.append(ImageClass(cls.name, paths[0:split]))
test_set.append(ImageClass(cls.name, paths[split:-1]))
else:
raise ValueError('Invalid train/test split mode "%s"' % mode)
return train_set, test_set
def quantize_from_probs2(probs, resolution):
"""Quantize multiple non-normalized probs to given resolution.
Args:
probs: An [N, M]-shaped numpy array of non-normalized probabilities.
Returns:
An [N, M]-shaped array of quantized probabilities such that
np.all(result.sum(axis=1) == resolution).
"""
assert len(probs.shape) == 2
N, M = probs.shape
probs = probs / probs.sum(axis=1, keepdims=True)
result = np.zeros(probs.shape, np.int8)
range_N = np.arange(N, dtype=np.int32)
for _ in range(resolution):
sample = probs.argmax(axis=1)
result[range_N, sample] += 1
probs[range_N, sample] -= 1.0 / resolution
return result
def get_train_data():
# definite the dataset as two input , one output
DS = SupervisedDataSet(2, 1)
u1, u2, y = _generate_data()
# add data element to the dataset
for i in np.arange(199):
DS.addSample([u1[i], u2[i]], [y[i + 1]])
# you can get your input/output this way
# X = DS['input']
# Y = DS['target']
# split the dataset into train dataset and test dataset
dataTrain, dataTest = DS.splitWithProportion(0.8)
return dataTrain, dataTest
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
self._open()
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
if self.time_axis == 0:
local_chunk = self.data[t_start:t_stop, :]
elif self.time_axis == 1:
local_chunk = self.data[:, t_start:t_stop].T
self._close()
if nodes is not None:
if not numpy.all(nodes == numpy.arange(self.nb_channels)):
local_chunk = numpy.take(local_chunk, nodes, axis=1)
return self._scale_data_to_float32(local_chunk)
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
if x_beg == x_end:
g_offset = x_beg * self.bytes_per_block_div + self.block_offset_div
data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + r_end * self.nb_channels, dtype=numpy.int64)
yield data_slice
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.bytes_per_block_div + self.block_offset_div
if count == 0:
data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + self.block_size_div, dtype=numpy.int64)
elif (count == (x_end - x_beg)):
data_slice = numpy.arange(g_offset, g_offset + r_end * self.nb_channels, dtype=numpy.int64)
else:
data_slice = numpy.arange(g_offset, g_offset + self.block_size_div, dtype=numpy.int64)
yield data_slice
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
data_slice = []
if x_beg == x_end:
g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg
data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64)
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks
if count == 0:
data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
elif (count == (x_end - x_beg)):
data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist()
else:
data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
return data_slice
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
local_shape = t_stop - t_start
if nodes is None:
nodes = numpy.arange(self.nb_channels)
local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype)
data_slice = self._get_slice_(t_start, t_stop)
self._open()
for count, i in enumerate(nodes):
local_chunk[:, count] = self.data[i][data_slice]
self._close()
return self._scale_data_to_float32(local_chunk)
def _get_slice_(self, t_start, t_stop):
x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD)
r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD)
x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD)
r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD)
data_slice = []
if x_beg == x_end:
g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg
data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64)
else:
for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)):
g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks
if count == 0:
data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
elif (count == (x_end - x_beg)):
data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist()
else:
data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist()
return data_slice
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None):
t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding)
local_shape = t_stop - t_start
if nodes is None:
nodes = numpy.arange(self.nb_channels)
local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype)
data_slice = self._get_slice_(t_start, t_stop)
self._open()
for count, i in enumerate(nodes):
local_chunk[:, count] = self.data[i][data_slice]
self._close()
return self._scale_data_to_float32(local_chunk)
def view_trigger_snippets_bis(trigger_snippets, elec_index, save=None):
fig = pylab.figure()
ax = fig.add_subplot(1, 1, 1)
for n in xrange(0, trigger_snippets.shape[2]):
y = trigger_snippets[:, elec_index, n]
x = numpy.arange(- (y.size - 1) / 2, (y.size - 1) / 2 + 1)
b = 0.5 + 0.5 * numpy.random.rand()
ax.plot(x, y, color=(0.0, 0.0, b), linestyle='solid')
ax.grid(True)
ax.set_xlim([numpy.amin(x), numpy.amax(x)])
ax.set_xlabel("time")
ax.set_ylabel("amplitude")
if save is None:
pylab.show()
else:
pylab.savefig(save)
pylab.close(fig)
return
def cost(self, x):
Rdx = dl.Vector()
self.Prior.init_vector(Rdx,0)
dx = x[PARAMETER] - self.Prior.mean
self.Prior.R.mult(dx, Rdx)
reg = .5*Rdx.inner(dx)
u = dl.Vector()
ud = dl.Vector()
self.Q.init_vector(u,0)
self.Q.init_vector(ud,0)
misfit = 0
for t in np.arange(self.t_1, self.t_final+(.5*self.dt), self.dt):
x[STATE].retrieve(u,t)
self.ud.retrieve(ud,t)
diff = u - ud
Qdiff = self.Q * diff
misfit += .5/self.noise_variance*Qdiff.inner(diff)
c = misfit + reg
return [c, reg, misfit]
def _flow_index(self, n, batch_size=32, shuffle=False, seed=None):
# ensure self.batch_index is 0
self.reset()
while 1:
if seed is not None:
np.random.seed(seed + self.total_batches_seen)
if self.batch_index == 0:
index_array = np.arange(n)
if shuffle:
index_array = np.random.permutation(n)
current_index = (self.batch_index * batch_size) % n
if n >= current_index + batch_size:
current_batch_size = batch_size
self.batch_index += 1
else:
current_batch_size = n - current_index
self.batch_index = 0
self.total_batches_seen += 1
yield (index_array[current_index: current_index + current_batch_size],
current_index, current_batch_size)
def make_split(X_full, Y_full, split):
N = X_full.shape[0]
n = int(N * PROPORTION_TRAIN)
ind = np.arange(N)
np.random.seed(split + SEED)
np.random.shuffle(ind)
train_ind = ind[:n]
test_ind= ind[n:]
X = X_full[train_ind]
Xs = X_full[test_ind]
Y = Y_full[train_ind]
Ys = Y_full[test_ind]
return X, Y, Xs, Ys
def plot_difference_histogram(group, gene_name, bins=np.arange(20.1)):
"""
Plot a histogram of percentage differences for a specific gene.
"""
exact_matches = group[group.V_SHM == 0]
CDR3s_exact = len(set(s for s in exact_matches.CDR3_nt if s))
Js_exact = len(set(exact_matches.J_gene))
fig = Figure(figsize=(100/25.4, 60/25.4))
ax = fig.gca()
ax.set_xlabel('Percentage difference')
ax.set_ylabel('Frequency')
fig.suptitle('Gene ' + gene_name, y=1.08, fontsize=16)
ax.set_title('{:,} sequences assigned'.format(len(group)))
ax.text(0.25, 0.95,
'{:,} ({:.1%}) exact matches\n {} unique CDR3\n {} unique J'.format(
len(exact_matches), len(exact_matches) / len(group),
CDR3s_exact, Js_exact),
transform=ax.transAxes, fontsize=10,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.5),
horizontalalignment='left', verticalalignment='top')
_ = ax.hist(list(group.V_SHM), bins=bins)
return fig
def create_decoder(self, helper, mode):
attention_fn = AttentionLayerDot(
params={"num_units": self.attention_dim},
mode=tf.contrib.learn.ModeKeys.TRAIN)
attention_values = tf.convert_to_tensor(
np.random.randn(self.batch_size, self.input_seq_len, 32),
dtype=tf.float32)
attention_keys = tf.convert_to_tensor(
np.random.randn(self.batch_size, self.input_seq_len, 32),
dtype=tf.float32)
params = AttentionDecoder.default_params()
params["max_decode_length"] = self.max_decode_length
return AttentionDecoder(
params=params,
mode=mode,
vocab_size=self.vocab_size,
attention_keys=attention_keys,
attention_values=attention_values,
attention_values_length=np.arange(self.batch_size) + 1,
attention_fn=attention_fn)
def _create_figure(predictions_dict):
"""Creates and returns a new figure that visualizes
attention scores for for a single model predictions.
"""
# Find out how long the predicted sequence is
target_words = list(predictions_dict["predicted_tokens"])
prediction_len = _get_prediction_length(predictions_dict)
# Get source words
source_len = predictions_dict["features.source_len"]
source_words = predictions_dict["features.source_tokens"][:source_len]
# Plot
fig = plt.figure(figsize=(8, 8))
plt.imshow(
X=predictions_dict["attention_scores"][:prediction_len, :source_len],
interpolation="nearest",
cmap=plt.cm.Blues)
plt.xticks(np.arange(source_len), source_words, rotation=45)
plt.yticks(np.arange(prediction_len), target_words, rotation=-45)
fig.tight_layout()
return fig
def make_copy(num_examples, min_len, max_len):
"""
Generates a dataset where the target is equal to the source.
Sequence lengths are chosen randomly from [min_len, max_len].
Args:
num_examples: Number of examples to generate
min_len: Minimum sequence length
max_len: Maximum sequence length
Returns:
An iterator of (source, target) string tuples.
"""
for _ in range(num_examples):
turn_length = np.random.choice(np.arange(min_len, max_len + 1))
source_tokens = np.random.choice(
list(VOCABULARY), size=turn_length, replace=True)
target_tokens = source_tokens
yield " ".join(source_tokens), " ".join(target_tokens)
def make_reverse(num_examples, min_len, max_len):
"""
Generates a dataset where the target is equal to the source reversed.
Sequence lengths are chosen randomly from [min_len, max_len].
Args:
num_examples: Number of examples to generate
min_len: Minimum sequence length
max_len: Maximum sequence length
Returns:
An iterator of (source, target) string tuples.
"""
for _ in range(num_examples):
turn_length = np.random.choice(np.arange(min_len, max_len + 1))
source_tokens = np.random.choice(
list(VOCABULARY), size=turn_length, replace=True)
target_tokens = source_tokens[::-1]
yield " ".join(source_tokens), " ".join(target_tokens)
def update_dividends(self, new_dividends):
"""
Update our dividend frame with new dividends. @new_dividends should be
a DataFrame with columns containing at least the entries in
zipline.protocol.DIVIDEND_FIELDS.
"""
# Mark each new dividend with a unique integer id. This ensures that
# we can differentiate dividends whose date/sid fields are otherwise
# identical.
new_dividends['id'] = np.arange(
self._dividend_count,
self._dividend_count + len(new_dividends),
)
self._dividend_count += len(new_dividends)
self.dividend_frame = sort_values(pd.concat(
[self.dividend_frame, new_dividends]
), ['pay_date', 'ex_date']).set_index('id', drop=False)
def create_test_panel_ohlc_source(sim_params, env):
start = sim_params.first_open \
if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
end = sim_params.last_close \
if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)
index = env.days_in_range(start, end)
price = np.arange(0, len(index)) + 100
high = price * 1.05
low = price * 0.95
open_ = price + .1 * (price % 2 - .5)
volume = np.ones(len(index)) * 1000
arbitrary = np.ones(len(index))
df = pd.DataFrame({'price': price,
'high': high,
'low': low,
'open': open_,
'volume': volume,
'arbitrary': arbitrary},
index=index)
panel = pd.Panel.from_dict({0: df})
return DataPanelSource(panel), panel
def test_expect_dtypes_with_tuple(self):
allowed_dtypes = (dtype('datetime64[ns]'), dtype('float'))
@expect_dtypes(a=allowed_dtypes)
def foo(a, b):
return a, b
for d in allowed_dtypes:
good_a = arange(3).astype(d)
good_b = object()
ret_a, ret_b = foo(good_a, good_b)
self.assertIs(good_a, ret_a)
self.assertIs(good_b, ret_b)
with self.assertRaises(TypeError) as e:
foo(arange(3, dtype='uint32'), object())
expected_message = (
"{qualname}() expected a value with dtype 'datetime64[ns]' "
"or 'float64' for argument 'a', but got 'uint32' instead."
).format(qualname=qualname(foo))
self.assertEqual(e.exception.args[0], expected_message)