def min_side(_, pos):
"""
Given an object pixels' positions, return the minimum side length of its
bounding box
:param _: pixel values (unused)
:param pos: pixel position (1-D)
:return: minimum bounding box side length
"""
xs = np.array([i / SSIZE for i in pos])
ys = np.array([i % SSIZE for i in pos])
minx = np.amin(xs)
miny = np.amin(ys)
maxx = np.amax(xs)
maxy = np.amax(ys)
ct1 = compute_line(np.array([minx, miny]), np.array([minx, maxy]))
ct2 = compute_line(np.array([minx, miny]), np.array([maxx, miny]))
return min(ct1, ct2)
python类array()的实例源码
def _cascade_evaluation(self, X_test, y_test):
""" Evaluate the accuracy of the cascade using X and y.
:param X_test: np.array
Array containing the test input samples.
Must be of the same shape as training data.
:param y_test: np.array
Test target values.
:return: float
the cascade accuracy.
"""
casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0)
casc_pred = np.argmax(casc_pred_prob, axis=1)
casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred)
print('Layer validation accuracy = {}'.format(casc_accuracy))
return casc_accuracy
def _create_feat_arr(self, X, prf_crf_pred):
""" Concatenate the original feature vector with the predicition probabilities
of a cascade layer.
:param X: np.array
Array containing the input samples.
Must be of shape [n_samples, data] where data is a 1D array.
:param prf_crf_pred: list
Prediction probabilities by a cascade layer for X.
:return: np.array
Concatenation of X and the predicted probabilities.
To be used for the next layer in a cascade forest.
"""
swap_pred = np.swapaxes(prf_crf_pred, 0, 1)
add_feat = swap_pred.reshape([np.shape(X)[0], -1])
feat_arr = np.concatenate([add_feat, X], axis=1)
return feat_arr
def shuffleBlock(self,cells,d,tlx,tly,cols,rows,width,height):
if tlx+cols < width and tly+rows < height:
temp = []
for row in range( rows):
for col in range( cols):
temp.append(d[cells[tlx+col][tly+row]])
temp = np.array(temp)
oldState = temp.copy()
np.random.shuffle(temp)
i = 0
for row in range( rows):
for col in range( cols):
d[cells[tlx+col][tly+row]] = temp[i]
i+=1
return oldState
else:
return []
def train(self, dataset, train_split=0.8, dense_size=32, learning_rate=0.001, batch_size=32, epochs=50, activation='relu'):
self.__load_dataset(dataset, train_split)
train_x = np.array(self.__train_data[:, 0].tolist())
train_y = to_categorical(self.__train_data[:, 1], 2)
test_x = np.array(self.__test_data[:, 0].tolist())
test_y = to_categorical(self.__test_data[:, 1], 2)
print(train_x.shape)
self.__model = Sequential()
self.__model.add(Dense(dense_size, input_dim=train_x.shape[1], activation=activation, init='glorot_uniform'))
self.__model.add(Dense(train_y.shape[1], activation='softmax', init='glorot_uniform'))
self.__model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
self.__model.fit(train_x, train_y, batch_size=batch_size, nb_epoch=epochs, validation_data=(test_x, test_y), verbose=2)
def normalize_array (solution, prediction):
''' Use min and max of solution as scaling factors to normalize prediction,
then threshold it to [0, 1]. Binarize solution to {0, 1}.
This allows applying classification scores to all cases.
In principle, this should not do anything to properly formatted
classification inputs and outputs.'''
# Binarize solution
sol=np.ravel(solution) # convert to 1-d array
maxi = np.nanmax((filter(lambda x: x != float('inf'), sol))) # Max except NaN and Inf
mini = np.nanmin((filter(lambda x: x != float('-inf'), sol))) # Mini except NaN and Inf
if maxi == mini:
print('Warning, cannot normalize')
return [solution, prediction]
diff = maxi - mini
mid = (maxi + mini)/2.
new_solution = np.copy(solution)
new_solution[solution>=mid] = 1
new_solution[solution<mid] = 0
# Normalize and threshold predictions (takes effect only if solution not in {0, 1})
new_prediction = (np.copy(prediction) - float(mini))/float(diff)
new_prediction[new_prediction>1] = 1 # and if predictions exceed the bounds [0, 1]
new_prediction[new_prediction<0] = 0
# Make probabilities smoother
#new_prediction = np.power(new_prediction, (1./10))
return [new_solution, new_prediction]
def mvmean(R, axis=0):
''' Moving average to avoid rounding errors. A bit slow, but...
Computes the mean along the given axis, except if this is a vector, in which case the mean is returned.
Does NOT flatten.'''
if len(R.shape)==0: return R
average = lambda x: reduce(lambda i, j: (0, (j[0]/(j[0]+1.))*i[1]+(1./(j[0]+1))*j[1]), enumerate(x))[1]
R=np.array(R)
if len(R.shape)==1: return average(R)
if axis==1:
return np.array(map(average, R))
else:
return np.array(map(average, R.transpose()))
# ======= All metrics used for scoring in the challenge ========
### REGRESSION METRICS (work on raw solution and prediction)
# These can be computed on all solutions and predictions (classification included)
def data_binary_sparse (filename, nbr_features):
''' This function takes as an argument a file representing a binary sparse matrix
binary_sparse_matrix[i][j] = a means matrix[i][j] = 1
It converts it into a numpy array an returns this array. '''
data = data_converter.file_to_array (filename)
nbr_samples = len(data)
dok_sparse = dok_matrix ((nbr_samples, nbr_features)) # the construction is easier w/ dok_sparse
print ("Converting {} to dok sparse matrix".format(filename))
for row in range (nbr_samples):
for feature in data[row]:
dok_sparse[row, int(feature)-1] = 1
print ("Converting {} to csr sparse matrix".format(filename))
return dok_sparse.tocsr()
# ================ Copy results from input to output ==========================
def generate_one_summary(self, review):
"""
Create summary for one review using Encoder Decoder Seq2Seq model
:param review: The input review
:return: Output Summary of the model
"""
review = review.T
review = [np.array([int(x)]) for x in review]
feed_dict_rev = {self.enc_inp[t]: review[t] for t in range(self.seq_length)}
feed_dict_rev.update({self.labels[t]: review[t] for t in range(self.seq_length)})
summary = self.sess.run(self.dec_outputs_tst, feed_dict_rev)
summary = [logits_t.argmax(axis=1) for logits_t in summary]
summary = [x[0] for x in summary]
return summary
def generate_one_summary(self, review):
"""
Create summary for one review using Encoder Decoder Seq2Seq model
:param review: The input review
:return: Output Summary of the model
"""
review = review.T
review = [np.array([int(x)]) for x in review]
feed_dict_rev = {self.enc_inp[t]: review[t] for t in range(self.seq_length)}
feed_dict_rev.update({self.labels[t]: review[t] for t in range(self.seq_length)})
summary = self.sess.run(self.dec_outputs_tst, feed_dict_rev)
summary = [logits_t.argmax(axis=1) for logits_t in summary]
summary = [x[0] for x in summary]
return summary
def __crawl_review(self):
"""
Crawl review
:return: review [numpy array]
"""
review_list = []
print 'Crawling Reviews....'
num_lines = 0
with open(self.raw_data_file) as infile:
for line in infile:
if line.startswith('review/text'):
if num_lines >= self.num_reviews:
break
num_lines += 1
_,review = line.split('/text: ')
review_list.append(review)
return np.array(review_list)
def __crawl_summary(self):
"""
Crawl summary
:return: summary [numpy array]
"""
summary_list = []
print 'Crawling Summary....'
num_lines = 0
with open(self.raw_data_file) as infile:
for line in infile:
if line.startswith('review/summary'):
if num_lines >= self.num_reviews:
break
num_lines += 1
_,summary = line.split('/summary: ')
summary_list.append(summary)
return np.array(summary_list)
def reshape_array(array, newsize, pixcombine='sum'):
"""
Reshape an array to a give size using either the sum, mean or median of the pixels binned
Note that the old array dimensions have to be multiples of the new array dimensions
--- INPUT ---
array Array to reshape (combine pixels)
newsize New size of array
pixcombine The method to combine the pixels with. Choices are sum, mean and median
"""
sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
pdb.set_trace()
if pixcombine == 'sum':
reshapedarray = array.reshape(sh).sum(-1).sum(1)
elif pixcombine == 'mean':
reshapedarray = array.reshape(sh).mean(-1).mean(1)
elif pixcombine == 'median':
reshapedarray = array.reshape(sh).median(-1).median(1)
return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen):
db = read_data(path_test)
X = create_sequences(db[:-maxlen], win_size=maxlen, step=maxlen)
X = np.reshape(X, (X.shape[0], X.shape[1], input_size))
# build the model: 1 layer LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(hidden_size, return_sequences=False, input_shape=(maxlen, input_size)))
model.add(Dense(maxlen))
model.load_weights(save_dir + model_name)
model.compile(loss='mse', optimizer='adam')
prediction = model.predict(X, batch_size, verbose=1)
prediction = prediction.flatten()
# prediction_container = np.array(prediction).flatten()
Y = db[maxlen:]
plt.plot(prediction, label='prediction')
plt.plot(Y, label='true')
plt.legend()
plt.show()
def word_list_to_embedding(words, embeddings, embedding_dimension=50):
'''
:param words: an n x (2*window_size + 1) matrix from data_to_mat
:param embeddings: an embedding dictionary where keys are strings and values
are embeddings; the output from embeddings_to_dict
:param embedding_dimension: the dimension of the values in embeddings; in this
assignment, embedding_dimension=50
:return: an n x ((2*window_size + 1)*embedding_dimension) matrix where each entry of the
words matrix is replaced with its embedding
'''
m, n = words.shape
words = words.reshape((-1))
return np.array([embeddings[w] for w in words], dtype=np.float32).reshape(m, n*embedding_dimension)
#
# End Twitter Helper Functions
#
def __init__(self, N, L, comm, precision,
communication="Alltoall",
padsize=1.5,
threads=1,
planner_effort=defaultdict(lambda: "FFTW_MEASURE")):
R2C.__init__(self, N, L, comm, precision,
communication=communication,
padsize=padsize, threads=threads,
planner_effort=planner_effort)
# Reuse all shapes from r2c transform R2C simply by resizing the final complex z-dimension:
self.Nf = N[2]
self.Nfp = int(self.padsize*self.N[2]) # Independent complex wavenumbers in z-direction for padded array
# Rename since there's no real space
self.original_shape_padded = self.real_shape_padded
self.original_shape = self.real_shape
self.transformed_shape = self.complex_shape
self.original_local_slice = self.real_local_slice
self.transformed_local_slice = self.complex_local_slice
self.ks = (fftfreq(N[2])*N[2]).astype(int)
eval_output_embeddings.py 文件源码
项目:almond-nnparser
作者: Stanford-Mobisocial-IoT-Lab
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def bag_of_tokens(config, labels, label_lengths):
if config.train_output_embeddings:
with tf.variable_scope('embed', reuse=True):
output_embeddings = tf.get_variable('output_embedding')
else:
output_embeddings = tf.constant(config.output_embedding_matrix)
#everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32)
#everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32)
labels = tf.constant(np.array(labels))
embedded_output = tf.gather(output_embeddings, labels)
print('embedded_output before', embedded_output)
#mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32)
# note: this multiplication will broadcast the mask along all elements of the depth dimension
# (which is why we run the expand_dims to choose how to broadcast)
#embedded_output = embedded_output * tf.expand_dims(mask, axis=2)
#print('embedded_output after', embedded_output)
return tf.reduce_sum(embedded_output, axis=1)
def _compute_process_and_covariance_matrices(self, dt):
"""Computes the transition and covariance matrix of the process model and measurement model.
Args:
dt (float): Timestep of the discrete transition.
Returns:
F (numpy.ndarray): Transition matrix.
Q (numpy.ndarray): Process covariance matrix.
R (numpy.ndarray): Measurement covariance matrix.
"""
F = np.array(np.bmat([[np.eye(3), dt * np.eye(3)], [np.zeros((3, 3)), np.eye(3)]]))
self.process_matrix = F
q_p = self.process_covariance_position
q_v = self.process_covariance_velocity
Q = np.diag([q_p, q_p, q_p, q_v, q_v, q_v]) ** 2 * dt
r = self.measurement_covariance
R = r * np.eye(4)
self.process_covariance = Q
self.measurement_covariance = R
return F, Q, R
def sample(self, sample_size=20, text=None):
"""Sample the documents."""
p = 1
if text != None:
try:
x, word_idxs = self.reader.get(text)
except Exception as e:
print(e)
return
else:
x, word_idxs = self.reader.random()
print(" [*] Text: %s" % " ".join([self.reader.idx2word[word_idx] for word_idx in word_idxs]))
cur_ps = self.sess.run(self.p_x_i, feed_dict={self.x: x})
word_idxs = np.array(cur_ps).argsort()[-sample_size:][::-1]
ps = cur_ps[word_idxs]
for idx, (cur_p, word_idx) in enumerate(zip(ps, word_idxs)):
print(" [%d] %-20s: %.8f" % (idx+1, self.reader.idx2word[word_idx], cur_p))
p *= cur_p
print(" [*] perp : %8.f" % -np.log(p))
def plot_nucleotide_diversity(ax, fqlists, invert=False):
'''
Create a FastQC-like "?Per base sequence content" plot
Plot fraction of nucleotides per position
zip will stop when shortest read is exhausted
'''
if invert:
fqlists = [list(reversed(read)) for read in fqlists]
numreads = len(fqlists)
sns.set_style("darkgrid")
l_A, = ax.plot(
np.array([pos.count('A') / numreads for pos in zip(*fqlists)]), 'green', label='A')
l_T, = ax.plot(
np.array([pos.count('T') / numreads for pos in zip(*fqlists)]), 'red', label='T')
l_G, = ax.plot(
np.array([pos.count('G') / numreads for pos in zip(*fqlists)]), 'black', label='G')
l_C, = ax.plot(
np.array([pos.count('C') / numreads for pos in zip(*fqlists)]), 'blue', label='C')
if invert:
ax.set_xticklabels(-1 * ax.get_xticks().astype(int))
return [l_A, l_T, l_G, l_C]
def plot_qual(ax, quallist, invert=False):
'''
Create a FastQC-like "?Per base sequence quality?" plot
Plot average quality per position
zip will stop when shortest read is exhausted
'''
sns.set_style("darkgrid")
if invert:
l_Q, = ax.plot(np.array([np.mean(position) for position in zip(
*[list(reversed(read)) for read in quallist])]), 'orange', label="Quality")
ax.set_xlabel('Position in read from end')
ax.set_xticklabels(-1 * ax.get_xticks().astype(int))
else:
l_Q, = ax.plot(np.array([np.mean(position)
for position in zip(*quallist)]), 'orange', label="Quality")
ax.set_xlabel('Position in read from start')
return l_Q
def d_x2(self, factors=None):
"""Creates a sparse matrix for computing the second derivative with respect to x multiplied
by factors given for every point. Uses central difference quotient.
Args:
factors: Factor for each point to be applied after derivation.
Returns:
Sparse matrix the calculate second derivatives of field components.
"""
# use ones as factors if none are specified
if factors is None:
factors = np.array(1).repeat(self.num_points)
return sp.dia_matrix((np.array([factors, -2*factors, factors]), [-1, 0, 1]),
shape=(self.num_points, self.num_points))
def d_x2(self, factors=None):
"""Creates a sparse matrix for computing the second derivative with respect to x multiplied
by factors given for every point. Uses central difference quotient.
Args:
factors: Factor for each point to be applied after derivation.
Returns:
Sparse matrix the calculate second derivatives of field components.
"""
# use ones as factors if none are specified
if factors is None:
factors = np.array(1).repeat(self.num_points)
return sp.dia_matrix((np.array([factors, -2*factors, factors]), [-1, 0, 1]),
shape=(self.num_points, self.num_points))
def plot_region(self, region):
"""Shows the given region in the field plot.
Args:
region: Region to be plotted.
"""
if type(region) == reg.PointRegion:
self.axes.plot(np.ones(2) * region.point_coordinates / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
elif type(region) == reg.LineRegion:
self.axes.plot(np.ones(2) * region.line_coordinates[0] / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
self.axes.plot(np.ones(2) * region.line_coordinates[1] / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
else:
raise TypeError('Unknown type in region list: {}'.format(type(region)))
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5):
p = []
l = []
cnt = 0
g = gen_minibatch1(tokens, features, mini_batch_size, False)
for token, feature in g:
if cnt % 100 == 0:
print(cnt)
cnt +=1
# print token.size()
# y_pred = get_predictions(token, word_attn, sent_attn)
# print y_pred
y_pred = get_predictions(token, feature, word_attn, sent_attn)
# print y_pred
# _, y_pred = torch.max(y_pred, 1)
# y_pred = y_pred[:, 1]
# print y_pred
p.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
p = [item for sublist in p for item in sublist]
p = np.array(p)
return p
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5):
p = []
l = []
cnt = 0
g = gen_minibatch1(tokens, features, mini_batch_size, False)
for token, feature in g:
if cnt % 100 == 0:
print cnt
cnt +=1
# print token.size()
# y_pred = get_predictions(token, word_attn, sent_attn)
# print y_pred
y_pred = get_predictions(token, feature, word_attn, sent_attn)
# print y_pred
# _, y_pred = torch.max(y_pred, 1)
# y_pred = y_pred[:, 1]
# print y_pred
p.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
p = [item for sublist in p for item in sublist]
p = np.array(p)
return p
def _ncc_c(x, y):
"""
>>> _ncc_c([1,2,3,4], [1,2,3,4])
array([ 0.13333333, 0.36666667, 0.66666667, 1. , 0.66666667,
0.36666667, 0.13333333])
>>> _ncc_c([1,1,1], [1,1,1])
array([ 0.33333333, 0.66666667, 1. , 0.66666667, 0.33333333])
>>> _ncc_c([1,2,3], [-1,-1,-1])
array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
"""
den = np.array(norm(x) * norm(y))
den[den == 0] = np.Inf
x_len = len(x)
fft_size = 1<<(2*x_len-1).bit_length()
cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
return np.real(cc) / den
def layout_tree(correlation):
"""Layout tree for visualization with e.g. matplotlib.
Args:
correlation: A [V, V]-shaped numpy array of latent correlations.
Returns:
A [V, 3]-shaped numpy array of spectral positions of vertices.
"""
assert len(correlation.shape) == 2
assert correlation.shape[0] == correlation.shape[1]
assert correlation.dtype == np.float32
laplacian = -correlation
np.fill_diagonal(laplacian, 0)
np.fill_diagonal(laplacian, -laplacian.sum(axis=0))
evals, evects = scipy.linalg.eigh(laplacian, eigvals=[1, 2, 3])
assert np.all(evals > 0)
assert evects.shape[1] == 3
return evects
def __init__(self, N, V, tree_prior, config):
"""Initialize a model with an empty subsample.
Args:
N (int): Number of rows in the dataset.
V (int): Number of columns (features) in the dataset.
tree_prior: A [K]-shaped numpy array of prior edge log odds, where
K is the number of edges in the complete graph on V vertices.
config: A global config dict.
"""
assert isinstance(N, int)
assert isinstance(V, int)
assert isinstance(tree_prior, np.ndarray)
assert isinstance(config, dict)
K = V * (V - 1) // 2 # Number of edges in complete graph.
assert V <= 32768, 'Invalid # features > 32768: {}'.format(V)
assert tree_prior.shape == (K, )
assert tree_prior.dtype == np.float32
self._config = config.copy()
self._num_rows = N
self._tree_prior = tree_prior
self._tree = TreeStructure(V)
assert self._tree.num_vertices == V
self._program = make_propagation_program(self._tree.tree_grid)
self._added_rows = set()
def sample_tree(self):
"""Samples a random tree.
Returns:
A pair (edges, edge_logits), where:
edges: A list of (vertex, vertex) pairs.
edge_logits: A [K]-shaped numpy array of edge logits.
"""
logger.info('TreeCatTrainer.sample_tree given %d rows',
len(self._added_rows))
SERIES.sample_tree_num_rows.append(len(self._added_rows))
complete_grid = self._tree.complete_grid
edge_logits = self.compute_edge_logits()
assert edge_logits.shape[0] == complete_grid.shape[1]
assert edge_logits.dtype == np.float32
edges = self.get_edges()
edges = sample_tree(complete_grid, edge_logits, edges)
return edges, edge_logits