def compute_edge_logits(self):
"""Compute non-normalized logprob of all V(V-1)/2 candidate edges.
This is used for sampling and estimating the latent tree.
"""
V, E, K, M = self._VEKM
vert_logits = logprob_dc(self._vert_ss, self._vert_prior, axis=1)
if len(self._added_rows) == V:
assignments = self._assignments
else:
assignments = self._assignments[sorted(self._added_rows), :]
assignments = np.array(assignments, order='F')
parallel = self._config['learning_parallel']
result = treecat_compute_edge_logits(M, self._tree.complete_grid,
self._gammaln_table, assignments,
vert_logits, parallel)
result += self._tree_prior
return result
python类array()的实例源码
def train(self):
"""Train a TreeCat model using subsample-annealed MCMC.
Returns:
A trained model as a dictionary with keys:
config: A global config dict.
tree: A TreeStructure instance with the learned latent
structure.
edge_logits: A [K]-shaped array of all edge logits.
suffstats: Sufficient statistics of features, vertices, and
edges and a ragged_index for the features array.
assignments: An [N, V]-shaped numpy array of latent cluster
ids for each cell in the dataset, where N be the number of
data rows and V is the number of features.
"""
model = TreeTrainer.train(self)
model['assignments'] = self._assignments
model['suffstats'] = {
'ragged_index': self._table.ragged_index,
'vert_ss': self._vert_ss,
'edge_ss': self._edge_ss,
'feat_ss': self._feat_ss,
'meas_ss': self._meas_ss,
}
return model
def __init__(self, data, tree_prior, config):
"""Initialize a model with an empty subsample.
Args:
data: An [N, V]-shaped numpy array of real-valued data.
tree_prior: A [K]-shaped numpy array of prior edge log odds, where
K is the number of edges in the complete graph on V vertices.
config: A global config dict.
"""
assert isinstance(data, np.ndarray)
data = np.asarray(data, np.float32)
assert len(data.shape) == 2
N, V = data.shape
D = config['model_latent_dim']
E = V - 1 # Number of edges in the tree.
TreeTrainer.__init__(self, N, V, tree_prior, config)
self._data = data
self._latent = np.zeros([N, V, D], np.float32)
# This is symmetric positive definite.
self._vert_ss = np.zeros([V, D, D], np.float32)
# This is arbitrary (not necessarily symmetric).
self._edge_ss = np.zeros([E, D, D], np.float32)
# This represents (count, mean, covariance).
self._feat_ss = np.zeros([V, D, 1 + 1 + D], np.float32)
def train(self):
"""Train a TreeGauss model using subsample-annealed MCMC.
Returns:
A trained model as a dictionary with keys:
config: A global config dict.
tree: A TreeStructure instance with the learned latent
structure.
edge_logits: A [K]-shaped array of all edge logits.
suffstats: Sufficient statistics of features and vertices.
latent: An [N, V, M]-shaped numpy array of latent states, where
N is the number of data rows, V is the number of features,
and M is the dimension of each latent variable.
"""
model = TreeTrainer.train(self)
model['latent'] = self._latent
model['suffstats'] = {
'vert_ss': self._vert_ss,
'edge_ss': self._edge_ss,
'feat_ss': self._feat_ss,
}
return model
def train_ensemble(table, tree_prior, config):
"""Train a TreeCat ensemble model using subsample-annealed MCMC.
The ensemble size is controlled by config['model_ensemble_size'].
Let N be the number of data rows and V be the number of features.
Args:
table: A Table instance holding N rows of V features of data.
tree_prior: A [K]-shaped numpy array of prior edge log odds, where
K is the number of edges in the complete graph on V vertices.
config: A global config dict.
Returns:
A trained model as a dictionary with keys:
tree: A TreeStructure instance with the learned latent structure.
suffstats: Sufficient statistics of features, vertices, and edges.
assignments: An [N, V] numpy array of latent cluster ids for each
cell in the dataset.
"""
tasks = []
for sub_seed in range(config['model_ensemble_size']):
sub_config = config.copy()
sub_config['seed'] += sub_seed
tasks.append((table, tree_prior, sub_config))
return parallel_map(_train_model, tasks)
def test_server_logprob_normalized(N, V, C, M):
model = generate_fake_model(N, V, C, M)
config = TINY_CONFIG.copy()
config['model_num_clusters'] = M
model['config'] = config
server = TreeCatServer(model)
# The total probability of all categorical rows should be 1.
ragged_index = model['suffstats']['ragged_index']
factors = []
for v in range(V):
C = ragged_index[v + 1] - ragged_index[v]
factors.append([one_hot(c, C) for c in range(C)])
data = np.array(
[np.concatenate(columns) for columns in itertools.product(*factors)],
dtype=np.int8)
logprobs = server.logprob(data)
logtotal = np.logaddexp.reduce(logprobs)
assert logtotal == pytest.approx(0.0, abs=1e-5)
def observed_perplexity(self, counts):
"""Compute perplexity = exp(entropy) of observed variables.
Perplexity is an information theoretic measure of the number of
clusters or latent classes. Perplexity is a real number in the range
[1, M], where M is model_num_clusters.
Args:
counts: A [V]-shaped array of multinomial counts.
Returns:
A [V]-shaped numpy array of perplexity.
"""
V, E, M, R = self._VEMR
if counts is not None:
counts = np.ones(V, dtype=np.int8)
assert counts.shape == (V, )
assert counts.dtype == np.int8
assert np.all(counts > 0)
observed_entropy = np.empty(V, dtype=np.float32)
for v in range(V):
beg, end = self._ragged_index[v:v + 2]
probs = np.dot(self._feat_cond[beg:end, :], self._vert_probs[v, :])
observed_entropy[v] = multinomial_entropy(probs, counts[v])
return np.exp(observed_entropy)
def observed_perplexity(self, counts):
"""Compute perplexity = exp(entropy) of observed variables.
Perplexity is an information theoretic measure of the number of
clusters or observed classes. Perplexity is a real number in the range
[1, dim[v]], where dim[v] is the number of categories in an observed
categorical variable or 2 for an ordinal variable.
Args:
counts: A [V]-shaped array of multinomial counts.
Returns:
A [V]-shaped numpy array of perplexity.
"""
result = self._ensemble[0].observed_perplexity(counts)
for server in self._ensemble[1:]:
result += server.observed_perplexity(counts)
result /= len(self._ensemble)
return result
def latent_correlation(self):
"""Compute correlation matrix among latent features.
This computes the generalization of Pearson's correlation to discrete
data. Let I(X;Y) be the mutual information. Then define correlation as
rho(X,Y) = sqrt(1 - exp(-2 I(X;Y)))
Returns:
A [V, V]-shaped numpy array of feature-feature correlations.
"""
result = self._ensemble[0].latent_correlation()
for server in self._ensemble[1:]:
result += server.latent_correlation()
result /= len(self._ensemble)
return result
def logprob(self, rows, evidence=None):
"""Compute non-normalized log probabilies of many rows of data.
If evidence is specified, compute conditional log probability;
otherwise compute unconditional log probability.
Args:
data: A list of rows of data, where each row is a sparse dict
mapping feature name to feature value.
evidence: An optional row of conditioning data, as a sparse dict
mapping feature name to feature value.
Returns:
An [len(rows)]-shaped numpy array of log probabilities.
"""
data = import_rows(self._schema, rows)
if evidence is None:
return self._server.logprob(data)
else:
ragged_evidence = import_rows(self._schema, [evidence])
return (self._server.logprob(data + ragged_evidence) -
self._server.logprob(data + evidence))
def sample(self, N, evidence=None):
"""Draw N samples from the posterior distribution.
Args:
N: The number of samples to draw.
evidence: An optional single row of conditioning data, as a sparse
dict mapping feature name to feature value.
Returns:
An [N, R]-shaped numpy array of sampled multinomial data.
"""
if evidence is None:
data = None
else:
data = import_rows(self._schema, [evidence])[0]
ragged_samples = self._server.sample(N, self._counts, data)
return export_rows(self._schema, ragged_samples)
def fit(self, X, y):
""" Training the gcForest on input data X and associated target y.
:param X: np.array
Array containing the input samples.
Must be of shape [n_samples, data] where data is a 1D array.
:param y: np.array
1D array containing the target values.
Must be of shape [n_samples]
"""
if np.shape(X)[0] != len(y):
raise ValueError('Sizes of y and X do not match.')
mgs_X = self.mg_scanning(X, y)
_ = self.cascade_forest(mgs_X, y)
def contest(self, b, g, r):
""" Search for biased BGR values
Finds closest neuron (min dist) and updates self.freq
finds best neuron (min dist-self.bias) and returns position
for frequently chosen neurons, self.freq[i] is high and self.bias[i] is negative
self.bias[i] = self.GAMMA*((1/self.NETSIZE)-self.freq[i])"""
i, j = self.SPECIALS, self.NETSIZE
dists = abs(self.network[i:j] - np.array([b,g,r])).sum(1)
bestpos = i + np.argmin(dists)
biasdists = dists - self.bias[i:j]
bestbiaspos = i + np.argmin(biasdists)
self.freq[i:j] *= (1-self.BETA)
self.bias[i:j] += self.BETAGAMMA * self.freq[i:j]
self.freq[bestpos] += self.BETA
self.bias[bestpos] -= self.BETAGAMMA
return bestbiaspos
def rasterMaskToGrid( rasterMask ):
grid = []
mask = rasterMask['mask']
for y in range(rasterMask['height']):
for x in range(rasterMask['width']):
if mask[y,x]==0:
grid.append([x,y])
grid = np.array(grid,dtype=np.float)
if not (rasterMask is None) and rasterMask['hex'] is True:
f = math.sqrt(3.0)/2.0
offset = -0.5
if np.argmin(rasterMask['mask'][0]) > np.argmin(rasterMask['mask'][1]):
offset = 0.5
for i in range(len(grid)):
if (grid[i][1]%2.0==0.0):
grid[i][0]-=offset
grid[i][1] *= f
return grid
def match_matrix(event: Event):
"""Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.
Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
matrix that looks like this (sans labels):
#1 #2 #3 #4 #5 #6 #7
qm1_red 1 0 1 0 1 0 0
qm1_blue 0 1 0 1 0 1 0
"""
match_list = []
for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
matchRow = []
for team in event.teams:
matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
match_list.append(matchRow)
matchRow = []
for team in event.teams:
matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
match_list.append(matchRow)
mat = numpy.array(match_list)
sum_matches = numpy.sum(mat, axis=0)
avg_team_matches = sum(sum_matches) / float(len(sum_matches))
return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]
def get_img(data_path):
# Getting image array from path:
img = imread(data_path)
img = imresize(img, (64, 64))
return img
def get_dataset(dataset_path='Data/Train_Data'):
# Getting all data from data path:
try:
X = np.load('Data/npy_train_data/X.npy')
Y = np.load('Data/npy_train_data/Y.npy')
except:
inputs_path = dataset_path+'/input'
images = listdir(inputs_path) # Geting images
X = []
Y = []
for img in images:
img_path = inputs_path+'/'+img
x_img = get_img(img_path).astype('float32').reshape(64, 64, 3)
x_img /= 255.
y_img = get_img(img_path.replace('input/', 'mask/mask_')).astype('float32').reshape(64, 64, 1)
y_img /= 255.
X.append(x_img)
Y.append(y_img)
X = np.array(X)
Y = np.array(Y)
# Create dateset:
if not os.path.exists('Data/npy_train_data/'):
os.makedirs('Data/npy_train_data/')
np.save('Data/npy_train_data/X.npy', X)
np.save('Data/npy_train_data/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
return X, X_test, Y, Y_test
def read_groundtruth():
ret = []
with open(
os.path.join(
os.path.abspath(os.path.dirname(__file__)),
'groundtruth.txt'), 'rb') as lines:
for line in lines:
ret.append(line[:-2])
return np.array(ret)
def extract_digits(self, image):
"""
Extract digits from a binary image representing a sudoku
:param image: binary image/sudoku
:return: array of digits and their probabilities
"""
prob = np.zeros(4, dtype=np.float32)
digits = np.zeros((4, 9, 9), dtype=object)
for i in range(4):
labeled, features = label(image, structure=CROSS)
objs = find_objects(labeled)
for obj in objs:
roi = image[obj]
# center of bounding box
cy = (obj[0].stop + obj[0].start) / 2
cx = (obj[1].stop + obj[1].start) / 2
dists = cdist([[cy, cx]], CENTROIDS, 'euclidean')
pos = np.argmin(dists)
cy, cx = pos % 9, pos / 9
# 28x28 image, center relative to sudoku
prediction = self.classifier.classify(morph(roi))
if digits[i, cy, cx] is 0:
# Newly found digit
digits[i, cy, cx] = prediction
prob[i] += prediction[0, 0]
elif prediction[0, 0] > digits[i, cy, cx][0, 0]:
# Overlapping! (noise), choose the most probable prediction
prob[i] -= digits[i, cy, cx][0, 0]
digits[i, cy, cx] = prediction
prob[i] += prediction[0, 0]
image = np.rot90(image)
logging.info(prob)
return digits[np.argmax(prob)]
def diagonal(_, pos):
"""
Given an object pixels' positions, return the diagonal length of its
bound box
:param _: pixel values (unused)
:param pos: pixel position (1-D)
:return: diagonal of bounding box
"""
xs = np.array([i / SSIZE for i in pos])
ys = np.array([i % SSIZE for i in pos])
minx = np.amin(xs)
miny = np.amin(ys)
maxx = np.amax(xs)
maxy = np.amax(ys)
return compute_line(np.array([minx, miny]), np.array([maxx, maxy]))
def reconstruct_batch(self, output, batch_id, chosen_labels=None):
""" Create the song associated with the network output
Args:
output (list[np.Array]): The ouput of the network (size batch_size*output_dim)
batch_id (int): The batch that we must reconstruct
chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song)
Return:
Song: The reconstructed song
"""
raise NotImplementedError('Abstract class')
def reconstruct_batch(self, output, batch_id, chosen_labels=None):
""" Create the song associated with the network output
Args:
output (list[np.Array]): The ouput of the network (size batch_size*output_dim)
batch_id (int): The batch id
chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song)
Return:
Song: The reconstructed song
"""
assert Relative.HAS_EMPTY == True
processed_song = Relative.RelativeSong()
processed_song.first_note = music.Note()
processed_song.first_note.note = 56 # TODO: Define what should be the first note
print('Reconstruct')
for i, note in enumerate(output):
relative = Relative.RelativeNote()
# Here if we did sample the output, we should get which has heen the selected output
if not chosen_labels or i == len(chosen_labels): # If chosen_labels, the last generated note has not been sampled
chosen_label = int(np.argmax(note[batch_id,:])) # Cast np.int64 to int to avoid compatibility with mido
else:
chosen_label = int(chosen_labels[i][batch_id])
print(chosen_label, end=' ') # TODO: Add a text output connector
if chosen_label == 0: # <next> token
relative.pitch_class = None
#relative.scale = # Note used
#relative.prev_tick =
else:
relative.pitch_class = chosen_label-1
#relative.scale =
#relative.prev_tick =
processed_song.notes.append(relative)
print()
return self.reconstruct_song(processed_song)
def score(self,xnew):
"""
Generate scores for new x values
xNew should be an array-like object where each row represents a test point
Return the predicted mean and standard deviation [mu,s]
@param{np.Array} xnew. An numpy array where each row corrosponds to an observation
@output{Array} mu. A list containing predicted mean values
@output{Array} s. A list containing predicted standard deviations
"""
self._validate_xnew(xnew)
#mu,sd = self.gp.predict(xnew,return_std=True)
#return {'mu':mu.T.tolist()[0], 'sd':sd.tolist()}
#K_trans = self.kernel(X, self.xTrain)
#y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)
#y_mean = self.y_train_mean + y_mean # undo normal.
# Compute variance of predictive distribution
#y_var = self.kernel_.diag(X)
#y_var -= np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv)
# Check if any of the variances is negative because of
# numerical issues. If yes: set the variance to 0.
#y_var_negative = y_var < 0
#if np.any(y_var_negative):
# warnings.warn("Predicted variances smaller than 0. "
# "Setting those variances to 0.")
# y_var[y_var_negative] = 0.0
#return y_mean, np.sqrt(y_var)
batchbuilder.py 文件源码
项目:How_to_generate_music_in_tensorflow_LIVE
作者: llSourcell
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def reconstruct_batch(self, output, batch_id, chosen_labels=None):
""" Create the song associated with the network output
Args:
output (list[np.Array]): The ouput of the network (size batch_size*output_dim)
batch_id (int): The batch that we must reconstruct
chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song)
Return:
Song: The reconstructed song
"""
raise NotImplementedError('Abstract class')
batchbuilder.py 文件源码
项目:How_to_generate_music_in_tensorflow_LIVE
作者: llSourcell
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def reconstruct_batch(self, output, batch_id, chosen_labels=None):
""" Create the song associated with the network output
Args:
output (list[np.Array]): The ouput of the network (size batch_size*output_dim)
batch_id (int): The batch id
chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song)
Return:
Song: The reconstructed song
"""
assert Relative.HAS_EMPTY == True
processed_song = Relative.RelativeSong()
processed_song.first_note = music.Note()
processed_song.first_note.note = 56 # TODO: Define what should be the first note
print('Reconstruct')
for i, note in enumerate(output):
relative = Relative.RelativeNote()
# Here if we did sample the output, we should get which has heen the selected output
if not chosen_labels or i == len(chosen_labels): # If chosen_labels, the last generated note has not been sampled
chosen_label = int(np.argmax(note[batch_id,:])) # Cast np.int64 to int to avoid compatibility with mido
else:
chosen_label = int(chosen_labels[i][batch_id])
print(chosen_label, end=' ') # TODO: Add a text output connector
if chosen_label == 0: # <next> token
relative.pitch_class = None
#relative.scale = # Note used
#relative.prev_tick =
else:
relative.pitch_class = chosen_label-1
#relative.scale =
#relative.prev_tick =
processed_song.notes.append(relative)
print()
return self.reconstruct_song(processed_song)