def add(self, x, y = None):
self.X = np.memmap(
self.path+"/X.npy", self.X.dtype,
shape = (self.nrows + x.shape[0] , x.shape[1])
)
self.X[self.nrows:self.nrows + x.shape[0],:] = x
if y is not None:
if x.shape != y.shape: raise "x and y should have the same shape"
self.Y = np.memmap(
self.path+"/Y.npy", self.Y.dtype,
shape = (self.nrows + y.shape[0] , y.shape[1])
)
self.Y[self.nrows:self.nrows + y.shape[0],:] = y
delta = x - self.running_mean
n = self.X.shape[0] + np.arange(x.shape[0]) + 1
self.running_dev += np.sum(delta * (x - self.running_mean), 0)
self.running_mean += np.sum(delta / n[:, np.newaxis], 0)
self.running_max = np.amax(np.vstack((self.running_max, x)), 0)
self.running_min = np.amin(np.vstack((self.running_min, x)), 0)
self.nrows += x.shape[0]
python类newaxis()的实例源码
def transform_mnist_rts(in_data):
img, label = in_data
img = img[0] # Remove channel axis for skimage manipulation
# Rotate
img = transform.rotate(img, angle=np.random.uniform(-45, 45),
resize=True, mode='constant')
# Scale
img = transform.rescale(img, scale=np.random.uniform(0.7, 1.2),
mode='constant')
# Translate
h, w = img.shape
if h >= img_size[0] or w >= img_size[1]:
img = transform.resize(img, output_shape=img_size, mode='constant')
img = img.astype(np.float32)
else:
img_canvas = np.zeros(img_size, dtype=np.float32)
ymin = np.random.randint(0, img_size[0] - h)
xmin = np.random.randint(0, img_size[1] - w)
img_canvas[ymin:ymin+h, xmin:xmin+w] = img
img = img_canvas
img = img[np.newaxis, :] # Add the bach channel back
return img, label
def predict(self, inputs):
# uses MEMORY_DATA layer for loading images and postprocessing DENSE_CRF layer
img = inputs[0].transpose((2, 0, 1))
img = img[np.newaxis, :].astype(np.float32)
label = np.zeros((1, 1, 1, 1), np.float32)
data_dim = np.zeros((1, 1, 1, 2), np.float32)
data_dim[0][0][0][0] = img.shape[2]
data_dim[0][0][0][1] = img.shape[3]
img = np.ascontiguousarray(img, dtype=np.float32)
label = np.ascontiguousarray(label, dtype=np.float32)
data_dim = np.ascontiguousarray(data_dim, dtype=np.float32)
self.set_input_arrays(img, label, data_dim)
out = self.forward()
predictions = out[self.outputs[0]] # the output layer should be called crf_inf
segm_result = predictions[0].argmax(axis=0).astype(np.uint8)
return segm_result
def estimate_time_constant(y, p=2, sn=None, lags=5, fudge_factor=1.):
"""
Estimate AR model parameters through the autocovariance function
Parameters
----------
y : array, shape (T,)
One dimensional array containing the fluorescence intensities with
one entry per time-bin.
p : positive integer
order of AR system
sn : float
sn standard deviation, estimated if not provided.
lags : positive integer
number of additional lags where he autocovariance is computed
fudge_factor : float (0< fudge_factor <= 1)
shrinkage factor to reduce bias
Returns
-------
g : estimated coefficients of the AR process
"""
if sn is None:
sn = GetSn(y)
lags += p
xc = axcov(y, lags)
xc = xc[:, np.newaxis]
A = scipy.linalg.toeplitz(xc[lags + np.arange(lags)],
xc[lags + np.arange(p)]) - sn**2 * np.eye(lags, p)
g = np.linalg.lstsq(A, xc[lags + 1:])[0]
gr = np.roots(np.concatenate([np.array([1]), -g.flatten()]))
gr = (gr + gr.conjugate()) / 2.
gr[gr > 1] = 0.95 + np.random.normal(0, 0.01, np.sum(gr > 1))
gr[gr < 0] = 0.15 + np.random.normal(0, 0.01, np.sum(gr < 0))
g = np.poly(fudge_factor * gr)
g = -g[1:]
return g.flatten()
matrix_factorization.py 文件源码
项目:probabilistic-matrix-factorization
作者: aki-nishimura
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def update_per_row(self, y_i, phi_i, J, mu0, c, v, r_prev_i, u_prev_i, phi_r_i, phi_u):
# Params:
# J - column indices
nnz_i = len(J)
residual_i = y_i - mu0 - c[J]
prior_Phi = np.diag(np.concatenate(([phi_r_i], phi_u)))
v_T = np.hstack((np.ones((nnz_i, 1)), v[J, :]))
post_Phi_i = prior_Phi + \
np.dot(v_T.T,
np.tile(phi_i[:, np.newaxis], (1, 1 + self.num_factor)) * v_T) # Weighted sum of v_j * v_j.T
post_mean_i = np.squeeze(np.dot(phi_i * residual_i, v_T))
C, lower = scipy.linalg.cho_factor(post_Phi_i)
post_mean_i = scipy.linalg.cho_solve((C, lower), post_mean_i)
# Generate Gaussian, recycling the Cholesky factorization from the posterior mean computation.
ru_i = math.sqrt(1 - self.relaxation ** 2) * scipy.linalg.solve_triangular(C, np.random.randn(len(post_mean_i)),
lower=lower)
ru_i += post_mean_i + self.relaxation * (post_mean_i - np.concatenate(([r_prev_i], u_prev_i)))
r_i = ru_i[0]
u_i = ru_i[1:]
return r_i, u_i
matrix_factorization.py 文件源码
项目:probabilistic-matrix-factorization
作者: aki-nishimura
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def update_per_col(self, y_j, phi_j, I, mu0, r, u, c_prev_j, v_prev_j, phi_c_j, phi_v):
prior_Phi = np.diag(np.concatenate(([phi_c_j], phi_v)))
nnz_j = len(I)
residual_j = y_j - mu0 - r[I]
u_T = np.hstack((np.ones((nnz_j, 1)), u[I, :]))
post_Phi_j = prior_Phi + \
np.dot(u_T.T,
np.tile(phi_j[:, np.newaxis], (1, 1 + self.num_factor)) * u_T) # Weighted sum of u_i * u_i.T
post_mean_j = np.squeeze(np.dot(phi_j * residual_j, u_T))
C, lower = scipy.linalg.cho_factor(post_Phi_j)
post_mean_j = scipy.linalg.cho_solve((C, lower), post_mean_j)
# Generate Gaussian, recycling the Cholesky factorization from the posterior mean computation.
cv_j = math.sqrt(1 - self.relaxation ** 2) * scipy.linalg.solve_triangular(C, np.random.randn(len(post_mean_j)),
lower=lower)
cv_j += post_mean_j + self.relaxation * (post_mean_j - np.concatenate(([c_prev_j], v_prev_j)))
c_j = cv_j[0]
v_j = cv_j[1:]
return c_j, v_j
def weighted_linear_regression(self, x, y, weights):
return np.linalg.inv((weights[:, np.newaxis] * x).T.dot(x)).dot((weights[:, np.newaxis] * x).T.dot(y))
def expectation(self, dataSplit, coefficients, variances):
assignment_weights = np.ones(
(len(dataSplit), self.num_components), dtype=float)
self.Q = len(self.endoVar)
for k in range(self.num_components):
coef_ = coefficients[k]
Beta = coef_.ix[self.endoVar][self.endoVar]
Gamma = coef_.ix[self.endoVar][self.exoVar]
a_ = (np.dot(Beta, self.fscores[
self.endoVar].T) + np.dot(Gamma, self.fscores[self.exoVar].T))
invert_ = np.linalg.inv(np.array(variances[k]))
exponential = np.exp(-0.5 * np.dot(np.dot(a_.T, invert_), a_))
den = (((2 * np.pi)**(self.Q / 2)) *
np.sqrt(np.linalg.det(variances[k])))
probabilities = exponential / den
probabilities = probabilities[0]
assignment_weights[:, k] = probabilities
assignment_weights /= assignment_weights.sum(axis=1)[:, np.newaxis]
# print(assignment_weights)
return assignment_weights
def contract_positions(XY, edges, stepsize):
"""Perturb vertex positions by an L1-minimizing attractive force.
This is used to slightly adjust vertex positions to provide a visual
hint to their grouping.
Args:
XY: A [V, 2]-shaped numpy array of the current positions.
edges: An [E, 2]-shaped numpy array of edges as (vertex,vertex) pairs.
"""
E = edges.shape[0]
V = E + 1
assert edges.shape == (E, 2)
assert XY.shape == (V, 2)
old = XY
new = old.copy()
heads = edges[:, 0]
tails = edges[:, 1]
diff = old[heads] - old[tails]
distances = (diff**2).sum(axis=1)**0.5
spacing = distances.min()
assert spacing > 0
diff /= distances[:, np.newaxis]
diff *= spacing
new[tails] += stepsize * diff
new[heads] -= stepsize * diff
return new
def validate_gof(N, V, C, M, server, conditional):
# Generate samples.
expected = C**V
num_samples = 1000 * expected
ones = np.ones(V, dtype=np.int8)
if conditional:
cond_data = server.sample(1, ones)[0, :]
else:
cond_data = server.make_zero_row()
samples = server.sample(num_samples, ones, cond_data)
logprobs = server.logprob(samples + cond_data[np.newaxis, :])
counts = {}
probs = {}
for sample, logprob in zip(samples, logprobs):
key = tuple(sample)
if key in counts:
counts[key] += 1
else:
counts[key] = 1
probs[key] = np.exp(logprob)
assert len(counts) == expected
# Check accuracy using Pearson's chi-squared test.
keys = sorted(counts.keys(), key=lambda key: -probs[key])
counts = np.array([counts[k] for k in keys], dtype=np.int32)
probs = np.array([probs[k] for k in keys])
probs /= probs.sum()
# Truncate to avoid low-precision.
truncated = False
valid = (probs * num_samples > 20)
if not valid.all():
T = valid.argmin()
T = max(8, T) # Avoid truncating too much
probs = probs[:T]
counts = counts[:T]
truncated = True
gof = multinomial_goodness_of_fit(
probs, counts, num_samples, plot=True, truncated=truncated)
assert 1e-2 < gof
def latent_correlation(self):
"""Compute correlation matrix among latent features.
This computes the generalization of Pearson's correlation to discrete
data. Let I(X;Y) be the mutual information. Then define correlation as
rho(X,Y) = sqrt(1 - exp(-2 I(X;Y)))
Returns:
A [V, V]-shaped numpy array of feature-feature correlations.
"""
logger.debug('computing latent correlation')
V, E, M, R = self._VEMR
edge_probs = self._edge_probs
vert_probs = self._vert_probs
result = np.zeros([V, V], np.float32)
for root in range(V):
messages = np.empty([V, M, M])
program = make_propagation_program(self._tree.tree_grid, root)
for op, v, v2, e in program:
if op == OP_ROOT:
# Initialize correlation at this node.
messages[v, :, :] = np.diagflat(vert_probs[v, :])
elif op == OP_OUT:
# Propagate correlation outward from parent to v.
trans = edge_probs[e, :, :]
if v > v2:
trans = trans.T
messages[v, :, :] = np.dot( #
trans / vert_probs[v2, np.newaxis, :],
messages[v2, :, :])
for v in range(V):
result[root, v] = correlation(messages[v, :, :])
return result
def sample_from_probs2(probs, out=None):
"""Sample from multiple vectors of non-normalized probabilities.
Args:
probs: An [N, M]-shaped numpy array of non-normalized probabilities.
out: An optional destination for the result.
Returns:
An [N]-shaped numpy array of integers in range(M).
"""
# Adapted from https://stackoverflow.com/questions/40474436
assert len(probs.shape) == 2
cdf = probs.cumsum(axis=1)
u = np.random.rand(probs.shape[0], 1) * cdf[:, -1, np.newaxis]
return (u < cdf).argmax(axis=1, out=out)
def getPosteriorMeanAndVar(self, diagKTestTest, KtrainTest, post, intercept=0):
L = post['L']
if (np.size(L) == 0): raise Exception('L is an empty array') #possible to compute it here
Lchol = np.all((np.all(np.tril(L, -1)==0, axis=0) & (np.diag(L)>0)) & np.isreal(np.diag(L)))
ns = diagKTestTest.shape[0]
nperbatch = 5000
nact = 0
#allocate mem
fmu = np.zeros(ns) #column vector (of length ns) of predictive latent means
fs2 = np.zeros(ns) #column vector (of length ns) of predictive latent variances
while (nact<(ns-1)):
id = np.arange(nact, np.minimum(nact+nperbatch, ns))
kss = diagKTestTest[id]
Ks = KtrainTest[:, id]
if (len(post['alpha'].shape) == 1):
try: Fmu = intercept[id] + Ks.T.dot(post['alpha'])
except: Fmu = intercept + Ks.T.dot(post['alpha'])
fmu[id] = Fmu
else:
try: Fmu = intercept[id][:, np.newaxis] + Ks.T.dot(post['alpha'])
except: Fmu = intercept + Ks.T.dot(post['alpha'])
fmu[id] = Fmu.mean(axis=1)
if Lchol:
V = la.solve_triangular(L, Ks*np.tile(post['sW'], (id.shape[0], 1)).T, trans=1, check_finite=False, overwrite_b=True)
fs2[id] = kss - np.sum(V**2, axis=0) #predictive variances
else:
fs2[id] = kss + np.sum(Ks * (L.dot(Ks)), axis=0) #predictive variances
fs2[id] = np.maximum(fs2[id],0) #remove numerical noise i.e. negative variances
nact = id[-1] #set counter to index of last processed data point
return fmu, fs2
def sq_dist(a, b=None):
#mean-center for numerical stability
D, n = a.shape[0], a.shape[1]
if (b is None):
mu = a.mean(axis=1)
a -= mu[:, np.newaxis]
b = a
m = n
aSq = np.sum(a**2, axis=0)
bSq = aSq
else:
d, m = b.shape[0], b.shape[1]
if (d != D): raise Exception('column lengths must agree')
mu = (float(m)/float(m+n))*b.mean(axis=1) + (float(n)/float(m+n))*a.mean(axis=1)
a -= mu[:, np.newaxis]
b -= mu[:, np.newaxis]
aSq = np.sum(a**2, axis=0)
bSq = np.sum(b**2, axis=0)
C = np.tile(np.column_stack(aSq).T, (1, m)) + np.tile(bSq, (n, 1)) - 2*a.T.dot(b)
C = np.maximum(C, 0) #remove numerical noise
return C
#evaluate 'power sums' of the individual terms in Z
def generate_markers_3d(image):
#Creation of the internal Marker
marker_internal = image < -400
marker_internal_labels = np.zeros(image.shape).astype(np.int16)
for i in range(marker_internal.shape[0]):
marker_internal[i] = segmentation.clear_border(marker_internal[i])
marker_internal_labels[i] = measure.label(marker_internal[i])
#areas = [r.area for r in measure.regionprops(marker_internal_labels)]
areas = [r.area for i in range(marker_internal.shape[0]) for r in measure.regionprops(marker_internal_labels[i])]
for i in range(marker_internal.shape[0]):
areas = [r.area for r in measure.regionprops(marker_internal_labels[i])]
areas.sort()
if len(areas) > 2:
for region in measure.regionprops(marker_internal_labels[i]):
if region.area < areas[-2]:
for coordinates in region.coords:
marker_internal_labels[i, coordinates[0], coordinates[1]] = 0
marker_internal = marker_internal_labels > 0
#Creation of the external Marker
# 3x3 structuring element with connectivity 1, used by default
struct1 = ndimage.generate_binary_structure(2, 1)
struct1 = struct1[np.newaxis,:,:] # expand by z axis .
external_a = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=10)
external_b = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=55)
marker_external = external_b ^ external_a
#Creation of the Watershed Marker matrix
#marker_watershed = np.zeros((512, 512), dtype=np.int) # origi
marker_watershed = np.zeros((marker_external.shape), dtype=np.int)
marker_watershed += marker_internal * 255
marker_watershed += marker_external * 128
return marker_internal, marker_external, marker_watershed
def prediction_time(self, X):
start_time = time.time()
dp = {}
for key, value in X.items():
dp[key] = value[np.newaxis, 0]
times = 100
for _ in range(times):
self.predict_proba(dp)
return (time.time() - start_time) / times
def prediction_time(self, X):
start_time = time.time()
dp = {}
for key, value in X.items():
dp[key] = value[np.newaxis, 0]
times = 100
for _ in range(times):
self.predict_proba(dp)
return (time.time() - start_time) / times
def prediction_time(self, X):
start_time = time.time()
dp = {}
for key, value in X.items():
dp[key] = value[np.newaxis, 0]
times = 100
for _ in range(times):
self.predict_proba(dp)
return (time.time() - start_time) / times
def prediction_time(self, X):
start_time = time.time()
dp = {}
for key, value in X.items():
dp[key] = value[np.newaxis, 0]
times = 100
for _ in range(times):
self.predict_proba(dp)
return (time.time() - start_time) / times
def prediction_time(self, X):
start_time = time.time()
dp = {}
for key, value in X.items():
dp[key] = value[np.newaxis, 0]
times = 100
for _ in range(times):
self.predict_proba(dp)
return (time.time() - start_time) / times