def KMO(data):
cor_ = pd.DataFrame.corr(data)
invCor = np.linalg.inv(cor_)
rows = cor_.shape[0]
cols = cor_.shape[1]
A = np.ones((rows, cols))
for i in range(rows):
for j in range(i, cols):
A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
A[j, i] = A[i, j]
num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
kmo = num / den
return kmo
python类ones()的实例源码
def plot_region(self, region):
"""Shows the given region in the field plot.
Args:
region: Region to be plotted.
"""
if type(region) == reg.PointRegion:
self.axes.plot(np.ones(2) * region.point_coordinates / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
elif type(region) == reg.LineRegion:
self.axes.plot(np.ones(2) * region.line_coordinates[0] / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
self.axes.plot(np.ones(2) * region.line_coordinates[1] / self._x_axis_factor,
np.array([-1, 1]) * self.scale, color='black')
else:
raise TypeError('Unknown type in region list: {}'.format(type(region)))
def observed_perplexity(self, counts):
"""Compute perplexity = exp(entropy) of observed variables.
Perplexity is an information theoretic measure of the number of
clusters or latent classes. Perplexity is a real number in the range
[1, M], where M is model_num_clusters.
Args:
counts: A [V]-shaped array of multinomial counts.
Returns:
A [V]-shaped numpy array of perplexity.
"""
V, E, M, R = self._VEMR
if counts is not None:
counts = np.ones(V, dtype=np.int8)
assert counts.shape == (V, )
assert counts.dtype == np.int8
assert np.all(counts > 0)
observed_entropy = np.empty(V, dtype=np.float32)
for v in range(V):
beg, end = self._ragged_index[v:v + 2]
probs = np.dot(self._feat_cond[beg:end, :], self._vert_probs[v, :])
observed_entropy[v] = multinomial_entropy(probs, counts[v])
return np.exp(observed_entropy)
def sparse_to_dense(sp_indices, output_shape, values, default_value=0):
"""Build a dense matrix from sparse representations.
Args:
sp_indices: A [0-2]-D array that contains the index to place values.
shape: shape of the dense matrix.
values: A {0,1}-D array where values corresponds to the index in each row of
sp_indices.
default_value: values to set for indices not specified in sp_indices.
Return:
A dense numpy N-D array with shape output_shape.
"""
assert len(sp_indices) == len(values), \
'Length of sp_indices is not equal to length of values'
array = np.ones(output_shape) * default_value
for idx, value in zip(sp_indices, values):
array[tuple(idx)] = value
return array
def infExact_scipy_post(self, K, covars, y, sig2e, fixedEffects):
n = y.shape[0]
#mean vector
m = covars.dot(fixedEffects)
if (K.shape[1] < K.shape[0]): K_true = K.dot(K.T)
else: K_true = K
if sig2e<1e-6:
L = la.cholesky(K_true + sig2e*np.eye(n), overwrite_a=True, check_finite=False) #Cholesky factor of covariance with noise
sl = 1
pL = -self.solveChol(L, np.eye(n)) #L = -inv(K+inv(sW^2))
else:
L = la.cholesky(K_true/sig2e + np.eye(n), overwrite_a=True, check_finite=False) #Cholesky factor of B
sl = sig2e
pL = L #L = chol(eye(n)+sW*sW'.*K)
alpha = self.solveChol(L, y-m, overwrite_b=False) / sl
post = dict([])
post['alpha'] = alpha #return the posterior parameters
post['sW'] = np.ones(n) / np.sqrt(sig2e) #sqrt of noise precision vector
post['L'] = pL
return post
def getTrainTestKernel(self, params, Xtest):
self.checkParams(params)
ell = np.exp(params[0])
p = np.exp(params[1])
Xtest_scaled = Xtest/np.sqrt(Xtest.shape[1])
d2 = sq_dist(self.X_scaled.T/ell, Xtest_scaled.T/ell) #precompute squared distances
#compute dp
dp = np.zeros(d2.shape)
for d in xrange(self.X_scaled.shape[1]):
dp += (np.outer(self.X_scaled[:,d], np.ones((1, Xtest_scaled.shape[0]))) - np.outer(np.ones((self.X_scaled.shape[0], 1)), Xtest_scaled[:,d]))
dp /= p
K = np.exp(-d2 / 2.0)
return np.cos(2*np.pi*dp)*K
def __init__(self, frameSize=1024, imageWidth=1024, imageHeight=1024, zmin=-1, zmax=1, defaultValue=0.0):
PlotBase.__init__(self)
self._frameSize = frameSize
self._sink = None
# Raster state
self._imageData = numpy.ones((imageHeight, imageWidth)) * defaultValue
self._image = self._plot.imshow(self._imageData, extent=(0, 1, 1, 0))
self._zmin = zmin
self._zmax = zmax
norm = self._getNorm(self._zmin, self._zmax)
self._image.set_norm(norm)
self._row = 0
self._xdelta = None
# Maintain aspect ratio of image
self._aspect = float(imageHeight)/imageWidth
self._plot.set_aspect(self._aspect)
# Add a colorbar
self._colorbar = self._figure.colorbar(self._image)
def test_with_fixed_inputs(self):
inputs = tf.random_normal(
[self.batch_size, self.sequence_length, self.input_depth])
seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length
helper = decode_helper.TrainingHelper(
inputs=inputs, sequence_length=seq_length)
decoder_fn = self.create_decoder(
helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN)
initial_state = decoder_fn.cell.zero_state(
self.batch_size, dtype=tf.float32)
decoder_output, _ = decoder_fn(initial_state, helper)
#pylint: disable=E1101
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
decoder_output_ = sess.run(decoder_output)
np.testing.assert_array_equal(
decoder_output_.logits.shape,
[self.sequence_length, self.batch_size, self.vocab_size])
np.testing.assert_array_equal(decoder_output_.predicted_ids.shape,
[self.sequence_length, self.batch_size])
return decoder_output_
def position_encoding(sentence_size, embedding_size):
"""
Position Encoding described in section 4.1 of
End-To-End Memory Networks (https://arxiv.org/abs/1503.08895).
Args:
sentence_size: length of the sentence
embedding_size: dimensionality of the embeddings
Returns:
A numpy array of shape [sentence_size, embedding_size] containing
the fixed position encodings for each sentence position.
"""
encoding = np.ones((sentence_size, embedding_size), dtype=np.float32)
ls = sentence_size + 1
le = embedding_size + 1
for k in range(1, le):
for j in range(1, ls):
encoding[j-1, k-1] = (1.0 - j/float(ls)) - (
k / float(le)) * (1. - 2. * j/float(ls))
return encoding
def create_test_panel_ohlc_source(sim_params, env):
start = sim_params.first_open \
if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
end = sim_params.last_close \
if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)
index = env.days_in_range(start, end)
price = np.arange(0, len(index)) + 100
high = price * 1.05
low = price * 0.95
open_ = price + .1 * (price % 2 - .5)
volume = np.ones(len(index)) * 1000
arbitrary = np.ones(len(index))
df = pd.DataFrame({'price': price,
'high': high,
'low': low,
'open': open_,
'volume': volume,
'arbitrary': arbitrary},
index=index)
panel = pd.Panel.from_dict({0: df})
return DataPanelSource(panel), panel
def flow_orientation(orientation):
""" Currently not use anymore """
# Boolean map
_greater_pi = orientation > math.pi/2
_less_minuspi = orientation < -math.pi/2
_remaining_part = ~(_greater_pi & _less_minuspi)
# orientation map
greater_pi = orientation*_greater_pi
less_minuspi = orientation*_less_minuspi
remaining_part = orientation*_remaining_part
pi_map = math.pi * np.ones(orientation.shape)
# converted orientation map
convert_greater_pi = pi_map*_greater_pi - greater_pi
convert_less_minuspi = -pi_map*_less_minuspi - less_minuspi
new_orient = remaining_part + convert_greater_pi + convert_less_minuspi
return new_orient
def _normalise_data(self):
self.train_x_mean = np.zeros(self.input_dim)
self.train_x_std = np.ones(self.input_dim)
self.train_y_mean = np.zeros(self.output_dim)
self.train_y_std = np.ones(self.output_dim)
if self.normalise_data:
self.train_x_mean = np.mean(self.train_x, axis=0)
self.train_x_std = np.std(self.train_x, axis=0)
self.train_x_std[self.train_x_std == 0] = 1.
self.train_x = (self.train_x - np.full(self.train_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.train_x.shape, self.train_x_std, dtype=np.float32)
self.test_x = (self.test_x - np.full(self.test_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.test_x.shape, self.train_x_std, dtype=np.float32)
self.train_y_mean = np.mean(self.train_y, axis=0)
self.train_y_std = np.std(self.train_y, axis=0)
if self.train_y_std == 0:
self.train_y_std[self.train_y_std == 0] = 1.
self.train_y = (self.train_y - self.train_y_mean) / self.train_y_std
def main():
fish = loadmat('./data/fish.mat')
X1 = np.zeros((fish['X'].shape[0], fish['X'].shape[1] + 1))
X1[:,:-1] = fish['X']
X2 = np.ones((fish['X'].shape[0], fish['X'].shape[1] + 1))
X2[:,:-1] = fish['X']
X = np.vstack((X1, X2))
Y1 = np.zeros((fish['Y'].shape[0], fish['Y'].shape[1] + 1))
Y1[:,:-1] = fish['Y']
Y2 = np.ones((fish['Y'].shape[0], fish['Y'].shape[1] + 1))
Y2[:,:-1] = fish['Y']
Y = np.vstack((Y1, Y2))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
callback = partial(visualize, ax=ax)
reg = affine_registration(X, Y)
reg.register(callback)
plt.show()
def test_bbs_in_bbs(self):
bbs_a = np.array([1, 1, 2.0, 3])
bbs_b = np.array([1, 0, 4, 5])
bbs_c = np.array([0, 0, 2, 2])
assert bbs_in_bbs(bbs_a, bbs_b).all()
assert bbs_in_bbs(bbs_b, bbs_c).any() is not True
assert bbs_in_bbs(bbs_a, bbs_c).any() is not True
bbs_d = np.array([
[0, 0, 5, 5],
[1, 2, 4, 4],
[2, 3, 4, 5]
])
assert (bbs_in_bbs(bbs_a, bbs_d) == np.array([1, 0, 0], dtype=np.bool)).all()
assert (bbs_in_bbs(bbs_d, bbs_d) == np.ones((3), dtype=np.bool)).all()
bbs_a *= 100
bbs_d *= 100
assert (bbs_in_bbs(bbs_a, bbs_d) == np.array([1, 0, 0], dtype=np.bool)).all()
def computeCost(X, y, theta):
'''YOUR CODE HERE'''
m = float(len(X))
d = 0
for i in range(len(X)):
h = np.dot(theta.transpose(), X[i])
c = (h - y[i])
c = (c **2)
d = (d + c)
j = (1.0 / (2 * m)) * d
return j
# Part 5: Prepare the data so that the input X has two columns: first a column of ones to accomodate theta0 and then a column of city population data
def test_write():
delete_layer()
cv, data = create_layer(size=(50,50,50,1), offset=(0,0,0))
replacement_data = np.zeros(shape=(50,50,50,1), dtype=np.uint8)
cv[0:50,0:50,0:50] = replacement_data
assert np.all(cv[0:50,0:50,0:50] == replacement_data)
replacement_data = np.random.randint(255, size=(50,50,50,1), dtype=np.uint8)
cv[0:50,0:50,0:50] = replacement_data
assert np.all(cv[0:50,0:50,0:50] == replacement_data)
# out of bounds
delete_layer()
cv, data = create_layer(size=(128,64,64,1), offset=(10,20,0))
with pytest.raises(ValueError):
cv[74:150,20:84,0:64] = np.ones(shape=(64,64,64,1), dtype=np.uint8)
# non-aligned writes
delete_layer()
cv, data = create_layer(size=(128,64,64,1), offset=(10,20,0))
with pytest.raises(ValueError):
cv[21:85,0:64,0:64] = np.ones(shape=(64,64,64,1), dtype=np.uint8)
def extract_and_save_bin_to(dir_to_bin, dir_to_source):
sets = [s for s in os.listdir(dir_to_source) if s in SETS]
for d in sets:
path = join(dir_to_source, d)
speakers = [s for s in os.listdir(path) if s in SPEAKERS]
for s in speakers:
path = join(dir_to_source, d, s)
output_dir = join(dir_to_bin, d, s)
if not tf.gfile.Exists(output_dir):
tf.gfile.MakeDirs(output_dir)
for f in os.listdir(path):
filename = join(path, f)
print(filename)
if not os.path.isdir(filename):
features = extract(filename)
labels = SPEAKERS.index(s) * np.ones(
[features.shape[0], 1],
np.float32,
)
b = os.path.splitext(f)[0]
features = np.concatenate([features, labels], 1)
with open(join(output_dir, '{}.bin'.format(b)), 'wb') as fp:
fp.write(features.tostring())
def __mul__(self, other):
"""
Left-multiply RigidTransform with another rigid transform
Two variants:
RigidTransform: Identical to oplus operation
ndarray: transform [N x 3] point set (X_2 = p_21 * X_1)
"""
if isinstance(other, DualQuaternion):
return DualQuaternion.from_dq(other.real * self.real,
other.dual * self.real + other.real * self.dual)
elif isinstance(other, float):
return DualQuaternion.from_dq(self.real * other, self.dual * other)
# elif isinstance(other, nd.array):
# X = np.hstack([other, np.ones((len(other),1))]).T
# return (np.dot(self.matrix, X).T)[:,:3]
else:
raise TypeError('__mul__ typeerror {:}'.format(type(other)))
def effective_sample_size(x, mu, var, logger):
"""
Calculate the effective sample size of sequence generated by MCMC.
:param x:
:param mu: mean of the variable
:param var: variance of the variable
:param logger: logg
:return: effective sample size of the sequence
Make sure that `mu` and `var` are correct!
"""
# batch size, time, dimension
b, t, d = x.shape
ess_ = np.ones([d])
for s in range(1, t):
p = auto_correlation_time(x, s, mu, var)
if np.sum(p > 0.05) == 0:
break
else:
for j in range(0, d):
if p[j] > 0.05:
ess_[j] += 2.0 * p[j] * (1.0 - float(s) / t)
logger.info('ESS: max [%f] min [%f] / [%d]' % (t / np.min(ess_), t / np.max(ess_), t))
return t / ess_
2linear_regression.py 文件源码
项目:Python-Machine-Learning-By-Example
作者: PacktPublishing
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def train_linear_regression(X_train, y_train, max_iter, learning_rate, fit_intercept=False):
""" Train a linear regression model with gradient descent
Args:
X_train, y_train (numpy.ndarray, training data set)
max_iter (int, number of iterations)
learning_rate (float)
fit_intercept (bool, with an intercept w0 or not)
Returns:
numpy.ndarray, learned weights
"""
if fit_intercept:
intercept = np.ones((X_train.shape[0], 1))
X_train = np.hstack((intercept, X_train))
weights = np.zeros(X_train.shape[1])
for iteration in range(max_iter):
weights = update_weights_gd(X_train, y_train, weights, learning_rate)
# Check the cost for every 100 (for example) iterations
if iteration % 100 == 0:
print(compute_cost(X_train, y_train, weights))
return weights
3logistic_regression_from_scratch.py 文件源码
项目:Python-Machine-Learning-By-Example
作者: PacktPublishing
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def train_logistic_regression(X_train, y_train, max_iter, learning_rate, fit_intercept=False):
""" Train a logistic regression model
Args:
X_train, y_train (numpy.ndarray, training data set)
max_iter (int, number of iterations)
learning_rate (float)
fit_intercept (bool, with an intercept w0 or not)
Returns:
numpy.ndarray, learned weights
"""
if fit_intercept:
intercept = np.ones((X_train.shape[0], 1))
X_train = np.hstack((intercept, X_train))
weights = np.zeros(X_train.shape[1])
for iteration in range(max_iter):
weights = update_weights_gd(X_train, y_train, weights, learning_rate)
# Check the cost for every 100 (for example) iterations
if iteration % 1000 == 0:
print(compute_cost(X_train, y_train, weights))
return weights
3logistic_regression_from_scratch.py 文件源码
项目:Python-Machine-Learning-By-Example
作者: PacktPublishing
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def train_logistic_regression(X_train, y_train, max_iter, learning_rate, fit_intercept=False):
""" Train a logistic regression model
Args:
X_train, y_train (numpy.ndarray, training data set)
max_iter (int, number of iterations)
learning_rate (float)
fit_intercept (bool, with an intercept w0 or not)
Returns:
numpy.ndarray, learned weights
"""
if fit_intercept:
intercept = np.ones((X_train.shape[0], 1))
X_train = np.hstack((intercept, X_train))
weights = np.zeros(X_train.shape[1])
for iteration in range(max_iter):
weights = update_weights_sgd(X_train, y_train, weights, learning_rate)
# Check the cost for every 2 (for example) iterations
if iteration % 2 == 0:
print(compute_cost(X_train, y_train, weights))
return weights
# Train the SGD model based on 10000 samples
def norm(self, x):
"""compute the Mahalanobis norm that is induced by the
statistical model / sample distribution, specifically by
covariance matrix ``C``. The expected Mahalanobis norm is
about ``sqrt(dimension)``.
Example
-------
>>> import cma, numpy as np
>>> sm = cma.sampler.GaussFullSampler(np.ones(10))
>>> x = np.random.randn(10)
>>> d = sm.norm(x)
`d` is the norm "in" the true sample distribution,
sampled points have a typical distance of ``sqrt(2*sm.dim)``,
where ``sm.dim`` is the dimension, and an expected distance of
close to ``dim**0.5`` to the sample mean zero. In the example,
`d` is the Euclidean distance, because C = I.
"""
return sum((np.dot(self.B.T, x) / self.D)**2)**0.5
def __init__(self, dimension,
constant_trace='None',
randn=np.random.randn,
quadratic=False,
**kwargs):
try:
self.dimension = len(dimension)
standard_deviations = np.asarray(dimension)
except TypeError:
self.dimension = dimension
standard_deviations = np.ones(dimension)
assert self.dimension == len(standard_deviations)
assert len(standard_deviations) == self.dimension
self.C = standard_deviations**2
"covariance matrix diagonal"
self.constant_trace = constant_trace
self.randn = randn
self.quadratic = quadratic
self.count_tell = 0
def norm(self, x):
"""compute the Mahalanobis norm that is induced by the
statistical model / sample distribution, specifically by
covariance matrix ``C``. The expected Mahalanobis norm is
about ``sqrt(dimension)``.
Example
-------
>>> import cma, numpy as np
>>> sm = cma.sampler.GaussFullSampler(np.ones(10))
>>> x = np.random.randn(10)
>>> d = sm.norm(x)
`d` is the norm "in" the true sample distribution,
sampled points have a typical distance of ``sqrt(2*sm.dim)``,
where ``sm.dim`` is the dimension, and an expected distance of
close to ``dim**0.5`` to the sample mean zero. In the example,
`d` is the Euclidean distance, because C = I.
"""
return sum(np.asarray(x)**2 / self.C)**0.5
def __init__(self, dimension, randn=np.random.randn, debug=False):
"""pass dimension of the underlying sample space
"""
try:
self.N = len(dimension)
std_vec = np.array(dimension, copy=True)
except TypeError:
self.N = dimension
std_vec = np.ones(self.N)
if self.N < 10:
print('Warning: Not advised to use VD-CMA for dimension < 10.')
self.randn = randn
self.dvec = std_vec
self.vvec = self.randn(self.N) / math.sqrt(self.N)
self.norm_v2 = np.dot(self.vvec, self.vvec)
self.norm_v = np.sqrt(self.norm_v2)
self.vn = self.vvec / self.norm_v
self.vnn = self.vn**2
self.pc = np.zeros(self.N)
self._debug = debug # plot covariance matrix
def covariance_matrix(self):
if self._debug:
# return None
ka = self.k_active
if ka > 0:
C = np.eye(self.N) + np.dot(self.V[:ka].T * self.S[:ka],
self.V[:ka])
C = (C * self.D).T * self.D
else:
C = np.diag(self.D**2)
C *= self.sigma**2
else:
# Fake Covariance Matrix for Speed
C = np.ones(1)
self.B = np.ones(1)
return C
def initwithsize(self, curshape, dim):
# DIM-dependent initialization
if self.dim != dim:
if self.zerox:
self.xopt = zeros(dim)
else:
self.xopt = compute_xopt(self.rseed, dim)
scale = max(1, dim ** .5 / 8.) # nota: different from scales in F8
self.linearTF = scale * compute_rotation(self.rseed, dim)
self.xopt = np.hstack(dot(.5 * np.ones((1, dim)), self.linearTF.T)) / scale ** 2
# DIM- and POPSI-dependent initialisations of DIM*POPSI matrices
if self.lastshape != curshape:
self.dim = dim
self.lastshape = curshape
self.arrxopt = resize(self.xopt, curshape)
def __imul__(self, factor):
"""define ``self *= factor``.
As a shortcut for::
self = self.__imul__(factor)
"""
try:
if factor == 1:
return self
except: pass
try:
if (np.size(factor) == np.size(self.scaling) and
all(factor == 1)):
return self
except: pass
if self.is_identity and np.size(self.scaling) == 1:
self.scaling = np.ones(np.size(factor))
self.is_identity = False
self.scaling *= factor
self.dim = np.size(self.scaling)
return self
def check_string_input(self, input_name, input_value):
if type(input_value) is np.array:
if input_value.size == self.P:
setattr(self, input_name, input_value)
elif input_value.size == 1:
setattr(self, input_name, np.repeat(input_value, self.P))
else:
raise ValueError("length of %s is %d; should be %d" % (input_name, input_value.size, self.P))
elif type(input_value) is str:
setattr(self, input_name, float(input_value)*np.ones(self.P))
elif type(input_value) is list:
if len(input_value) == self.P:
setattr(self, input_name, np.array([str(x) for x in input_value]))
elif len(input_value) == 1:
setattr(self, input_name, np.repeat(input_value, self.P))
else:
raise ValueError("length of %s is %d; should be %d" % (input_name, len(input_value), self.P))
else:
raise ValueError("user provided %s with an unsupported type" % input_name)