def test_bad_input(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(
USEquityPricing.close,
baseline,
)
with self.assertRaises(ValueError):
# Wrong column.
loader.load_adjusted_array(
[USEquityPricing.open], self.dates, self.sids, self.mask
)
with self.assertRaises(ValueError):
# Too many columns.
loader.load_adjusted_array(
[USEquityPricing.open, USEquityPricing.close],
self.dates,
self.sids,
self.mask,
)
python类arange()的实例源码
def test_baseline(self):
data = arange(100).reshape(self.ndates, self.nsids)
baseline = DataFrame(data, index=self.dates, columns=self.sids)
loader = DataFrameLoader(USEquityPricing.close, baseline)
dates_slice = slice(None, 10, None)
sids_slice = slice(1, 3, None)
[adj_array] = loader.load_adjusted_array(
[USEquityPricing.close],
self.dates[dates_slice],
self.sids[sids_slice],
self.mask[dates_slice, sids_slice],
).values()
for idx, window in enumerate(adj_array.traverse(window_length=3)):
expected = baseline.values[dates_slice, sids_slice][idx:idx + 3]
assert_array_equal(window, expected)
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000):
bins = np.arange(0,len(time_range),1)
#print bins
a,b = np.histogram(Rates, bins)
ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2
Fs = 1./(1*0.001)
freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt
freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1]
px = ff[0:len(ff)/2+1]
max_px = np.max(px[1:])
idx = px == max_px
corr_freq = freq[pl.find(idx)]
new_px = px
max_pow = new_px[pl.find(idx)]
return new_px,freq,corr_freq[0],freq2, max_pow
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
mat_disp = (mat_var - mat_mean) / np.square(mat_mean)
quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
quantiles = np.append(quantiles, mat_mean.max())
# merge bins with no difference in value
quantiles = np.unique(quantiles)
if len(quantiles) <= 1:
# pathological case: the means are all identical. just return raw dispersion.
return mat_disp
# calc median dispersion per bin
(disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)
# calc median absolute deviation of dispersion per bin
disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
disp_abs_dev = abs(mat_disp - disp_meds_arr)
(disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)
# calculate normalized dispersion
disp_mads_arr = disp_mads[disp_bins-1]
disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
return disp_norm
def compute_nearest_neighbors(submatrix, balltree, k, row_start):
""" Compute k nearest neighbors on a submatrix
Args: submatrix (np.ndarray): Data submatrix
balltree: Nearest neighbor index (from sklearn)
k: number of nearest neigbors to compute
row_start: row offset into larger matrix
Returns a COO sparse adjacency matrix of nearest neighbor relations as (i,j,x)"""
nn_dist, nn_idx = balltree.query(submatrix, k=k+1)
# Remove the self-as-neighbors
nn_idx = nn_idx[:,1:]
nn_dist = nn_dist[:,1:]
# Construct a COO sparse matrix of edges and distances
i = np.repeat(row_start + np.arange(nn_idx.shape[0]), k)
j = nn_idx.ravel().astype(int)
return (i, j, nn_dist.ravel())
def preprocess_matrix(matrix, num_bcs=None, use_bcs=None, use_genes=None, force_cells=None):
if force_cells is not None:
bc_counts = matrix.get_reads_per_bc()
bc_indices, _, _ = cr_stats.filter_cellular_barcodes_fixed_cutoff(bc_counts, force_cells)
matrix = matrix.select_barcodes(bc_indices)
elif use_bcs is not None:
bc_seqs = cr_utils.load_csv_rownames(use_bcs)
bc_indices = matrix.bcs_to_ints(bc_seqs)
matrix = matrix.select_barcodes(bc_indices)
elif num_bcs is not None and num_bcs < matrix.bcs_dim:
bc_indices = np.sort(np.random.choice(np.arange(matrix.bcs_dim), size=num_bcs, replace=False))
matrix = matrix.select_barcodes(bc_indices)
if use_genes is not None:
gene_ids = cr_utils.load_csv_rownames(use_genes)
gene_indices = matrix.gene_ids_to_ints(gene_ids)
matrix = matrix.select_genes(gene_indices)
matrix, _, _ = matrix.select_nonzero_axes()
return matrix
def get_depth_info(read_iter, chrom, cstart, cend):
depths = np.zeros(cend-cstart, np.int32)
for read in read_iter:
pos = read.pos
rstart = max(pos, cstart)
# Increment to the end of the window or the end of the
# alignment, whichever comes first
rend = min(read.aend, cend)
depths[(rstart-cstart):(rend-cstart)] += 1
positions = np.arange(cstart, cend, dtype=np.int32)
depth_df = pd.DataFrame({"chrom": chrom, "pos": positions, "coverage": depths})
return depth_df
def getDataRecorderConfiguration(self):
nRecorders= self.getNumberOfRecorderTables()
sourceBufSize= 256
source= ctypes.create_string_buffer('\000', sourceBufSize)
option= CIntArray(np.zeros(nRecorders, dtype=np.int32))
table=CIntArray(np.arange(1, nRecorders + 1))
self._lib.PI_qDRC.argtypes= [c_int, CIntArray, c_char_p,
CIntArray, c_int, c_int]
self._convertErrorToException(
self._lib.PI_qDRC(self._id, table, source,
option, sourceBufSize, nRecorders))
sources= [x.strip() for x in source.value.split('\n')]
cfg= DataRecorderConfiguration()
for i in range(nRecorders):
cfg.setTable(table.toNumpyArray()[i],
sources[i],
option.toNumpyArray()[i])
return cfg
def loadLogoSet(path, rows,cols,test_data_rate=0.15):
random.seed(612)
_, imgID = readItems('data.txt')
y, _ = modelDict(path)
nPics = len(y)
faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images
noImg = []
for i in range(nPics):
temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0)
if temp == None:
noImg.append(i)
elif temp.size < 1000:
noImg.append(i)
else:
temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC)
faceassset[i,:,:] = temp
y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0)
nPics = len(y)
index = random.sample(np.arange(nPics), int(nPics*test_data_rate))
x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0)
y_test = y[index]; y_train = np.delete(y, index, 0)
return (x_train, y_train), (x_test, y_test)
def batch_iter(data, batch_size, num_epochs, shuffle=True):
"""
Generates a batch iterator for a dataset.
"""
data = np.array(data)
data_size = len(data)
num_batches_per_epoch = int(len(data)/batch_size) + 1
for epoch in range(num_epochs):
# Shuffle the data at each epoch
if shuffle:
shuffle_indices = np.random.permutation(np.arange(data_size))
shuffled_data = data[shuffle_indices]
else:
shuffled_data = data
for batch_num in range(num_batches_per_epoch):
start_index = batch_num * batch_size
end_index = min((batch_num + 1) * batch_size, data_size)
yield shuffled_data[start_index:end_index]
def _gen_centroids():
a = np.arange(SSIZE/18, SSIZE, SSIZE/9)
x, y = np.meshgrid(a, a)
return np.dstack((y, x)).reshape((81, 2))
def classify(self, image):
"""
Given a 28x28 image, returns an array representing the 2 highest
probable prediction
:param image:
:return: array of 2 highest prob-digit tuples
"""
if cv2.__version__[0] == '2':
res = self.model.find_nearest(np.array([self.feature(image)]), k=11)
else:
res = self.model.findNearest(np.array([self.feature(image)]), k=11)
hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0]
zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True)
return np.array(zipped[:2])
def blend2(x1,x2,y, metric, task, x1valid, x2valid, x1test, x2test):
try:
mm = no_transform()
mbest_score = -2
for w1 in np.arange(0.2, 1, 0.1):
w2 = 1- w1
x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2
exec('score = libscores.'+ metric + '(y, x, "' + task + '")')
try:
if score <= 0:
exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")')
score += CVscore_auc/10
except:
pass
if score > mbest_score:
mbest_score = score
mbest_w1 = w1
mbest_x = x
mbest_w2 = 1- mbest_w1
xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2
xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2
return mbest_score, xvalid, xtest
except:
return 0.01, x1valid, x1test
def blend3(x1,x2, x3, y, metric, task, x1valid, x2valid, x3valid, x1test, x2test, x3test):
try:
mm = no_transform()
mbest_score = -2
for w1 in np.arange(0.2, 1, 0.2):
for w2 in np.arange(0.1, 0.6, 0.2):
w3 = 1- w1 - w2
if w3 > 0:
x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2 + mm.fit_transform(x3)*w3
exec('score = libscores.'+ metric + '(y, x, "' + task + '")')
try:
if score <= 0:
exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")')
score += CVscore_auc/10
except:
pass
if score > mbest_score:
mbest_score = score
mbest_w1 = w1
mbest_w2 = w2
mbest_w3 = 1- mbest_w1- mbest_w2
xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2 + mm.fit_transform(x3valid)* mbest_w3
xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2 + mm.fit_transform(x3test) * mbest_w3
return mbest_score, xvalid, xtest
except:
return 0.01, x1valid, x1test
def tiedrank(a):
''' Return the ranks (with base 1) of a list resolving ties by averaging.
This works for numpy arrays.'''
m=len(a)
# Sort a in ascending order (sa=sorted vals, i=indices)
i=a.argsort()
sa=a[i]
# Find unique values
uval=np.unique(a)
# Test whether there are ties
R=np.arange(m, dtype=float)+1 # Ranks with base 1
if len(uval)!=m:
# Average the ranks for the ties
oldval=sa[0]
newval=sa[0]
k0=0
for k in range(1,m):
newval=sa[k]
if newval==oldval:
# moving average
R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
else:
k0=k;
oldval=newval
# Invert the index
S=np.empty(m)
S[i]=R
return S
def plot_trajectories(src_sent, src_encoding, idx):
# encoding is (time_steps, hidden_dim)
#pca = PCA(n_components=1)
#pca_result = pca.fit_transform(src_encoding)
times = np.arange(src_encoding.shape[0])
plt.plot(times, src_encoding)
plt.title(" ".join(src_sent))
plt.xlabel('timestep')
plt.ylabel('trajectories')
plt.savefig("misc_hidden_cell_trajectories_"+str(idx), bbox_inches="tight")
plt.close()
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
if augment:
# as in paper :
# pad feature arrays with 4 pixels on each side
# and do random cropping of 32x32
padded = np.pad(inputs[excerpt],((0,0),(0,0),(4,4),(4,4)),mode='constant')
random_cropped = np.zeros(inputs[excerpt].shape, dtype=np.float32)
crops = np.random.random_integers(0,high=8,size=(batchsize,2))
for r in range(batchsize):
random_cropped[r,:,:,:] = padded[r,:,crops[r,0]:(crops[r,0]+32),crops[r,1]:(crops[r,1]+32)]
inp_exc = random_cropped
else:
inp_exc = inputs[excerpt]
yield inp_exc, targets[excerpt]
# ############################## Main program ################################
def __init__(self, env):
self.env = env
if isinstance(env.observation_space, Discrete):
self.state_size = 1
else:
self.state_size = numel(env.observation_space.shape)
if isinstance(self.env.action_space, Discrete):
self.is_discrete = True
self.action_size = env.action_space.n
self.actions = np.arange(self.action_size)
else:
self.is_discrete = False
self.action_size = numel(env.action_space.sample())