def dump_source_translation(model, source_buckets, vocab_inv_source, vocab_inv_target, beam_width=8, normalization_alpha=0):
for source_bucket in source_buckets:
if beam_width == 1: # greedy
batchsize = 24
if len(source_bucket) > batchsize:
num_sections = len(source_bucket) // batchsize - 1
if len(source_bucket) % batchsize > 0:
num_sections += 1
indices = [(i + 1) * batchsize for i in range(num_sections)]
source_sections = np.split(source_bucket, indices, axis=0)
else:
source_sections = [source_bucket]
for source_batch in source_sections:
translation_batch = translate_greedy(model, source_batch, source_batch.shape[1] * 2, len(vocab_inv_target), beam_width)
for index in range(len(translation_batch)):
source = source_batch[index]
translation = translation_batch[index]
dump_translation(vocab_inv_source, vocab_inv_target, source, translation)
else: # beam search
for index in range(len(source_bucket)):
source = source_bucket[index]
translations = translate_beam_search(model, source, source.size * 2, len(vocab_inv_target), beam_width, normalization_alpha, return_all_candidates=True)
dump_all_translation(vocab_inv_source, vocab_inv_target, source, translations)
python类split()的实例源码
def compute_accuracy(model, buckets, batchsize=100):
result = []
for bucket_index, dataset in enumerate(buckets):
acc = []
# split into minibatch
if len(dataset) > batchsize:
num_sections = len(dataset) // batchsize - 1
if len(dataset) % batchsize > 0:
num_sections += 1
indices = [(i + 1) * batchsize for i in range(num_sections)]
sections = np.split(dataset, indices, axis=0)
else:
sections = [dataset]
# compute accuracy
for batch_index, batch in enumerate(sections):
printr("computing accuracy ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
acc.append(compute_accuracy_batch(model, batch))
result.append(sum(acc) / len(acc))
printr("")
return result
def compute_perplexity(model, buckets, batchsize=100):
result = []
for bucket_index, dataset in enumerate(buckets):
ppl = []
# split into minibatch
if len(dataset) > batchsize:
num_sections = len(dataset) // batchsize - 1
if len(dataset) % batchsize > 0:
num_sections += 1
indices = [(i + 1) * batchsize for i in range(num_sections)]
sections = np.split(dataset, indices, axis=0)
else:
sections = [dataset]
# compute accuracy
for batch_index, batch in enumerate(sections):
sys.stdout.write("\rcomputing perplexity ... bucket {}/{} (batch {}/{})".format(bucket_index + 1, len(buckets), batch_index + 1, len(sections)))
sys.stdout.flush()
ppl.append(compute_perplexity_batch(model, batch))
result.append(sum(ppl) / len(ppl))
sys.stdout.write("\r" + stdout.CLEAR)
sys.stdout.flush()
return result
def __init__(self):
dict_ = cPickle.load(open(file_path + '/dict_.pkl', "rb"))
gen_images = dict_['gen_images']
self.num_ex = 4
self.row_list = []
if 'ground_truth' in dict_:
ground_truth = dict_['ground_truth']
if not isinstance(ground_truth, list):
ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
ground_truth = [np.squeeze(g) for g in ground_truth]
ground_truth = ground_truth[1:]
self.row_list.append((ground_truth, 'Ground Truth'))
self.row_list.append((gen_images, 'Gen Images'))
self.build_figure()
def save_distrib_visual(self, full_images, use_genimg = True):
#assumes full_images is already rescaled to [0,1]
orig_images = np.split(full_images, full_images.shape[0], axis = 0)
orig_images = [im.reshape(1,64,64,3) for im in orig_images]
# the first image of corr_gen_images is the first image of the original images!
file_path =self.policyparams['current_dir'] + '/videos_distrib'
if use_genimg:
cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
open(file_path + '/correction.pkl', 'wb'))
distrib = make_color_scheme(self.rec_input_distrib)
distrib = add_crosshairs(distrib, self.desig_pix)
frame_list = assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
else:
cPickle.dump([orig_images, self.rec_input_distrib],
open(file_path + '/correction.pkl', 'wb'))
distrib = make_color_scheme(self.rec_input_distrib)
distrib = add_crosshairs(distrib, self.desig_pix)
frame_list = assemble_gif([orig_images, distrib], num_exp=1)
npy_to_gif(frame_list, self.policyparams['rec_distrib'])
def fft(self, audio, highpass, lowpass):
"""
Fast fourier transform conditioning
Output:
'output' contains the strength of each frequency in the audio signal
frequencies are marked by its position in 'output':
frequency = index * rate / buffesize
output.size = buffersize/2
Method:
Use numpy's FFT (numpy.fft.fft)
Find the magnitude of the complex numbers returned (abs value)
Split the FFT array in half, because we have mirror frequencies
(they're the complex conjugates)
Use just the first half to apply the bandpass filter
Great info here: http://stackoverflow.com/questions/4364823/how-to-get-frequency-from-fft-result
"""
left,right = numpy.split(numpy.abs(numpy.fft.fft(audio)),2)
output = left[highpass:lowpass]
return output
def test_batches_from_two_sets():
data1 = np.array(['a', 'b'])
data2 = np.array(['c', 'd', 'e'])
batch_generator = combine_batches(
eternal_batches(data1, batch_size=1),
eternal_batches(data2, batch_size=2)
)
first_six_batches = list(islice(batch_generator, 6))
assert [len(batch) for batch in first_six_batches] == [3, 3, 3, 3, 3, 3]
batch_portions1 = [batch[:1] for batch in first_six_batches]
batch_portions2 = [batch[1:] for batch in first_six_batches]
returned1 = np.concatenate(batch_portions1)
returned2 = np.concatenate(batch_portions2)
epochs1 = np.split(returned1, 3)
epochs2 = np.split(returned2, 4)
assert all(sorted(items) == ['a', 'b'] for items in epochs1)
assert all(sorted(items) == ['c', 'd', 'e'] for items in epochs2)
def test_stratified_batches():
data = np.array([('a', -1), ('b', 0), ('c', 1), ('d', -1), ('e', -1)],
dtype=[('x', np.str_, 8), ('y', np.int32)])
assert list(data['x']) == ['a', 'b', 'c', 'd', 'e']
assert list(data['y']) == [-1, 0, 1, -1, -1]
batch_generator = training_batches(data, batch_size=3, n_labeled_per_batch=1)
first_ten_batches = list(islice(batch_generator, 10))
labeled_batch_portions = [batch[:1] for batch in first_ten_batches]
unlabeled_batch_portions = [batch[1:] for batch in first_ten_batches]
labeled_epochs = np.split(np.concatenate(labeled_batch_portions), 5)
unlabeled_epochs = np.split(np.concatenate(unlabeled_batch_portions), 4)
assert ([sorted(items['x'].tolist()) for items in labeled_epochs] ==
[['b', 'c']] * 5)
assert ([sorted(items['y'].tolist()) for items in labeled_epochs] ==
[[0, 1]] * 5)
assert ([sorted(items['x'].tolist()) for items in unlabeled_epochs] ==
[['a', 'b', 'c', 'd', 'e']] * 4)
assert ([sorted(items['y'].tolist()) for items in unlabeled_epochs] ==
[[-1, -1, -1, -1, -1]] * 4)
def create_batches(self):
self.num_batches = int(self.train.size / (self.batch_size * self.seq_length))
self.num_valid_batches = int(self.valid.size / (self.batch_size * self.seq_length))
# When the data (tensor) is too small, let's give them a better error message
if self.num_batches == 0:
assert False, "Not enough data. Make seq_length and batch_size small."
self.train = self.train[:self.num_batches * self.batch_size * self.seq_length]
self.valid = self.valid[:self.num_valid_batches * self.batch_size * self.seq_length]
xdata = self.train
ydata = np.copy(self.train)
ydata[:-1] = xdata[1:]
ydata[-1] = xdata[0]
x_valid = self.valid
y_valid = np.copy(self.valid)
y_valid[:-1] = x_valid[1:]
y_valid[-1] = x_valid[0]
self.x_valid = np.split(x_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
self.y_valid = np.split(y_valid.reshape(self.batch_size, -1), self.num_valid_batches, 1)
self.x_batches = np.split(xdata.reshape(self.batch_size, -1), self.num_batches, 1)
self.y_batches = np.split(ydata.reshape(self.batch_size, -1), self.num_batches, 1)
def arrange_images(Y):
concat_image = None
Y = (Y + 1)/2
for yi in np.split(Y, 10):
image = None
for y in yi:
img = cv2.merge((y[0, :, :], y[1, :, :], y[2, :, :]))
if image is None:
image = img
else:
image = np.concatenate((image, img))
if concat_image is None:
concat_image = image
else:
concat_image = np.concatenate((concat_image, image), axis=1)
return concat_image
def make_video(file_path, conf):
print 'reading files from:', file_path
ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))
distrib = cPickle.load(open(file_path + '/output_distrib_list.pkl', "rb"))
ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
ground_truth = np.squeeze(ground_truth)
fused_gif = video_prediction.utils_vpred.create_gif.assemble_gif([ground_truth, gen_images, distrib])
import re
itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)
video_prediction.utils_vpred.create_gif.npy_to_gif(fused_gif, file_path +'/' + conf['experiment_name'] + '_' + str(itr_vis))
return fused_gif
def comp_video(file_path, conf, suffix = None):
print 'reading files from:', file_path
ground_truth = cPickle.load(open(file_path + '/ground_truth.pkl', "rb"))
gen_images = cPickle.load(open(file_path + '/gen_image_seq.pkl', "rb"))
ground_truth = np.split(ground_truth, ground_truth.shape[1], axis=1)
ground_truth = np.squeeze(ground_truth)
fused_gif = assemble_gif([ground_truth, gen_images])
itr_vis = re.match('.*?([0-9]+)$', conf['visualize']).group(1)
if not suffix:
name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis)
else: name = file_path + '/vid_' + conf['experiment_name'] + '_' + str(itr_vis) + suffix
npy_to_gif(fused_gif, name)
return fused_gif
def save_distrib_visual(self, full_images, use_genimg = True):
#assumes full_images is already rescaled to [0,1]
orig_images = np.split(full_images, full_images.shape[0], axis = 0)
orig_images = [im.reshape(1,64,64,3) for im in orig_images]
# the first image of corr_gen_images is the first image of the original images!
file_path =self.policyparams['current_dir'] + '/videos_distrib'
if use_genimg:
cPickle.dump([orig_images, self.corr_gen_images, self.rec_input_distrib, self.desig_pix],
open(file_path + '/correction.pkl', 'wb'))
distrib = makegif.pix_distrib_video(self.rec_input_distrib)
distrib = makegif.add_crosshairs(distrib, self.desig_pix)
frame_list = makegif.assemble_gif([orig_images, self.corr_gen_images, distrib], num_exp=1)
else:
cPickle.dump([orig_images, self.rec_input_distrib],
open(file_path + '/correction.pkl', 'wb'))
distrib = makegif.pix_distrib_video(self.rec_input_distrib)
distrib = makegif.add_crosshairs(distrib, self.desig_pix)
frame_list = makegif.assemble_gif([orig_images, distrib], num_exp=1)
makegif.npy_to_gif(frame_list, self.policyparams['rec_distrib'])
def genTrainData(self):
data = []
with open('../train-data.csv', 'r') as f:
data = [list(map(int,rec)) for rec in csv.reader(f, delimiter=',')]
data = np.array(data)
labels = data[:,0]
data = np.delete(data, 0, 1)
data = np.split(data, [(int)(data.shape[0]*.75)])[0]
labels = np.split(labels, [(int)(labels.shape[0]*.75)])[0]
testData = np.split(data, [(int)(data.shape[0]*.75)])[1]
testLabels = np.split(labels, [(int)(labels.shape[0]*.75)])[1]
return data, labels, testData, testLabels
def run_trial(self, trial_input, t_connectivity = None, use_input = True):
rnn_inputs = np.split(trial_input, trial_input.shape[0], axis=0)
state = np.expand_dims(self.init_state[0, :], 0)
rnn_outputs = []
rnn_states = []
for i, rnn_input in enumerate(rnn_inputs):
if t_connectivity:
output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
else:
output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)
rnn_outputs.append(output)
rnn_states.append(state)
return np.array(rnn_outputs), np.array(rnn_states)
# apply the RNN to a whole batch of inputs
def run_trials(self, trial_input, batch_size, t_connectivity = None, use_input = True):
rnn_inputs = np.split(trial_input, trial_input.shape[1], axis=1)
state = np.expand_dims(self.init_state[0, :], 0)
state = np.repeat(state, batch_size, 0)
rnn_outputs = []
rnn_states = []
for rnn_input in rnn_inputs:
if t_connectivity:
output, state = self.rnn_step(state, rnn_input, t_connectivity[i], use_input)
else:
output, state = self.rnn_step(state, rnn_input, np.ones_like(self.W_rec), use_input)
rnn_outputs.append(output)
rnn_states.append(state)
return np.array(rnn_outputs), np.array(rnn_states)
def __init__(self, data, target, hidden_layers):
""" Must submit either a net configuration, or something to load from """
if hidden_layers == [] and model_filename == "":
raise Exception("Must provide a net configuration or a file to load from")
""" Divide the data into training and test """
self.trainsize = int(len(data) * 5 / 6)
self.testsize = len(data) - self.trainsize
self.x_train, self.x_test = np.split(data, [self.trainsize])
self.y_train, self.y_test = np.split(target, [self.trainsize])
""" Create the underlying neural network model """
self.sizes = [len(data[0])]
self.sizes.extend(hidden_layers)
self.sizes.append(len(set(target)))
self.model = L.Classifier(BaseNetwork(self.sizes))
""" Create the underlying optimizer """
self.optimizer = optimizers.Adam()
self.optimizer.setup(self.model)
def _compute_table_rank(self, contained):
logger.log(logging.DEBUG, "Computing tables relations")
tables_rank = [([], []) for _ in range(6)]
indices = [
set(l) for l in np.split(contained.indices, contained.indptr)[1:-1]
]
for root in self.dictionary.roots:
for t0, t1 in combinations(self.dictionary.roots[root], 2):
commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]]
rank = max(map(lambda t: t.rank, commons))
tables_rank[rank][0].extend((t0.index, t1.index))
tables_rank[rank][1].extend((t1.index, t0.index))
return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]
def prepare_faces():
data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
X = data.data
y = data.target
X = np.split(X, 40)
y = np.split(y, 40)
X_train = [x[0:7, :] for x in X]
X_test = [x[7:, :] for x in X]
y_train = [a[0:7] for a in y]
y_test = [a[7:] for a in y]
X_train = np.concatenate(X_train)
X_test = np.concatenate(X_test)
y_train = pd.Series(np.concatenate(y_train))
y_test = pd.Series(np.concatenate(y_test))
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train = pd.DataFrame(scaler.fit_transform(X_train))
X_test = pd.DataFrame(scaler.transform(X_test))
return X_train, y_train, X_test, y_test, scaler
def prepare_faces():
data = sklearn.datasets.fetch_olivetti_faces('../data', shuffle=False)
X = data.data
y = data.target
X = np.split(X, 40)
y = np.split(y, 40)
X_train = [x[0:7, :] for x in X]
X_test = [x[7:, :] for x in X]
y_train = [a[0:7] for a in y]
y_test = [a[7:] for a in y]
X_train = np.concatenate(X_train)
X_test = np.concatenate(X_test)
y_train = np.concatenate(y_train)
y_test = np.concatenate(y_test)
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, y_train, X_test, y_test, scaler