def encode_jpeg(arr):
assert arr.dtype == np.uint8
# simulate multi-channel array for single channel arrays
if len(arr.shape) == 3:
arr = np.expand_dims(arr, 3) # add channels to end of x,y,z
arr = arr.transpose((3,2,1,0)) # channels, z, y, x
reshaped = arr.reshape(arr.shape[3] * arr.shape[2], arr.shape[1] * arr.shape[0])
if arr.shape[0] == 1:
img = Image.fromarray(reshaped, mode='L')
elif arr.shape[0] == 3:
img = Image.fromarray(reshaped, mode='RGB')
else:
raise ValueError("Number of image channels should be 1 or 3. Got: {}".format(arr.shape[3]))
f = io.BytesIO()
img.save(f, "JPEG")
return f.getvalue()
python类save()的实例源码
pruned_box_features.py 文件源码
项目:Deep360Pilot-optical-flow
作者: yenchenlin
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def gen_pruned_features(name):
print name
feature_dir = 'data/feature_' + args.domain + \
'_' + str(args.n_boxes) + 'boxes/' + name + '/'
n_clips = len(glob.glob(feature_dir + BOX_FEATURE + '*.npy'))
for clip in xrange(1, n_clips+1):
pruned_boxes = np.load(feature_dir + BOX_FEATURE + '{:04d}.npy'.format(clip)) # (50, args.n_boxes, 4)
roisavg = np.load(feature_dir + 'roisavg{:04d}.npy'.format(clip)) # (50, args.n_boxes, 512)
pruned_roisavg = np.zeros((50, args.n_boxes, 512))
for frame in xrange(50):
for box_id in xrange(args.n_boxes):
if not np.array_equal(pruned_boxes[frame][box_id], np.zeros((4))):
pruned_roisavg[frame][box_id] = roisavg[frame][box_id]
np.save('{}pruned_roisavg{:04d}'.format(feature_dir, clip), pruned_roisavg)
def visualize(self, zv, path):
self.ax1.clear()
self.ax2.clear()
z, v = zv
if path:
np.save(path + '/trajectory.npy', z)
z = np.reshape(z, [-1, 2])
self.ax1.hist2d(z[:, 0], z[:, 1], bins=400)
self.ax1.set(xlim=self.xlim(), ylim=self.ylim())
v = np.reshape(v, [-1, 2])
self.ax2.hist2d(v[:, 0], v[:, 1], bins=400)
self.ax2.set(xlim=self.xlim(), ylim=self.ylim())
if self.display:
import matplotlib.pyplot as plt
plt.show()
plt.pause(0.1)
elif path:
self.fig.savefig(path + '/visualize.png')
def load_rec(self):
# first try and see if anything with the save data exists, since obviously
# we dont' want to keep loading from the original load location if some work has
# already been done
load = self.load_from_db({'exp_id': self.exp_id},
cache_filters=True)
# if not, try loading from the loading location
if not load and not self.sameloc:
load = self.load_from_db(self.load_query,
cache_filters=True,
collfs=self.load_collfs,
collfs_recent=self.load_collfs_recent)
if load is None:
raise Exception('You specified load parameters but no '
'record was found with the given spec.')
self.load_data = load
feature_extractor.py 文件源码
项目:video_labelling_using_youtube8m
作者: LittleWat
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def get_feature_mat_from_video(video_filename, output_dir='output'):
yt_vid, extension = video_filename.split('/')[-1].split('.')
assert extension in ['webm', 'mp4', '3gp']
mkdir_if_not_exist(output_dir, False)
output_filename = output_dir + '/' + yt_vid + '.npy'
vid_reader = imageio.get_reader(video_filename, 'ffmpeg')
img_list = get_img_list_from_vid_reader(vid_reader, extension)
base_model = InceptionV3(include_top=True, weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
feature_mat = get_feature_mat(model, img_list)
np.save(output_filename, feature_mat)
return feature_mat
def compute_dt_dist(docs, labels, tags, model, max_len, batch_size, pad_id, idxvocab, output_file):
#generate batches
num_batches = int(math.ceil(float(len(docs)) / batch_size))
dt_dist = []
t = []
combined = []
docid = 0
for i in xrange(num_batches):
x, _, _, t, s = get_batch_doc(docs, labels, tags, i, max_len, cf.tag_len, batch_size, pad_id)
attention, mean_topic = sess.run([model.attention, model.mean_topic], {model.doc: x, model.tag: t})
dt_dist.extend(attention[:s])
if debug:
for si in xrange(s):
d = x[si]
print "\n\nDoc", docid, "=", " ".join([idxvocab[item] for item in d if (item != pad_id)])
sorted_dist = matutils.argsort(attention[si], reverse=True)
for ti in sorted_dist:
print "Topic", ti, "=", attention[si][ti]
docid += 1
np.save(open(output_file, "w"), dt_dist)
def predictPL(self):
B = self.flags.batch_size
W,H,C = self.flags.width, self.flags.height, self.flags.color
inputs = tf.placeholder(dtype=tf.float32,shape=[None,H,W,C])
#with open(self.flags.pred_path,'w') as f:
# pass
self._build(inputs,resize=False)
counter = 0
with tf.Session() as sess:
self.sess = sess
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for imgs,imgnames in self.DATA.test_generator():
pred = sess.run(self.logit,feed_dict={inputs:imgs})
np.save("%s/%d.npy"%(self.flags.pred_path,counter),{"pred":pred,"name":imgnames})
counter+=len(imgs)
if counter/B%10 ==0:
print_mem_time("%d images predicted"%counter)
# train with placeholders
def show_embedding(self,name,save_model="model.ckpt",meta_path='metadata.tsv'):
self._build()
self._write_meta()
from tensorflow.contrib.tensorboard.plugins import projector
# Use the same LOG_DIR where you stored your checkpoint.
with tf.Session() as sess:
self.sess = sess
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
summary_writer = tf.summary.FileWriter(self.flags.log_path, sess.graph)
saver = tf.train.Saver()
saver.save(sess, os.path.join(self.flags.log_path, save_model), 0)
# Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto
config = projector.ProjectorConfig()
# You can add multiple embeddings. Here we add only one.
embedding = config.embeddings.add()
embedding.tensor_name = name
# Link this tensor to its metadata file (e.g. labels).
embedding.metadata_path = os.path.join(self.flags.log_path, meta_path)
# Saves a configuration file that TensorBoard will read during startup.
projector.visualize_embeddings(summary_writer, config)
def split(flags):
if os.path.exists(flags.split_path):
return np.load(flags.split_path).item()
folds = flags.folds
path = flags.input_path
random.seed(6)
img_list = ["%s/%s"%(path,img) for img in os.listdir(path)]
random.shuffle(img_list)
dic = {}
n = len(img_list)
num = (n+folds-1)//folds
for i in range(folds):
s,e = i*num,min(i*num+num,n)
dic[i] = img_list[s:e]
np.save(flags.split_path,dic)
return dic
def save(self, filename):
"""Saves the collection to a file.
Parameters
----------
filename : :obj:`str`
The file to save the collection to.
Raises
------
ValueError
If the file extension is not .npy or .npz.
"""
file_root, file_ext = os.path.splitext(filename)
if file_ext == '.npy':
np.save(filename, self._data)
elif file_ext == '.npz':
np.savez_compressed(filename, self._data)
else:
raise ValueError('Extension %s not supported for point saves.' %(file_ext))
def get_word_seq(train_ori1, train_ori2, test_ori1, test_ori2):
# fit tokenizer
tk = Tokenizer(num_words=TrainConfig.MAX_NB_WORDS)
tk.fit_on_texts(train_ori1 + train_ori2 + test_ori1 + test_ori2)
word_index = tk.word_index
# q1, q2 training text sequence
# (sentence_len, MAX_SEQUENCE_LENGTH)
train_x1 = tk.texts_to_sequences(train_ori1)
train_x1 = pad_sequences(train_x1, maxlen=TrainConfig.MAX_SEQUENCE_LENGTH)
train_x2 = tk.texts_to_sequences(train_ori2)
train_x2 = pad_sequences(train_x2, maxlen=TrainConfig.MAX_SEQUENCE_LENGTH)
# q1, q2 testing text sequence
test_x1 = tk.texts_to_sequences(test_ori1)
test_x1 = pad_sequences(test_x1, maxlen=TrainConfig.MAX_SEQUENCE_LENGTH)
test_x2 = tk.texts_to_sequences(test_ori2)
test_x2 = pad_sequences(test_x2, maxlen=TrainConfig.MAX_SEQUENCE_LENGTH)
np.save(open(DirConfig.Q1_CACHE_TRAIN, 'wb'), train_x1)
np.save(open(DirConfig.Q2_CACHE_TRAIN, 'wb'), train_x2)
np.save(open(DirConfig.Q1_CACHE_TEST, 'wb'), test_x1)
np.save(open(DirConfig.Q2_CACHE_TEST, 'wb'), test_x2)
np.save(open(DirConfig.WORD_INDEX_CACHE, 'wb'), word_index)
return train_x1, train_x2, test_x1, test_x2, word_index
def load_word2vec_matrix(vec_file, word_index, config):
if os.path.isfile(DirConfig.W2V_CACHE):
print('---- Load word vectors from cache.')
embedding_matrix = np.load(open(DirConfig.W2V_CACHE, 'rb'))
return embedding_matrix
print('---- loading word2vec ...')
word2vec = KeyedVectors.load_word2vec_format(
vec_file, binary=True)
print('Found %s word vectors of word2vec' % len(word2vec.vocab))
nb_words = min(config.MAX_NB_WORDS, len(word_index)) + 1
embedding_matrix = np.zeros((nb_words, config.WORD_EMBEDDING_DIM))
for word, i in word_index.items():
if word in word2vec.vocab:
embedding_matrix[i] = word2vec.word_vec(word)
print('Null word embeddings: %d' % \
np.sum(np.sum(embedding_matrix, axis=1) == 0))
# check the words which not in embedding vectors
not_found_words = []
for word, i in word_index.items():
if word not in word2vec.vocab:
not_found_words.append(word)
np.save(open(DirConfig.W2V_CACHE, 'wb'), embedding_matrix)
return embedding_matrix
def af_h5_to_np(input_path, outpath):
files = tables.open_file(input_path, mode = 'r+')
speaker_nodes = files.root._f_list_nodes()
for spk in speaker_nodes:
file_nodes = spk._f_list_nodes()
for fls in file_nodes:
file_name = fls._v_name
af_nodes = fls._f_list_nodes()
af_list = []
for fts in af_nodes:
features = fts[:]
mean = numpy.mean(features,1)
normalised_feats = list(numpy.transpose(features)/mean)
af_list += normalised_feats
numpy.save(outpath + file_name, numpy.array(af_list))
def save_params(self, weights_file, catched=False):
"""Save the model's parameters."""
f_dump = open(weights_file, "w")
params_vls = []
if catched:
if self.catched_params != []:
params_vls = self.catched_params
else:
raise ValueError(
"You asked to save catched params," +
"but you didn't catch any!!!!!!!")
else:
for param in self.params:
params_vls.append(param.get_value())
pkl.dump(params_vls, f_dump, protocol=pkl.HIGHEST_PROTOCOL)
f_dump.close()
def main():
args = docopt("""
Usage:
text2numpy.py <path>
""")
path = args['<path>']
matrix = read_vectors(path)
iw = sorted(matrix.keys())
new_matrix = np.zeros(shape=(len(iw), len(matrix[iw[0]])), dtype=np.float32)
for i, word in enumerate(iw):
if word in matrix:
new_matrix[i, :] = matrix[word]
np.save(path + '.npy', new_matrix)
save_vocabulary(path + '.vocab', iw)
def main():
args = docopt("""
Usage:
pmi2svd.py [options] <pmi_path> <output_path>
Options:
--dim NUM Dimensionality of eigenvectors [default: 500]
--neg NUM Number of negative samples; subtracts its log from PMI [default: 1]
""")
pmi_path = args['<pmi_path>']
output_path = args['<output_path>']
dim = int(args['--dim'])
neg = int(args['--neg'])
explicit = PositiveExplicit(pmi_path, normalize=False, neg=neg)
ut, s, vt = sparsesvd(explicit.m.tocsc(), dim)
np.save(output_path + '.ut.npy', ut)
np.save(output_path + '.s.npy', s)
np.save(output_path + '.vt.npy', vt)
save_vocabulary(output_path + '.words.vocab', explicit.iw)
save_vocabulary(output_path + '.contexts.vocab', explicit.ic)
def worker(proc_num, queue, out_dir, in_dir, count_dir, words, dim, num_words, min_count=100):
while True:
if queue.empty():
break
year = queue.get()
print "Loading embeddings for year", year
time.sleep(random.random() * 120)
valid_words = set(words_above_count(count_dir, year, min_count))
print len(valid_words)
words = list(valid_words.intersection(words[year][:num_words]))
print len(words)
base_embed = Explicit.load((in_dir + INPUT_FORMAT).format(year=year), normalize=False)
base_embed = base_embed.get_subembed(words, restrict_context=True)
print "SVD for year", year
u, s, v = randomized_svd(base_embed.m, n_components=dim, n_iter=5)
print "Saving year", year
np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-u.npy", u)
np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-v.npy", v)
np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-s.npy", s)
write_pickle(base_embed.iw, (out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-vocab.pkl")
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args):
first_iter = True
base_embed = None
for year in years:
print "Loading year:", year
year_embed = create_representation(rep_type, in_dir + str(year), **rep_args)
year_words = words_above_count(count_dir, year, min_count)
year_embed.get_subembed(year_words)
print "Aligning year:", year
if first_iter:
aligned_embed = year_embed
first_iter = False
else:
aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed)
base_embed = aligned_embed
print "Writing year:", year
foutname = out_dir + str(year)
np.save(foutname + "-w.npy",aligned_embed.m)
write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
def safeWrite(rdd,outputfile,dvrdump=False):
"""Save the rdd in the given directory.
Keyword arguments:
--rdd: given rdd to be saved
--outputfile: desired directory to save rdd
"""
if os.path.isfile(outputfile):
os.remove(outputfile)
elif os.path.isdir(outputfile):
shutil.rmtree(outputfile)
if dvrdump:
rdd_list = rdd.collect()
with open(outputfile,'wb') as f:
count = 0
for item in rdd_list:
f.write(str(item))
count = count+1
if count < len(rdd_list):
f.write("\n")
else:
rdd.saveAsTextFile(outputfile)
def save(self):
if self.index is None:
self.index = np.array(range(self.X.shape[0]))
metadata = {
"index": self.index.tolist(),
"x_shape": self.X.shape,
"x_type": str(self.X.dtype),
"running_mean": self.running_mean.tolist(),
"running_dev": self.running_dev.tolist(),
"running_min": self.running_min.tolist(),
"running_max": self.running_max.tolist(),
}
if self.Y is not None:
metadata["y_shape"] = self.Y.shape
metadata["y_type"] = str(self.Y.dtype)
with open(self.path+"/dataset.json", "wt") as f:
f.write(json.dumps(metadata))
self.X.flush()
if self.Y is not None: self.Y.flush()
def stop(self):
audio.say("Stopping Accuracy Test")
logger.info('Stopping Accuracy_Test')
self.screen_marker_state = 0
self.active = False
self.close_window()
matched_data = calibrate.closest_matches_monocular(self.gaze_list,self.ref_list)
pt_cloud = calibrate.preprocess_2d_data_monocular(matched_data)
logger.info("Collected {} data points.".format(len(pt_cloud)))
if len(pt_cloud) < 20:
logger.warning("Did not collect enough data.")
return
pt_cloud = np.array(pt_cloud)
np.save(os.path.join(self.g_pool.user_dir,'accuracy_test_pt_cloud.npy'),pt_cloud)
gaze,ref = pt_cloud[:,0:2],pt_cloud[:,2:4]
error_lines = np.array([[g,r] for g,r in zip(gaze,ref)])
self.error_lines = error_lines.reshape(-1,2)
self.pt_cloud = pt_cloud
def main():
import sys
save_dir = sys.argv[1]
all_imgs = []
all_fet = []
for line in sys.stdin:
fet = load_np(line.strip())
all_fet.append(fet)
all_imgs.append(line.strip())
fet = np.vstack(all_fet)
np.save(osp.join(save_dir, 'c3d.npy'), fet)
with open(osp.join(save_dir, 'c3d.list'), 'w') as writer:
for img in all_imgs:
writer.write('%s\n' % img)
pass
def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase):
try:
transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase)
print_stderr('Converting data...')
if caffemodel_path is not None:
data = transformer.transform_data()
print_stderr('Saving data...')
with open(data_output_path, 'wb') as data_out:
np.save(data_out, data)
if code_output_path:
print_stderr('Saving source...')
with open(code_output_path, 'wb') as src_out:
src_out.write(transformer.transform_source())
print_stderr('Done.')
except KaffeError as err:
fatal_error('Error encountered: {}'.format(err))
def batch_works(k):
if k == n_processes - 1:
paths = all_paths[k * int(len(all_paths) / n_processes) : ]
else:
paths = all_paths[k * int(len(all_paths) / n_processes) : (k + 1) * int(len(all_paths) / n_processes)]
for path in paths:
o_path = os.path.join(output_path, os.path.basename(path))
if not os.path.exists(o_path):
os.makedirs(o_path)
x, y, z = perturb_patch_locations(base_locs, patch_size / 16)
probs = generate_patch_probs(path, (x, y, z), patch_size, image_size)
selections = np.random.choice(range(len(probs)), size=patches_per_image, replace=False, p=probs)
image = read_image(path)
for num, sel in enumerate(selections):
i, j, k = np.unravel_index(sel, (len(x), len(y), len(z)))
patch = image[int(x[i] - patch_size / 2) : int(x[i] + patch_size / 2),
int(y[j] - patch_size / 2) : int(y[j] + patch_size / 2),
int(z[k] - patch_size / 2) : int(z[k] + patch_size / 2), :]
f = os.path.join(o_path, str(num))
np.save(f, patch)
def run(self):
all_file_names = []
all_labels = []
for n, folder_name in enumerate(os.listdir(self.in_txtdir().path)):
full_folder_name = self.in_txtdir().path+'/'+folder_name
if os.path.isfile(full_folder_name):
continue
for file_name in os.listdir(full_folder_name):
all_labels.append(n)
all_file_names.append(full_folder_name+'/'+file_name)
vectorizer = CountVectorizer(input='filename')
vector = vectorizer.fit_transform(all_file_names)
numpy.save(self.out_npy().path,vector)
numpy.save('labels',numpy.array(all_labels)) #Where and how do we want to save this?
#This is just to test the tasks above
def make_check_point(self):
'''
Save the solver's current status
'''
checkpoints = {
'model': self.model,
'epoch': self.epoch,
'best_params': self.best_params,
'best_val_acc': self.best_val_acc,
'loss_history': self.loss_history,
'val_acc_history': self.val_acc_history,
'train_acc_history': self.train_acc_history}
name = 'check_' + str(self.epoch)
directory = os.path.join(self.path_checkpoints, name)
if not os.path.exists(directory):
os.makedirs(directory)
try:
np.save(checkpoints, os.path.join(
directory, name + '.pkl'))
except:
print('sorry, I haven\'t fixed this line, but it should be easy to fix, if you want you can try now and make a pull request')
raise()
def export_histories(self, path):
if not os.path.exists(path):
os.makedirs(path)
i = np.arange(len(self.loss_history)) + 1
z = np.array(zip(i, i*self.batch_size, self.loss_history))
np.savetxt(path + 'loss_history.csv', z, delimiter=',', fmt=[
'%d', '%d', '%f'], header='iteration, n_images, loss')
i = np.arange(len(self.train_acc_history), dtype=np.int)
z = np.array(zip(i, self.train_acc_history))
np.savetxt(path + 'train_acc_history.csv', z, delimiter=',', fmt=[
'%d', '%f'], header='epoch, train_acc')
z = np.array(zip(i, self.val_acc_history))
np.savetxt(path + 'val_acc_history.csv', z, delimiter=',', fmt=[
'%d', '%f'], header='epoch, val_acc')
np.save(path + 'loss', self.loss_history)
np.save(path + 'train_acc_history', self.train_acc_history)
np.save(path + 'val_acc_history', self.val_acc_history)
test_conv1.py 文件源码
项目:tensorflow-action-conditional-video-prediction
作者: williamd4112
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def main(args):
with tf.Graph().as_default() as graph:
# Create dataset
logging.info('Create data flow from %s' % args.data)
caffe_dataset = CaffeDataset(dir=args.data, num_act=args.num_act, mean_path=args.mean)
# Config session
config = get_config(args)
x = tf.placeholder(dtype=tf.float32, shape=[None, 84, 84, 12])
op = load_caffe_model(x, args.load)
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# Start session
with tf.Session(config=config) as sess:
sess.run(init)
i = 0
for s, a in caffe_dataset(5):
pred_data = sess.run([op], feed_dict={x: [s]})[0]
print pred_data.shape
np.save('tf-%03d.npy' % i, pred_data)
i += 1
def test_large_file_support():
if (sys.platform == 'win32' or sys.platform == 'cygwin'):
raise SkipTest("Unknown if Windows has sparse filesystems")
# try creating a large sparse file
tf_name = os.path.join(tempdir, 'sparse_file')
try:
# seek past end would work too, but linux truncate somewhat
# increases the chances that we have a sparse filesystem and can
# avoid actually writing 5GB
import subprocess as sp
sp.check_call(["truncate", "-s", "5368709120", tf_name])
except:
raise SkipTest("Could not create 5GB large file")
# write a small array to the end
with open(tf_name, "wb") as f:
f.seek(5368709120)
d = np.arange(5)
np.save(f, d)
# read it back
with open(tf_name, "rb") as f:
f.seek(5368709120)
r = np.load(f)
assert_array_equal(r, d)
def dataAsImageDataLayer(voc_dir, tmp_dir, image_set='train', **kwargs):
from caffe_all import L
from os import path
from python_layers import PY
Py = PY('data')
voc_data = VOCData(voc_dir, image_set)
# Create a text file with all the paths
source_file = path.join(tmp_dir, image_set+"_images.txt")
if not path.exists( source_file ):
f = open(source_file, 'w')
for n in voc_data.image_names:
print('%s 0'% voc_data.image_path[n], file=f)
f.close()
# Create a label file
lbl_file = path.join(tmp_dir, image_set+"_images.lbl")
if not path.exists( lbl_file ):
np.save(open(lbl_file, 'wb'), [voc_data.labels[n] for n in voc_data.image_names])
cs = kwargs.get('transform_param',{}).get('crop_size',0)
return L.ImageData(source=source_file, ntop=2, new_width=cs, new_height=cs, **kwargs)[0], Py.LabelData(label=lbl_file, batch_size=kwargs.get('batch_size',1))