def save_good_coin_ids(data_dir, seed_image_id,cut_off,remove_image_ids):
#todo save_good_test_ids is not correct this needs a database:
good_coin_ids = {}
filename = data_dir + 'good_coin_ids.pickle'
if os.path.exists(filename):
#good_coin_ids = set(pickle.load(open(filename, "rb")))
pass
values = results_dict[seed_image_id].iteritems()
for test_image_id, test_values in values:
max_value, angle = test_values
coin_id = test_image_id/100
if max_value > cut_off:
good_coin_ids.add(test_image_id)
good_coin_ids.difference_update(remove_image_ids)
print 'good_test_ids len: ' , len(good_coin_ids)
pickle.dump(good_coin_ids, open(filename, "wb"))
python类load()的实例源码
def gt_roidb(self):
"""
return ground truth image regions database
:return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} gt roidb loaded from {}'.format(self.name, cache_file)
return roidb
gt_roidb = [self.load_pascal_annotation(index) for index in self.image_set_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote gt roidb to {}'.format(cache_file)
return gt_roidb
def selective_search_roidb(self, gt_roidb):
"""
get selective search roidb and ground truth roidb
:param gt_roidb: ground truth roidb
:return: roidb of selective search (ground truth included)
"""
cache_file = os.path.join(self.cache_path, self.name + '_ss_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} ss roidb loaded from {}'.format(self.name, cache_file)
return roidb
if self.image_set != 'test':
ss_roidb = self.load_selective_search_roidb(gt_roidb)
roidb = IMDB.merge_roidbs(gt_roidb, ss_roidb)
else:
roidb = self.load_selective_search_roidb(None)
with open(cache_file, 'wb') as fid:
cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote ss roidb to {}'.format(cache_file)
return roidb
def gt_roidb(self):
"""
return ground truth image regions database
:return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} gt roidb loaded from {}'.format(self.name, cache_file)
return roidb
gt_roidb = [self.load_annotation(index) for index in self.image_set_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote gt roidb to {}'.format(cache_file)
return gt_roidb
def gt_roidb(self):
"""
Return the database of ground-truth regions of interest.
This function loads/saves from/to a cache file to speed up future calls.
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} gt roidb loaded from {}'.format(self.name, cache_file)
return roidb
gt_roidb = [self._load_pascal_annotation(index)
for index in self.image_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote gt roidb to {}'.format(cache_file)
return gt_roidb
def gt_roidb(self):
"""
Return the database of ground-truth regions of interest.
This function loads/saves from/to a cache file to speed up future calls.
"""
cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
if osp.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} gt roidb loaded from {}'.format(self.name, cache_file)
return roidb
gt_roidb = [self._load_coco_annotation(index)
for index in self._image_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote gt roidb to {}'.format(cache_file)
return gt_roidb
def render_plots(pickle_file, output):
with open(pickle_file, 'rb') as f:
results = pickle.load(f)
C.Util.plot(
method_labels=results['methods'],
data_bytes=results['comp_bytes'],
ratios=results['ratios'],
com_speed=results['total_comp_speed'],
com_speed_stderr=results['total_comp_speed_std'],
dcom_speed=results['total_decomp_speed'],
dcom_speed_stderr=results['total_decomp_speed_std'],
save=output,
dpi=300,
bw=False
)
def verify_all(folder_paths):
"""
Calls verify_one on each folder path. Also checks to make sure all the
answer vocabularies are the same.
"""
adict_paths = []
for folder_path in folder_paths:
paths = verify_one(folder_path)
adict_paths.append(paths[2])
adicts = []
for path in adict_paths:
with open(path, 'r') as f:
adict = json.load(f)
adicts.append(adict)
if len(adicts) > 1:
for a2 in adicts[1:]:
if set(adicts[0].keys()) != set(a2.keys()):
print set(adicts[0].keys()) - set(a2.keys())
print set(a2.keys()) - set(adicts[0].keys())
raise Exception('Answer vocab mismatch')
return adicts
def verify_one(folder_path):
"""
Makes sure all the required files exist in the folder. If so, returns the
paths to all the files.
"""
model_path = glob.glob(folder_path + '/*.caffemodel')
assert len(model_path) == 1, 'one .caffemodel per folder, please'
model_path = model_path[0]
proto_path = folder_path + '/proto_test.prototxt'
adict_path = folder_path + '/adict.json'
vdict_path = folder_path + '/vdict.json'
aux_path = folder_path + '/aux.json'
assert os.path.exists(proto_path), 'proto_test.prototxt missing'
assert os.path.exists(adict_path), 'adict.json missing'
assert os.path.exists(vdict_path), 'vdict.json missing'
assert os.path.exists(aux_path), 'aux.json missing'
with open(aux_path, 'r') as f:
aux = json.load(f)
batch_size = int(aux['batch_size'])
data_shape = tuple(map(int, aux['data_shape']))
img_feature_prefix = aux['img_feature_prefix']
spatial_coord = aux['spatial_coord'] if 'spatial_coord' in aux else False
glove = aux['glove'] if 'glove' in aux else False
return model_path, proto_path, adict_path, vdict_path, batch_size, data_shape, img_feature_prefix, spatial_coord, glove
def get_qid_valid_answer_dict(ques_file, adict):
"""
Returns a dictionary mapping question IDs to valid neuron indices.
"""
print 'Multiple choice mode: making valid answer dictionary...'
valid_answer_dict = {}
with open(ques_file, 'r') as f:
qdata = json.load(f)
for q in qdata['questions']:
valid_answer_dict[q['question_id']] = q['multiple_choices']
for qid in valid_answer_dict:
answers = valid_answer_dict[qid]
valid_indices = []
for answer in answers:
if answer in adict:
valid_indices.append(adict[answer])
if len(valid_indices) == 0:
print "we won't be able to answer qid", qid
valid_answer_dict[qid] = valid_indices
return valid_answer_dict
def funcion_Open(self):
fileName = QtGui.QFileDialog.getOpenFileName()
if len(fileName)>0:
self.timer.stop()
self.variableStats={}
self.reqStats={}
self.limpiatablas()
self.comboBox.clear()
self.comboBox.addItem("All")
self.comboBox_2.clear()
self.comboBox_2.addItem("All")
self.comboBox_3.clear()
self.comboBox_3.addItem("All")
Proxynet.clearRequests()
f = file(fileName)
Proxynet.addRequests(pickl.load(f))
f.close()
self.numRequests=0
self.timerFunc()
self.timer.start(500)
def register(self, name, serializer):
"""Register ``serializer`` object under ``name``.
Raises :class:`AttributeError` if ``serializer`` in invalid.
.. note::
``name`` will be used as the file extension of the saved files.
:param name: Name to register ``serializer`` under
:type name: ``unicode`` or ``str``
:param serializer: object with ``load()`` and ``dump()``
methods
"""
# Basic validation
getattr(serializer, 'load')
getattr(serializer, 'dump')
self._serializers[name] = serializer
def from_command_line(cls, *args, **keys):
params = list()
for name, param in cls.params():
if name not in keys:
params.append((name, param))
bot_name = inspect.getmodulename(inspect.stack()[1][1])
if "ABUSEHELPER_CONF_FROM_STDIN" in os.environ:
defaults = dict(pickle.load(sys.stdin))
defaults.setdefault("bot_name", bot_name)
added = cls._from_dict(params, **defaults)
else:
added = cls._from_sys_argv(params, bot_name=bot_name)
added.update(keys)
return cls(*args, **added)
def __init__(self, state_file=None):
self.file = None
self.sessions = dict()
self.state = dict()
if state_file is not None:
self.file = open_file(state_file)
try:
if not lock_file_nonblocking(self.file):
raise RuntimeError("state file %r already in use" % state_file)
except:
self.file.close()
raise
try:
self.state = pickle.load(self.file)
except EOFError:
pass
self.errors = idiokit.consume()
def load_data(self, dataset_path, share = False):
"""Load the data set.
"""
f = gzip.open(dataset_path, 'rb')
train_set, valid_set, test_set = pickle.load(f)
f.close()
# share the data
train_set_x, train_set_y = self.shared_dataset(train_set, train=True)
valid_set_x, valid_set_y = self.shared_dataset(valid_set)
test_set_x, test_set_y = self.shared_dataset(test_set)
if share:
reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
else:
reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!)
return reval
def set_params(mo, bparams):
i = 0
for la in mo.layers:
we = bparams[i:i+2]
print len(we)
la.set_weights(we)
i += 2
return mo
#with open("best_model_keras.pkl", 'r') as f:
# b_params = pkl.load(f)
#
#model = set_params(model, b_params)
#out = model.predict(xvl, batch_size=xvl.shape[0], verbose=0)
#error = np.mean(np.mean(np.power(out - yvl, 2), axis=1))
#print "Error vl", error
#sys.exit()
#init_p = get_params(model)
#with open("init_keras_param.pkl", 'w') as f:
# pkl.dump(init_p, f)
def register(self, name, serializer):
"""Register ``serializer`` object under ``name``.
Raises :class:`AttributeError` if ``serializer`` in invalid.
.. note::
``name`` will be used as the file extension of the saved files.
:param name: Name to register ``serializer`` under
:type name: ``unicode`` or ``str``
:param serializer: object with ``load()`` and ``dump()``
methods
"""
# Basic validation
getattr(serializer, 'load')
getattr(serializer, 'dump')
self._serializers[name] = serializer
def loadData (self, filename, verbose=True, replace_missing=True):
''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
if verbose: print("========= Reading " + filename)
start = time.time()
if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
return pickle.load(pickle_file)
if 'format' not in self.info.keys():
self.getFormatData(filename)
if 'feat_num' not in self.info.keys():
self.getNbrFeatures(filename)
data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}
data = data_func[self.info['format']](filename, self.info['feat_num'])
# INPORTANT: when we replace missing values we double the number of variables
if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
vprint (verbose, "Replace missing values by 0 (slow, sorry)")
data = data_converter.replace_missing(data)
if self.use_pickle:
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
p = pickle.Pickler(pickle_file)
p.fast = True
p.dump(data)
end = time.time()
if verbose: print( "[+] Success in %5.2f sec" % (end - start))
return data
def loadLabel (self, filename, verbose=True):
''' Get the solution/truth values'''
if verbose: print("========= Reading " + filename)
start = time.time()
if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
return pickle.load(pickle_file)
if 'task' not in self.info.keys():
self.getTypeProblem(filename)
# IG: Here change to accommodate the new multiclass label format
if self.info['task'] == 'multilabel.classification':
label = data_io.data(filename)
elif self.info['task'] == 'multiclass.classification':
label = data_converter.convert_to_num(data_io.data(filename))
else:
label = np.ravel(data_io.data(filename)) # get a column vector
#label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector
if self.use_pickle:
with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
p = pickle.Pickler(pickle_file)
p.fast = True
p.dump(label)
end = time.time()
if verbose: print( "[+] Success in %5.2f sec" % (end - start))
return label
def _split_train_tst(self):
"""
divide the data into training and testing data
Create the X_trn, X_tst, for both forward and backward, and Y_trn and Y_tst
Note that only the reviews are changed, and not the summary.
:return: None
"""
num_samples = self.Y.shape[0]
mapper_file = self.checkpointer.get_mapper_file_location()
if not self.checkpointer.is_mapper_checkpointed():
print 'No mapper checkpoint found. Fresh loading in progress ...'
# Now shuffle the data
sample_id = range(num_samples)
random.shuffle(sample_id)
print 'Dumping the mapper shuffle for reuse.'
Pickle.dump(sample_id, open(mapper_file, 'wb'))
print 'Dump complete. Moving Forward...'
else:
print 'Mapper Checkpoint found... Reading from mapper dump'
sample_id = Pickle.load(open(mapper_file, 'rb'))
print 'Mapping unpickling complete.. Moving forward...'
self.X_fwd = self.X_fwd[sample_id]
self.X_bwd = self.X_bwd[sample_id]
self.Y = self.Y[sample_id]
# Now divide the data into test ans train set
test_fraction = 0.01
self.test_size = int(test_fraction * num_samples)
self.train_size = num_samples - self.test_size
# Forward review
self.X_trn_fwd = self.X_fwd[0:self.train_size]
self.X_tst_fwd = self.X_fwd[self.train_size:num_samples]
# Backward review
self.X_trn_bwd = self.X_bwd[0:self.train_size]
self.X_tst_bwd = self.X_bwd[self.train_size:num_samples]
# Summary
self.Y_trn = self.Y[0:self.train_size]
self.Y_tst = self.Y[self.train_size:num_samples]