def lcdict_to_pickle(lcdict, outfile=None):
'''This just writes the lcdict to a pickle.
If outfile is None, then will try to get the name from the
lcdict['objectid'] and write to <objectid>-hptxtlc.pkl. If that fails, will
write to a file named hptxtlc.pkl'.
'''
if not outfile and lcdict['objectid']:
outfile = '%s-hplc.pkl' % lcdict['objectid']
elif not outfile and not lcdict['objectid']:
outfile = 'hplc.pkl'
with open(outfile,'wb') as outfd:
pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)
if os.path.exists(outfile):
LOGINFO('lcdict for object: %s -> %s OK' % (lcdict['objectid'],
outfile))
return outfile
else:
LOGERROR('could not make a pickle for this lcdict!')
return None
python类dump()的实例源码
def register(name):
# hit api to see if name is already registered
if check_name(name)['status'] == 'error':
print('{} already registered.'.format(name))
else:
# generate new keypair
(pub, priv) = rsa.newkeys(512)
if os.path.exists(KEY_LOCATION) == False:
os.mkdir(KEY_LOCATION)
# save to disk
with open('{}/.key'.format(KEY_LOCATION), 'wb') as f:
pickle.dump((pub, priv), f, pickle.HIGHEST_PROTOCOL)
r = requests.post('{}/names'.format(API_LOCATION), data = {'name' : name, 'n' : pub.n, 'e' : pub.e})
if r.json()['status'] == 'success':
print('Successfully registered new name: {}'.format(name))
else:
print('Error registering name: {}'.format(name))
def get_item_history(self, prior_or_train, reconstruct = False, none_idx = 49689):
filepath = self.cache_dir + './item_history_' + prior_or_train + '.pkl'
if (not reconstruct) and os.path.exists(filepath):
with open(filepath, 'rb') as f:
item_history = pickle.load(f)
else:
up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
item_history = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
item_history.loc[item_history.order_number == 1, 'product_id'] = item_history.loc[item_history.order_number == 1, 'product_id'] + [none_idx]
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
# accumulate
item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].transform(pd.Series.cumsum)
# get unique item list
item_history['product_id'] = item_history['product_id'].apply(set).apply(list)
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
# shift each group to make it history
item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].shift(1)
for row in item_history.loc[item_history.product_id.isnull(), 'product_id'].index:
item_history.at[row, 'product_id'] = [none_idx]
item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
item_history.columns = ['user_id', 'history_items']
with open(filepath, 'wb') as f:
pickle.dump(item_history, f, pickle.HIGHEST_PROTOCOL)
return item_history
def render_POST(self, request):
if 'disconnect' in request.args:
self._request_disconnection(request)
else:
# Save all the _inputdata, so it's the same next time
conn_params = {x: request.args[x][0] for x in request.args.keys()}
if request.args['secret_type'][0] == 'key':
conn_params['secret_key'] = 'checked'
conn_params['secret_password'] = ''
else:
conn_params['secret_key'] = ''
conn_params['secret_password'] = 'checked'
# Save both locally and across restarts
request.sdata.conn_params = conn_params
try:
with open(DEFAULT_PATH, 'wb') as f:
pickle.dump(conn_params, f)
except Exception as e:
print('### failed to save defaults: ' + str(e))
# Do the request
self._request_connection(request)
generate_new_sample_saved_state.py 文件源码
项目:zipline-chinese
作者: zhanghan1990
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def write_state_to_disk(cls, state, emission_rate=None):
state_dir = cls.__module__ + '.' + cls.__name__
full_dir = base_state_dir + '/' + state_dir
if not os.path.exists(full_dir):
os.makedirs(full_dir)
if emission_rate is not None:
name = 'State_Version_' + emission_rate + \
str(state['obj_state'][VERSION_LABEL])
else:
name = 'State_Version_' + str(state['obj_state'][VERSION_LABEL])
full_path = full_dir + '/' + name
f = open(full_path, 'w')
pickle.dump(state, f)
f.close()
def gt_roidb(self):
"""
Return the database of ground-truth regions of interest.
This function loads/saves from/to a cache file to speed up future calls.
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
try:
roidb = pickle.load(fid)
except:
roidb = pickle.load(fid, encoding='bytes')
print('{} gt roidb loaded from {}'.format(self.name, cache_file))
return roidb
gt_roidb = [self._load_pascal_annotation(index)
for index in self.image_index]
with open(cache_file, 'wb') as fid:
pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
print('wrote gt roidb to {}'.format(cache_file))
return gt_roidb
def create_hash_district_map_dict():
file = "cluster_map.csv"
district_hash_map_path = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, file)
hash_data = pd.read_csv(district_hash_map_path)
## convert the dataframe into dict
hash_map_rule = dict(zip(hash_data.district_hash, hash_data.district_map))
# print(type(hash_map_rule))
saved_file = "cluster_map.pickle"
map_save_file = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, saved_file)
## save into same dir as file
with open(map_save_file, "wb") as f:
pickle.dump(hash_map_rule, f)
#print(hash_map_rule)
# map the district features in the input data_frame into value
def _pre_process_all(self):
if self._pre_processed_exists():
self._load_norm_parameters()
print("Mean = ", self._mean, ", STD = ", self._std)
return
print("No pre-processed dataset found, pre-processing now...")
if not(os.path.exists(self._target_directory)):
os.makedirs(self._target_directory)
size = len(self._all_series)
for idx, patient in enumerate(self._all_series):
print(patient[1], str(idx+1) + "/" + str(size))
p.dump(self._pre_process(patient), open(os.path.join(self._target_directory, patient[1] + ".pick"), "wb"), protocol=2)
print("Mean = ", self._mean, ", STD = ", self._std)
p.dump((self._mean, self._std), open(os.path.join(self._target_directory, "norm_parameters.pick"), "wb"), protocol=2)
print("Pre-processing Done!")
def __save(self):
if self.__asynchronous == 0:
state = {
"version" : _BobState.CUR_VERSION,
"byNameDirs" : self.__byNameDirs,
"results" : self.__results,
"inputs" : self.__inputs,
"jenkins" : self.__jenkins,
"dirStates" : self.__dirStates,
"buildState" : self.__buildState,
}
tmpFile = self.__path+".new"
try:
with open(tmpFile, "wb") as f:
pickle.dump(state, f)
f.flush()
os.fsync(f.fileno())
os.replace(tmpFile, self.__path)
except OSError as e:
raise ParseError("Error saving workspace state: " + str(e))
self.__dirty = False
else:
self.__dirty = True
def register(self, name, serializer):
"""Register ``serializer`` object under ``name``.
Raises :class:`AttributeError` if ``serializer`` in invalid.
.. note::
``name`` will be used as the file extension of the saved files.
:param name: Name to register ``serializer`` under
:type name: ``unicode`` or ``str``
:param serializer: object with ``load()`` and ``dump()``
methods
"""
# Basic validation
getattr(serializer, 'load')
getattr(serializer, 'dump')
self._serializers[name] = serializer
def dump(cls, obj, file_obj):
"""Serialize object ``obj`` to open pickle file.
.. versionadded:: 1.8
:param obj: Python object to serialize
:type obj: Python object
:param file_obj: file handle
:type file_obj: ``file`` object
"""
return pickle.dump(obj, file_obj, protocol=-1)
# Set up default manager and register built-in serializers
def register(self, name, serializer):
"""Register ``serializer`` object under ``name``.
Raises :class:`AttributeError` if ``serializer`` in invalid.
.. note::
``name`` will be used as the file extension of the saved files.
:param name: Name to register ``serializer`` under
:type name: ``unicode`` or ``str``
:param serializer: object with ``load()`` and ``dump()``
methods
"""
# Basic validation
getattr(serializer, 'load')
getattr(serializer, 'dump')
self._serializers[name] = serializer
useful_functions.py 文件源码
项目:scientific-paper-summarisation
作者: EdCo95
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def write_summary(location, summary_as_list, filename):
"""
Writes a generated summary to the specified location, writing both a pickle file and a text file; the pickle file
for easy program reading, and a text file for easy human and ROUGE reading.
:param location: the location to write the summary
:param summary_as_list: the summary to write, as a list of tuples with each tuple of the form
(sentence, sentence_index_into_paper)
:param filename: the name of the file to write.
"""
with open(location + "Pickles/" + filename + ".pkl", "wb") as f:
pickle.dump(summary_as_list, f)
raw_sentences = [x for x, _ in summary_as_list]
with open(location + "Text/" + filename + ".txt", "wb") as f:
for sentence in raw_sentences:
f.write(sentence)
f.write("\n")
def set(self, key, value, timeout=None):
if timeout is None:
timeout = int(time() + self.default_timeout)
elif timeout != 0:
timeout = int(time() + timeout)
filename = self._get_filename(key)
self._prune()
try:
fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
dir=self._path)
with os.fdopen(fd, 'wb') as f:
pickle.dump(timeout, f, 1)
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
rename(tmp, filename)
os.chmod(filename, self._mode)
except (IOError, OSError):
return False
else:
return True
def build_hash_to_coord(paths):
if os.path.exists("comps/mobike/sol_carl/data/h2c.p") and os.path.exists("comps/mobike/sol_carl/data/c2h.p"):
return
h2c,c2h = {},{}
for path in paths:
for c,row in enumerate(csv.DictReader(open(path))):
for tag in ["geohashed_end_loc","geohashed_start_loc"]:
if tag not in row:
continue
h = row[tag]
if h not in h2c:
coord = str_coord(decode(h))
h2c[h] = coord
#lat,lon = int(lat+0.5),int(lon+0.5)
if coord not in c2h:
c2h[coord] = set()
c2h[coord].add(h)
if c>0 and c%100000 == 0:
print(path,c)
print(len(h2c),len(c2h))
pickle.dump(h2c,open("comps/mobike/sol_carl/data/h2c.p","wb"))
pickle.dump(c2h,open("comps/mobike/sol_carl/data/c2h.p","wb"))
def get_per_sample_tf(self, texts, field, silent=0):
"""
Each sample is a document.
Input:
texts: ["train","text"]
"""
if self.sample_tf is not None:
return
self.sample_tf = {}
self.get_per_sample_words_count(texts, field, 1)
for text in texts:
name = "{}/{}_sample_tf_{}.p".format(self.flags.data_path,self.name,text)
if os.path.exists(name):
self.sample_tf[text] = pickle.load(open(name,'rb'))
else:
print("gen",name)
tf_list = tf(self.sample_words_count[text],0)
pickle.dump(tf_list,open(name,'wb'))
self.sample_tf[text] = tf_list
if silent==0:
print("\n{} sample tf done".format(text))
def mean_target_rate(name,out,idcol,ycol):
if os.path.exists(out):
return pickle.load(open(out,'rb'))
yc,cc = defaultdict(float),defaultdict(float)
for c,row in enumerate(csv.DictReader(open(name))):
y = float(row[ycol])
for i in row:
if i in [idcol,ycol]:
continue
v = "%s-%s"%(i,row[i])
yc[v] += y
cc[v] += 1.0
if c>0 and c%100000 == 0:
print("rows %d len_cc %d"%(c,len(cc)))
for i in yc:
yc[i] = yc[i]/cc[i]
pickle.dump(yc,open(out,'wb'))
return yc
def tutor_fpout():
pklout = os.path.join(RESDIR, TUTORPKL)
if os.path.exists(pklout):
with open(pklout, 'rb') as f:
fpout = pickle.load(f)
else:
print('re-creating fp results ... this could take a few minutes')
zip_archive = os.path.join(DATADIR, ZIPFILE)
with zipfile.ZipFile(zip_archive, 'r') as zfile:
zfile.extractall(DATADIR)
fpout = tutor_example()
make_clean_dat()
os.makedirs(RESDIR, exist_ok=True)
with open(pklout, 'wb') as f:
pickle.dump(fpout, f)
return fpout
def sync(self):
'Write dict to disk'
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
fileobj = open(tempname, 'wb' if self.format=='pickle' else 'w')
try:
self.dump(fileobj)
except Exception:
os.remove(tempname)
raise
finally:
fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
def save_pickle(self, dumpfile=DUMPFILE):
if not self.changed:
self.note(0, "\nNo need to save checkpoint")
elif not dumpfile:
self.note(0, "No dumpfile, won't save checkpoint")
else:
self.note(0, "\nSaving checkpoint to %s ...", dumpfile)
newfile = dumpfile + ".new"
f = open(newfile, "wb")
pickle.dump(self, f)
f.close()
try:
os.unlink(dumpfile)
except os.error:
pass
os.rename(newfile, dumpfile)
self.note(0, "Done.")
return 1
def _dump_cache_data(self, simstate, dump_fp=None):
if self.tracer.predecessors[-1] != None:
state = self.tracer.predecessors[-1]
else:
state = None
if dump_fp:
proj = state.project
state.project = None
state.history.trim()
try:
pickle.dump((self.tracer.bb_cnt, self.tracer.cgc_flag_bytes, state, claripy.ast.base.var_counter), dump_fp, pickle.HIGHEST_PROTOCOL)
except RuntimeError as e: # maximum recursion depth can be reached here
l.error("unable to cache state, '%s' during pickling", e.message)
finally:
state.project = proj
# unhook receive
receive.cache_hook = None
# add preconstraints to tracer
self.tracer._preconstrain_state(simstate)
def build_control_IPD_dict( self, motifs, bi_motifs ):
"""
"""
control_ipds_fn = glob.glob( "control_ipds.tmp" )
control_ipds_N_fn = glob.glob( "control_ipdsN.tmp")
control_kmers_fn = glob.glob( "control_ipdskmers.tmp")
if (len(control_ipds_fn)>1 or len(control_ipds_N_fn)>1 or len(control_kmers_fn)>1):
raise Exception("*** Double check the control files. There should not be multiples for a file type.")
control_means,not_found = self.chunk_control_matrices(control_ipds_fn[0], control_ipds_N_fn[0], control_kmers_fn[0])
if not_found > 0:
logging.info("")
logging.warning("WARNING: could not find sufficient instances (>=%s) for %s motifs (out of %s total) in control data!" % (self.opts.min_motif_count, not_found, (len(motifs)+len(bi_motifs))))
logging.warning(" * If this is alarming, try reducing --min_motif_count or increasing --N_reads, although you just might not have those motifs in your reference sequence.")
logging.info("")
logging.info("Writing control data to a pickled file: %s" % self.opts.control_pkl_name)
pickle.dump( control_means, open( self.opts.control_pkl_name, "wb" ) )
return control_means
def feat_ann(c=0):
batch_size =700
feats_eeg = scipy.stats.zscore(tools.feat_eeg(data[:,:,0]))
feats_emg = scipy.stats.zscore(tools.feat_emg(data[:,:,1]))
feats_eog = scipy.stats.zscore(tools.feat_eog(data[:,:,2]))
feats_all = np.hstack([feats_eeg, feats_emg, feats_eog])
results = dict()
r = cv(feats_eeg, target, groups, models.ann, name = 'eeg', stop_after=15,batch_size=batch_size, counter=c, plot=plot)
results.update(r)
r = cv(np.hstack([feats_eeg,feats_eog]), target, groups, models.ann, name = 'eeg+eog',batch_size=batch_size, stop_after=15, counter=c, plot=plot)
results.update(r)
r = cv(np.hstack([feats_eeg,feats_emg]), target, groups, models.ann, name = 'eeg+emg',batch_size=batch_size, stop_after=15, counter=c, plot=plot)
results.update(r)
r = cv(feats_all, target, groups, models.ann, name = 'all',batch_size=batch_size, stop_after=15, counter=c, plot=plot)
results.update(r)
with open('results_electrodes_feat.pkl', 'wb') as f: pickle.dump(results, f)
def inference(self,g):
"""We load a graph-tool graph-object and fit an hsbm:
- hierarchical
- nonoverlapping
- degree-corrected
We get a state-object which is a 'NestedBlockState'.
We save as 'state.pkl'
"""
if self.args.state == None:
state=gt.minimize_nested_blockmodel_dl(g,deg_corr=True,overlap=False)
## save state
with open(os.path.join(self.out_path,'state.pkl'),'wb') as f:
pickle.dump(state,f)
## if the state already exists, we just load
else:
with open(self.args.state,'rb') as f:
state = pickle.load(f)
return state
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--caption_file', type=str, default='Data/text.txt',
help='caption file')
parser.add_argument('--data_dir', type=str, default='Data',
help='Data Directory')
args = parser.parse_args()
model = skipthoughts.load_model()
encoded_captions = {}
file_path = os.path.join(args.caption_file)
dump_path = os.path.join(args.data_dir, 'enc_text.pkl')
with open(file_path) as f:
str_captions = f.read()
captions = str_captions.split('\n')
print(captions)
encoded_captions['features'] = skipthoughts.encode(model, captions)
pickle.dump(encoded_captions,
open(dump_path, "wb"))
print('Finished extracting Skip-Thought vectors of the given text '
'descriptions')
def get_clusters_for_project(project_id, video_names):
embs = []
filenames = []
for video_name in video_names:
filename_to_embedding = pickle.load(open(os.path.join('temp', project_id, video_name, 'filename_to_emb.pkl'))) # TODO: call get_inception_embeddings on frame dir, but for now just use the pickle
for filename, embedding in filename_to_embedding.iteritems():
embs.append(embedding)
filenames.append(filename)
labels = cluster(embs, eps=12, min_pts=3)
d = {}
for video_name in video_names:
d[video_name] = {}
for i in range(len(filenames)):
video_name = video_name_from_filename(filenames[i])
d[video_name][filenames[i]] = labels[i]
with open(os.path.join('temp', project_id, 'filename_to_clust.pkl'), 'w') as pickle_file:
pickle.dump(d, pickle_file)
for video_name in d:
for filename in d[video_name]:
mkdir_p(os.path.join('temp', project_id, 'clusters', str(d[video_name][filename])))
copy(filename, os.path.join('temp', project_id, 'clusters', str(d[video_name][filename]), os.path.basename(filename)))
'''filenames = [filename[filename.rindex('/')+1:] for filename in filenames]
embs = np.array(embs)
candidates = [(11, 6)]
candidates = [(eps, min_pts) for eps in range(7, 15) for min_pts in range(2, 10)]'''
def main_sim_multi(cor = 0.75, rs = 0.5):
"""
multitask simulated data
"""
dic1, rel1, turk1, dic2, rel2, turk2 = simulate_multitask(cor)
lc1 = crowd_model.labels_collection(turk1, rel1)
lc2 = crowd_model.labels_collection(turk2, rel2)
for rs in [0.1, 0.2, 0.3, 0.4, 0.5]:
res = main_multitask([lc1, lc2], [dic1, dic2], rs)
import pickle
f = open('simult_' + str(cor) + '.pkl', 'w')
pickle.dump(res, f)
f.close()
################################################
################################################
# multitask on simulated data
make_datasets.py 文件源码
项目:PersonalizedMultitaskLearning
作者: mitmedialab
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def getWellbeingTaskListFromDataset(datafile, data_path=PATH_TO_DATASETS, subdivide_phys=True):
df = pd.DataFrame.from_csv(data_path + datafile)
wanted_labels = [x for x in df.columns.values if '_Label' in x and 'tomorrow_' in x and 'Evening' in x and 'Alertness' not in x and 'Energy' not in x]
wanted_feats = [x for x in df.columns.values if x != 'user_id' and x != 'timestamp' and x!= 'dataset' and x!='Cluster' and '_Label' not in x]
core_name = getDatasetCoreName(datafile)
modality_dict = getModalityDict(wanted_feats, subdivide_phys=subdivide_phys)
for dataset in ['Train','Val','Test']:
task_dict_list = []
for target_label in wanted_labels:
mini_df = helper.normalizeAndFillDataDf(df, wanted_feats, [target_label], suppress_output=True)
mini_df.reindex(np.random.permutation(mini_df.index))
X,y = helper.getTensorFlowMatrixData(mini_df, wanted_feats, [target_label], dataset=dataset, single_output=True)
task_dict = dict()
task_dict['X'] = X
task_dict['Y'] = y
task_dict['Name'] = target_label
task_dict['ModalityDict'] = modality_dict
task_dict_list.append(task_dict)
pickle.dump(task_dict_list, open(data_path + "datasetTaskList-" + core_name + "_" + dataset + ".p","wb"))
def set(self, key, value, timeout=None):
if timeout is None:
timeout = int(time() + self.default_timeout)
elif timeout != 0:
timeout = int(time() + timeout)
filename = self._get_filename(key)
self._prune()
try:
fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
dir=self._path)
with os.fdopen(fd, 'wb') as f:
pickle.dump(timeout, f, 1)
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
rename(tmp, filename)
os.chmod(filename, self._mode)
except (IOError, OSError):
return False
else:
return True
def add_scrape_data(symbol, scrape_data, complete):
"""Add data regarding scrape to scrape log."""
if complete:
complete_key = 'complete'
else:
complete_key = 'incomplete'
data_log = pickle.load(open(settings.SCRAPE_LOG_FILE_PATH, "rb"))
try:
data_log[symbol]
data_log[symbol][complete_key] = scrape_data
except KeyError:
data_log[symbol] = {}
data_log[symbol]['complete'] = None
data_log[symbol]['incomplete'] = None
data_log[symbol][complete_key] = scrape_data
pickle.dump(data_log, open(settings.SCRAPE_LOG_FILE_PATH, "wb"))