def load_models(models_dir):
"""
Load saved models from disk. This will attempt to unpickle all files in a
directory; any files that give errors on unpickling (such as README.txt) will
be skipped.
Inputs:
- models_dir: String giving the path to a directory containing model files.
Each model file is a pickled dictionary with a 'model' field.
Returns:
A dictionary mapping model file names to models.
"""
models = {}
for model_file in os.listdir(models_dir):
with open(os.path.join(models_dir, model_file), 'rb') as f:
try:
models[model_file] = pickle.load(f)['model']
except pickle.UnpicklingError:
continue
return models
python类load()的实例源码
def get_data(name):
"""Load data from the given name"""
gen_data = {}
# new version
if os.path.isfile(name + 'data.pickle'):
curent_f = open(name + 'data.pickle', 'rb')
d2 = cPickle.load(curent_f)
# Old version
else:
curent_f = open(name, 'rb')
d1 = cPickle.load(curent_f)
data1 = d1[0]
data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]])
# Convert log e to log2
normalization_factor = 1 / np.log2(2.718281)
epochsInds = np.arange(0, data.shape[4])
d2 = {}
d2['epochsInds'] = epochsInds
d2['information'] = data / normalization_factor
return d2
def get_data(name):
"""Load data from the given name"""
gen_data = {}
# new version
if os.path.isfile(name + 'data.pickle'):
curent_f = open(name + 'data.pickle', 'rb')
d2 = cPickle.load(curent_f)
# Old version
else:
curent_f = open(name, 'rb')
d1 = cPickle.load(curent_f)
data1 = d1[0]
data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]])
# Convert log e to log2
normalization_factor = 1 / np.log2(2.718281)
epochsInds = np.arange(0, data.shape[4])
d2 = {}
d2['epochsInds'] = epochsInds
d2['information'] = data / normalization_factor
return d2
def __init__(self,filename='word2vec.pklz'):
"""
Py Word2vec??
"""
super().__init__()
self.name='word2vec'
self.load(filename)
self.vocab_cnt=len(self)
self.dims=self[list(self.keys())[0]].shape[0]
print('???:' + str(self.vocab_cnt))
print('???:' + str(self.dims))
self.word2idx= {w: i for i, w in enumerate(self.keys())}
self.idx2word= {i: w for i, w in enumerate(self.keys())}
self._matrix =np.array(list(self.values()))
print(self._matrix.shape)
def load_caltech101(folder=CALTECH101_DIR, one_hot=True, partitions=None, filters=None, maps=None):
path = folder + "/caltech101.pickle"
with open(path, "rb") as input_file:
X, target_name, files = cpickle.load(input_file)
dict_name_ID = {}
i = 0
list_of_targets = sorted(list(set(target_name)))
for k in list_of_targets:
dict_name_ID[k] = i
i += 1
dict_ID_name = {v: k for k, v in dict_name_ID.items()}
Y = []
for name_y in target_name:
Y.append(dict_name_ID[name_y])
if one_hot:
Y = to_one_hot_enc(Y)
dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name},
sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)])
if partitions:
res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True)
res += [None] * (3 - len(res))
return Datasets(train=res[0], validation=res[1], test=res[2])
return dataset
def load_cifar10(folder=CIFAR10_DIR, one_hot=True, partitions=None, filters=None, maps=None, balance_classes=False):
path = folder + "/cifar-10.pickle"
with open(path, "rb") as input_file:
X, target_name, files = cpickle.load(input_file)
X = np.array(X)
dict_name_ID = {}
i = 0
list_of_targets = sorted(list(set(target_name)))
for k in list_of_targets:
dict_name_ID[k] = i
i += 1
dict_ID_name = {v: k for k, v in dict_name_ID.items()}
Y = []
for name_y in target_name:
Y.append(dict_name_ID[name_y])
if one_hot:
Y = to_one_hot_enc(Y)
dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name},
sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)])
if partitions:
res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True, balance_classes=True)
res += [None] * (3 - len(res))
return Datasets(train=res[0], validation=res[1], test=res[2])
return dataset
def load_codex_list():
'''
Loads codex list from file
to show in select menu
Parameters
------------
None
Returns
------------
* codex_list (string[]): Array of codex
'''
codex_list = None
with open(os.path.join(ROOT_DIR, 'data/res/codex_list'), 'rb') as f_in:
codex_list = pickle.load(f_in)
return codex_list
def load_pickle(f):
"""
loads and returns the content of a pickled file
it handles the inconsistencies between the pickle packages available in Python 2 and 3
"""
try:
import cPickle as thepickle
except ImportError:
import _pickle as thepickle
try:
ret = thepickle.load(f, encoding='latin1')
except TypeError:
ret = thepickle.load(f)
return ret
def setUp(self):
self.seed = 0
file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
with gzip.open(file_path, 'rb') as f:
if sys.version_info[0] == 3:
ratings = cPickle.load(f, encoding='latin1')
else:
ratings = cPickle.load(f)
self.n_user = 943
self.n_item = 1682
self.assertEqual(ratings.shape[0], 100000)
self.assertEqual(ratings[:, 0].min(), 1)
self.assertEqual(ratings[:, 0].max(), self.n_user)
self.assertEqual(ratings[:, 1].min(), 1)
self.assertEqual(ratings[:, 1].max(), self.n_item)
# let user_id / item_id start from 0
ratings[:, 0] = ratings[:, 0] - 1
ratings[:, 1] = ratings[:, 1] - 1
self.ratings = ratings
def setUp(self):
self.seed = 0
file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
with gzip.open(file_path, 'rb') as f:
if sys.version_info[0] == 3:
ratings = cPickle.load(f, encoding='latin1')
else:
ratings = cPickle.load(f)
self.n_user = 943
self.n_item = 1682
self.assertEqual(ratings.shape[0], 100000)
self.assertEqual(ratings[:, 0].min(), 1)
self.assertEqual(ratings[:, 0].max(), self.n_user)
self.assertEqual(ratings[:, 1].min(), 1)
self.assertEqual(ratings[:, 1].max(), self.n_item)
# let user_id / item_id start from 0
ratings[:, 0] = ratings[:, 0] - 1
ratings[:, 1] = ratings[:, 1] - 1
self.ratings = ratings
def setUp(self):
self.seed = 0
file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
with gzip.open(file_path, 'rb') as f:
if sys.version_info[0] == 3:
ratings = cPickle.load(f, encoding='latin1')
else:
ratings = cPickle.load(f)
self.n_user = 943
self.n_item = 1682
self.assertEqual(ratings.shape[0], 100000)
self.assertEqual(ratings[:, 0].min(), 1)
self.assertEqual(ratings[:, 0].max(), self.n_user)
self.assertEqual(ratings[:, 1].min(), 1)
self.assertEqual(ratings[:, 1].max(), self.n_item)
# let user_id / item_id start from 0
ratings[:, 0] = ratings[:, 0] - 1
ratings[:, 1] = ratings[:, 1] - 1
self.ratings = ratings
def get(fn, *args, **kwargs):
"""??redis?cache??
fn: ??, ????????
return: data fn????"""
key = gen_keyname(fn)
r = createRedis()
#r.flushall()
if key not in r.keys():
o = fn(*args, **kwargs)
#?????????
f = cStringIO.StringIO()
cPickle.dump(o, f)
s = f.getvalue()
f.close()
r.set(key, s)
s = r.get(key)
f = cStringIO.StringIO(s)
o = cPickle.load(f)
f.close()
return o
def load_transformer_list(config_data):
output_directory = config_data['embeddings_directory']
output_basename = config_data['embeddings_basename']
path = os.path.join(output_directory, output_basename)
config_fname = os.path.join(path, 'config.json')
with open(config_fname, 'r') as json_data:
wemb_config = json.load(json_data)
ngrams = wemb_config['ngrams']
transformers = []
for i in range(ngrams - 1):
phrase_model = Phrases.load(os.path.join(path, '{}gram'.format(i)))
transformers.append(phrase_model)
return transformers
def load_mnist(path, num_training=50000, num_test=10000, cnn=True, one_hot=False):
f = gzip.open(path, 'rb')
training_data, validation_data, test_data = cPickle.load(
f, encoding='iso-8859-1')
f.close()
X_train, y_train = training_data
X_validation, y_validation = validation_data
X_test, y_test = test_data
if cnn:
shape = (-1, 1, 28, 28)
X_train = X_train.reshape(shape)
X_validation = X_validation.reshape(shape)
X_test = X_test.reshape(shape)
if one_hot:
y_train = one_hot_encode(y_train, 10)
y_validation = one_hot_encode(y_validation, 10)
y_test = one_hot_encode(y_test, 10)
X_train, y_train = X_train[range(
num_training)], y_train[range(num_training)]
X_test, y_test = X_test[range(num_test)], y_test[range(num_test)]
return (X_train, y_train), (X_test, y_test)
def load_cifar10(path, num_training=1000, num_test=1000):
Xs, ys = [], []
for batch in range(1, 6):
f = open(os.path.join(path, "data_batch_{0}".format(batch)), 'rb')
data = cPickle.load(f, encoding='iso-8859-1')
f.close()
X = data["data"].reshape(10000, 3, 32, 32).astype("float64")
y = np.array(data["labels"])
Xs.append(X)
ys.append(y)
f = open(os.path.join(CIFAR10_PATH, "test_batch"), 'rb')
data = cPickle.load(f, encoding='iso-8859-1')
f.close()
X_train, y_train = np.concatenate(Xs), np.concatenate(ys)
X_test = data["data"].reshape(10000, 3, 32, 32).astype("float")
y_test = np.array(data["labels"])
X_train, y_train = X_train[range(
num_training)], y_train[range(num_training)]
X_test, y_test = X_test[range(num_test)], y_test[range(num_test)]
mean = np.mean(X_train, axis=0)
std = np.std(X_train)
X_train /= 255.0
X_test /= 255.0
return (X_train, y_train), (X_test, y_test)
def __init__(self, filepath):
"""
Args:
filepath (string): path to data file
Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0)
Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images
"""
with open(filepath, "rb") as f:
processed_data = pickle.load(f)
self.data = dict()
for image, label in zip(processed_data['images'], processed_data['labels']):
if label not in self.data:
self.data[label] = list()
img = np.expand_dims(image, axis=0).astype('float32')
#img /= 255.0
self.data[label].append(img)
self.num_categories = len(self.data)
self.category_size = len(self.data[processed_data['labels'][0]])
def __init__(self, filepath):
"""
Args:
filepath (string): path to data file
Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0)
Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images
"""
with open(filepath, "rb") as f:
processed_data = pickle.load(f)
self.data = dict()
for image, label in zip(processed_data['images'], processed_data['labels']):
if label not in self.data:
self.data[label] = list()
img = np.expand_dims(image, axis=0).astype('float32')
img /= 255.0
self.data[label].append(img)
self.num_categories = len(self.data)
self.category_size = len(self.data[processed_data['labels'][0]])
def generate_random_hyperparams(lr_min, lr_max, K_min, K_max, num_layers_min, num_layers_max,load_hparams):
"""This function generates random hyper-parameters for hyperparameter search"""
#this is for new random parameters
if not load_hparams[0]:
lr_exp = np.random.uniform(lr_min, lr_max)
lr = 10**(lr_exp)
K = np.random.choice(np.arange(K_min, K_max+1),1)[0]
num_layers = np.random.choice(np.arange(num_layers_min, num_layers_max + 1),1)[0]
#momentum_exp = np.random.uniform(-8,0)
momentum = np.random.uniform(0,1) #(2**momentum_exp)
#this loads hyperparameters from an existing file
else:
exp_data = np.load('experiment_data/nmf_data_timit_model_bi_mod_lstm_diag_to_full_device_cpu:0_1490813245.npy')[load_hparams[1]]
lr = exp_data['LR']
K = exp_data['K']
num_layers = exp_data['num_layers']
try:
momentum = exp_data['num_layers']
except:
momentum = None
return lr, K, num_layers, momentum
def load_model():
"""
Load the model with saved tables
"""
# Load model options
print('Loading model parameters...')
with open('%s.pkl'%path_to_umodel, 'rb') as f:
uoptions = pkl.load(f)
with open('%s.pkl'%path_to_bmodel, 'rb') as f:
boptions = pkl.load(f)
# Load parameters
uparams = init_params(uoptions)
uparams = load_params(path_to_umodel, uparams)
utparams = init_tparams(uparams)
bparams = init_params_bi(boptions)
bparams = load_params(path_to_bmodel, bparams)
btparams = init_tparams(bparams)
# Extractor functions
print('Compiling encoders...')
embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')
# Tables
print('Loading tables...')
utable, btable = load_tables()
# Store everything we need in a dictionary
print('Packing up...')
model = {}
model['uoptions'] = uoptions
model['boptions'] = boptions
model['utable'] = utable
model['btable'] = btable
model['f_w2v'] = f_w2v
model['f_w2v2'] = f_w2v2
return model
def load_tables():
"""
Load the tables
"""
words = []
utable = numpy.load(path_to_tables + 'utable.npy', encoding='bytes')
btable = numpy.load(path_to_tables + 'btable.npy', encoding='bytes')
f = open(path_to_tables + 'dictionary.txt', 'rb')
for line in f:
words.append(line.decode('utf-8').strip())
f.close()
utable = OrderedDict(zip(words, utable))
btable = OrderedDict(zip(words, btable))
return utable, btable
def preprocess(text):
"""
Preprocess text for encoder
"""
X = []
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
for t in text:
sents = sent_detector.tokenize(t)
result = ''
for s in sents:
tokens = word_tokenize(s)
result += ' ' + ' '.join(tokens)
X.append(result)
return X
def load_params(path, params):
"""
load parameters
"""
pp = numpy.load(path)
for kk, vv in params.items():
if kk not in pp:
warnings.warn('%s is not in the archive'%kk)
continue
params[kk] = pp[kk]
return params
# layers: 'name': ('parameter initializer', 'feedforward')
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = pickle.load(f, encoding='latin1')
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1, 6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def load(self,filename='word2vec.pklz'):
fil = gzip.open(filename, 'rb')
while True:
try:
tmp=cPickle.load(fil)
self.update(tmp)
except EOFError as e:
print(e)
break
fil.close()
download_cifar10.py 文件源码
项目:TensorFlow-Machine-Learning-Cookbook
作者: PacktPublishing
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def load_batch_from_file(file):
file_conn = open(file, 'rb')
image_dictionary = cPickle.load(file_conn, encoding='latin1')
file_conn.close()
return(image_dictionary)
def load_cifar100(folder=CIFAR100_DIR, one_hot=True, partitions=None, filters=None, maps=None):
path = folder + "/cifar-100.pickle"
with open(path, "rb") as input_file:
X, target_ID_fine, target_ID_coarse, fine_ID_corr, coarse_ID_corr, files = cpickle.load(input_file)
X = np.array(X);
target_ID_fine = target_ID_fine[:len(X)]
target_ID_coarse = target_ID_coarse[:len(X)]
fine_ID_corr = {v: k for v, k in zip(range(len(fine_ID_corr)), fine_ID_corr)}
coarse_ID_corr = {v: k for v, k in zip(range(len(coarse_ID_corr)), coarse_ID_corr)}
fine_label_corr = {v: k for k, v in fine_ID_corr.items()}
coarse_label_corr = {v: k for k, v in coarse_ID_corr.items()}
Y = []
for name_y in target_ID_fine:
Y.append(name_y)
Y = np.array(Y)
if one_hot:
Y = to_one_hot_enc(Y)
superY = []
for name_y in target_ID_coarse:
superY.append(name_y)
superY = np.array(superY)
if one_hot:
superY = to_one_hot_enc(superY)
print(len(X))
print(len(Y))
dataset = Dataset(data=X, target=Y,
info={'dict_name_ID_fine': fine_label_corr, 'dict_name_ID_coarse': coarse_label_corr,
'dict_ID_name_fine': fine_ID_corr, 'dict_ID_name_coarse': coarse_ID_corr},
sample_info=[{'Y_coarse': yc, 'files': f} for yc, f in zip(superY, files)])
if partitions:
res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True)
res += [None] * (3 - len(res))
return Datasets(train=res[0], validation=res[1], test=res[2])
return dataset
def load_obj(name, root_dir=None, notebook_mode=True):
if root_dir is None: root_dir = os.getcwd()
directory = check_or_create_dir(join_paths(root_dir, FOLDER_NAMINGS['OBJ_DIR']),
notebook_mode=notebook_mode, create=False)
filename = join_paths(directory, name if name.endswith('.pkgz') else name + '.pkgz')
with gzip.open(filename, 'rb') as f:
return pickle.load(f)
def __init__(self, recompute_statistics=False):
"""
Initiation of module.
Parameters
------------
*recompute_statistics(boolean): if True, statistics are calculated again,
if False, statistics are loaded from file
"""
self.get_article_statistics(recompute_statistics=recompute_statistics)
# list which users see
self.cur_articles_list = list(self.article_index.values())
self.cur_articles_list = [a for a in self.cur_articles_list if a.questions_cnt > 0 and a.official_article]
# ranking list of articles without filters - for fast execution
# self.articles_list_all = list(self.article_index.values())
# self.articles_list_all = [a for a in self.articles_list_all if a.questions_cnt > 0]
# filters list
self.filters_type = []
self.filters_data = []
# with open("../data/guide_articles/guide_article_ID", "rb") as f:
# self.ids_in_guides = cPickle.load(f)
with open("../data/guide_articles/guides_list", "rb") as f:
self.guides_list = cPickle.load(f)
# print (len(self.ids_in_guides))
# default - ranked by cnt_questions, no filters
# self.ranking_articles(rank_type='by_cnt_questions')
def get_article_statistics(self, recompute_statistics=True):
"""
Agregate statistics from both forum.
"""
if recompute_statistics:
self.get_article_index()
data_generator = loadDataGenerator()
cnt_not_match_links = 0
links_cnt = 0
l2a = Link2Article()
# log = open("./logs", "w")
# error_link = []
for question_batch in data_generator:
for question in question_batch:
links = LinksSearcher(question.get_all_text()).get_simple_links()
for link in links:
# log.write(link.link_text + "\n")
# log.flush()
# function from Alexandrina
article = l2a.link2article(link)
# print (article)
if article:
# print (article.article_ID)
links_cnt += 1
self.article_index[article.article_ID].add_question(question, link)
else:
cnt_not_match_links += 1
sys.stderr.write("\r\t\t\t\t\tALL LINKS: %d; CAN't MATCH: %d" % (links_cnt, cnt_not_match_links))
with open("./../data/statistics/article_statistics", "wb") as f:
cPickle.dump(self.article_index, f, protocol=pickle.HIGHEST_PROTOCOL)
else:
with open("./../data/statistics/article_statistics", "rb") as f:
self.article_index = cPickle.load(f)