def predict():
fnn=joblib.load(PKL)
dir='E:/????/??????/1 ???/captcha_master1/captcha_master/worddata/'
predictValue = []
for fr in os.listdir(dir):
dataset=[]
f = dir + fr
if f.rfind(u'.DS_Store') == -1 and f.rfind(u'Thumbs.db') == -1:
data = np.loadtxt(f, delimiter=',')
#data.reshape((1,2500))
for item in data:
dataset.append(int(item))
#print(len(dataset))
out = fnn.activate(dataset)
out = out.argmax()
iconset = ['3', 'c', 'd', 'e', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'w', 'x', 'y']
for y, word in enumerate(iconset):
if out == y:
print(word)
predictValue.append(word)
print(u'????%s' % (''.join(predictValue)))
python类load()的实例源码
def test():
DS=loadPybrainData()
train,test=DS.splitWithProportion(0.1)
fnn=joblib.load(PKL)
# ??test??
output = fnn.activateOnDataset(test)
# ann.activate(onedata)????????????
outputs=[]
target=[]
count=0
for out in output:
outs=out.argmax()
outputs.append(outs)
for tar in test['target']:
ta=tar.argmax()
target.append(ta)
for i in range(0,len(target)):
if outputs[i]==target[i]:
count+=1
right=count/len(target)#???????
rate=(right**4)
print("???????%.4f%%" % (rate * 100))
v = Validator()
print(u'??????',v.MSE(output, test['target']))#??test?????????????,????????
def load_trained_model(self, classifier):
filename = '{}.pkl'.format(classifier.__name__.lower())
path = os.path.join(self.data_path, filename)
# palliative: this outputs a model too large for joblib
if classifier.__name__ == 'MonthlySubquotaLimitClassifier':
model = classifier()
model.fit(self.dataset)
else:
if os.path.isfile(path):
model = joblib.load(path)
else:
model = classifier()
model.fit(self.dataset)
joblib.dump(model, path)
return model
def loadcolumn(filename,col=4, skip=1, floats=True):
pred=[]
op=open(filename,'r')
if skip==1:
op.readline() #header
for line in op:
line=line.replace('\n','')
sps=line.split(',')
#load always the last columns
if floats:
pred.append(float(sps[col]))
else :
pred.append(str(sps[col]))
op.close()
return pred
#functions to manipulate pickles
def is_target(screen_name, disable_targeting, model_file='cluster.pkl'):
"""
Returns a boolean for whether the user should be selected according
to label identity returned by a prediction from a pretrained
clustering algorithm.
"""
if disable_targeting:
return True
else:
auth = tweepy.OAuthHandler(credentials.consumer_key,
credentials.consumer_secret)
auth.set_access_token(credentials.access_token,
credentials.access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
user_array = numpy.array([api.get_user(screen_name=screen_name)])
model = joblib.load(model_file)
cluster_label = model.predict(user_array)
return cluster_label == 1
def get_cache_file(model_id, index, cache_dir='', suffix='csv'):
# Identify index trick.
# If sum of first 20 index, recognize as the same index.
if index is None:
raise IOError
if len(index) < 20:
sum_index = sum(index)
else:
sum_index = sum(index[:20])
return "{0}{1}_{2}.{3}".format(cache_dir,
model_id,
sum_index,
suffix)
##def saving_fit(learner, X, y, index):
## import os
## pkl_file = "{0}_{1}_{2}.pkl".format(learner.id, min(index), max(index))
## try:
## learner = joblib.load(pkl_file)
## print("**** learner is loaded from {0} ****".format(pkl_file))
## except IOError:
## learner.fit(X, y)
## joblib.dump(learner, pkl_file)
## return learner
def KmeansWrapper(true_k, data, load=False):
from sklearn.externals import joblib
modelName = 'doc_cluster.%s.plk' % true_k
if load:
km = joblib.load(modelName)
labels = km.labels_
else:
km = KMeans(n_clusters=true_k,
init='k-means++',
# max_iter=1000,
n_init=10,
n_jobs=-1,
random_state=0,
verbose=0)
km.fit_predict(data)
labels = km.labels_
joblib.dump(km, modelName)
return labels, km.cluster_centers_
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True,
word2vec_model='./model/word2vec/ourword2vec.pkl'):
# word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
#self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
self.word2vec = pkl.load(open(word2vec_model,'r'))
self.batch_size = batch_size
self.weight_decay = 0.0005
self.endpoint={}
self.is_skip=is_skip
self.is_TopKloss = is_TopKloss
self.is_training = is_training
self.keep_prob = 0.5 if is_training else 1.0
self.build_input()
#self.build_matchnet()
#self.build_classify()
#self.build_crossEnt_class()
#self.loss_weight = 0.
self.build_unite()
if is_training:
#self.build_summary()
#self.build_summary_crossEnt()
self.build_summary_unite()
def generate_fishervector(sample_set,ica_model='./model/ICA/ica_ourword2vec.model',gmm_model_path='./model/GMM/gmm_ourword2vec.model',max_num = 30000):
ica = joblib.load(ica_model)
gmm_model =pkl.load(open(gmm_model_path,'r'))
centenrs = gmm_model[0].shape[0]
dims = gmm_model[1].shape[1]
fishervector = np.zeros([len(sample_set),centenrs*dims*2])+0.00001
for i,v in enumerate(sample_set):
words =v.strip().split(' ')
words = words[:min(len(words),max_num+200)]
vectors =[]
for j in words:
try:
vectors.append(word2vec_model[j])
except:
pass#print 'Not found %s'%j
if len(vectors) >0:
vectors=vectors[:min(len(vectors),max_num)]
fishervector[i]=yael.ynumpy.fisher(gmm_model,ica.transform(np.array(vectors)).astype(np.float32) ,include='mu sigma')
print 'mean vectors is',fishervector.mean(0)
return fishervector
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True,
word2vec_model='./model/word2vec/ourword2vec.pkl'):
# word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
#self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
self.word2vec = pkl.load(open(word2vec_model,'r'))
self.batch_size = batch_size
self.weight_decay = 0.0005
self.endpoint={}
self.is_skip=is_skip
self.is_TopKloss = is_TopKloss
self.is_training = is_training
self.keep_prob = 0.5 if is_training else 1.0
self.build_input()
#self.build_matchnet()
#self.build_classify()
#self.build_crossEnt_class()
#self.loss_weight = 0.
self.build_unite()
if is_training:
#self.build_summary()
#self.build_summary_crossEnt()
self.build_summary_unite()
def init_state(indata, test=False):
close = indata['close'].values
diff = np.diff(close)
diff = np.insert(diff, 0, 0)
sma15 = SMA(indata, timeperiod=15)
sma60 = SMA(indata, timeperiod=60)
rsi = RSI(indata, timeperiod=14)
atr = ATR(indata, timeperiod=14)
#--- Preprocess data
xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))
xdata = np.nan_to_num(xdata)
if test == False:
scaler = preprocessing.StandardScaler()
xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
joblib.dump(scaler, 'data/scaler.pkl')
elif test == True:
scaler = joblib.load('data/scaler.pkl')
xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
state = xdata[0:1, 0:1, :]
return state, xdata, close
#Take Action
def model_loader(model_dict, adv=None, rd=None, rev=None):
"""
Returns a classifier object if it already exists. Returns None, otherwise.
"""
if adv is None:
adv_mag = None
print('Loading model...')
abs_path_m = resolve_path_m(model_dict)
try:
clf = joblib.load(abs_path_m + get_svm_model_name(model_dict, adv, adv_mag, rd, rev) +
'.pkl')
except BaseException:
clf = None
return clf
#------------------------------------------------------------------------------#
def vect_(self):
if not hasattr(self, '_vect') or self._vect is None:
mid = self.dsid
mid_dir = self.cache_dir / mid
if not mid_dir.exists():
raise ValueError(('Vectorizer model id {} ({}) '
'not found in the cache {}!')
.format(mid, mid_dir))
fname = mid_dir / 'vectorizer'
if self.pars_['use_hashing']:
self._vect = joblib.load(str(fname))
else:
# this is much faster in python 3 as cpickle is used
# (only works if no numpy arrays are used)
with fname.open('rb') as fh:
self._vect = pickle.load(fh)
return self._vect
def _show(args):
cache_dir = _parse_cache_dir(args.cache_dir)
p = PipelineFinder.by_id(mid=args.mid, cache_dir=cache_dir)
print(p)
print(' * model_id: {}'.format(args.mid))
print(' * model_type: {}'.format(list(p.keys())[-1]))
print(' * file_path: {}'.format(p.get_path()))
try:
pars = joblib.load(os.path.join(p.get_path(), 'pars'))
for key, val in pars.items():
val_str = str(val)
if len(val_str) > 30 and not isinstance(val, dict):
continue
print(' * {}: {}'.format(key, val_str))
except:
pass
def test_get_feature_extraction(app, hashed, weighting):
norm_alpha = 0.5
dsid, _, _ = get_features_cached(app, hashed=hashed, weighting=weighting,
norm_alpha=norm_alpha)
method = V01 + "/feature-extraction/{}".format(dsid)
data = app.get_check(method)
assert dict2type(data, collapse_lists=True) == {'analyzer': 'str',
'ngram_range': ['int'], 'stop_words': 'str',
'n_jobs': 'int', 'chunk_size': 'int',
'data_dir': 'str', 'n_samples': 'int',
'n_features': 'int', 'weighting': 'str',
'norm_alpha': 'float', 'use_hashing': 'bool',
'filenames': ['str'], 'max_df': 'float', 'min_df': 'float',
'parse_email_headers': 'bool', 'n_samples_processed': 'int',
'preprocess': []}
assert data['use_hashing'] == hashed
assert data['weighting'] == weighting
assert data['norm_alpha'] == norm_alpha
vect = joblib.load(os.path.join(CACHE_DIR, 'ediscovery_cache', dsid, 'vectorizer'))
assert (data['use_hashing'] is True) == ('hashing' in type(vect).__name__.lower())
def predict_function():
x_list = []
line_list = []
line_dict = {}
predict_doc = joblib.load('logreg.pkl')
feature_doc = joblib.load("word_vec.pkl")
y_train, x_train = get_feature()
line = "bad bad good good"
line_list = line.split()
for line in x_train:
for key in line:
line_dict[key] = 0
line_dict.update(dict(Counter(line_list)))
for a in sorted(line_dict.items(), key = lambda x:x[1]):
print(a)
x_list.append(line_dict)
print(x_list)
exit()
X = DictVectorizer().fit_transform(x_list)
pred = predict_doc.predict(X)
prob = predict_doc.predict_proba(X)
for pred, prob in zip(pred,prob):
print(pred, prob)
def from_file(cls, path):
"""
This function ...
:param path:
:return:
"""
# Create a new classifier instance
classifier = cls()
# Load the classifier
classifier.vector_classifier = joblib.load(path)
# Return the classifier
return classifier
# -----------------------------------------------------------------
def from_file(cls, path):
"""
This function ...
:param path:
:return:
"""
# Create a new classifier instance
classifier = cls()
# Load the classifier
classifier.vector_classifier = joblib.load(path)
# Return the classifier
return classifier
# -----------------------------------------------------------------
BidirectionNet_lstm.py 文件源码
项目:Sohu-LuckData-Image-Text-Matching-Competition
作者: WeitaoVan
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True,
word2vec_model='/media/wwt/860G/data/souhu_data/fusai/train/word2vec_11w.pkl'):
# word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
#self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
self.word2vec = pkl.load(open(word2vec_model,'r'))
self.batch_size = batch_size
self.weight_decay = 0.000001
self.endpoint={}
self.is_skip=is_skip
self.is_TopKloss = is_TopKloss
self.is_training = is_training
self.keep_prob = 0.5 if is_training else 1.0
self.build_input()
#self.build_matchnet()
#self.build_classify()
#self.build_crossEnt_class()
self.loss_weight = 0.
self.build_unite(self.loss_weight)
if is_training:
#self.build_summary()
#self.build_summary_crossEnt()
self.build_summary_unite()
def load_movielens(version):
data_home = get_data_dirs()[0]
if version == "100k":
path = os.path.join(data_home, "movielens100k", "movielens100k.pkl")
elif version == "1m":
path = os.path.join(data_home, "movielens1m", "movielens1m.pkl")
elif version == "10m":
path = os.path.join(data_home, "movielens10m", "movielens10m.pkl")
else:
raise ValueError("Invalid version of movielens.")
# FIXME: make downloader
if not os.path.exists(path):
raise ValueError("Dowload dataset using 'make download-movielens%s' at"
" project root." % version)
X = load(path)
return X
def predict_song(wavfile):
sampling_rate, song_array = scipy.io.wavfile.read(wavfile)
song_array[song_array == 0] = 1
ceps, mspec, spec = mfcc(song_array)
base_wav, ext = os.path.splitext(wavfile)
data_wav = base_wav + ".ceps"
np.save(data_wav, ceps)
# features
X = []
Y = []
ceps = np.load(data_wav + ".npy")
num_ceps = len(ceps)
X.append(np.mean(ceps[int(num_ceps * 1 / 10): int(num_ceps * 9 / 10)], axis=0))
# prediction
# print predict_file(X)
genre_list = ["country", "hiphop", "metal", "pop", "reggae", "rock"]
clf = joblib.load('./analyser/ml_utils/genre_classify/model_ceps.pkl')
index = clf.predict(X)
return genre_list[index[0]]
def backtesting_with_lstm():
model = get_loaded_model()
df = pd.read_csv('btc_etc.csv').rename(columns={
'Close': 'close',
'Date time': 'datetime',
'Open': 'open',
'High': 'high',
'Low': 'low',
'Volume': 'volume'
})
ds = DataSeries(df)
scaler = joblib.load(open('scaler.sav', 'rb'))
look_back = 1
bt = NNBT(ds, 1000.0, model, look_back, scaler)
bt.run()
print('Profit: ${:.2f}'.format(bt.get_profit()))
def backtesting_with_lstm():
model = get_loaded_model()
df = pd.read_csv('btc_etc.csv').rename(columns={
'Close': 'close',
'Date time': 'datetime',
'Open': 'open',
'High': 'high',
'Low': 'low',
'Volume': 'volume'
})
ds = DataSeries(df)
scaler = joblib.load(open('scaler.sav', 'rb'))
look_back = 1
bt = NNBT(ds, 1000.0, model, look_back, scaler)
bt.run()
print('Profit: ${:.2f}'.format(bt.get_profit()))
def use_model_to_predict(test_df, model):
test_df.drop(['label'], axis=1, inplace=True)
print 'Fix Missing App Count Value...'
model_miss = joblib.load('XGB_missing.model')
test_df = fix_missing_appcounts(test_df, model_miss)
'''print 'Fix Missing Age Value...'
model_age = joblib.load('XGB_age.model')
test_df = fix_missing_age(test_df, model_age)'''
test_df.drop(['marriageStatus','haveBaby','sitesetID', 'positionType'], axis=1, inplace=True)
print 'Done'
print test_df.info()
print test_df.describe()
print test_df.isnull().sum()
test_np = test_df.as_matrix()
X = test_np[:, 1:]
print 'Use Model To Predict...'
predicts = model.predict(X)
result = pd.DataFrame({'instanceID':test_df['instanceID'].as_matrix(), 'prob':predicts})
#print predicts#, predicts.min(axis=0), predicts.max(axis=0), predicts.sum(axis=1)
return result
def generate_submission(test_dataset, sess, eval_prediction, eval_data_node):
test_labels = eval_in_batches(test_dataset, sess, eval_prediction, eval_data_node)
test_labels *= 96.0
test_labels = test_labels.clip(0, 96)
lookup_table = pd.read_csv(FLOOKUP)
values = []
cols = joblib.load('data/cols.pkl')
for index, row in lookup_table.iterrows():
values.append((
row['RowId'],
test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
))
submission = pd.DataFrame(values, columns=('RowId', 'Location'))
submission.to_csv('data/submission.csv', index=False)
def make_submission(test_labels):
test_labels *= 96.0
test_labels = test_labels.clip(0, 96)
lookup_table = pd.read_csv(FLOOKUP)
values = []
cols = joblib.load('data/cols.pkl')
for index, row in lookup_table.iterrows():
values.append((
row['RowId'],
test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
))
submission = pd.DataFrame(values, columns=('RowId', 'Location'))
submission.to_csv('data/submission.csv', index=False)
def splity(im):
s = ''
w,h = im.size
pix = im.load()
for j in xrange(h):
num = 0
for i in xrange(w):
# print pix[i,j]
if pix[i,j]==BLACK:
num += 1
if num > 0:
s += '1'
else:
s += '0'
# print s
start = s.find('1')
end = s.rfind('1')
return im.crop((0,start,w,end))
def __init__(self, s_date):
prev_bd = int(s_date[:6])-1
prev_ed = int(s_date[9:15])-1
if prev_bd%100 == 0: prev_bd -= 98
if prev_ed%100 == 0: prev_ed -= 98
pred_s_date = "%d01_%d01" % (prev_bd, prev_ed)
prev_model = '../model/tflearn/lstm/%s' % pred_s_date
self.model_dir = '../model/tflearn/lstm/%s' % s_date
tf.reset_default_graph()
tflearn.init_graph(gpu_memory_fraction=0.1)
input_layer = tflearn.input_data(shape=[None, 30, 23], name='input')
lstm1 = tflearn.lstm(input_layer, 23, dynamic=True, name='lstm1')
dense1 = tflearn.fully_connected(lstm1, 1, name='dense1')
output = tflearn.single_unit(dense1)
regression = tflearn.regression(output, optimizer='adam', loss='mean_square',
metric='R2', learning_rate=0.001)
self.estimators = tflearn.DNN(regression)
if os.path.exists('%s/model.tfl' % prev_model):
self.estimators.load('%s/model.tfl' % prev_model)
def load_vocab(self, vocab):
self.vocab = self.load('model/vocab.pkl')
self.max_length = self.load('model/max_length.pkl')
if self.vocab != None and self.max_length != None:
return
vocab_temp, self.max_length = utils.load_data2list_string(vocab)
# vocab_temp = sorted(vocab_temp, key=lambda s: len(s.split()), reverse=True)
vocab_temp = filter(lambda s: len(s.split()) > 1, vocab_temp) # remove word have one syllable
vocab_temp_clone = map(lambda s: s.replace(u' ', u'_'), vocab_temp)
self.vocab = {i:{} for i in xrange(1, self.max_length+1)}
for i in xrange(len(vocab_temp)):
s = vocab_temp[i]
ss = vocab_temp_clone[i]
w = s.split()[0]
length = vocab_temp[i].count(u' ')
try: self.vocab[length][w].update({s:ss})
except: self.vocab[length].update({w:{s:ss}})
print('size of vocab = %d' % (len(vocab_temp)))
self.save_model(self.vocab, 'model/vocab.pkl')
self.save_model(self.max_length, 'model/max_length.pkl')
def classify_comment(comment):
"""Classify the comment.
:param comment: should have a message attribute.
"""
global _comment_pipeline
from sklearn.externals import joblib
model_is_not_loaded = _comment_pipeline is None
if model_is_not_loaded:
import pygameweb.comment.classifier_train
import pygameweb.config
model_fname = pygameweb.config.Config.COMMENT_MODEL
_comment_pipeline = joblib.load(model_fname)
return _comment_pipeline.predict([comment.message])[0]