def load_model(self):
self.scaler = joblib.load("data_analysis/scaler.pkl")
self.model = joblib.load("data_analysis/model.pkl")
# Processes the given article and stores the results in the queue
python类load()的实例源码
def create_ranklist (data ) :
for j in range(len(data[0])):
putcolumn( data,ranking(select_column(data,j)),j)
# method to load a specific column
def loadcolumn(filename,col=4, skip=1, floats=True):
pred=[]
op=open(filename,'r')
if skip==1:
op.readline() #header
for line in op:
line=line.replace('\n','')
sps=line.split(',')
#load always the last columns
if floats:
pred.append(float(sps[col]))
else :
pred.append(str(sps[col]))
op.close()
return pred
def create_ranklist (data ) :
for j in range(len(data[0])):
putcolumn( data,ranking(select_column(data,j)),j)
# method to load a specific column
def loadcolumn(filename,col=4, skip=1, floats=True):
pred=[]
op=open(filename,'r')
if skip==1:
op.readline() #header
for line in op:
line=line.replace('\n','')
sps=line.split(',')
#load always the last columns
if floats:
pred.append(float(sps[col]))
else :
pred.append(str(sps[col]))
op.close()
return pred
def create_ranklist (data ) :
for j in range(len(data[0])):
putcolumn( data,ranking(select_column(data,j)),j)
# method to load a specific column
def load_datas(filename):
return joblib.load(filename)
AUC_Geo_Rank_Weighted_Average.py 文件源码
项目:ensemble_amazon
作者: kaz-Anova
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def create_ranklist (data ) :
for j in range(len(data[0])):
putcolumn( data,ranking(select_column(data,j)),j)
# method to load a specific column
AUC_Geo_Rank_Weighted_Average.py 文件源码
项目:ensemble_amazon
作者: kaz-Anova
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def loadcolumn(filename,col=4, skip=1, floats=True):
pred=[]
op=open(filename,'r')
if skip==1:
op.readline() #header
for line in op:
line=line.replace('\n','')
sps=line.split(',')
#load always the last columns
if floats:
pred.append(float(sps[col]))
else :
pred.append(str(sps[col]))
op.close()
return pred
def load(self, path):
"""
Load a model and it's classes with joblib and pickle.
Parameters
----------
path: string
The location of the persistence directory from which model and classes will be loaded.
Returns
----------
None
"""
self.model = joblib.load(path + 'tree.pkl')
self.classes = joblib.load(path + 'classes.pkl')
def build(self, environment, ml_definitions):
self._messages.clear()
# read received definitions and configure these
field_manager = FieldManager.read_definitions(ml_definitions)
field_manager.init(environment)
# load dataset and evaluate
dataset = DataSet.load_dataset(environment, field_manager=field_manager)
self._merge_and_check_messages(dataset.evaluate())
# build the feature from field and dataset
f_builder = FeatureBuilder(field_manager)
f_builder.build(dataset)
self._merge_and_check_messages(f_builder.evaluate())
# adjust the dataset to the feature
adjusted = f_builder.field_manager.adjust(dataset)
# make & train the model
m_builder = ModelBuilder(f_builder.field_manager)
m_builder.build(adjusted)
self._merge_and_check_messages(m_builder.evaluate())
self.field_manager = f_builder.field_manager
self.model = m_builder.model
self.model_score = m_builder.model_score
def load(cls, app_id):
home_dir = cls.__home_dir(app_id)
if not os.path.isdir(home_dir):
raise Exception("Model File for application {} have not created yet.".format(app_id))
path_fieldm = os.path.join(home_dir, cls.FIELD_MANAGER_FILE)
with open(path_fieldm, mode="r", encoding="utf-8") as md:
serialized = json.load(md)
field_manager = FieldManager.load(serialized)
trained_model = joblib.load(os.path.join(home_dir, cls.MODEL_FILE))
model_manager = ModelManager(field_manager, trained_model)
return model_manager
raw_to_mask.py 文件源码
项目:kaggle-dstl-satellite-imagery-feature-detection
作者: u1234x1234
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def mask_to_poly(image_id):
preds = joblib.load('raw_preds/raw_blend5/{}.pkl'.format(image_id))
size = preds.shape[1]
if n_out == 10:
# preds = (preds > 0.3).astype(np.uint8)
thresholds = np.array([0.4, 0.4, 0.4, 0.4, 0.8,
0.4, 0.4, 0.4, 0.1, 0.1]).reshape((10, 1))
preds = (preds.reshape((10, -1)) > thresholds).reshape((10, size, size))
preds = preds.astype(np.uint8)
else:
preds = np.argmax(preds, axis=0)
preds = unsoft(preds)
rg = colorize_raster(preds.transpose((1, 2, 0)))
# cv2.imwrite('1.png', rg)
size = 900
rg = cv2.resize(rg, (size, size))
# cv2.imshow('mask', rg)
# cv2.waitKey()
im = get_rgb_image(image_id, size, size)
rg = np.hstack([rg, im])
cv2.imwrite('raw_temp5_1/{}.png'.format(image_id), rg)
shs = []
for i in range(10):
mask = preds[i]
y_sf, x_sf = get_scale_factor(image_id, mask.shape[0], mask.shape[1])
y_sf = 1. / y_sf
x_sf = 1. / x_sf
sh = polygonize_cv(mask)
# sh = polygonize_sk((mask>0)*255, 0)
# sh = (sh1.buffer(0).intersection(sh2.buffer(0))).buffer(0)
# if not sh.is_valid:
# sh = sh.buffer(0)
sh = affinity.scale(sh, xfact=x_sf, yfact=y_sf, origin=(0, 0, 0))
shs.append(sh)
return shs
def _load_credentials(self):
with open(self.loc.format('../../config/twitter_creds.json')) as fp:
config = json.load(fp)
self.logger.info('Twitter credentials loaded')
return config
def _load_model(self):
self.logger.info('Loading serialized model')
# hardcoded path
path = self.loc.format('../saved_models/model.pkl')
return joblib.load(path)
def transform(self):
tweet_df = json_normalize(self.tweet)
# drop all columns from tweet_df that we're not using in extract_fields
with open(self.loc.format('../../etl/extract/extract_fields.json')) as fp:
fields_dict = json.load(fp)
fields_subset = fields_dict.get('fields')
tweet_df = tweet_df.loc[:, fields_subset]
# perform transformations on DF to get into same form as DB table
tweet_df.loc[:, 'retweets_to_faves'] = 0
# this feature isn't scaled properly since we're pulling from the stream
#tweet_df.loc[:, 'retweets_to_faves'] = tweet_df.loc[:, 'retweet_count'] / tweet_df.loc[:, 'favorite_count']
tweet_df.loc[:, 'num_characters'] = tweet_df.text.apply(lambda x: len(x))
tweet_df.loc[:, 'num_exclamation_points'] = tweet_df.text.apply(lambda x: x.count('!'))
tweet_df.loc[:, 'is_tweetstorm'] = 0
tweet_df.loc[:, 'is_trump_retweet'] = tweet_df.text.apply(lambda x: is_retweet(x))
tweet_df.loc[:, 'num_uppercase_strings'] = tweet_df.text.apply(lambda x: count_uppercase_substrings(x))
tweet_df.loc[:, 'source'] = tweet_df.source.apply(lambda x: normalize_tweet_sources(x))
tweet_df.rename(columns={
'favorite_count': 'favorites',
'quoted_status.text': 'quoted_status_text',
'retweet_count': 'retweets',
'source': 'tweet_source',
'user.id_str': 'user_id_str',
'user.name': 'user_name',
'user.followers_count': 'followers',
'user.screen_name': 'user_screen_name',
'user.statuses_count': 'num_statuses'
}, inplace=True)
self.tweet_df = tweet_df
def load_current_checkpoints(self):
''' Return the current checkpoint '''
checkpoints = os.listdir(self.path_checkpoints)
num = max([int(f.split('_')[1]) for f in checkpoints])
name = 'check_' + str(num)
return num, joblib.load(os.path.join(self.path_checkpoints, name, name + '.pkl'))
def _classify(df):
"""Classification
- Args:
df(pandas.dataframe): candidate qa pairs with extracted features
- Returns:
question_answers(pandas.dataframe): Question, Answer, Prediction (label)
"""
model_path = os.path.dirname(os.path.abspath(__file__)) + '/models/clf.pkl'
clf = joblib.load(model_path)
question_answers = df[['Question', 'Answer']]
X = df.drop(['Answer', 'Question', 'Sentence'], axis=1).as_matrix()
y = clf.predict(X)
question_answers['Prediction'] = y
return question_answers
def test(feats_test, estimator=None, model='model.pkl'):
"""
Evaluate the generated machine learning model on test data, and print a mean absolute error.
@param estimator: The trained ML model/estimator
@param feats_test: test features (obtained from data)
"""
if estimator is None:
estimator = joblib.load(cfg.PATH_RESOURCES+model)
return estimator.predict(feats_test)
def load(self):
self.classifier = joblib.load(self.model_path + "banana.pkl")
self.scaler = joblib.load(self.model_path + "banana_scaler.pkl")
with open(self.model_path + "banana_list.txt") as f:
self.features = f.readline().split()
return self