def combine_rmse_gpvar(grid_scores, w_rmse=0.8, w_gpvar=0.2):
from sklearn.preprocessing import minmax_scale
# Scale rmses, gpvars to (0,1)
scaled_scores = np.empty((len(grid_scores), 2))
for i,scores in enumerate(grid_scores):
scaled_scores[i,0] = scores.mean_scores[0]
scaled_scores[i,1] = scores.mean_scores[1]
rmse_sort_indices = np.argsort(scaled_scores[:,0])
gpvar_sort_indices = np.argsort(scaled_scores[:,1])
scaled_scores = minmax_scale(scaled_scores)
combined_scores = w_rmse*scaled_scores[:,0] + w_gpvar*scaled_scores[:,1]
comb_sort_indices = np.argsort(combined_scores)
return CombinedScore(combined_scores,
scaled_scores,
comb_sort_indices,
rmse_sort_indices,
gpvar_sort_indices)
python类minmax_scale()的实例源码
def make_drop_duplicate(self, _df_csv_read_ori, _drop_duplicate , _label):
""" Label? ??? ??? ??? ??? ??? Row ??? ????.
Args:
params:
* _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
* _df_csv_read_ori : pandas dataframe
* _label
Returns:
Preprocessing Dataframe
"""
if _drop_duplicate == None or _drop_duplicate == 'null' or _drop_duplicate == False:
logging.info("No Duplicate")
result_df = _df_csv_read_ori
else :
cell_features = _df_csv_read_ori.columns.tolist()
cell_features.remove(_label)
result_df = _df_csv_read_ori.drop_duplicates(cell_features, keep="first")
logging.info("duplicated row delete {0}".format(len(_df_csv_read_ori.index)-len(result_df.index)))
temp_duplicate_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + "_dup.csvbk"
result_df.to_csv(self.data_src_path + "/backup/" + temp_duplicate_filename)
return result_df
def cell_fd_extention(fname_org='sheet.gz/cell_db.cvs.gz', camera_bit_resolution=14):
cell_df = pd.read_csv(fname_org)
Limg, Lx, Ly = cell_fd_info(cell_df)
cell_df_ext = cell_df.copy()
# Fresnel diffraction
cell_img_fd_a = cell_fd_conv(cell_df)
cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)
# max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
Limg, Lx, Ly).astype(int)
cell_df_ext[
'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)
return cell_df_ext
def cell_fd_extention(fname_org='sheet.gz/cell_db.cvs.gz', camera_bit_resolution=14):
cell_df = pd.read_csv(fname_org)
Limg, Lx, Ly = cell_fd_info(cell_df)
cell_df_ext = cell_df.copy()
# Fresnel diffraction
cell_img_fd_a = cell_fd_conv(cell_df)
cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)
# max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
Limg, Lx, Ly).astype(int)
cell_df_ext[
'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)
return cell_df_ext
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
# data = pd.read_csv(file_in, dtype='str')
# data['DateTime'] = pd.to_datetime(
# data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
# format='%Y%m%d%H%M%S')
# data = data.set_index('DateTime')
# data = pd.Series(data['<CLOSE>']).map(float)
# data = data.resample('M').fillna(method='pad')
# data = preprocessing.minmax_scale(data)
# data_t = data[6:]
# data_f = data.reshape(-1, 6)
# data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
# np.save(file_out[0], data_f[:len(data_f) - 1])
# np.save(file_out[1], data_t)
data = preprocessing.minmax_scale(pd.read_pickle(
file_in)['close'])
data = data.reshape(-1, 24)
data_m = np.array([[data[i + x * 24][0] for x in range(5)]
for i in range(len(data) - 5 * 24 + 1)])
data_m = data_m.reshape(-1, 5)
data_s = np.array([data[i + 5 * 24][0]
for i in range(len(data) - 5 * 24)])
np.save(file_out[0], data_m[:len(data_m) - 1])
np.save(file_out[1], data_s)
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
# data = pd.read_csv(file_in, dtype='str')
# data['DateTime'] = pd.to_datetime(
# data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
# format='%Y%m%d%H%M%S')
# data = data.set_index('DateTime')
# data = pd.Series(data['<CLOSE>']).map(float)
# data = data.resample('M').fillna(method='pad')
# data = preprocessing.minmax_scale(data)
# data_t = data[6:]
# data_f = data.reshape(-1, 6)
# data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
# np.save(file_out[0], data_f[:len(data_f) - 1])
# np.save(file_out[1], data_t)
data = preprocessing.minmax_scale(pd.read_pickle(
file_in)['close'])
data = data.reshape(-1, 24)
data_m = np.array([[data[i + x][0] for x in range(5)]
for i in range(len(data) - 5 + 1)])
data_m = data_m.reshape(-1, 5)
data_s = np.array([data[i + 5][0]
for i in range(len(data) - 5)])
np.save(file_out[0], data_m[:len(data_m) - 1])
np.save(file_out[1], data_s)
def process(file_in=PATH_FILE_IN, file_out=PATH_FILE_FINAL):
# data = pd.read_csv(file_in, dtype='str')
# data['DateTime'] = pd.to_datetime(
# data['<DTYYYYMMDD>'].map(str) + data['<TIME>'].map(str),
# format='%Y%m%d%H%M%S')
# data = data.set_index('DateTime')
# data = pd.Series(data['<CLOSE>']).map(float)
# data = data.resample('M').fillna(method='pad')
# data = preprocessing.minmax_scale(data)
# data_t = data[6:]
# data_f = data.reshape(-1, 6)
# data_f = np.array([data[i:i + 6] for i in range(data.shape[0] - 6 + 1)])
# np.save(file_out[0], data_f[:len(data_f) - 1])
# np.save(file_out[1], data_t)
data = preprocessing.minmax_scale(pd.read_pickle(
file_in)['close'])
data_m = np.array([[data[i + x * 24 * 24] for x in range(6)]
for i in range(len(data) - 6 * 24 * 24 + 1)])
data_m = data_m.reshape(-1, 6)
data_s = np.array([data[i + 6 * 24 * 24]
for i in range(len(data) - 6 * 24 * 24)])
np.save(file_out[0], data_m[:len(data_m) - 1])
np.save(file_out[1], data_s)
def scale_feature(self, col=None, scaling=None, scaling_parms=None):
'''
Scales a given set of numerical columns. This only works for columns
with numerical values.
Parameters
----------
col : a string of a column name, or a list of many columns names or
None (default). If col is None, all numerical columns will
be used.
scaling : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale',
'robust_scale'}
User-defined scaling functions can also be used through self.transform_feature
scaling_parms : dictionary
any additional parameters to be used for sklearn's scaling functions.
'''
self._validate_params(params_list = {'col':col,'scaling':scaling},
expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]})
if scaling is None: scaling = 'minmax_scale'
if scaling == 'zscore':
scaling = 'lambda x: (x - x.mean()) / x.std()'
elif scaling == 'minmax_scale' and scaling_parms is None:
scaling_parms = {'feature_range':(0, 1),'axis':0}
elif scaling == 'scale' and scaling_parms is None:
scaling_parms = {'with_mean':True, 'with_std':True,'axis':0}
elif scaling == 'maxabs_scale' and scaling_parms is None:
scaling_parms = {'axis':0}
elif scaling == 'robust_scale' and scaling_parms is None:
scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0),
else:
raise TypeError('UNSUPPORTED scaling TYPE')
self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
def convert(image):
"""Convert a scikit-image binary image matrix to OpenCV"""
with warnings.catch_warnings(record=True):
warnings.filterwarnings('ignore', category=DataConversionWarning)
return minmax_scale(image, (0, 255)).astype(np.ubyte)
def make_preprocessing_pandas(self, _df_csv_read_ori, _preprocessing_type , _label):
""" SKLearn? ???? Pandas? Proprocessing
label? Preprocessing ?? ??
Args:
params:
* _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
* _df_csv_read_ori : pandas dataframe
* _label
Returns:
Preprocessing DataFrame
"""
if _preprocessing_type == None or _preprocessing_type == 'null':
logging.info("No Preprocessing")
result_df = _df_csv_read_ori
else :
logging.info("Preprocessing type : {0}".format(_preprocessing_type))
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
for i, v in _df_csv_read_ori.dtypes.iteritems():
if v in numerics:
if i not in _label:
#preprocessing_types = ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
#_preprocessing_type = ['maxabs_scale']
if 'scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.scale(_df_csv_read_ori[i].fillna(0.0))
if 'minmax_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.minmax_scale(_df_csv_read_ori[i].fillna(0.0))
if 'robust_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.robust_scale(_df_csv_read_ori[i].fillna(0.0))
if 'normalize' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.normalize(_df_csv_read_ori[i].fillna(0.0))
if 'maxabs_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.maxabs_scale(_df_csv_read_ori[i].fillna(0.0))
result_df = _df_csv_read_ori
return result_df
def extention(self, camera_bit_resolution=14):
fname_org = self.fname_org
h2d = self.h2d
cell_df = pd.read_csv(fname_org)
Limg, Lx, Ly = cell_fd_info(cell_df)
cell_df_ext = cell_df.copy()
# Fresnel diffraction
cell_img_fd_a = cell_fd_conv(cell_df, h2d)
cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)
# max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
Limg, Lx, Ly).astype(int)
cell_df_ext[
'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)
return cell_df_ext
#Deep Learning
def extention(self, camera_bit_resolution=14):
fname_org = self.fname_org
h2d = self.h2d
cell_df = pd.read_csv(fname_org)
Limg, Lx, Ly = cell_fd_info(cell_df)
cell_df_ext = cell_df.copy()
# Fresnel diffraction
cell_img_fd_a = cell_fd_conv(cell_df, h2d)
cell_df_ext['freznel image'] = cell_img_fd_a.reshape(-1)
# max_v, min_v = np.max(cell_df["image"]), np.min(cell_df["image"])
cell_img_fd_a_2d = cell_img_fd_a.reshape(Limg, -1)
cell_img_fd_a_2d_scale = preprocessing.minmax_scale(
np.abs(cell_img_fd_a_2d)) * (2**camera_bit_resolution)
cell_img_fd_a_2d_scale_200x144x144 = cell_img_fd_a_2d_scale.reshape(
Limg, Lx, Ly).astype(int)
cell_df_ext[
'mag freznel image'] = cell_img_fd_a_2d_scale_200x144x144.reshape(-1)
return cell_df_ext
#Deep Learning
def normalise_and_centre_score(strategy_score, up_threshold, low_threshold):
"""normalise and centre score when fitting thresholds"""
temp_score = minmax_scale(strategy_score)
temp_score = temp_score - 0.5
temp_score[temp_score > up_threshold] = up_threshold
temp_score[temp_score < -up_threshold] = -up_threshold
temp_score[abs(temp_score) < low_threshold] = 0
temp_score = temp_score + 0.5
return temp_score
def features_processing(file_in, file_out):
features = np.load(file_in)
new_features = preprocessing.minmax_scale(features, axis=1)
np.save(file_out, new_features.astype('float32'))
def features_processing(file_in, file_out):
features = np.load(file_in)
new_features = preprocessing.minmax_scale(features, axis=1)
np.save(file_out, new_features.astype('float32'))