def query(self, query, k=1, sort=True):
models = self.retrieval_models
weights = maxabs_scale(self.weights) # max 1 does not crash [0,1]
agg_fn = self.aggregation_fn
# we only need to sort in the final run
combined = [m.query(query, k=k, sort=False) for m in models]
if weights is not None:
combined = [{k: v * w for k, v in r.items()} for r, w in
zip(combined, weights)]
combined = aggregate_dicts(combined, agg_fn=agg_fn, sort=True)
if sort:
# only cut-off at k if this is the final (sorted) output
combined = OrderedDict(sorted(combined.items(), key=itemgetter(1),
reverse=True)[:k])
return combined
python类maxabs_scale()的实例源码
def make_drop_duplicate(self, _df_csv_read_ori, _drop_duplicate , _label):
""" Label? ??? ??? ??? ??? ??? Row ??? ????.
Args:
params:
* _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
* _df_csv_read_ori : pandas dataframe
* _label
Returns:
Preprocessing Dataframe
"""
if _drop_duplicate == None or _drop_duplicate == 'null' or _drop_duplicate == False:
logging.info("No Duplicate")
result_df = _df_csv_read_ori
else :
cell_features = _df_csv_read_ori.columns.tolist()
cell_features.remove(_label)
result_df = _df_csv_read_ori.drop_duplicates(cell_features, keep="first")
logging.info("duplicated row delete {0}".format(len(_df_csv_read_ori.index)-len(result_df.index)))
temp_duplicate_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime()) + "_dup.csvbk"
result_df.to_csv(self.data_src_path + "/backup/" + temp_duplicate_filename)
return result_df
def sk_abs_scale(X):
return maxabs_scale(X)
def scale_feature(self, col=None, scaling=None, scaling_parms=None):
'''
Scales a given set of numerical columns. This only works for columns
with numerical values.
Parameters
----------
col : a string of a column name, or a list of many columns names or
None (default). If col is None, all numerical columns will
be used.
scaling : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale',
'robust_scale'}
User-defined scaling functions can also be used through self.transform_feature
scaling_parms : dictionary
any additional parameters to be used for sklearn's scaling functions.
'''
self._validate_params(params_list = {'col':col,'scaling':scaling},
expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]})
if scaling is None: scaling = 'minmax_scale'
if scaling == 'zscore':
scaling = 'lambda x: (x - x.mean()) / x.std()'
elif scaling == 'minmax_scale' and scaling_parms is None:
scaling_parms = {'feature_range':(0, 1),'axis':0}
elif scaling == 'scale' and scaling_parms is None:
scaling_parms = {'with_mean':True, 'with_std':True,'axis':0}
elif scaling == 'maxabs_scale' and scaling_parms is None:
scaling_parms = {'axis':0}
elif scaling == 'robust_scale' and scaling_parms is None:
scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0),
else:
raise TypeError('UNSUPPORTED scaling TYPE')
self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
def preprocess_features(X, ips):
'''
Scale the feature vectors using scikit preprocessing.
'''
assert(len(X.shape) == 2) # Double check that X is 2d.
X = preprocessing.maxabs_scale(X, copy=False)
return X, ips
def preprocess_features(X, ips):
'''
Scale the feature vectors using scikit preprocessing.
'''
assert(len(X.shape) == 2) # Double check that X is 2d.
X = preprocessing.maxabs_scale(X, copy=False)
return X, ips
def preprocess_features(X, Y):
'''
Scale the feature vectors using scikit preprocessing.
'''
assert(len(X.shape) == 2) # Double check that X is 2d.
X = preprocessing.maxabs_scale(X, copy=False)
return X, Y
def make_preprocessing_pandas(self, _df_csv_read_ori, _preprocessing_type , _label):
""" SKLearn? ???? Pandas? Proprocessing
label? Preprocessing ?? ??
Args:
params:
* _preprocessing_type: ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
* _df_csv_read_ori : pandas dataframe
* _label
Returns:
Preprocessing DataFrame
"""
if _preprocessing_type == None or _preprocessing_type == 'null':
logging.info("No Preprocessing")
result_df = _df_csv_read_ori
else :
logging.info("Preprocessing type : {0}".format(_preprocessing_type))
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
for i, v in _df_csv_read_ori.dtypes.iteritems():
if v in numerics:
if i not in _label:
#preprocessing_types = ['scale', 'minmax_scale', 'robust_scale', 'normalize', 'maxabs_scale']
#_preprocessing_type = ['maxabs_scale']
if 'scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.scale(_df_csv_read_ori[i].fillna(0.0))
if 'minmax_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.minmax_scale(_df_csv_read_ori[i].fillna(0.0))
if 'robust_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.robust_scale(_df_csv_read_ori[i].fillna(0.0))
if 'normalize' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.normalize(_df_csv_read_ori[i].fillna(0.0))
if 'maxabs_scale' in _preprocessing_type:
_df_csv_read_ori[i] = preprocessing.maxabs_scale(_df_csv_read_ori[i].fillna(0.0))
result_df = _df_csv_read_ori
return result_df