def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
num_training_samples=num_epochs_per_subj*(num_subjects-1))
predict = clf.predict()
print(predict)
print(clf.decision_function())
test_labels = labels[0:num_epochs_per_subj]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(None, test_labels))
python类hamming()的实例源码
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
num_training_samples=num_epochs_per_subj*(num_subjects-1)
clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
labels[0:num_training_samples])
X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
predict = clf.predict(X)
print(predict)
print(clf.decision_function(X))
test_labels = labels[num_training_samples:]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(X, test_labels))
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
num_subjects, num_epochs_per_subj):
# aggregate the kernel matrix to save memory
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
num_training_samples=num_epochs_per_subj*(num_subjects-1)
clf.fit(list(zip(raw_data, raw_data2)), labels,
num_training_samples=num_training_samples)
predict = clf.predict()
print(predict)
print(clf.decision_function())
test_labels = labels[num_training_samples:]
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when aggregating the similarity matrix to save memory, '
'the accuracy is %d / %d = %.2f' %
(num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
# when the kernel matrix is computed in portion, the test data is already in
print(clf.score(None, test_labels))
# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
def compute_clients_dist(self, client_data):
client_categorical_feats = [client_data.get(specified_key) for specified_key in CATEGORICAL_FEATURES]
client_continuous_feats = [client_data.get(specified_key) for specified_key in CONTINUOUS_FEATURES]
# Compute the distances between the user and the cached continuous
# and categorical features.
cont_features = distance.cdist(self.continuous_features,
np.array([client_continuous_feats]),
'canberra')
# The lambda trick is needed to prevent |cdist| from force-casting the
# string features to double.
cat_features = distance.cdist(self.categorical_features,
np.array([client_categorical_feats]),
lambda x, y: distance.hamming(x, y))
# Take the product of similarities to attain a univariate similarity score.
# Addition of 0.001 to the continuous features avoids a zero value from the
# categorical variables, allowing categorical features precedence.
return (cont_features + 0.001) * cat_features
def test_multilabel_hamming_loss():
# Dense label indicator matrix format
y1 = np.array([[0, 1, 1], [1, 0, 1]])
y2 = np.array([[0, 0, 1], [1, 0, 1]])
w = np.array([1, 3])
assert_equal(hamming_loss(y1, y2), 1 / 6)
assert_equal(hamming_loss(y1, y1), 0)
assert_equal(hamming_loss(y2, y2), 0)
assert_equal(hamming_loss(y2, 1 - y2), 1)
assert_equal(hamming_loss(y1, 1 - y1), 1)
assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
# sp_hamming only works with 1-D arrays
assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj):
# no shrinking, set C=1
svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
#logit_clf = LogisticRegression()
clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
# doing leave-one-subject-out cross validation
for i in range(num_subjects):
leave_start = i * num_epochs_per_subj
leave_end = (i+1) * num_epochs_per_subj
training_data = raw_data[0:leave_start] + raw_data[leave_end:]
test_data = raw_data[leave_start:leave_end]
training_labels = labels[0:leave_start] + labels[leave_end:]
test_labels = labels[leave_start:leave_end]
clf.fit(list(zip(training_data, training_data)), training_labels)
# joblib can be used for saving and loading models
#joblib.dump(clf, 'model/logistic.pkl')
#clf = joblib.load('model/svm.pkl')
predict = clf.predict(list(zip(test_data, test_data)))
print(predict)
print(clf.decision_function(list(zip(test_data, test_data))))
incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
logger.info(
'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' %
(i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
(num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
)
print(clf.score(list(zip(test_data, test_data)), test_labels))
def similarity_function(x, y):
""" Similarity function for comparing user features.
This actually really should be implemented in taar.similarity_recommender
and then imported here for consistency.
"""
def safe_get(field, row, default_value):
# Safely get a value from the Row. If the value is None, get the
# default value.
return row[field] if row[field] is not None else default_value
# Extract the values for the categorical and continuous features for both
# the x and y samples. Use an empty string as the default value for missing
# categorical fields and 0 for the continuous ones.
x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES]
x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES]
y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES]
y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES]
# Here a larger distance indicates a poorer match between categorical variables.
j_d = (distance.hamming(x_categorical_features, y_categorical_features))
j_c = (distance.canberra(x_continuous_features, y_continuous_features))
# Take the product of similarities to attain a univariate similarity score.
# Add a minimal constant to prevent zero values from categorical features.
# Note: since both the distance function return a Numpy type, we need to
# call the |item| function to get the underlying Python type. If we don't
# do that this job will fail when performing KDE due to SPARK-20803 on
# Spark 2.2.0.
return abs((j_c + 0.001) * j_d).item()
def check_distance_funciton_input(distance_func_name,netinfo):
"""
Funciton returns distance_func_name given netinfo.
"""
if distance_func_name == 'default' and netinfo['nettype'][0] == 'b':
print('Default distance funciton specified. As network is binary, using Hamming')
distance_func_name = 'hamming'
elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w':
distance_func_name = 'euclidean'
print(
'Default distance funciton specified. '
'As network is weighted, using Euclidean')
return distance_func_name
def plot_trajectory_uncertainty(true, gen, filter, smooth, filename):
sequences, timesteps, h, w = true.shape
errors = dict(Generated=list(), Filtered=list(), Smoothed=list())
for label, var in zip(('Generated', 'Filtered', 'Smoothed'), (gen, filter, smooth)):
for step in range(timesteps):
errors[label].append(hamming(true[:, step].ravel() > 0.5, var[:, step].ravel() > 0.5))
plt.plot(np.linspace(1, timesteps, num=timesteps).astype(int), errors[label], linewidth=3, ms=20, label=label)
plt.xlabel('Steps', fontsize=20)
plt.ylabel('Hamming distance', fontsize=20)
plt.legend(fontsize=20)
plt.savefig(filename)
plt.close()
def sample(config, model=None):
""" Create sampler and sample per options in configuration file.
If there is a configuration option 'load_model_from_pickle' in
section 'sampling' the function tries to load that model, ignoring
the data argument (the option value shold be the path to a pickle
file containing a single LogisticRuleModel object.) If the model
argument is None, and that configuration option is not present, an
exception results.
"""
if config.has_option('sampling','load_model_from_pickle'):
with open(config.get('sampling','load_model_from_pickle')) as f:
model = pickle.load(f)
if model is None:
raise ValueError('Model must be passed as argument if not specified in config file.')
l = [hamming(model.data.y,t) for t in model.rule_population.flat_truth]
arbitrary_rl = rules.RuleList(
[[model.rule_population.flat_rules[np.argmin(l)]]]
)
sampler = logit_rules.LogisticRuleSampler(model,
arbitrary_rl)
if config.has_option('sampling','sampling_time'):
sampling_time = config.getfloat('sampling','sampling_time')
logger.info('Starting sampling: will continue for %.1f seconds' %
sampling_time)
sampler.sample_for(sampling_time)
elif config.has_option('sampling','total_samples'):
total_samples = config.getint('sampling','total_samples')
logger.info('Starting to draw %d samples' % total_samples)
sampler.sample(total_samples)
else:
raise ValueError('Either number of samples or sampling time must be specified.')
if config.has_option('sampling', 'pickle_sampler'):
prefix = config.get('description','tag')
if config.getboolean('sampling','pickle_sampler'):
filename = prefix + '_sampler_object.pickle'
with open(filename, 'w') as f:
pickle.dump(sampler,f)
logger.info('Sampler written to %s' % filename)
return sampler
def getDistanceFunction(requested_metric):
"""
This function returns a specified distance function.
**PARAMETERS**
:'requested_metric': can be 'hamming', 'eculidean' or any of the functions in https://docs.scipy.org/doc/scipy/reference/spatial.distance.html which only require u and v as input.
**OUTPUT**
returns distance function (as function)
**HISTORY**
:Created: Dec 2016, WHT
:Updated (v0.2.1): Aug 2017, WHT. Changed from distance functions being in misc to using scipy.
"""
distance_options = {
'braycurtis': distance.braycurtis,
'canberra': distance.canberra,
'chebyshev': distance.chebyshev,
'cityblock': distance.cityblock,
'correlation': distance.correlation,
'cosine': distance.cosine,
'euclidean': distance.euclidean,
'sqeuclidean': distance.sqeuclidean,
'dice': distance.dice,
'hamming': distance.hamming,
'jaccard': distance.jaccard,
'kulsinski': distance.kulsinski,
'matching': distance.matching,
'rogerstanimoto': distance.rogerstanimoto,
'russellrao': distance.russellrao,
'sokalmichener': distance.sokalmichener,
'sokalsneath': distance.sokalsneath,
'yule': distance.yule,
}
if requested_metric in distance_options:
return distance_options[requested_metric]
else:
raise ValueError('Distance function cannot be found.')