def main():
# model_file = "../data/word2vec/character.model"
model_file = "../data/word2vec_new/word.model"
checkSimilarity(model_file, "?")
# character_wv_file = '../data/word2vec/character_model.txt'
# word_wv_file = '../data/word2vec/word_model.txt'
#
# embeddings_file = word_wv_file
# wv, vocabulary = load_embeddings(embeddings_file)
#
# tsne = TSNE(n_components=2, random_state=0)
# np.set_printoptions(suppress=True)
# Y = tsne.fit_transform(wv[:1000, :])
#
# plt.scatter(Y[:, 0], Y[:, 1])
# for label, x, y in zip(vocabulary, Y[:, 0], Y[:, 1]):
# plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
# plt.show()
python类set_printoptions()的实例源码
def test_formatter_reset(self):
x = np.arange(3)
np.set_printoptions(formatter={'all':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'int':None})
assert_equal(repr(x), "array([0, 1, 2])")
np.set_printoptions(formatter={'all':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'all':None})
assert_equal(repr(x), "array([0, 1, 2])")
np.set_printoptions(formatter={'int':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'int_kind':None})
assert_equal(repr(x), "array([0, 1, 2])")
x = np.arange(3.)
np.set_printoptions(formatter={'float':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1.0, 0.0, 1.0])")
np.set_printoptions(formatter={'float_kind':None})
assert_equal(repr(x), "array([ 0., 1., 2.])")
def grad_nan_report(grads, tparams):
numpy.set_printoptions(precision=3)
D = OrderedDict()
i = 0
NaN_keys = []
magnitude = []
assert len(grads) == len(tparams)
for k, v in tparams.iteritems():
grad = grads[i]
magnitude.append(numpy.abs(grad).mean())
if numpy.isnan(grad.sum()):
NaN_keys.append(k)
#assert v.get_value().shape == grad.shape
D[k] = grad
i += 1
#norm = [numpy.sqrt(numpy.sum(grad**2)) for grad in grads]
#print '\tgrad mean(abs(x))', numpy.array(magnitude)
return D, NaN_keys
def test_formatter_reset(self):
x = np.arange(3)
np.set_printoptions(formatter={'all':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'int':None})
assert_equal(repr(x), "array([0, 1, 2])")
np.set_printoptions(formatter={'all':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'all':None})
assert_equal(repr(x), "array([0, 1, 2])")
np.set_printoptions(formatter={'int':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1, 0, 1])")
np.set_printoptions(formatter={'int_kind':None})
assert_equal(repr(x), "array([0, 1, 2])")
x = np.arange(3.)
np.set_printoptions(formatter={'float':lambda x: str(x-1)})
assert_equal(repr(x), "array([-1.0, 0.0, 1.0])")
np.set_printoptions(formatter={'float_kind':None})
assert_equal(repr(x), "array([ 0., 1., 2.])")
def pformat(obj, indent=0, depth=3):
if 'numpy' in sys.modules:
import numpy as np
print_options = np.get_printoptions()
np.set_printoptions(precision=6, threshold=64, edgeitems=1)
else:
print_options = None
out = pprint.pformat(obj, depth=depth, indent=indent)
if print_options:
np.set_printoptions(**print_options)
return out
###############################################################################
# class `Logger`
###############################################################################
def plot_swap_subject_cm(swappy,title,name):
from sklearn.metrics import confusion_matrix
import numpy as np
subs = swappy.exportSubjectData()
labs = getLabelReal(subs)
# Compute confusion matrix
cnf_matrix = confusion_matrix(labs['actual'], labs['predicted'])
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['Bogus','Real'],
normalize=False,
title=title)
plt.savefig(name)
plt.show()
#import pandas as pd
#ps = pd.Series([(labs['actual'][x],labs['predicted'][x]) for x in range(0, len(labs['actual']))])
#counts = ps.value_counts()
#counts
def np_printoptions(**kwargs):
"""Context manager to temporarily set numpy print options."""
old = np.get_printoptions()
np.set_printoptions(**kwargs)
yield
np.set_printoptions(**old)
def _nn_pose_fill(valid):
"""
Looks up closest True for each False and returns
indices for fill-in-lookup
In: [True, False, True, ... , False, True]
Out: [0, 0, 2, ..., 212, 212]
"""
valid_inds, = np.where(valid)
invalid_inds, = np.where(~valid)
all_inds = np.arange(len(valid))
all_inds[invalid_inds] = -1
for j in range(10):
fwd_inds = valid_inds + j
bwd_inds = valid_inds - j
# Forward fill
invalid_inds, = np.where(all_inds < 0)
fwd_fill_inds = np.intersect1d(fwd_inds, invalid_inds)
all_inds[fwd_fill_inds] = all_inds[fwd_fill_inds-j]
# Backward fill
invalid_inds, = np.where(all_inds < 0)
if not len(invalid_inds): break
bwd_fill_inds = np.intersect1d(bwd_inds, invalid_inds)
all_inds[bwd_fill_inds] = all_inds[bwd_fill_inds+j]
# Check if any missing
invalid_inds, = np.where(all_inds < 0)
if not len(invalid_inds): break
# np.set_printoptions(threshold=np.nan)
# print valid.astype(np.int)
# print np.array_str(all_inds)
# print np.where(all_inds < 0)
return all_inds
def save_results(save_dict=None, **kwargs):
np.set_printoptions(precision=2,threshold=np.nan)
if save_dict==None:
save_dict=kwargs
for key in save_dict.keys():
save_dict[key] = str(save_dict[key])
np.set_printoptions(precision=2,threshold=1000)
append_json('experiments.json', save_dict)
jsondict2csv('experiments.json', 'experiments.csv')
def run_model(model):
'''Train model'''
# Call global variables
x_train, x_test, y_train, y_test = X_TRAIN, X_TEST, Y_TRAIN, Y_TEST
model.fit(x_train, y_train)
# make predictions for test data
y_pred = model.predict(x_test)
# Accuracy
acc = metrics.accuracy_score(y_test, y_pred)
print('Accuracy: %.2f%%' % (acc * 100.0))
# F1_score
# f1_score = metrics.f1_score(y_test, y_pred)
# print("F1_score: %.2f%%" % (f1_score * 100.0))
# AUC of ROC
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = metrics.auc(fpr, tpr)
print('AUC: %.3f' % (auc))
# Logs for each fold
crossvalidation_acc.append(acc)
crossvalidation_auc.append(auc)
if ARGS.m:
cnf_matrix = confusion_matrix(y_test, y_pred)
print(cnf_matrix)
np.set_printoptions(precision=2)
if ARGS.t == '2':
classes = np.asarray(['Spiced', 'Non-spliced'])
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
elif ARGS.t == '3':
classes = np.asarray(['Low', 'Medium', 'High'])
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True)
plt.show()
if ARGS.f:
feature_selection(imp=IMP, model=model)
print()
def __init__(self, model, filename, mcmc=default_mcmc, headers=1,
ph_units="mrad", cc_modes=2, decomp_poly=4, c_exp=1.0,
log_min_tau=-3, guess_noise=False, keep_traces=False,
ccdt_priors='auto', ccdt_cfg=None):
self.model = model
self.filename = filename
self.mcmc = mcmc
self.headers = headers
self.ph_units = ph_units
self.cc_modes = cc_modes
self.decomp_poly = decomp_poly
self.c_exp = c_exp
self.log_min_tau = log_min_tau
self.guess_noise = guess_noise
self.keep_traces = keep_traces
self.ccd_priors = ccdt_priors
self.ccdtools_config = ccdt_cfg
if model == "CCD":
if self.ccd_priors == 'auto':
self.ccd_priors = self.get_ccd_priors(config=self.ccdtools_config)
print("\nUpdated CCD priors with new data")
self.start()
# def print_resul(self):
# #==============================================================================
# # Impression des résultats
# pm, model, filename = self.pm, self.model, self.filename
# print('\n\nInversion success!')
# print('Name of file:', filename)
# print('Model used:', model)
# e_keys = sorted([s for s in list(pm.keys()) if "_std" in s])
# v_keys = [e.replace("_std", "") for e in e_keys]
# labels = ["{:<8}".format(x+":") for x in v_keys]
# np.set_printoptions(formatter={'float': lambda x: format(x, '6.3E')})
# for l, v, e in zip(labels, v_keys, e_keys):
# print(l, pm[v], '+/-', pm[e], np.char.mod('(%.2f%%)',abs(100*pm[e]/pm[v])))
def __init__(self, model, filename, mcmc=default_mcmc, headers=1,
ph_units="mrad", cc_modes=2, decomp_poly=4, c_exp=1.0,
log_min_tau=-3, guess_noise=False, keep_traces=False,
ccdt_priors='auto', ccdt_cfg=None):
self.model = model
self.filename = filename
self.mcmc = mcmc
self.headers = headers
self.ph_units = ph_units
self.cc_modes = cc_modes
self.decomp_poly = decomp_poly
self.c_exp = c_exp
self.log_min_tau = log_min_tau
self.guess_noise = guess_noise
self.keep_traces = keep_traces
self.ccd_priors = ccdt_priors
self.ccdtools_config = ccdt_cfg
if model == "CCD":
if self.ccd_priors == 'auto':
self.ccd_priors = self.get_ccd_priors(config=self.ccdtools_config)
print("\nUpdated CCD priors with new data")
self.start()
# def print_resul(self):
# #==============================================================================
# # Impression des résultats
# pm, model, filename = self.pm, self.model, self.filename
# print('\n\nInversion success!')
# print('Name of file:', filename)
# print('Model used:', model)
# e_keys = sorted([s for s in list(pm.keys()) if "_std" in s])
# v_keys = [e.replace("_std", "") for e in e_keys]
# labels = ["{:<8}".format(x+":") for x in v_keys]
# np.set_printoptions(formatter={'float': lambda x: format(x, '6.3E')})
# for l, v, e in zip(labels, v_keys, e_keys):
# print(l, pm[v], '+/-', pm[e], np.char.mod('(%.2f%%)',abs(100*pm[e]/pm[v])))
def plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
# markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
plt.rc('legend',**{'fontsize':30})
classes_to_visual = list(set(classes_to_visual))
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_ids = dict(zip(classes_to_visual, range(C)))
if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
else:
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual:
idx = np.array(labels) == c
# idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=1, marker=markers[class_ids[c]],
markersize=10, label=c)
legend = plt.legend(loc='upper right', shadow=True)
# plt.title("tsne")
# plt.savefig(save_file)
plt.savefig(save_file, format='eps', dpi=2000)
plt.show()
def plot_tsne_3d(doc_codes, doc_labels, classes_to_visual, save_file, maker_size=None, opaque=None):
markers = ["D", "p", "*", "s", "d", "8", "^", "H", "v", ">", "<", "h", "|"]
plt.rc('legend',**{'fontsize':20})
colors = ['r', 'b', 'g', 'c', 'm', 'y', 'k']
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
while True:
if C <= len(colors):
break
colors += colors
class_ids = dict(zip(classes_to_visual, range(C)))
if isinstance(doc_codes, dict) and isinstance(doc_labels, dict):
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if doc_labels[doc] in classes_to_visual])
else:
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=3, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
fig = plt.figure(figsize=(10, 10), facecolor='white')
ax = fig.add_subplot(111, projection='3d')
# The problem is that the legend function don't support the type returned by a 3D scatter.
# So you have to create a "dummy plot" with the same characteristics and put those in the legend.
scatter_proxy = []
for i in range(C):
cls = classes_to_visual[i]
idx = np.array(labels) == cls
ax.scatter(X[idx, 0], X[idx, 1], X[idx, 2], c=colors[i], alpha=opaque[i] if opaque else 1, s=maker_size[i] if maker_size else 20, marker=markers[i], label=cls)
scatter_proxy.append(mpl.lines.Line2D([0],[0], linestyle="none", c=colors[i], marker=markers[i], label=cls))
ax.legend(scatter_proxy, classes_to_visual, numpoints=1)
plt.savefig(save_file)
plt.show()
def DBN_plot_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_ids = dict(zip(classes_to_visual.keys(), range(C)))
codes, labels = doc_codes, doc_labels
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual.keys():
idx = np.array(labels) == c
# idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
markersize=6, label=classes_to_visual[c])
legend = plt.legend(loc='upper center', shadow=True)
plt.title("tsne")
plt.savefig(save_file)
plt.show()
def reuters_visualize_tsne(doc_codes, doc_labels, classes_to_visual, save_file):
"""
Visualize the input data on a 2D PCA plot. Depending on the number of components,
the plot will contain an X amount of subplots.
@param doc_codes:
@param number_of_components: The number of principal components for the PCA plot.
"""
# markers = ["p", "s", "h", "H", "+", "x", "D"]
markers = ["o", "v", "8", "s", "p", "*", "h", "H", "+", "x", "D"]
C = len(classes_to_visual)
while True:
if C <= len(markers):
break
markers += markers
class_names = classes_to_visual.keys()
class_ids = dict(zip(class_names, range(C)))
class_names = set(class_names)
codes, labels = zip(*[(code, doc_labels[doc]) for doc, code in doc_codes.items() if class_names.intersection(set(doc_labels[doc]))])
X = np.r_[list(codes)]
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
np.set_printoptions(suppress=True)
X = tsne.fit_transform(X)
plt.figure(figsize=(10, 10), facecolor='white')
for c in classes_to_visual.keys():
idx = get_indices(labels, c)
plt.plot(X[idx, 0], X[idx, 1], linestyle='None', alpha=0.6, marker=markers[class_ids[c]],
markersize=6, label=classes_to_visual[c])
legend = plt.legend(loc='upper center', shadow=True)
plt.title("tsne")
plt.savefig(save_file)
plt.show()
def main():
# Collect the user arguments and hyper parameters
args, hyper_params = get_args_and_hyperparameters()
np.set_printoptions( precision=8, suppress=True, edgeitems=6, threshold=2048)
# setup the CPU or GPU backend
be = gen_backend(**extract_valid_args(args, gen_backend))
# load the training dataset. This will download the dataset from the web and cache it
# locally for subsequent use.
train_set = MultiscaleSampler('trainval', '2007', samples_per_img=hyper_params.samples_per_img,
sample_height=224, path=args.data_dir,
samples_per_batch=hyper_params.samples_per_batch,
max_imgs = hyper_params.max_train_imgs,
shuffle = hyper_params.shuffle)
# create the model by replacing the classification layer of AlexNet with
# new adaptation layers
model, opt = create_model( args, hyper_params)
# Seed the Alexnet conv layers with pre-trained weights
if args.model_file is None and hyper_params.use_pre_trained_weights:
load_imagenet_weights(model, args.data_dir)
train( args, hyper_params, model, opt, train_set)
# Load the test dataset. This will download the dataset from the web and cache it
# locally for subsequent use.
test_set = MultiscaleSampler('test', '2007', samples_per_img=hyper_params.samples_per_img,
sample_height=224, path=args.data_dir,
samples_per_batch=hyper_params.samples_per_batch,
max_imgs = hyper_params.max_test_imgs,
shuffle = hyper_params.shuffle)
test( args, hyper_params, model, test_set)
return
# parse the command line arguments
def trainer(model_params):
"""Train a sketch-rnn model."""
np.set_printoptions(precision=8, edgeitems=6, linewidth=200, suppress=True)
tf.logging.info('sketch-rnn')
tf.logging.info('Hyperparams:')
for key, val in model_params.values().iteritems():
tf.logging.info('%s = %s', key, str(val))
tf.logging.info('Loading data files.')
datasets = load_dataset(FLAGS.data_dir, model_params)
train_set = datasets[0]
valid_set = datasets[1]
test_set = datasets[2]
model_params = datasets[3]
eval_model_params = datasets[4]
reset_graph()
model = sketch_rnn_model.Model(model_params)
eval_model = sketch_rnn_model.Model(eval_model_params, reuse=True)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
if FLAGS.resume_training:
load_checkpoint(sess, FLAGS.log_root)
# Write config file to json file.
tf.gfile.MakeDirs(FLAGS.log_root)
with tf.gfile.Open(
os.path.join(FLAGS.log_root, 'model_config.json'), 'w') as f:
json.dump(model_params.values(), f, indent=True)
train(sess, model, eval_model, train_set, valid_set, test_set)
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
np.set_printoptions(precision=2)
plt.figure()
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
logger.info("Normalized confusion matrix")
else:
logger.info('Confusion matrix, without normalization')
logger.info(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
def __init__(self, n_hidden=10, max_iter=10000, tol=1e-5, anneal=True, missing_values=None,
discourage_overlap=True, gaussianize='standard', gpu=False,
verbose=False, seed=None):
self.m = n_hidden # Number of latent factors to learn
self.max_iter = max_iter # Number of iterations to try
self.tol = tol # Threshold for convergence
self.anneal = anneal
self.eps = 0 # If anneal is True, it's adjusted during optimization to avoid local minima
self.missing_values = missing_values
self.discourage_overlap = discourage_overlap # Whether or not to discourage overlapping latent factors
self.gaussianize = gaussianize # Preprocess data: 'standard' scales to zero mean and unit variance
self.gpu = gpu # Enable GPU support for some large matrix multiplications.
if self.gpu:
cm.cublas_init()
self.yscale = 1. # Can be arbitrary, but sets the scale of Y
np.random.seed(seed) # Set seed for deterministic results
self.verbose = verbose
if verbose:
np.set_printoptions(precision=3, suppress=True, linewidth=160)
print(('Linear CorEx with {:d} latent factors'.format(n_hidden)))
# Initialize these when we fit on data
self.n_samples, self.nv = 0, 0 # Number of samples/variables in input data
self.ws = np.zeros((0, 0)) # m by nv array of weights
self.moments = {} # Dictionary of moments
self.theta = None # Parameters for preprocessing each variable
self.history = {} # Keep track of values for each iteration
self.last_update = 0 # Used for momentum methods