def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
python类std()的实例源码
def clipped_linscale_img(img_array,
cap=255.0,
lomult=2.0,
himult=2.0):
'''
This clips the image between the values:
[median(img_array) - lomult*stdev(img_array),
median(img_array) + himult*stdev(img_array)]
and returns a linearly scaled image using the cap given.
'''
img_med, img_stdev = np.median(img_array), np.std(img_array)
clipped_linear_img = np.clip(img_array,
img_med-lomult*img_stdev,
img_med+himult*img_stdev)
return cap*clipped_linear_img/(img_med+himult*img_stdev)
def __SubDoWavelets(self,waveforms):
scales = 4
dimensions = 10
nspk,ls = waveforms.shape
cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
cc = np.hstack(cc)
sd = list()
for i in range(ls):
test_data = cc[:,i]
thr_dist = np.std(test_data,ddof=1)*3
thr_dist_min = np.mean(test_data)-thr_dist
thr_dist_max = np.mean(test_data)+thr_dist
aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
if aux.size > 10:
sd.append(self.__test_ks(aux))
else:
sd.append(0)
ind = np.argsort(sd)
ind = ind[::-1]
coeff = ind[:dimensions]
waveletspk = cc[:,coeff]
return waveletspk
data_preprocessing_video.py 文件源码
项目:AVSR-Deep-Speech
作者: pandeydivesh15
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def encode_and_store(batch_x, output_dir, file_name):
"""
Args:
1. batch_x: Batch of 32*32 images which will go inside our autoencoder.
2. output_dir: Dir path for storing all encoded features for given `batch_x`.
Features will be stored in the form of JSON file.
3. file_name: File name of JSON file.
"""
global AUTO_ENCODER
if AUTO_ENCODER is None:
load_AE()
norm_batch = np.zeros(batch_x.shape)
for i in range(len(batch_x)):
norm_batch[i] = (batch_x[i] - np.mean(batch_x[i])) / np.std(batch_x[i])
output_dict = {
'name' : file_name,
'encoded': AUTO_ENCODER.transform(norm_batch).tolist()}
with open(output_dir+file_name+'.json', 'w') as f:
json.dump(output_dict, f)
def get_color_medio(self, roi, a,b,imprimir = False):
xl,yl,ch = roi.shape
roiyuv = cv2.cvtColor(roi,cv2.COLOR_RGB2YUV)
roihsv = cv2.cvtColor(roi,cv2.COLOR_RGB2HSV)
h,s,v=cv2.split(roihsv)
mask=(h<5)
h[mask]=200
roihsv = cv2.merge((h,s,v))
std = np.std(roiyuv.reshape(xl*yl,3),axis=0)
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)-60
mediayuv = np.mean(roiyuv.reshape(xl*yl,3), axis=0)
if std[0]<12 and std[1]<12 and std[2]<12:
#if (std[0]<15 and std[2]<15) or ((media[0]>100 or media[0]<25) and (std[0]>10)):
media = np.mean(roihsv.reshape(xl*yl,3), axis=0)
# el amarillo tiene 65 de saturacion y sobre 200
if media[1]<60: #and (abs(media[0]-30)>10):
# blanco
return [-10,0,0]
else:
return media
else:
return None
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def fit(self, X_train, y_train, X_valid, y_valid, X_test, y_test, steps=400):
tf.global_variables_initializer().run()
redirect=FDRedirector(STDERR)
for i in range(steps):
redirect.start()
feed_dict = {self.labels:y_train}
for key, tensor in self.features.items():
feed_dict[tensor] = X_train[key]
predictions, loss = sess.run([self.prediction, self.train_op], feed_dict=feed_dict)
if i % 10 == 0:
print("step:{} loss:{:.3g} np.std(predictions):{:.3g}".format(i, loss, np.std(predictions)))
self.threshold = float(min(self.threshold_from_data(X_valid, y_valid), self.threshold_from_data(X_train, y_train)))
tf.get_collection_ref("threshold")[0] = self.threshold
self.print_metrics(X_train, y_train, "Training")
self.print_metrics(X_valid, y_valid, "Validation")
errors = redirect.stop()
if errors:
print(errors)
self.print_metrics(X_test, y_test, "Test")
def metrics(self, X, y):
metrics = {}
y_pred_pair, loss = self.predict_proba_with_loss(X, y)
y_pred = y_pred_pair[:,1] ## From softmax pair to prob of catastrophe
metrics['loss'] = loss
threshold = self.threshold_from_data(X, y)
metrics['threshold'] = threshold
metrics['np.std(y_pred)'] = np.std(y_pred)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def classification_metrics(y, y_pred, threshold):
metrics = {}
metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
metrics['np.std(y_pred)'] = np.std(y_pred)
metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
denom = np.count_nonzero(y == False)
num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
if denom > 0:
metrics['fpr'] = float(num) / float(denom)
if any(y) and not all(y):
metrics['auc'] = roc_auc_score(y, y_pred)
y_pred_bool = y_pred >= threshold
if (any(y_pred_bool) and not all(y_pred_bool)):
metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
metrics['recall'] = recall_score(y, y_pred_bool)
return metrics
def information_ratio(algorithm_returns, benchmark_returns):
"""
http://en.wikipedia.org/wiki/Information_ratio
Args:
algorithm_returns (np.array-like):
All returns during algorithm lifetime.
benchmark_returns (np.array-like):
All benchmark returns during algo lifetime.
Returns:
float. Information ratio.
"""
relative_returns = algorithm_returns - benchmark_returns
relative_deviation = relative_returns.std(ddof=1)
if zp_math.tolerant_equals(relative_deviation, 0) or \
np.isnan(relative_deviation):
return 0.0
return np.mean(relative_returns) / relative_deviation
def _normalise_data(self):
self.train_x_mean = np.zeros(self.input_dim)
self.train_x_std = np.ones(self.input_dim)
self.train_y_mean = np.zeros(self.output_dim)
self.train_y_std = np.ones(self.output_dim)
if self.normalise_data:
self.train_x_mean = np.mean(self.train_x, axis=0)
self.train_x_std = np.std(self.train_x, axis=0)
self.train_x_std[self.train_x_std == 0] = 1.
self.train_x = (self.train_x - np.full(self.train_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.train_x.shape, self.train_x_std, dtype=np.float32)
self.test_x = (self.test_x - np.full(self.test_x.shape, self.train_x_mean, dtype=np.float32)) / \
np.full(self.test_x.shape, self.train_x_std, dtype=np.float32)
self.train_y_mean = np.mean(self.train_y, axis=0)
self.train_y_std = np.std(self.train_y, axis=0)
if self.train_y_std == 0:
self.train_y_std[self.train_y_std == 0] = 1.
self.train_y = (self.train_y - self.train_y_mean) / self.train_y_std
def test(self, input_path, output_path):
if not self.load()[0]:
raise Exception("No model is found, please train first")
mean, std = self.sess.run([self.mean, self.std])
images = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], 1), dtype=np.float32)
#labels = np.empty((1, self.im_size[0], self.im_size[1], self.im_size[2], self.nclass), dtype=np.float32)
for f in input_path:
images[0, ..., 0], read_info = read_testing_inputs(f, self.roi[0], self.im_size, output_path)
probs = self.sess.run(self.probs, feed_dict = { self.images: (images - mean) / std,
self.is_training: True,
self.keep_prob: 1 })
#print(self.roi[1] + os.path.basename(f) + ":" + str(dice))
output_file = os.path.join(output_path, self.roi[1] + '_' + os.path.basename(f))
f_h5 = h5py.File(output_file, 'w')
if self.roi[0] < 0:
f_h5['predictions'] = restore_labels(np.argmax(probs[0], 3), self.roi[0], read_info)
else:
f_h5['probs'] = restore_labels(probs[0, ..., 1], self.roi[0], read_info)
f_h5.close()
def lowpass_random(n_samples, cutoff, n_dim=None, rng = None, normalize = False, slope=0):
"""
Return a random lowpass-filtered signal.
:param n_samples:
:param cutoff:
:param rng:
:return:
"""
rng = get_rng(rng)
assert 0<=cutoff<=1, "Cutoff must be in the range 0 (pure DC) to 1 (sample frequency)"
base_signal = rng.randn(n_samples) if n_dim is None else rng.randn(n_samples, n_dim)
lowpass_signal = lowpass(base_signal, cutoff)
if normalize:
lowpass_signal = lowpass_signal/np.std(lowpass_signal)
if slope != 0:
ramp = slope*np.arange(len(lowpass_signal))
lowpass_signal = lowpass_signal+(ramp if n_dim is None else ramp[:, None])
return lowpass_signal
def __test_ks(self,x):
x = x[~np.isnan(x)]
n = x.size
x.sort()
yCDF = np.arange(1,n+1)/float(n)
notdup = np.hstack([np.diff(x,1),[1]])
notdup = notdup>0
x_expcdf = x[notdup]
y_expcdf = np.hstack([[0],yCDF[notdup]])
zScores = (x_expcdf-np.mean(x))/np.std(x,ddof=1);
mu = 0
sigma = 1
theocdf = 0.5*erfc(-(zScores-mu)/(np.sqrt(2)*sigma))
delta1 = y_expcdf[:-1]-theocdf
delta2 = y_expcdf[1:]-theocdf
deltacdf = np.abs(np.hstack([delta1,delta2]))
KSmax = deltacdf.max()
return KSmax
def __SubDoWavelets(self,waveforms):
scales = 4
dimensions = 10
nspk,ls = waveforms.shape
cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
cc = np.hstack(cc)
sd = list()
for i in range(ls):
test_data = cc[:,i]
thr_dist = np.std(test_data,ddof=1)*3
thr_dist_min = np.mean(test_data)-thr_dist
thr_dist_max = np.mean(test_data)+thr_dist
aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
if aux.size > 10:
sd.append(self.__test_ks(aux))
else:
sd.append(0)
ind = np.argsort(sd)
ind = ind[::-1]
coeff = ind[:dimensions]
waveletspk = cc[:,coeff]
return waveletspk
def __test_ks(self,x):
x = x[~np.isnan(x)]
n = x.size
x.sort()
yCDF = np.arange(1,n+1)/float(n)
notdup = np.hstack([np.diff(x,1),[1]])
notdup = notdup>0
x_expcdf = x[notdup]
y_expcdf = np.hstack([[0],yCDF[notdup]])
zScores = (x_expcdf-np.mean(x))/np.std(x,ddof=1);
mu = 0
sigma = 1
theocdf = 0.5*erfc(-(zScores-mu)/(np.sqrt(2)*sigma))
delta1 = y_expcdf[:-1]-theocdf
delta2 = y_expcdf[1:]-theocdf
deltacdf = np.abs(np.hstack([delta1,delta2]))
KSmax = deltacdf.max()
return KSmax
def zscore(x):
"""Computes the Z-score of a vector x. Removes the mean and divides by the
standard deviation. Has a failback if std is 0 to return all zeroes.
Parameters
----------
x: list of int
Input time-series
Returns
-------
z: list of float
Z-score normalized time-series
"""
mean = np.mean(x)
sd = np.std(x)
if sd == 0:
z = np.zeros_like(x)
else:
z = (x - mean)/sd
return z
def process(dic, p, s = 0, normalize = 1.0):
#x = [5000, 10000, 20000, 40000, 80000, 150000]
#x = [1000, 5000, 10000]
a = ['vs_true', 'vs_false', 'tc', 'mv']
data = {}
for algo in a:
y = zip(*dic[(p, algo)])[s]
m = np.mean(y)
sd = np.std(y)
print p, algo, "%.4f" % (m/normalize) #, "%.2f" % sd
data[algo] = np.asarray(y) * 1.0 / normalize
#print data[algo]
#print data['mv']
print 'vsfalse', scipy.stats.ttest_1samp(data['tc'] - data['vs_false'], 0)
print 'tc', scipy.stats.ttest_1samp(data['tc'] - data['vs_true'], 0)
print 'mv', scipy.stats.ttest_1samp(data['mv'] - data['vs_true'], 0)
def mean_variance_normalisation(h5f, mvn_h5f, vad=None):
"""Do mean variance normlization. Optionnaly use a vad.
Parameters:
----------
h5f: str. h5features file name
mvn_h5f: str, h5features output name
"""
dset = h5py.File(h5f).keys()[0]
if vad is not None:
raise NotImplementedError
else:
data = h5py.File(h5f)[dset]['features'][:]
features = data
epsilon = np.finfo(data.dtype).eps
mean = np.mean(data)
std = np.std(data)
mvn_features = (features - mean) / (std + epsilon)
shutil.copy(h5f, mvn_h5f)
h5py.File(mvn_h5f)[dset]['features'][:] = mvn_features
def update_summary(
var_up,
var,
start,
end,
):
diff = np.abs(var_up - var)
reldiff = diff / var
# filter out nan's
try:
reldiff = reldiff[~np.isnan(reldiff)]
except:
pass
return (np.mean(diff), np.std(diff), np.mean(reldiff),
np.std(reldiff), (end - start).microseconds)
def apply_metric_results_macro_average(results, metric,
print_full_result=False):
for method in results.keys():
max_train = max(results[method].keys())
for train_perc in sorted(results[method].keys()):
samples = len(results[method][train_perc])
if print_full_result:
print ':'.join(map(str, [train_perc, method])) + ',' \
+ ','.join(map(lambda x: '{:.2f}'.format(x),
[metric(a, b, train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]]))
metric_val = ' '.join(map(str, ['%.2f' % np.mean([metric(a,
b, train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]]), "\pm",
'%.2f' % np.std([metric(a, b,
train_perc=train_perc,
max_train=max_train) for (a, b) in
results[method][train_perc]])]))
results[method][train_perc] = (metric_val, samples)
def test_bootstrap_replicate_1d(data, seed):
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.mean)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.mean)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.median)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.median)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
np.random.seed(seed)
x = dcst.bootstrap_replicate_1d(data, np.std)
np.random.seed(seed)
x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.std)
assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \
or np.isclose(x, x_correct, atol=atol, equal_nan=True)
def inspect(self, output = True):
''' short function that returns the image values: mean,
standard deviation, max, min and size of image
if output is True, it prints to the console the string containing the
formatted value
'''
m = np.mean(self.data)
s = np.std(self.data)
u = np.max(self.data)
l = np.min(self.data)
d = self.data.shape
if output:
s = "Mean: {0:.2f} | Std: {1:.2f} | Max: {2:.2f}|Min: {3:.2f} | \
Dim: {4[0]}x{4[1]}".format(m, s, u, l, d)
print(s)
return s
return (m, s, u, l, d)
def csvwrite(_imagefile, _feature_data, write_dir):
print("Writing FEATURE.CSV file...")
feature_file = os.path.splitext(_imagefile)[0]
feature_file = feature_file.replace("IR", "Features")
name = feature_file + '.csv';
with open(name, 'w') as csvfile:
fieldnames = ['mean_value', 'euler_number', 'major_axis', 'area', 'solidity', 'std', 'eccentricity',
'eq_diameter', 'minor_axis']
fieldnames.extend(getHistFeatureKeys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames);
writer.writeheader()
for cluster in _feature_data:
data = {key:value for key, value in cluster.items() if key in fieldnames}
writer.writerow(data)
print write_dir
os.rename(name, write_dir + "\\" + "output.csv")
#copy2(outpu, _junk)
#os.rename(_junk, "output.csv")
print("FEATURE.CSV file is Written")
def updatePlot(self, data):
""" Update the plot """
plt.figure(self.fig.number)
#assert (data.shape[1] == self.nbCh), 'new data does not have the same number of channels'
#assert (data.shape[0] == self.nbPoints), 'new data does not have the same number of points'
data = data - np.mean(data,axis=0)
std_data = np.std(data,axis=0)
std_data[np.where(std_data == 0)] = 1
data = data/std_data*self.chRange/5.0
for i, chName in enumerate(self.chNames):
self.chLinesDict[chName].set_ydata(data[:,i]+self.offsets[i])
plt.draw()