def split_data(self,n,v=5):
''' The function split the data into v folds. Whatever the number of sample per class
Input:
n : the number of samples
v : the number of folds
Output: None
'''
step = n //v # Compute the number of samples in each fold
sp.random.seed(1) # Set the random generator to the same initial state
t = sp.random.permutation(n) # Generate random sampling of the indices
indices=[]
for i in range(v-1): # group in v fold
indices.append(t[i*step:(i+1)*step])
indices.append(t[(v-1)*step:n])
for i in range(v):
self.iT.append(sp.asarray(indices[i]))
l = range(v)
l.remove(i)
temp = sp.empty(0,dtype=sp.int64)
for j in l:
temp = sp.concatenate((temp,sp.asarray(indices[j])))
self.it.append(temp)
python类asarray()的实例源码
def main():
posscores = sp.asarray([0.245 , 0.2632, 0.3233, 0.3573, 0.4014, 0.4055, 0.4212, 0.5677])
test_distances = sp.asarray([ 0.05, 0.1 , 0.25, 0.4 , 0.75, 1. , 1.5 , 2.])
mr = libmr.MR()
# since higher is worse and we want to fit the higher tail,
# use fit_high()
mr.fit_high(posscores, posscores.shape[0])
wscores = mr.w_score_vector(test_distances)
for i in range(wscores.shape[0]):
print "%.2f %.2f %.2f" %(test_distances[i], wscores[i], mr.inv(wscores[i]))
# wscores are the ones to be used in the equation
# s_i * (1 - rho_i)
print "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier"
print "High wscore --> High probability that the score is outlier i.e. sample IS an outlier"
print "posscores: ", posscores
print "test_distances: ", test_distances
print "wscores: ", wscores
def split_data(self,n,v=5):
''' The function split the data into v folds. Whatever the number of sample per class
Input:
n : the number of samples
v : the number of folds
Output: None
'''
step = n //v # Compute the number of samples in each fold
sp.random.seed(1) # Set the random generator to the same initial state
t = sp.random.permutation(n) # Generate random sampling of the indices
indices=[]
for i in range(v-1): # group in v fold
indices.append(t[i*step:(i+1)*step])
indices.append(t[(v-1)*step:n])
for i in range(v):
self.iT.append(sp.asarray(indices[i]))
l = range(v)
l.remove(i)
temp = sp.empty(0,dtype=sp.int64)
for j in l:
temp = sp.concatenate((temp,sp.asarray(indices[j])))
self.it.append(temp)
def compute_mean_vector(category_name, labellist, layer = 'fc8'):
print category_name
featurefile_list = glob.glob('%s/%s/*.mat' %(featurefilepath, category_name))
# gather all the training samples for which predicted category
# was the category under consideration
correct_features = []
for featurefile in featurefile_list:
try:
img_arr = loadmat(featurefile)
predicted_category = labellist[img_arr['scores'].argmax()]
if predicted_category == category_name:
correct_features += [img_arr[layer]]
except TypeError:
continue
# Now compute channel wise mean vector
channel_mean_vec = []
for channelid in range(correct_features[0].shape[0]):
channel = []
for feature in correct_features:
channel += [feature[channelid, :]]
channel = sp.asarray(channel)
assert len(correct_features) == channel.shape[0]
# Gather mean over each channel, to get mean channel vector
channel_mean_vec += [sp.mean(channel, axis=0)]
# this vector contains mean computed over correct classifications
# for each channel separately
channel_mean_vec = sp.asarray(channel_mean_vec)
savemat('%s.mat' %category_name, {'%s'%category_name: channel_mean_vec})
def computeOpenMaxProbability(openmax_fc8, openmax_score_u):
""" Convert the scores in probability value using openmax
Input:
---------------
openmax_fc8 : modified FC8 layer from Weibull based computation
openmax_score_u : degree
Output:
---------------
modified_scores : probability values modified using OpenMax framework,
by incorporating degree of uncertainity/openness for a given class
"""
prob_scores, prob_unknowns = [], []
for channel in range(NCHANNELS):
channel_scores, channel_unknowns = [], []
for category in range(NCLASSES):
channel_scores += [sp.exp(openmax_fc8[channel, category])]
total_denominator = sp.sum(sp.exp(openmax_fc8[channel, :])) + sp.exp(sp.sum(openmax_score_u[channel, :]))
prob_scores += [channel_scores/total_denominator ]
prob_unknowns += [sp.exp(sp.sum(openmax_score_u[channel, :]))/total_denominator]
prob_scores = sp.asarray(prob_scores)
prob_unknowns = sp.asarray(prob_unknowns)
scores = sp.mean(prob_scores, axis = 0)
unknowns = sp.mean(prob_unknowns, axis=0)
modified_scores = scores.tolist() + [unknowns]
assert len(modified_scores) == 1001
return modified_scores
#---------------------------------------------------------------------------------
def frame_to_vect(frame):
# tranform rgb image for CNN input layer
H,W = frame.shape[:2]
frame = sp.asarray(frame, dtype = sp.float16) / 255.0
features = frame.transpose(2,0,1).reshape(3, H, W)
return features
def initialize(self, sample_from_prior, distance_to_ground_truth_function):
super().initialize(sample_from_prior,
distance_to_ground_truth_function)
eps_logger.debug("calc initial epsilon")
# calculate initial epsilon if not given
if self._initial_epsilon == 'from_sample':
distances = sp.asarray([distance_to_ground_truth_function(x)
for x in sample_from_prior])
eps_t0 = sp.median(distances) * self.median_multiplier
self._look_up = {0: eps_t0}
else:
self._look_up = {0: self._initial_epsilon}
eps_logger.info("initial epsilon is {}".format(self._look_up[0]))
def _dict_to_to_vect(self, x):
return sp.asarray([x[key] for key in self.measures_to_use])
def _calculate_whitening_transformation_matrix(self, sample_from_prior):
samples_vec = sp.asarray([self._dict_to_to_vect(x)
for x in sample_from_prior])
# samples_vec is an array of shape nr_samples x nr_features
means = samples_vec.mean(axis=0)
centered = samples_vec - means
covariance = centered.T.dot(centered)
w, v = la.eigh(covariance)
self._whitening_transformation_matrix = (
v.dot(sp.diag(1. / sp.sqrt(w))).dot(v.T))
def split_data_class(self,y,v=5):
''' The function split the data into v folds. The samples of each class are split approximatly in v folds
Input:
n : the number of samples
v : the number of folds
Output: None
'''
# Get parameters
n = y.size
C = y.max().astype('int')
# Get the step for each class
tc = []
for j in range(v):
tempit = []
tempiT = []
for i in range(C):
# Get all samples for each class
t = sp.where(y==(i+1))[0]
nc = t.size
stepc = nc // v # Step size for each class
if stepc == 0:
print "Not enough sample to build "+ str(v) +" folds in class " + str(i)
sp.random.seed(i) # Set the random generator to the same initial state
tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i
# Set testing and training samples
if j < (v-1):
start,end = j*stepc,(j+1)*stepc
else:
start,end = j*stepc,nc
tempiT.extend(sp.asarray(tc[start:end])) #Testing
k = range(v)
k.remove(j)
for l in k:
if l < (v-1):
start,end = l*stepc,(l+1)*stepc
else:
start,end = l*stepc,nc
tempit.extend(sp.asarray(tc[start:end])) #Training
self.it.append(tempit)
self.iT.append(tempiT)
def compute_features(imgname, args):
"""
Instantiate a classifier class, pass the images through the network and save features.
Features are saved in .mat format
"""
image_dims = [int(s) for s in args.images_dim.split(',')]
if args.force_grayscale:
channel_swap = None
mean_file = None
else:
channel_swap = [int(s) for s in args.channel_swap.split(',')]
mean_file = args.mean_file
# Make classifier.
classifier = caffe.Classifier(args.model_def, args.pretrained_model,
image_dims=image_dims, gpu=args.gpu, mean_file=mean_file,
input_scale=args.input_scale, channel_swap=channel_swap)
if args.gpu:
print 'GPU mode'
outfname = imgname.replace('imageNetForWeb', 'imageNetForWeb_Features') + ".mat"
print outfname
if not path.exists(path.dirname(outfname)):
os.makedirs(path.dirname(outfname))
inputs = [caffe.io.load_image(imgname)]
if args.force_grayscale:
inputs = [rgb2gray(input) for input in inputs];
print "Classifying %d inputs." % len(inputs)
scores = classifier.predict(inputs, not args.center_only)
# Now save features
feature_dict = {}
feature_dict['IMG_NAME'] = path.join(path.dirname(imgname), path.basename(imgname))
feature_dict['fc7'] = sp.asarray(classifier.blobs['fc7'].data.squeeze(axis=(2,3)))
feature_dict['fc8'] = sp.asarray(classifier.blobs['fc8'].data.squeeze(axis=(2,3)))
feature_dict['prob'] = sp.asarray(classifier.blobs['prob'].data.squeeze(axis=(2,3)))
feature_dict['scores'] = sp.asarray(scores)
savemat(outfname, feature_dict)
def compute_channel_distances(mean_train_channel_vector, features, category_name):
"""
Input:
---------
mean_train_channel_vector : mean activation vector for a given class.
It can be computed using MAV_Compute.py file
features: features for the category under consideration
category_name: synset_id
Output:
---------
channel_distances: dict of distance distribution from MAV for each channel.
distances considered are eucos, cosine and euclidean
"""
eucos_dist, eu_dist, cos_dist = [], [], []
for channel in range(features[0].shape[0]):
eu_channel, cos_channel, eu_cos_channel = [], [], []
# compute channel specific distances
for feat in features:
eu_channel += [spd.euclidean(mean_train_channel_vector[channel, :], feat[channel, :])]
cos_channel += [spd.cosine(mean_train_channel_vector[channel, :], feat[channel, :])]
eu_cos_channel += [spd.euclidean(mean_train_channel_vector[channel, :], feat[channel, :])/200. +
spd.cosine(mean_train_channel_vector[channel, :], feat[channel, :])]
eu_dist += [eu_channel]
cos_dist += [cos_channel]
eucos_dist += [eu_cos_channel]
# convert all arrays as scipy arrays
eucos_dist = sp.asarray(eucos_dist)
eu_dist = sp.asarray(eu_dist)
cos_dist = sp.asarray(cos_dist)
# assertions for length check
assert eucos_dist.shape[0] == 10
assert eu_dist.shape[0] == 10
assert cos_dist.shape[0] == 10
assert eucos_dist.shape[1] == len(features)
assert eu_dist.shape[1] == len(features)
assert cos_dist.shape[1] == len(features)
channel_distances = {'eucos': eucos_dist, 'cosine': cos_dist, 'euclidean':eu_dist}
return channel_distances
#------------------------------------------------------------------------------------------
def split_data_class(self,y,v=5):
''' The function split the data into v folds. The samples of each class are split approximatly in v folds
Input:
n : the number of samples
v : the number of folds
Output: None
'''
# Get parameters
n = y.size
C = y.max().astype('int')
# Get the step for each class
tc = []
for j in range(v):
tempit = []
tempiT = []
for i in range(C):
# Get all samples for each class
t = sp.where(y==(i+1))[0]
nc = t.size
stepc = nc // v # Step size for each class
if stepc == 0:
print "Not enough sample to build "+ str(v) +" folds in class " + str(i)
sp.random.seed(i) # Set the random generator to the same initial state
tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i
# Set testing and training samples
if j < (v-1):
start,end = j*stepc,(j+1)*stepc
else:
start,end = j*stepc,nc
tempiT.extend(sp.asarray(tc[start:end])) #Testing
k = range(v)
k.remove(j)
for l in k:
if l < (v-1):
start,end = l*stepc,(l+1)*stepc
else:
start,end = l*stepc,nc
tempit.extend(sp.asarray(tc[start:end])) #Training
self.it.append(tempit)
self.iT.append(tempiT)