def get_weather_dict(self,data_dir):
t0 = time()
filename = '../data_raw/' + data_dir.split('/')[-2] + '_weather.csv.dict.pickle'
dumpload = DumpLoad( filename)
if dumpload.isExisiting():
return dumpload.load()
resDict = {}
df = self.load_weatherdf(data_dir)
for index, row in df.iterrows():
resDict[row['time_slotid']] = (index, row['weather'], row['temparature'], row['pm25'])
for name, group in df.groupby('time_date'):
resDict[name] = (-1, mode(group['weather'])[0][0], mode(group['temparature'])[0][0], mode(group['pm25'])[0][0])
dumpload.dump(resDict)
print "dump weather dict:", round(time()-t0, 3), "s"
return resDict
python类mode()的实例源码
def find_history_data(self, row, history_dict=None,):
start_district_id = row.iloc[0]
time_id = row.iloc[1]
index = ['history_mean','history_median','history_mode','history_plus_mean','history_plus_median', 'history_plus_mode']
min_list = self.__get_historylist_from_dict(history_dict, start_district_id, time_id)
plus_list1 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-1)
plus_list2 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-2)
plus_list = np.array((plus_list1 + plus_list2 + min_list))
min_list = np.array(min_list)
res =pd.Series([min_list.mean(), np.median(min_list), mode(min_list)[0][0], plus_list.mean(), np.median(plus_list),mode(plus_list)[0][0]], index = index)
return res
return pd.Series(res, index = ['history_mean', 'history_mode', 'history_median'])
def predict_proba(self,X):
'''
return confidences (i.e., p(y_j|x))
(in the multi-dimensional output case, this should be an N x L x K array
but @NOTE/@TODO: this is not the case at the moment! At the moment it is N x L x 2;
For example, in
[[ 0. 1. ]
[ 0. 0.9]
[ 0. 1. ]
[ 0. 1. ]
[ 0. 1. ]
[ 1. 0.9]]
y_j=6 with probability 0.9.
)
'''
N,D = X.shape
Y = zeros((N,self.L,2))
for i in range(N):
V = zeros((self.M,self.L))
for m in range(self.M):
V[m,:] = self.h[m].predict(array([X[i,:]]))
k = mode(V)[0]
Y[i,:,0] = k
Y[i,:,1] = sum(V==k,axis=0)/self.M
return Y
classification.py 文件源码
项目:decoding-brain-challenge-2016
作者: alexandrebarachant
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def predict(self, covtest):
"""get the predictions.
Parameters
----------
X : ndarray, shape (n_trials, n_channels, n_channels)
ndarray of SPD matrices.
Returns
-------
pred : ndarray of int, shape (n_trials, 1)
the prediction for each trials according to the closest centroid.
"""
dist = self._predict_distances(covtest)
neighbors_classes = self.classes_[numpy.argsort(dist)]
out, _ = stats.mode(neighbors_classes[:, 0:self.n_neighbors], axis=1)
return out.ravel()
def predict(self, X):
"""Predict the class labels for the provided data
Parameters
----------
X : array-like, shape (n_ts, sz, d)
Test samples.
"""
X_ = to_time_series_dataset(X)
neigh_dist, neigh_ind = self.kneighbors(X_)
weights = _get_weights(neigh_dist, self.weights)
if weights is None:
mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1)
else:
mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1)
return mode[:, 0]
def mode(data=None):
"""
Compute mode of given numpy array or pandas series.
Mode is just a wrapper around scipy.stats.mode which returns
the mode of a given numpy array or pandas series. Missing
values are omitted before the mode is computed.
Args:
data: A numpy array or pandas series.
Returns:
The mode of x as scalar value.
Raises:
ValueError: If no data is specified or if all values are missing.
"""
if data is None:
raise ValueError('No data specified.')
if not len(data.dropna()):
raise ValueError('No valid data specified.')
mode_val = stats.mode(data.dropna(), nan_policy='omit')[0]
return mode_val[0]
classification.py 文件源码
项目:decoding_challenge_cortana_2016_3rd
作者: kingjr
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def predict(self, covtest):
"""get the predictions.
Parameters
----------
X : ndarray, shape (n_trials, n_channels, n_channels)
ndarray of SPD matrices.
Returns
-------
pred : ndarray of int, shape (n_trials, 1)
the prediction for each trials according to the closest centroid.
"""
dist = self._predict_distances(covtest)
neighbors_classes = self.classes_[numpy.argsort(dist)]
out, _ = stats.mode(neighbors_classes[:, 0:self.n_neighbors], axis=1)
return out.ravel()
def __init__(self, model_param):
self.interface_layer = model_param['interface_layer']
self.middle_layers = model_param['middle_layers']
self.cost_layer = model_param['cost_layer']
self.last_n = model_param['last_n']
self.outputs = model_param.get('outputs', None)
self.errors = model_param.get('errors', None)
self.name = model_param["name"]
self.problem_type = model_param["problem_type"]
self.mode = "train"
self.param = []
for layer in self.middle_layers:
self.param += layer.param
self.param += self.cost_layer.param
self.set_mode(self.mode)
self.grad = self.get_grad()
self.cost_func = self.get_cost_func()
self.output_func_dict = self.get_output_func_dict()
self.error_func_dict = self.get_error_func_dict()
KNNclassLearner.py 文件源码
项目:machine-learning-for-trading
作者: arjun-joshua
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def query(self, points):
"""
@summary: Estimate a set of test points given the model we built.
@param points: should be a numpy array with each row corresponding to a specific query.
@returns the estimated values according to the saved model.
"""
pred = np.zeros(points.shape[0]) #initialize prediction vector
for i in range(0, points.shape[0]): #iterate over each test example
sqDist = np.zeros(np.shape(self.dataY)) #initialize squared distances vector
for j in range(0,self.dataX.shape[1]):
sqDist[:,0] += (points[i,j] - self.dataX[:,j])**2
sqDist = np.concatenate((sqDist, self.dataY), axis=1)
sqDist = np.asarray(sorted(sqDist, key=lambda x:x[0]))
# classify: calculate mode & no. of counts of modal value
pred[i], binCounts = stats.mode(sqDist[0:self.k,1])
return pred
BagClassLearner.py 文件源码
项目:machine-learning-for-trading
作者: arjun-joshua
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def query(self, testX):
"""
@summary: Add test data to query individual learners in BagLearner
@param testX: ndarray, X test data with examples in rows & features in columns
Returns pred: 1Darray, the predicted labels
"""
pred = np.empty((testX.shape[0],self.bags)) # initialize pred, no. of
# rows = no. of test examples, no of columns = no. of individual learners
for col in range(pred.shape[1]):
# predictions for each learner in rows of pred
pred[:,col] = self.learnerList[col].query(testX)
modeValue, binCount = stats.mode(pred, axis = 1) # mode and number of
# counts along columns (i.e. over all learners) returned as column vectors
return modeValue[:,0] # return (column) mode of all learners in 1D-array
def ensure_no_stuckbits(F, args, fn):
bias = re.split('[\[ \] \: \,]', F[0].header['BIASSEC'])[1:-1]
biassec = [int(t)-((i+1)%2) for i,t in enumerate(bias)]
data = F[0].data[biassec[2]:biassec[3], biassec[0]:biassec[1]]
mode_value = int(mode(data.ravel()).mode)
missing_values = []
for i in np.arange(mode_value-8, mode_value+9):
if (data == i).sum() == 0:
missing_values.append(i)
for missing in missing_values:
args.log.warning('The value %i is not represented '
'in the overscan region for %s' %(missing, fn))
if len(missing_values):
return False
else:
return True
def test_random_weights():
# set this up so that each row should have a weighted mode of 6,
# with a score that is easily reproduced
mode_result = 6
rng = np.random.RandomState(0)
x = rng.randint(mode_result, size=(100, 10))
w = rng.random_sample(x.shape)
x[:, :5] = mode_result
w[:, :5] += 1
mode, score = weighted_mode(x, w, axis=1)
assert_array_equal(mode, mode_result)
assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
def getNDValue(self):
"""
Get value of not defined depth value distances
:return:value of not defined depth value
"""
if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]:
return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0]
else:
return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0]
def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True):
"""
Crop patch from image
:param dpt: depth image to crop from
:param xstart: start x
:param xend: end x
:param ystart: start y
:param yend: end y
:param zstart: start z
:param zend: end z
:param thresh_z: threshold z values
:return: cropped image
"""
if len(dpt.shape) == 2:
cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy()
# add pixels that are out of the image in order to keep aspect ratio
cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
abs(yend)-min(yend, dpt.shape[0])),
(abs(xstart)-max(xstart, 0),
abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=0)
elif len(dpt.shape) == 3:
cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy()
# add pixels that are out of the image in order to keep aspect ratio
cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
abs(yend)-min(yend, dpt.shape[0])),
(abs(xstart)-max(xstart, 0),
abs(xend)-min(xend, dpt.shape[1])),
(0, 0)), mode='constant', constant_values=0)
else:
raise NotImplementedError()
if thresh_z is True:
msk1 = numpy.bitwise_and(cropped < zstart, cropped != 0)
msk2 = numpy.bitwise_and(cropped > zend, cropped != 0)
cropped[msk1] = zstart
cropped[msk2] = 0. # backface is at 0, it is set later
return cropped
def __init__(self, data_block):
#check if data element is of class datablock
self.check_datatype(data_block,'data_block',DataBlock)
self.datablock = data_block
#though redundant but will make code mode readable
self.target = self.datablock.target
#get tuple of available data
self.dp = self.datablock.data_present().values()
def __init__(self, data_block):
#check if data element is of class datablock
self.check_datatype(data_block,'data_block',DataBlock)
self.datablock = data_block
self.target = self.datablock.target #though redundant but will make code mode readable
#get tuple of available data
self.dp = self.datablock.data_present().values()
def voting(self, y_true, pred):
if y_true.shape[0] != pred.shape[0]:
raise ValueError('Both arrays should have the same size!')
# split the arrays in songs
arr_size = y_true.shape[0]
pred = np.split(pred, arr_size/self.augment_factor)
y_true = np.split(y_true, arr_size/self.augment_factor)
# Empty answers
voting_truth = []
voting_ans = []
for x,y in zip(y_true, pred):
voting_truth.append(mode(x)[0][0])
voting_ans.append(mode(y)[0][0])
return np.array(voting_truth), np.array(voting_ans)
# @Class: MusicDataGenerator
# @Description:
# featurewise_center: set input mean to 0 over the dataset.
# samplewise_center: set each sample mean to 0.
# featurewise_std_normalization: divide inputs by std of the dataset.
# samplewise_std_normalization: divide each input by its std.
# zca_whitening: apply ZCA whitening.
def predict(self, X):
'''
return predictions for X
(multi-dimensionally speaking, i.e., we return the mode)
'''
N,D = X.shape
Y = zeros((N,self.L))
for i in range(N):
V = zeros((self.M,self.L))
for m in range(self.M):
V[m,:] = self.h[m].predict(array([X[i,:]]))
Y[i,:] = mode(V)[0]
return Y
def _most_frequent(array, extra_value, n_repeat):
"""Compute the most frequent value in a 1d array extended with
[extra_value] * n_repeat, where extra_value is assumed to be not part
of the array."""
# Compute the most frequent value in array only
if array.size > 0:
mode = stats.mode(array)
most_frequent_value = mode[0][0]
most_frequent_count = mode[1][0]
else:
most_frequent_value = 0
most_frequent_count = 0
# Compare to array + [extra_value] * n_repeat
if most_frequent_count == 0 and n_repeat == 0:
return np.nan
elif most_frequent_count < n_repeat:
return extra_value
elif most_frequent_count > n_repeat:
return most_frequent_value
elif most_frequent_count == n_repeat:
# Ties the breaks. Copy the behaviour of scipy.stats.mode
if most_frequent_value < extra_value:
return most_frequent_value
else:
return extra_value
def summarize(self, x, summary_func, missing_data_cond, in_place=False):
""" Substitutes missing values with a statistical summary of each
feature vector
Parameters
----------
x : numpy.array
Assumes that each feature column is of single type. Converts
digit string features to float.
summary_func : function
Summarization function to be used for imputation
(mean, median, mode, max, min...)
missing_data_cond : function
Method that takes one value and returns True if it represents
missing data or false otherwise.
"""
if in_place:
data = x
else:
data = np.copy(x)
# replace missing values with the summarization function
for col in xrange(x.shape[1]):
nan_ids = missing_data_cond(x[:, col])
if True in nan_ids:
val = summary_func(x[~nan_ids, col])
data[nan_ids, col] = val
return data
def getNDValue(self):
"""
Get value of not defined depth value distances
:return:value of not defined depth value
"""
if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]:
return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0]
else:
return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0]
def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True):
"""
Crop patch from image
:param dpt: depth image to crop from
:param xstart: start x
:param xend: end x
:param ystart: start y
:param yend: end y
:param zstart: start z
:param zend: end z
:param thresh_z: threshold z values
:return: cropped image
"""
if len(dpt.shape) == 2:
cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy()
# add pixels that are out of the image in order to keep aspect ratio
cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
abs(yend)-min(yend, dpt.shape[0])),
(abs(xstart)-max(xstart, 0),
abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=0)
elif len(dpt.shape) == 3:
cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy()
# add pixels that are out of the image in order to keep aspect ratio
cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0),
abs(yend)-min(yend, dpt.shape[0])),
(abs(xstart)-max(xstart, 0),
abs(xend)-min(xend, dpt.shape[1])),
(0, 0)), mode='constant', constant_values=0)
else:
raise NotImplementedError()
if thresh_z is True:
msk1 = numpy.bitwise_and(cropped < zstart, cropped != 0)
msk2 = numpy.bitwise_and(cropped > zend, cropped != 0)
cropped[msk1] = zstart
cropped[msk2] = 0. # backface is at 0, it is set later
return cropped
def build_tree(self, dataX, dataY):
if self.verbose: print "build_tree", self.leaf_size
if self.verbose: print "data shape", dataX.shape
#if no elements in subtree, return empty subtree
if dataX.shape[0] == 0: return np.array([])
#if there is only 1 item left or if fewer than leaf size, return mode of data
if dataX.ndim == 1 or dataX.shape[0] <= self.leaf_size: return np.array([-1, stats.mode(dataY).mode[0], -1, -1])
#if all of the data has the same value, return that value
# if not np.all(dataY - dataY[0]):
# print 'all same'
# return np.array([-1, dataY[0],-1,-1])
else:
if self.verbose: print "passed conditions"
i = np.random.randint(dataX.shape[1]-1)
d = np.random.randint(dataX.shape[0],size=2)
for j in range(11):
if dataX[d[0],i] != dataX[d[1],i]: break
else: d[1] = np.random.randint(dataX.shape[0])
if j == 10: return np.array([-1, dataY[d[0]], -1, -1])
splitVal = (dataX[d[0],i] + dataX[d[1],i])/2.0
indices = dataX[:,i] <= splitVal
leftTree = self.build_tree(dataX[indices,:], dataY[indices])
indices = dataX[:, i] > splitVal
rightTree = self.build_tree(dataX[indices,:], dataY[indices])
leftTreeSize = leftTree.shape[0] if leftTree.ndim != 1 else 1
if leftTree.shape[0] == 0 or rightTree.shape[0] == 0: leftTreeSize = 0
root = [i, splitVal, 1, leftTreeSize + 1]
if (leftTree.shape[0] != 0): root = np.vstack((root, leftTree))
if (rightTree.shape[0] != 0): root = np.vstack((root, rightTree))
return np.array(root)
def _most_common(img):
"""
Subtract the most common value from the whole image
"""
common = mode(img, axis=None).mode[0]
res = img - common
res[res < 0] = 0
return res
def compute_class_averages(self):
"""
Computes the class average of each node in the tree.
Class average is the mode of training data that partitions to the node.
"""
for i in range(2, self.nodes + 1):
parent = self.graph.predecessors(i)[0]
if self.graph.node[parent]['cutoff'] is None:
self.graph.node[i]['classval'] = self.graph.node[parent]['classval']
else:
node_indices = self.partition_data(i)
classval = mode(self.y[node_indices]).mode[0]
self.graph.node[i]['classval'] = classval
def predict(self, x, k=1, model='regression'):
"""
Note: currenly only works on single vector and not matrices
Args:
x (np.ndarray): Training data of shape[1, n_features]
k (int): number of nearest neighbor to consider
model: {'regression', 'classification'}
K nearest neighbor classification or regression.
Choice most likely depends on the type of data the
model was fit with.
Returns:
float: Returns predicted value
Raises:
ValueError if model has not been fit
"""
if not self.learned:
raise NameError('Fit model first')
distances = np.array([])
for row in range(np.shape(self.samples)[0]):
# Add distance from x to sample row to distances vector
distances = np.append(distances,
np.linalg.norm(x - self.samples[row, :]))
nearestneighbors = distances.argsort()[:k]
if model == 'regression':
prediction = self.values[nearestneighbors].mean()
if model == 'classification':
prediction = stats.mode(self.values[nearestneighbors]).mode
return prediction
def predict(self, estimator_args, with_prob=False):
if self.voting == 'hard':
# sub_res -> (estimator_dim, batch_dim)
sub_res = np.array([estimator.predict_func(*estimator_args) for estimator in self.estimators],
dtype=theano.config.floatX)
mode_res, count = mode(sub_res, axis=0)
return (mode_res[0], count[0]/self.n_estimators) if with_prob else mode_res[0]
else:
# sub_res -> (estimator_dim, batch_dim, target_dim)
sub_res = np.array([estimator.predict_prob_func(*estimator_args) for estimator in self.estimators],
dtype=theano.config.floatX)
sub_res = sub_res.mean(axis=0)
max_res = np.argmax(sub_res, axis=1)
mean_prob = sub_res[np.arange(sub_res.shape[0]), max_res]
return (max_res, mean_prob) if with_prob else max_res
def predict_sent(self, sent, with_prob=False):
if self.voting == 'hard':
# sub_res -> (estimator_dim, )
sub_res = np.array([estimator.predict_sent(sent) for estimator in self.estimators],
dtype=np.float32)
mode_res, count = mode(sub_res)
return (mode_res[0], count[0]/self.n_estimators) if with_prob else mode_res[0]
else:
# sub_res -> (estimator_dim, target_dim)
sub_res = np.array([estimator.predict_sent(sent, with_prob=True) for estimator in self.estimators],
dtype=np.float32)
sub_res = sub_res.mean(axis=0)
max_res = np.argmax(sub_res)
mean_prob = sub_res[max_res]
return (max_res, mean_prob) if with_prob else max_res
realtime-emotion.py 文件源码
项目:Realtime-EEG-Based-Emotion-Recognition
作者: nadzeri
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def predict_emotion(self,feature):
"""
Get arousal and valence class from feature.
Input: Feature (standard deviasion and mean) from all frequency bands and channels with dimesion 1 x M (number of feature).
Output: Class of emotion between 1 to 3 from each arousal and valence. 1 denotes low category, 2 denotes normal category, and 3 denotes high category.
"""
#Compute canberra with arousal training data
distance_ar = map(lambda x:ss.distance.canberra(x,feature),self.train_arousal)
#Compute canberra with valence training data
distance_va = map(lambda x:ss.distance.canberra(x,feature),self.train_valence)
#Compute 3 nearest index and distance value from arousal
idx_nearest_ar = np.array(np.argsort(distance_ar)[:3])
val_nearest_ar = np.array(np.sort(distance_ar)[:3])
#Compute 3 nearest index and distance value from arousal
idx_nearest_va = np.array(np.argsort(distance_va)[:3])
val_nearest_va = np.array(np.sort(distance_va)[:3])
#Compute comparation from first nearest and second nearest distance. If comparation less or equal than 0.7, then take class from the first nearest distance. Else take frequently class.
#Arousal
comp_ar = val_nearest_ar[0]/val_nearest_ar[1]
if comp_ar<=0.97:
result_ar = self.class_arousal[0,idx_nearest_ar[0]]
else:
result_ar = sst.mode(self.class_arousal[0,idx_nearest_ar])
result_ar = float(result_ar[0])
#Valence
comp_va = val_nearest_va[0]/val_nearest_va[1]
if comp_va<=0.97:
result_va = self.class_valence[0,idx_nearest_va[0]]
else:
result_va = sst.mode(self.class_valence[0,idx_nearest_va])
result_va = float(result_va[0])
return result_ar,result_va
def getNDValue(self):
"""
Get value of not defined depth value distances
:return:value of not defined depth value
"""
if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]:
return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0]
else:
return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0]