def blend(img_in: np.ndarray, img_layer: np.ndarray, blend_op: None, opacity: float=1.0):
# sanity check of inputs
assert img_in.dtype == np.float, 'Input variable img_in should be of numpy.float type.'
assert img_layer.dtype == np.float, 'Input variable img_layer should be of numpy.float type.'
assert img_in.shape[2] == 4, 'Input variable img_in should be of shape [:, :,4].'
assert img_layer.shape[2] == 4, 'Input variable img_layer should be of shape [:, :,4].'
assert 0.0 <= opacity <= 1.0, 'Opacity needs to be between 0.0 and 1.0.'
ratio = _compose_alpha(img_in, img_layer, opacity)
if blend_op is None:
blend_op = BlendOp.screen
elif isinstance(blend_op, str):
if hasattr(BlendOp, blend_op):
blend_op = getattr(BlendOp, blend_op)
else:
raise ValueError('Invalid blend mode: %s' % blend_op)
comp = blend_op(img_in, img_layer)
ratio_rs = np.reshape(np.repeat(ratio, 3), [comp.shape[0], comp.shape[1], comp.shape[2]])
img_out = comp*ratio_rs + img_in[:, :, :3] * (1.0-ratio_rs)
img_out = np.nan_to_num(np.dstack((img_out, img_in[:, :, 3]))) # add alpha channel and replace nans
return img_out
python类nan_to_num()的实例源码
def load_arff(arff_file, one_hot=True, normalize=True):
with open(arff_file, 'r') as f:
obj = arff.load(f, encode_nominal=True)
data = obj[DATA]
labels = [x[-1] for x in (x for x in data)]
data = np.array(data)
data = data[:,:-1]
if normalize:
data = (data - data.min(axis=0)) / data.ptp(axis=0)
data = np.nan_to_num(data)
if one_hot:
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(max(labels) + 1))
labels = label_binarizer.transform(labels)
labels = np.array(labels, dtype=np.float32)
return data, labels
def _normalize_for_correlation(data, axis):
"""normalize the data before computing correlation
The data will be z-scored and divided by sqrt(n)
along the assigned axis
Parameters
----------
data: 2D array
axis: int
specify which dimension of the data should be normalized
Returns
-------
data: 2D array
the normalized data
"""
shape = data.shape
data = zscore(data, axis=axis, ddof=0)
# if zscore fails (standard deviation is zero),
# set all values to be zero
data = np.nan_to_num(data)
data = data / math.sqrt(shape[axis])
return data
def divide_safezero(a, b):
'''
Divies a by b, then turns nans and infs into 0, so all division by 0
becomes 0.
Args:
a (np.ndarray)
b (np.ndarray|int|float)
Returns:
np.ndarray
'''
# deal with divide-by-zero: turn x/0 (inf) into 0, and turn 0/0 (nan) into
# 0.
c = a / b
c[c == np.inf] = 0.0
c = np.nan_to_num(c)
return c
# Classes
# -----------------------------------------------------------------------------
def set_data(self, y, variance, x=None):
"""
update a gauss_1D with new data
:param y:
:param variance:
:param x:
:return:
"""
n_points = len(y)
if x is None:
x = np.arange(n_points)
self._handle.set_data(x, y) # Update mean
new_percentiles = []
out = self.distribution.split("+")
n_percentiles = len(out)
sub_alpha = str(self.alpha / n_percentiles) # Normalize w.r.t. the number of percentiles
for i, percentile in enumerate(self._percentiles):
percentile.remove()
percentile = float(out[i])
assert 0 <= percentile <= 100, 'Percentile must be >0 & <100. Instead is %f' % percentile
interval = scipy.stats.norm.interval(percentile/100, loc=y, scale=np.sqrt(variance))
interval = np.nan_to_num(interval) # Fix stupid case of norm.interval(0) returning nan
new_percentiles.append(plt.fill_between(x, interval[0], interval[1], color=self._handle.get_color(), alpha=sub_alpha))
# TODO: not implemented yet
pass
def __init__(self, z, w, name='', quadrature=None, cfg=None):
self.t0 = datetime.datetime.now()
# If given a namespace of configuration settings, use it.
# Otherwise fall back to whatever is in `constants.py`.
self.cfg = cfg or constants
# If w or z are DataFrames, convert them to ndarrays.
self.w = w.values if hasattr(w, 'values') else w
self.z = z.values if hasattr(z, 'values') else z
self.w2 = np.nan_to_num(self.w)
self.num2 = np.nan_to_num(self.z)
self.name, self.n = name, w.shape[0]
self.ests_init = np.array(pack(self.cfg.INITIAL_LVM_PARAMS, w.shape[1]))
if quadrature or (cfg is not None and cfg.QUADRATURE):
self.ests_ll = self.ests_ll_quad
self.ests_bounds = pack(self.cfg.QUAD_BOUNDS, w.shape[1])
else:
self.ests_ll = self.ests_ll_exact
self.ests_bounds = pack(self.cfg.EXACT_BOUNDS, w.shape[1])
def ests_ll_quad(self, params):
"""
Calculate the loglikelihood given model parameters `params`.
This method uses Gaussian quadrature, and thus returns an *approximate*
integral.
"""
mu0, gamma0, err0 = np.split(params, 3)
x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1)) # (QCOUNTXnhospXnmeas)
loc = mu0 + np.outer(QC1, gamma0)
loc = np.tile(loc, (self.n, 1, 1))
loc = np.transpose(loc, (1, 0, 2))
scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
zs = lpdf_3d(x=x, loc=loc, scale=scale)
w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
wted = np.nansum(w2 * zs, axis=2).T # (nhosp X QCOUNT)
qh = np.tile(QC1, (self.n, 1)) # (nhosp X QCOUNT)
combined = wted + norm.logpdf(qh) # (nhosp X QCOUNT)
return logsumexp(np.nan_to_num(combined), b=QC2, axis=1) # (nhosp)
def __model_form(self, tri_array):
w = np.nan_to_num(self.weights/tri_array[:,:,:-1]**(2-self.alpha))
x = np.nan_to_num(tri_array[:,:,:-1]*(tri_array[:,:,1:]*0+1))
y = np.nan_to_num(tri_array[:,:,1:])
LDF = np.sum(w*x*y,axis=1)/np.sum(w*x*x,axis=1)
#Chainladder (alpha=1/delta=1)
#LDF = np.sum(np.nan_to_num(tri_array[:,:,1:]),axis=1) / np.sum(np.nan_to_num((tri_array[:,:,1:]*0+1)*tri_array[:,:,:-1]),axis=1)
#print(LDF.shape)
# assumes no tail
CDF = np.append(np.cumprod(LDF[:,::-1],axis=1)[:,::-1],np.array([1]*tri_array.shape[0]).reshape(tri_array.shape[0],1),axis=1)
latest = np.flip(tri_array,axis=1).diagonal(axis1=1,axis2=2)
ults = latest*CDF
lu = list(ults)
lc = list(CDF)
exp_cum_triangle = np.array([np.flipud(lu[num].reshape(tri_array.shape[2],1).dot(1/lc[num].reshape(1,tri_array.shape[2]))) for num in range(tri_array.shape[0])])
exp_incr_triangle = np.append(exp_cum_triangle[:,:,0,np.newaxis],np.diff(exp_cum_triangle),axis=2)
return LDF, CDF, ults, exp_incr_triangle
def init_state(indata, test=False):
close = indata['close'].values
diff = np.diff(close)
diff = np.insert(diff, 0, 0)
sma15 = SMA(indata, timeperiod=15)
sma60 = SMA(indata, timeperiod=60)
rsi = RSI(indata, timeperiod=14)
atr = ATR(indata, timeperiod=14)
#--- Preprocess data
xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))
xdata = np.nan_to_num(xdata)
if test == False:
scaler = preprocessing.StandardScaler()
xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
joblib.dump(scaler, 'data/scaler.pkl')
elif test == True:
scaler = joblib.load('data/scaler.pkl')
xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
state = xdata[0:1, 0:1, :]
return state, xdata, close
#Take Action
def init_state(data):
close = data
diff = np.diff(data)
diff = np.insert(diff, 0, 0)
#--- Preprocess data
xdata = np.column_stack((close, diff))
xdata = np.nan_to_num(xdata)
scaler = preprocessing.StandardScaler()
xdata = scaler.fit_transform(xdata)
state = xdata[0:1, :]
return state, xdata
#Take Action
def init_state(data):
close = data
diff = np.diff(data)
diff = np.insert(diff, 0, 0)
#--- Preprocess data
xdata = np.column_stack((close, diff))
xdata = np.nan_to_num(xdata)
scaler = preprocessing.StandardScaler()
xdata = scaler.fit_transform(xdata)
state = xdata[0:1, :]
return state, xdata
#Take Action
def test_validateTerms():
data = {'b1': "np.nan_to_num(self.layerdict['friction'].getSlice("
"rowstart, rowend, colstart, colend, name='friction'))",
'b2': "self.layerdict['slope'].getSlice(rowstart, rowend, "
"colstart, colend, name='slope')/100.",
'b3': "np.log(self.layerdict['vs30'].getSlice(rowstart, rowend, "
"colstart, colend, name='vs30'))",
'b4': "self.layerdict['cti1'].getSlice(rowstart, rowend, "
"colstart, colend, name='cti1')",
'b5': "self.layerdict['precip'].getSlice(rowstart, rowend, "
"colstart, colend, name='precip')"}
timeField = 'MONTH'
coeff = LM.validateCoefficients(cmodel)
layers = LM.validateLayers(cmodel)
terms, time = LM.validateTerms(cmodel, coeff, layers)
assert time == timeField
assert data == terms
def V_short(self,eta):
sum0 = np.zeros(7,dtype=float)
sum1 = np.zeros(7,dtype=float)
for n1,n2 in product(range(self.N1+1),range(self.N2+1)):
wdo = comb(self.N1,n1,exact=True)*comb(self.N2,n2,exact=True)
wdox10 = comb(self.N1-1,n1,exact=True)*comb(self.N2,n2,exact=True)
wdox11 = comb(self.N1-1,n1-1,exact=True)*comb(self.N2,n2,exact=True)
wdox20 = comb(self.N1,n1,exact=True)*comb(self.N2-1,n2,exact=True)
wdox21 = comb(self.N1,n1,exact=True)*comb(self.N2-1,n2-1,exact=True)
w = np.asarray([wdox10,wdox20,wdox11,wdox21,wdo,wdo,wdo])
pz0,pz1 = self.p_n_given_z(n1,n2)
counts = [self.N1-n1,self.N2-n2,n1,n2,1,1,1]
Q = (eta*pz0*counts*(1-self.pZgivenA)+eta*pz1*counts*self.pZgivenA).sum()
ratio = np.nan_to_num(np.true_divide(pz0*(1-self.pZgivenA)+pz1*self.pZgivenA,Q))
sum0 += np.asfarray(w*pz0*ratio)
sum1 += np.asfarray(w*pz1*ratio)
result = self.pZgivenA*sum1+(1-self.pZgivenA)*sum0
return result
def stetsonJ(mag, magerr):
"""The variability index K was first suggested by Peter B. Stetson and serves as a
measure of the kurtosis of the magnitude distribution.
See: (P. B. Stetson, Publications of the Astronomical Society of the Pacific 108, 851 (1996)).
:param mag: the time-varying intensity of the object. Must be an array.
:param magerr: photometric error for the intensity. Must be an array.
:rtype: float
"""
mag, magerr = remove_bad(mag, magerr)
n = np.float(len(mag))
#mean = meanMag(mag, magerr)
mean = np.median(mag)
delta_list = []
for i in range(0, len(mag)):
delta = np.sqrt(n/(n-1.))*((mag[i] - mean)/magerr[i])
delta_list.append(delta)
val = np.nan_to_num([x*y for x,y in zip(delta_list[0:int(n)-1], delta_list[1:int(n)])])
sign = np.sign(val)
stetj = sum(sign*np.sqrt(np.abs(val)))
return stetj
def stetsonK(mag, magerr):
"""The variability index K was first suggested by Peter B. Stetson and serves as a
measure of the kurtosis of the magnitude distribution.
See: (P. B. Stetson, Publications of the Astronomical Society of the Pacific 108, 851 (1996)).
:param mag: the time-varying intensity of the object. Must be an array.
:param magerr: photometric error for the intensity. Must be an array.
:rtype: float
"""
mag, magerr = remove_bad(mag, magerr)
n = np.float(len(mag))
#mean = meanMag(mag, magerr)
mean = np.median(mag)
delta = np.sqrt((n/(n-1.)))*((mag - mean)/magerr)
stetsonK = ((1./n)*sum(abs(delta)))/(np.sqrt((1./n)*sum(delta**2)))
return np.nan_to_num(stetsonK)
def Salton(MatrixAdjacency_Train):
similarity_StartTime = time.clock()
similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train)
deg_row = sum(MatrixAdjacency_Train)
deg_row.shape = (deg_row.shape[0],1)
deg_row_T = deg_row.T
tempdeg = np.dot(deg_row,deg_row_T)
temp = np.sqrt(tempdeg)
np.seterr(divide='ignore', invalid='ignore')
Matrix_similarity = np.nan_to_num(similarity / temp)
# print np.isnan(Matrix_similarity)
# Matrix_similarity = np.nan_to_num(Matrix_similarity)
# print np.isnan(Matrix_similarity)
similarity_EndTime = time.clock()
print " SimilarityTime: %f s" % (similarity_EndTime- similarity_StartTime)
return Matrix_similarity
def ACT(MatrixAdjacency_Train):
similarity_StartTime = time.clock()
Matrix_D = np.diag(sum(MatrixAdjacency_Train))
Matrix_Laplacian = Matrix_D - MatrixAdjacency_Train
INV_Matrix_Laplacian = np.linalg.pinv(Matrix_Laplacian)
Array_Diag = np.diag(INV_Matrix_Laplacian)
Matrix_ONE = np.ones([MatrixAdjacency_Train.shape[0],MatrixAdjacency_Train.shape[0]])
Matrix_Diag = Array_Diag * Matrix_ONE
Matrix_similarity = Matrix_Diag + Matrix_Diag.T - (2 * Matrix_Laplacian)
print Matrix_similarity
Matrix_similarity = Matrix_ONE / Matrix_similarity
Matrix_similarity = np.nan_to_num(Matrix_similarity)
similarity_EndTime = time.clock()
print " SimilarityTime: %f s" % (similarity_EndTime- similarity_StartTime)
return Matrix_similarity
def load_from_hdf5(self, path):
"""load model in compressed sparse row format from hdf5 file
hdf5 file should contain row_ptr, col_ind and data array
Args:
path: path to the embeddings folder
"""
self.load_metadata(path)
f = tables.open_file(os.path.join(path, 'cooccurrence_csr.h5p'), 'r')
row_ptr = np.nan_to_num(f.root.row_ptr.read())
col_ind = np.nan_to_num(f.root.col_ind.read())
data = np.nan_to_num(f.root.data.read())
dim = row_ptr.shape[0] - 1
self.matrix = scipy.sparse.csr_matrix(
(data, col_ind, row_ptr), shape=(dim, dim), dtype=np.float32)
f.close()
self.vocabulary = Vocabulary_cooccurrence()
self.vocabulary.load(path)
self.name += os.path.basename(os.path.normpath(path))
def load_with_alpha(self, path, power=0.6):
# self.load_provenance(path)
f = tables.open_file(os.path.join(path, 'vectors.h5p'), 'r')
# left = np.nan_to_num(f.root.vectors.read())
left = f.root.vectors.read()
sigma = f.root.sigma.read()
logger.info("loaded left singular vectors and sigma")
sigma = np.power(sigma, power)
self.matrix = np.dot(left, np.diag(sigma))
logger.info("computed the product")
self.metadata["pow_sigma"] = power
self.metadata["size_dimensions"] = int(self.matrix.shape[1])
f.close()
self.vocabulary = Vocabulary_simple()
self.vocabulary.load(path)
self.name += os.path.basename(os.path.normpath(path)) + "_a" + str(power)
def macro_accuracy(P, Y, n_classes, bg_class=None, return_all=False, **kwargs):
def macro_(P, Y, n_classes=None, bg_class=None, return_all=False):
conf_matrix = sm.confusion_matrix(Y, P, labels=np.arange(n_classes))
conf_matrix = conf_matrix/(conf_matrix.sum(0)[:,None]+1e-5)
conf_matrix = np.nan_to_num(conf_matrix)
diag = conf_matrix.diagonal()*100.
# Remove background score
if bg_class is not None:
diag = np.array([diag[i] for i in range(n_classes) if i!=bg_class])
macro = diag.mean()
if return_all:
return macro, diag
else:
return macro
if type(P) == list:
out = [macro_(P[i], Y[i], n_classes=n_classes, bg_class=bg_class, return_all=return_all) for i in range(len(P))]
if return_all:
return (np.mean([o[0] for o in out]), np.mean([o[1] for o in out],0))
else:
return np.mean(out)
else:
return macro_(P,Y, n_classes=n_classes, bg_class=bg_class, return_all=return_all)
def calQnt(self, price, volume, method, fixedFraction):
Equity = self.queryCapital()
proportion = 0.15
maxDrawDown = 3800
if method is 'FixedFraction':
# TradeRisk = maxDrawDown(data)
TradeRisk = maxDrawDown
N = fixedFraction * Equity / abs(TradeRisk)
if N >= volume * proportion : return math.trunc(volume * proportion)
else : return int(np.nan_to_num(N))
# return int(N)
if method is 'MaxDrawDown':
margin = 0.65
# allocation = maxDrawDown(data) * 1.5 + margin * price
allocation = maxDrawDown * 1.5 + margin * price
N = Equity / allocation
if N >= volume * proportion : return math.trunc(volume * proportion)
else : return int(np.nan_to_num(N))
# return int(N)
# query capital of self strategy
Phase4.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 43
收藏 0
点赞 0
评论 0
def userSim(data_matrix):
n = np.shape(data_matrix)[0] # ?userNum
userSimArr = np.zeros(shape=(n, n)) # ???????????????,???
for i in range(n):
for j in range(i+1, n):
overLap = np.nonzero(np.logical_and(data_matrix[i, :]>0, data_matrix[j, :]>0))[0]
if len(overLap) > 1:
sim = computeSim(data_matrix[i, overLap], data_matrix[j, overLap])
else:
sim = 0
userSimArr[i][j] = sim
userSimArr[j][i] = sim
userSimArr = np.nan_to_num(userSimArr)
return userSimArr
# ?????????
Phase4.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def itemSim(data_matrix):
n = np.shape(data_matrix)[1] # ?itemNum
itemSimArr = np.zeros(shape=(n, n)) # ???????????????,???
for i in range(n):
for j in range(i+1, n):
overLap = np.nonzero(np.logical_and(data_matrix[:, i]>0, data_matrix[:, j]>0))[0]
if len(overLap) > 1:
sim = computeSim(data_matrix[overLap, i], data_matrix[overLap, j])
else:
sim = 0
itemSimArr[i][j] = sim
itemSimArr[j][i] = sim
itemSimArr = np.nan_to_num(itemSimArr)
return itemSimArr
# ????????????
Phase4.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def newUserSim(data_matrix):
n = np.shape(data_matrix)[0] # ?userNum
userSimArr = np.zeros(shape=(n, n)) # ???????????????,???
userCommon = np.zeros(shape=(n, n)) # ???????
for i in range(n):
for j in range(i+1, n):
overLap = np.nonzero(np.logical_and(data_matrix[i, :]>0, data_matrix[j, :]>0))[0]
if len(overLap) > 1:
sim = computeSim(data_matrix[i, overLap], data_matrix[j, overLap])
else:
sim = 0
userSimArr[i][j] = sim
userSimArr[j][i] = sim
userCommon[i][j] = len(overLap)
userCommon[j][i] = len(overLap)
coef = np.exp((userCommon * 1.0 / userCommon.max(axis=0)) - 1)
newUserSim = coef * userSimArr # ????,????????????????
newUserSim = np.nan_to_num(newUserSim) # ????????,????
return newUserSim, userCommon
# ????????????
Phase4.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def newItemSim(data_matrix):
n = np.shape(data_matrix)[1] # ?itemNum
itemSimArr = np.zeros(shape=(n, n)) # ???????????????,???
itemCommon = np.zeros(shape=(n, n)) # ????????
for i in range(n):
for j in range(i+1, n):
overLap = np.nonzero(np.logical_and(data_matrix[:, i]>0, data_matrix[:, j]>0))[0]
if len(overLap) > 1:
sim = computeSim(data_matrix[overLap, i], data_matrix[overLap, j])
else:
sim = 0
itemSimArr[i][j] = sim
itemSimArr[j][i] = sim
itemCommon[i][j] = len(overLap)
itemCommon[j][i] = len(overLap)
coef = np.exp((itemCommon * 1.0 / itemCommon.max(axis=0)) - 1)
newItemSim = coef * itemSimArr # ????,????????????????
newItemSim = np.nan_to_num(newItemSim) # ????????,????
return newItemSim, itemCommon
# ??????????????????
Phase3.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def newUserSim(dataSet):
n = np.shape(dataSet)[0] #?userNum
userSimArr = np.zeros(shape=(n,n)) #???????????????,???
userCommon = np.zeros(shape=(n,n)) #???????
newUserSim = np.zeros(shape=(n,n)) #????????,????
for i in range(n):
for j in range(i+1, n):
overLap = np.nonzero(np.logical_and(dataSet[i,:]>0, dataSet[j,:]>0))[0]
if len(overLap) > 1:
sim = pearsSim(dataSet[i,overLap], dataSet[j,overLap])
else:
sim = 0
userSimArr[i][j] = sim
userSimArr[j][i] = sim
userCommon[i][j] = len(overLap)
userCommon[j][i] = len(overLap)
coef = np.exp((userCommon*1.0/userCommon.max(axis=0))-1)
newUserSim = coef * userSimArr #????,????????????????
newUserSim = np.nan_to_num(newUserSim)
return newUserSim, userCommon
#????????????
RewriteCF.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def newUserSim(dataSet):
n = np.shape(dataSet)[0] #?userNum
userSimArr = np.zeros(shape=(n,n)) #???????????????,???
userCommon = np.zeros(shape=(n,n)) #???????
newUserSim = np.zeros(shape=(n,n)) #????????,????
for i in range(n):
for j in range(i+1, n):
sim = 0
overLap = np.nonzero(np.logical_and(dataSet[i,:]>0, dataSet[j,:]>0))[0]
if len(overLap) > 0:
sim = pearsSim(dataSet[i,overLap], dataSet[j,overLap])
userSimArr[i][j] = sim
userSimArr[j][i] = sim
userCommon[i][j] = len(overLap)
userCommon[j][i] = len(overLap)
coef = np.exp((userCommon*1.0/userCommon.max(axis=0))-1)
newUserSim = coef * userSimArr #????,????????????????
newUserSim = np.nan_to_num(newUserSim)
return newUserSim
#????????????
RewriteCF.py 文件源码
项目:Collaborative-Filtering-recommendation
作者: LunaBlack
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def newItemSim(dataSet):
n = np.shape(dataSet)[1] #?itemNum
itemSimArr = np.zeros(shape=(n,n)) #???????????????,???
itemCommon = np.zeros(shape=(n,n)) #????????
newItemSim = np.zeros(shape=(n,n)) #????????,????
for i in range(n):
for j in range(i+1, n):
sim = 0
overLap = np.nonzero(np.logical_and(dataSet[:,i]>0, dataSet[:,j]>0))[0]
if len(overLap) > 0:
sim = pearsSim(dataSet[overLap,i], dataSet[overLap,j])
itemSimArr[i][j] = sim
itemSimArr[j][i] = sim
itemCommon[i][j] = len(overLap)
itemCommon[j][i] = len(overLap)
coef = np.exp((itemCommon*1.0/itemCommon.max(axis=0))-1)
newItemSim = coef * itemSimArr #????,????????????????
newItemSim = np.nan_to_num(newItemSim)
return newItemSim
#??????????????????
def perform(self, a, y):
"""Perform costOperation
Parameters
----------
a : np.array
Predictions
y : np.array
Data labels
Returns
-------
np.array
Output data
"""
predLog = np.nan_to_num(-np.log(a))
cEntropyMat = np.multiply(y, predLog)
return (1.0 / self.nExamples) * np.sum(cEntropyMat)
def zscore(a, axis=0, ddof=0):
a = np.asanyarray(a)
mns = a.mean(axis=axis)
sstd = a.std(axis=axis, ddof=ddof)
if axis and mns.ndim < a.ndim:
res = (((a - np.expand_dims(mns, axis=axis)) /
np.expand_dims(sstd,axis=axis)))
else:
res = (a - mns) / sstd
return np.nan_to_num(res)