def predict_one(self,_id,duration,pred_year):
try:
patent = self.params['patent'][str(_id)]
except KeyError,e:
return None
w1 = self.params['w1']
alpha = patent['alpha']
w2 = self.params['w2']
fea = numpy.mat(patent['fea'])
ti = patent['cite']
beta = numpy.mat(self.params['beta'])
cut_point = pred_year - int(float((patent['year'])))
tr = numpy.mat([x for x in ti if x <= cut_point])
pred = self.predict_year_by_year(tr,cut_point,duration,
beta*numpy.mat(fea).T,w1,alpha,w2)
_dict = {}
for i in range(len(pred)):
year = pred_year + i + 1
_dict[year] = pred[i]
_list = sorted(_dict.items(),key=lambda x:x[0])
return _list
python类mat()的实例源码
def predict_year_by_year(self,tr,cut_point,duration,spontaneous,w1,alpha,w2):
N = tr.shape[1]
pred = []
for t in range(cut_point+1,cut_point+duration+1):
delta_ct = spontaneous/w1*(numpy.exp(-w1*(t-1))-numpy.exp(-w1*t)) + \
alpha/w2*(numpy.sum(numpy.exp(-w2*((t-1)-tr)))-numpy.sum(numpy.exp(-w2*(t-tr))))
delta_ct = delta_ct[0,0]
if len(pred) == 0:
ct = N + delta_ct
else :
ct = pred[-1] + delta_ct
tr = tr.tolist()[0]
tr.extend([t for i in range(int(delta_ct))])
tr = numpy.mat(tr)
pred.append(ct)
return pred
def update_sequence_weights(self,pids,Alpha,features,sequences,publish_years,predict_year,beta,W1,W2):
result = []
for i in range(len(pids)):
seq = {}
fea = numpy.mat(features[i])
beta = numpy.mat(beta)
seq['seq_id'] = i
seq['paper_id'] = pids[i]
seq['theta'] = W1[i]
seq['w'] = W2[i]
seq['alpha'] = Alpha[i]
seq['fea'] = features[i]
seq['beta'] = beta.tolist()
seq['spont'] = (fea*beta).tolist()[0]
result.append(seq)
self.sequence_weights = result
def simhawkes(trseq,T1,T2,v,w1,alpha,w2):
t = T1
while t<T2:
lam = v*numpy.exp(-w1*t) + numpy.sum(alpha*numpy.exp(-w2*(t-trseq)))
#v*exp(-w1*t) + sum(alpha*exp(-w2*(t-trseq)));
u = numpy.random.random() # rand();
t = t+(-numpy.log(numpy.random.random())/float(lam)) #t+(-log(rand)/lam);
lam2 = v*numpy.exp(-w1*t) + numpy.sum(alpha*numpy.exp(-w2*(t-trseq)))
#v*exp(-w1*t) + sum(alpha*exp(-w2*(t-trseq)));
if t<T2 and u*lam<lam2:
trseq = numpy.concatenate((trseq,numpy.mat(t)),axis=1) #[trseq;t];
#end
if trseq.shape[1] > 1e3: #length(trseq)>1e3
break
#end
#end
return trseq
#ct = trseq
#end
def cal_obj(v,w1,alpha,w2,events,trainT): #function [obj]=cal_obj(v,w1,alpha,w2,events,trainT)
T=trainT
N=len(events)
s=events
old_sum2 = 0
obj = numpy.log(v*numpy.exp(-w1*s[0])) #log(v*exp(-w1*s(1)));
for i in range(1,N): #i=2:N
mu = v*numpy.exp(-w1*s[i]) #v*exp(-w1*s(i));
sum1 = mu
sum2 = (old_sum2 + alpha)*numpy.exp(-w2*(s[i]-s[i-1])) #(old_sum2+ alpha)*exp(-w2*(s(i)-s(i-1)));
old_sum2 = sum2
obj=obj+numpy.log(sum1+sum2)
#end
____1 = numpy.exp(-w2*(T-numpy.mat(s)))
____2 = numpy.sum((1- ____1))*alpha/float(w2)
obj= obj - ____2 #obj - sum((1-exp(-w2*(T-s))))*alpha/w2;
obj = obj - (v/w1) * (1 - numpy.exp(-w1*T)) #obj - v/w1*(1-exp(-w1*T));
return obj
#end
def rbf_kernel(self, x, y, gamma):
"""
Custom sigmoid kernel function, similarities of vectors using a radial basis function kernel
:param x: array of input vectors
:param y: array of input vectors
:param gamma: reach factor
:returns:
- rbfk: radial basis of the kernel's inner product
"""
mat1 = np.mat(x) #convert to readable matrices
mat2 = np.mat(y)
trnorms1 = np.mat([(v * v.T)[0, 0] for v in mat1]).T #norm matrices
trnorms2 = np.mat([(v * v.T)[0, 0] for v in mat2]).T
k1 = trnorms1 * np.mat(np.ones((mat2.shape[0], 1), dtype=np.float64)).T #dot products of y and y transposed and x and x transposed
k2 = np.mat(np.ones((mat1.shape[0], 1), dtype=np.float64)) * trnorms2.T
rbfk = k1 + k2 #sum products together
rbfk -= 2 * np.mat(mat1 * mat2.T) #dot product of x and y transposed
rbfk *= - 1./(2 * np.power(gamma, 2)) #radial basis
np.exp(rbfk,rbfk)
return np.array(rbfk)
def load_ad_info(dict_ad_info, user_behavior):
list_ad_info = []
for ad in dict_ad_info:
ad_id = dict_ad_info[ad][0]
position = 1
advertiser_id = int(dict_ad_info[ad][1])
price = int(dict_ad_info[ad][5])
ad_tag = dict_ad_info[ad][4]
user_tag = user_behavior[0][0]
user_sex = user_behavior[0][1]
list_ad_info.append([-1, ad_id, position, advertiser_id, price, ad_tag, user_tag, user_sex])
# print list_ad_info
list_ad_info.append([-1, ad_id, 2, advertiser_id, price, ad_tag, user_tag, user_sex])
name = ['click', 'ad_id', 'position', 'advertiser_id', 'price', 'ad_tag', 'user_tag', 'user_sex']
# np_ad_info = np.mat(list_ad_info)
df_ad_info = pd.DataFrame(list_ad_info, columns=name)
file = open("f_origin_8features.pkl", 'wb')
pickle.dump(df_ad_info, file)
file.close()
#print 'DONE'
return df_ad_info
def Back_Propagation():
global In_param
global Out_param
global thres_in
global thres_out
rate = 0.1
for epoch in range(50000):
for id, item in enumerate(traits):
hid_In = np.array(np.mat(item) * np.mat(In_param))
hid_Out = sigmoid(hid_In - thres_in)
fin_In = np.array(np.mat(hid_Out) * np.mat(Out_param))
fin_Out = sigmoid(fin_In - thres_out)
g = fin_Out * (1.0 - fin_Out) * (judge[id] - fin_Out)
e = hid_Out * (1.0 - hid_Out) * np.array([np.dot(x, g) for x in Out_param])
In_param += np.array(rate * np.matrix(item).T * np.matrix(e))
Out_param += np.array(rate * np.matrix(hid_Out).T * np.matrix(g))
thres_in -= rate * e
thres_out -= rate * g
def PCA(dataset, topFeatNum = 2):
#????
#1????????????
#2???????????
#3??????????????
#4???xx??????????
#5?????????????
datasetMat = np.mat(dataset)
meanValues = np.mean(datasetMat, axis = 0)
stds = np.std(datasetMat, axis = 0)
adjustedDatasetMat = datasetMat - meanValues
adjustedDatasetMat = adjustedDatasetMat / stds
plt.plot(adjustedDatasetMat[:, 0], adjustedDatasetMat[:, 1], "r^")
plt.show()
covMat = np.cov(adjustedDatasetMat, rowvar = 0)
#covMat = (adjustedDatasetMat.T * adjustedDatasetMat) / datasetMat.shape[0] #?????0????????
eigenVals, eigenVecs = np.linalg.eig(np.mat(covMat))
draw(eigenVals) #?????????????????
eigenValsIndex = np.argsort(eigenVals) #?eigenVals???????????????
eigenValsIndex = eigenValsIndex[: -(topFeatNum+1) : -1] #??eigenVals???topFeatNum?????
eigenVecs = eigenVecs[:, eigenValsIndex] #????topFeatNum????????eigenValues????
transformedDatasetMat = adjustedDatasetMat * eigenVecs
return transformedDatasetMat
def mds(d, dimensions=2):
"""
Multidimensional Scaling - Given a matrix of interpoint distances,
find a set of low dimensional points that have similar interpoint
distances.
"""
E = (-0.5 * d**2)
# Use mat to get column and row means to act as column and row means.
Er = np.mat(np.mean(E, 1))
Es = np.mat(np.mean(E, 0))
# From Principles of Multivariate Analysis: A User's Perspective (page 107).
F = np.array(E - np.transpose(Er) - Es + np.mean(E))
U, S, V = svd(F)
Y = U * np.sqrt(S)
return Y[:, 0:dimensions], S
project1.py 文件源码
项目:South-African-Heart-Disease-data-analysis-using-python
作者: khushi4tiwari
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def plotPrincipalComponents(principal1, principal2, X, y, classNames):
C = len(classNames)
Y = X - np.ones((len(X),1))*X.mean(0)
U,S,V = linalg.svd(Y,full_matrices=False)
V = mat(V).T
Z = Y * V
# Plot PCA of the data
f = figure()
f.hold()
title('Data projected onto Principal Components')
for c in range(C):
class_mask = y.A.ravel()==c
plot(Z[class_mask,principal1], Z[class_mask,principal2], 'o')
legend([convertToWord(i) for i in classNames])
xlabel('PC{0}'.format(principal1+1))
ylabel('PC{0}'.format(principal2+1))
show()
# Gets the direction of a certain principal component
project1.py 文件源码
项目:South-African-Heart-Disease-data-analysis-using-python
作者: khushi4tiwari
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def plot3DPrincipalComponents(X,y,classNames,prin1,prin2,prin3,attributeNames):
C = len(classNames)
Y = X - np.ones((len(X),1))*X.mean(0)
U,S,V = linalg.svd(Y,full_matrices=False)
V = mat(V).T
Z = Y * V
f = figure()
hold(True)
colors = ['blue', 'green']
ax = f.add_subplot(111, projection='3d')
for c in range(C):
class_mask = (y==c).A.ravel()
ax.scatter(Z[class_mask,prin1].A, Z[class_mask,prin2].A, Z[class_mask,prin3].A, c=colors[c])
ax.set_xlabel('PC{0}'.format(prin1+1))
ax.set_ylabel('PC{0}'.format(prin2+1))
ax.set_zlabel('PC{0}'.format(prin3+1))
title("3D plot of principal components")
legend(attributeNames)
#Using CHD as attribute
def learn(fName, features, nRows=-1):
with open('bin/train.bin', 'r') as f:
train = np.load(f)
x = np.mat(train[:nRows,timbreVector[features[0]]]).reshape(nRows,1)
y = np.mat(train[:nRows,timbreVector[features[1]]]).reshape(nRows,1)
z = np.mat(train[:nRows,timbreVector[features[2]]]).reshape(nRows,1)
X = np.concatenate((x, y, z), axis=1)
Y = train[:nRows,0] % minYear
clf = svm.SVC(verbose=3)
clf.fit(X, Y)
print "[SUCCESS] Fitted training data to SVM (kernel: rbf)."
print "[STARTED] Dumping classifier."
joblib.dump(clf, 'bin/%s'%fName)
print "[SUCCESS] Dumped to ", fName
def test(fName, features, nRows):
with open('bin/train.bin') as f:
test = np.load(f)
x = np.mat(test[:nRows,timbreVector[features[0]]]).reshape(nRows,1)
y = np.mat(test[:nRows,timbreVector[features[1]]]).reshape(nRows,1)
z = np.mat(test[:nRows,timbreVector[features[2]]]).reshape(nRows,1)
X = np.concatenate((x, y, z), axis=1)
Y = test[:nRows,0]
pred = predict(fName, X)
print "Mean Square Error: ", np.mean(0.5*np.square(pred - Y))
print "Absolute Error: ", np.mean(np.absolute(pred-Y))
plt.scatter(Y, pred-Y, marker='o')
plt.xlabel('Actual')
plt.ylabel('Difference')
plt.show()
def learn(X, Y, datapoint):
global alpha
datapoint = np.mat(datapoint)
Y = np.mat(Y)
X = np.mat(X)
weights = getWeights(X, datapoint)
den = (X*weights)*X.T
num = (X*weights)*Y.T
try:
return num*den.I
except:
return None
def filter(self, X, Y):
if self.interpolate:
X, Y = self.simplefill(X, Y)
else:
X, Y = self.sortxy(X, Y)
order_range = list(range(self.order+1))
half_window = (self.window_size - 1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range]
for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[self.deriv]
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = Y[0] - np.abs(Y[1:half_window+1][::-1] - Y[0])
lastvals = Y[-1] + np.abs(Y[-half_window-1:-1][::-1] - Y[-1])
Y1 = np.concatenate((firstvals, Y, lastvals))
Y2 = np.convolve(m, Y1, mode='valid')
return X, Y2
def gs_numpy( method, X, Y, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1, disp = True):
"""
Grid search method with numpy array of X and Y
Previously, np.mat are used for compatible with Matlab notation.
"""
if disp:
print( X.shape, Y.shape)
clf = getattr( linear_model, method)()
parmas = {'alpha': np.logspace( *alphas_log)}
kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
#kf5 = kf5_c.split( X)
gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)
gs.fit( X, Y)
return gs
def mlr_show( clf, RMv, yEv, disp = True, graph = True):
yEv_calc = clf.predict( RMv)
if len( np.shape(yEv)) == 2 and len( np.shape(yEv_calc)) == 1:
yEv_calc = np.mat( yEv_calc).T
r_sqr, RMSE = jchem.estimate_accuracy( yEv, yEv_calc, disp = disp)
if graph:
plt.figure()
ms_sz = max(min( 4000 / yEv.shape[0], 8), 1)
plt.plot( yEv.tolist(), yEv_calc.tolist(), '.', ms = ms_sz)
ax = plt.gca()
lims = [
np.min([ax.get_xlim(), ax.get_ylim()]), # min of both axes
np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes
]
# now plot both limits against eachother
#ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
ax.plot(lims, lims, '-', color = 'pink')
plt.xlabel('Experiment')
plt.ylabel('Prediction')
plt.title( '$r^2$ = {0:.2e}, RMSE = {1:.2e}'.format( r_sqr, RMSE))
plt.show()
return r_sqr, RMSE
def mlr_show3( clf, RMv, yEv, disp = True, graph = True):
yEv_calc = clf.predict( RMv)
if len( np.shape(yEv)) == 2 and len( np.shape(yEv_calc)) == 1:
yEv_calc = np.mat( yEv_calc).T
r_sqr, RMSE, aae = jchem.estimate_accuracy3( yEv, yEv_calc, disp = disp)
if graph:
plt.figure()
ms_sz = max(min( 4000 / yEv.shape[0], 8), 1)
plt.plot( yEv.tolist(), yEv_calc.tolist(), '.', ms = ms_sz)
ax = plt.gca()
lims = [
np.min([ax.get_xlim(), ax.get_ylim()]), # min of both axes
np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes
]
# now plot both limits against eachother
#ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
ax.plot(lims, lims, '-', color = 'pink')
plt.xlabel('Experiment')
plt.ylabel('Prediction')
plt.title( '$r^2$={0:.2e}, RMSE={1:.2e}, AAE={2:.2e}'.format( r_sqr, RMSE, aae))
plt.show()
return r_sqr, RMSE, aae
def ann_val_post( yE, disp = True, graph = True, rate = 2, more_train = True, center = None):
"""
After ann_pre and shell command, ann_post can be used.
"""
df_ann = pd.read_csv( 'ann_out.csv')
yE_c = np.mat( df_ann['out'].tolist()).T
yEt, yEt_c, yEv, yEv_c = jchem.get_valid_mode_data( yE, yE_c, rate = rate, more_train = more_train, center = center)
print('Trainig result')
ann_show( yEt, yEt_c, disp = disp, graph = graph)
print('Validation result')
r_sqr, RMSE = ann_show( yEv, yEv_c, disp = disp, graph = graph)
return r_sqr, RMSE