def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None):
shape = data.shape
if refs is None:
tops = data.max(axis=0)
bots = data.min(axis=0)
dists = scipy.matrix(scipy.diag(tops-bots))
rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
for i in range(nrefs):
rands[:, :, i] = rands[:, :, i]*dists+bots
else:
rands = refs
gaps = scipy.zeros((len(ks),))
for (i, k) in enumerate(ks):
g1 = method(n_clusters=k).fit(data)
(kmc, kml) = (g1.cluster_centers_, g1.labels_)
disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])])
refdisps = scipy.zeros((rands.shape[2],))
for j in range(rands.shape[2]):
g2 = method(n_clusters=k).fit(rands[:, :, j])
(kmc, kml) = (g2.cluster_centers_, g2.labels_)
refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])])
gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp)
return gaps
python类log()的实例源码
def cvglmnetPlot(cvobject, sign_lambda = 1.0, **options):
sloglam = sign_lambda*scipy.log(cvobject['lambdau'])
fig = plt.gcf()
ax1 = plt.gca()
#fig, ax1 = plt.subplots()
plt.errorbar(sloglam, cvobject['cvm'], cvobject['cvsd'], \
ecolor = (0.5, 0.5, 0.5), \
**options
)
plt.hold(True)
plt.plot(sloglam, cvobject['cvm'], linestyle = 'dashed',\
marker = 'o', markerfacecolor = 'r')
xlim1 = ax1.get_xlim()
ylim1 = ax1.get_ylim()
xval = sign_lambda*scipy.log(scipy.array([cvobject['lambda_min'], cvobject['lambda_min']]))
plt.plot(xval, ylim1, color = 'b', linestyle = 'dashed', \
linewidth = 1)
if cvobject['lambda_min'] != cvobject['lambda_1se']:
xval = sign_lambda*scipy.log([cvobject['lambda_1se'], cvobject['lambda_1se']])
plt.plot(xval, ylim1, color = 'b', linestyle = 'dashed', \
linewidth = 1)
ax2 = ax1.twiny()
ax2.xaxis.tick_top()
atdf = ax1.get_xticks()
indat = scipy.ones(atdf.shape, dtype = scipy.integer)
if sloglam[-1] >= sloglam[1]:
for j in range(len(sloglam)-1, -1, -1):
indat[atdf <= sloglam[j]] = j
else:
for j in range(len(sloglam)):
indat[atdf <= sloglam[j]] = j
prettydf = cvobject['nzero'][indat]
ax2.set(XLim=xlim1, XTicks = atdf, XTickLabels = prettydf)
ax2.grid()
ax1.yaxis.grid()
ax2.set_xlabel('Degrees of Freedom')
# plt.plot(xlim1, [ylim1[1], ylim1[1]], 'b')
# plt.plot([xlim1[1], xlim1[1]], ylim1, 'b')
if sign_lambda < 0:
ax1.set_xlabel('-log(Lambda)')
else:
ax1.set_xlabel('log(Lambda)')
ax1.set_ylabel(cvobject['name'])
#plt.show()
def devi(yy, eta):
deveta = yy*eta - scipy.exp(eta)
devy = yy*scipy.log(yy) - yy
devy[yy == 0] = 0
result = 2*(devy - deveta)
return(result)
def tstat(beta, var, sigma, q, N, log=False):
"""
Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
"""
ts = beta / np.sqrt(var * sigma)
print ts
# ts = beta / np.sqrt(sigma)
# ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
# sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
if log:
ps = 2.0 + (stats.t.logsf(np.abs(ts), N - q))
else:
ps = 2.0 * (stats.t.sf(np.abs(ts), N - q))
print ps
# if not len(ts) == 1 or not len(ps) == 1:
# raise Exception("Something bad happened :(")
# return ts, ps
return ts.sum(), ps.sum()
c13_08_KMF_function.py 文件源码
项目:Python-for-Finance-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def KMV_f(E,D,T,r,sigmaE):
n=10000
m=2000
diffOld=1e6 # a very big number
for i in sp.arange(1,10):
for j in sp.arange(1,m):
A=E+D/2+i*D/n
sigmaA=0.05+j*(1.0-0.001)/m
d1 = (log(A/D)+(r+sigmaA*sigmaA/2.)*T)/(sigmaA*sqrt(T))
d2 = d1-sigmaA*sqrt(T)
diff4E= (A*N(d1)-D*exp(-r*T)*N(d2)-E)/A # scale by assets
diff4A= A/E*N(d1)*sigmaA-sigmaE # a small number already
diffNew=abs(diff4E)+abs(diff4A)
if diffNew<diffOld:
diffOld=diffNew
output=(round(A,2),round(sigmaA,4),round(diffNew,5))
return output
#
c10_37_volatility_smile.py 文件源码
项目:Python-for-Finance-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def implied_vol_call_min(S,X,T,r,c):
from scipy import log,exp,sqrt,stats
implied_vol=1.0
min_value=1000
for i in range(10000):
sigma=0.0001*(i+1)
d1=(log(S/X)+(r+sigma*sigma/2.)*T)/(sigma*sqrt(T))
d2 = d1-sigma*sqrt(T)
c2=S*stats.norm.cdf(d1)-X*exp(-r*T)*stats.norm.cdf(d2)
abs_diff=abs(c2-c)
if abs_diff<min_value:
min_value=abs_diff
implied_vol=sigma
k=i
return implied_vol
# Step 3: get call option data
c10_33_implied_vol_EuropeanPut_min.py 文件源码
项目:Python-for-Finance-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 51
收藏 0
点赞 0
评论 0
def implied_vol_put_min(S,X,T,r,p):
implied_vol=1.0
min_value=100.0
for i in xrange(1,10000):
sigma=0.0001*(i+1)
d1=(log(S/X)+(r+sigma*sigma/2.)*T)/(sigma*sqrt(T))
d2 = d1-sigma*sqrt(T)
put=X*exp(-r*T)*stats.norm.cdf(-d2)-S*stats.norm.cdf(-d1)
abs_diff=abs(put-p)
if abs_diff<min_value:
min_value=abs_diff
implied_vol=sigma
k=i
put_out=put
print 'k, implied_vol, put, abs_diff'
return k,implied_vol, put_out,min_value
def shannon_entropy(freq, unit='bit'):
"""Calculates the Shannon Entropy (H) of a frequency.
Arguments:
- freq (``numpy.ndarray``) A ``Freq`` instance or ``numpy.ndarray`` with
frequency vectors along the last axis.
- unit (``str``) The unit of the returned entropy one of 'bit', 'digit'
or 'nat'.
"""
log = get_base(unit)
shape = freq.shape # keep shape to return in right shape
Hs = np.ndarray(freq.size / shape[-1]) # place to keep entropies
# this returns an array of vectors or just a vector of frequencies
freq = freq.reshape((-1, shape[-1]))
# this makes sure we have an array of vectors of frequencies
freq = np.atleast_2d(freq)
# get fancy indexing
positives = freq != 0.
for i, (freq, idx) in enumerate(izip(freq, positives)):
freq = freq[idx] # keep only non-zero
logs = log(freq) # logarithms of non-zero frequencies
Hs[i] = -np.sum(freq * logs)
Hs.reshape(shape[:-1])
return Hs
def mutual_information(jointfreq, rowfreq=None, colfreq=None, unit='bit'):
"""
Calculates the Mutual Information (I) of a joint frequency. The marginal
frequencies can be given or are calculated from the joint frequency.
Arguments:
- jointfreq (``numpy.ndarray``) A normalized ``JointFreq`` instance or
``numpy.ndarray`` of rank-2, which is a joint probability distribution
function of two random variables.
- rowfreq (``numpy.ndarray``) [default: ``None``] A normalized marginal
probability distribution function for the variable along the axis =0.
- colfreq (``numpy.ndarray``) [default: ``None``] A normalized marginal
probability distribution function for the variable along the axis =1.
- unit (``str``) [defualt: ``"bit"``] Unit of the returned information.
"""
log = get_base(unit)
rowfreq = rowfreq or np.sum(jointfreq, axis=1)
colfreq = colfreq or np.sum(jointfreq, axis=0)
indfreq = np.dot(rowfreq[None].transpose(), colfreq[None])
non_zero = jointfreq != 0.
jntf = jointfreq[non_zero]
indf = indfreq[non_zero]
return np.sum(jntf * log(jntf/indf))
def tstat(beta, var, sigma, q, N, log=False):
"""
Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
"""
ts = beta / np.sqrt(var * sigma)
# ts = beta / np.sqrt(sigma)
# ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
# sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
if log:
ps = 2.0 + (stats.t.logsf(np.abs(ts), N - q))
else:
ps = 2.0 * (stats.t.sf(np.abs(ts), N - q))
if not len(ts) == 1 or not len(ps) == 1:
raise Exception("Something bad happened :(")
# return ts, ps
return ts.sum(), ps.sum()
def kernel_fredriksen(n):
"""
Generates kernel for Hilbert transform using FFT.
Parameters
----------
n : int
Number of equidistant grid points.
Returns
-------
array
Kernel used when performing Hilbert transform using FFT.
"""
aux = np.zeros(n+1, dtype=doublenp)
for i in range(1,n+1):
aux[i] = i*log(i)
m = 2*n
ker = np.zeros(m, dtype=doublenp)
for i in range(1,n):
ker[i] = aux[i+1]-2*aux[i]+aux[i-1]
ker[m-i] = -ker[i]
return fft(ker)/pi
def binary_logloss(p, y):
epsilon = 1e-15
p = sp.maximum(epsilon, p)
p = sp.minimum(1-epsilon, p)
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
res *= -1.0/len(y)
return res
def multiclass_logloss(P, Y):
score = 0.
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
score = -(1. / len(Y)) * np.sum(np.log(npreds))
return score
def _BlackScholesCall(S, K, T, sigma, r, q):
d1 = (log(S/K) + (r - q + (sigma**2)/2)*T)/(sigma*sqrt(T))
d2 = d1 - sigma*sqrt(T)
return S*exp(-q*T)*norm.cdf(d1) - K*exp(-r*T)*norm.cdf(d2)
def _BlackScholesPut(S, K, T, sigma, r, q):
d1 = (log(S/K) + (r - q + (sigma**2)/2)*T)/(sigma*sqrt(T))
d2 = d1 - sigma*sqrt(T)
return K*exp(-r*T)*norm.cdf(-d2) - S*exp(-q*T)*norm.cdf(-d1)
def _fprime(self, sigma):
logSoverK = log(self.S/self.K)
n12 = ((self.r + sigma**2/2)*self.T)
numerd1 = logSoverK + n12
d1 = numerd1/(sigma*sqrt(self.T))
return self.S*sqrt(self.T)*norm.pdf(d1)*exp(-self.r*self.T)
def binary_logloss(p, y):
epsilon = 1e-15
p = sp.maximum(epsilon, p)
p = sp.minimum(1-epsilon, p)
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
res *= -1.0/len(y)
return res
def multiclass_logloss(P, Y):
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
score = -(1. / len(Y)) * np.sum(np.log(npreds))
return score
def predict(self,xt,tau=None,proba=None):
'''
Function that predict the label for sample xt using the learned model
Inputs:
xt: the samples to be classified
Outputs:
y: the class
K: the decision value for each class
'''
## Get information from the data
nt = xt.shape[0] # Number of testing samples
C = self.ni.shape[0] # Number of classes
## Initialization
K = sp.empty((nt,C))
if tau is None:
TAU=self.tau
else:
TAU=tau
for c in range(C):
invCov,logdet = self.compute_inverse_logdet(c,TAU)
cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant
xtc = xt-self.mean[c,:]
temp = sp.dot(invCov,xtc.T).T
K[:,c] = sp.sum(xtc*temp,axis=1)+cst
del temp,xtc
##
## Assign the label save in classnum to the minimum value of K
yp = self.classnum[sp.argmin(K,1)]
## Reassign label with real value
if proba is None:
return yp
else:
return yp,K
def compute_inverse_logdet(self,c,tau):
Lr = self.L[c,:]+tau # Regularized eigenvalues
temp = self.Q[c,:,:]*(1/Lr)
invCov = sp.dot(temp,self.Q[c,:,:].T) # Pre compute the inverse
logdet = sp.sum(sp.log(Lr)) # Compute the log determinant
return invCov,logdet
def BIC(self,x,y,tau=None):
'''
Computes the Bayesian Information Criterion of the model
'''
## Get information from the data
C,d = self.mean.shape
n = x.shape[0]
## Initialization
if tau is None:
TAU=self.tau
else:
TAU=tau
## Penalization
P = C*(d*(d+3)/2) + (C-1)
P *= sp.log(n)
## Compute the log-likelihood
L = 0
for c in range(C):
j = sp.where(y==(c+1))[0]
xi = x[j,:]
invCov,logdet = self.compute_inverse_logdet(c,TAU)
cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant
xi -= self.mean[c,:]
temp = sp.dot(invCov,xi.T).T
K = sp.sum(xi*temp,axis=1)+cst
L +=sp.sum(K)
del K,xi
return L + P
def loglossl(act, pred):
epsilon = 1e-15
pred = sp.maximum(epsilon, pred)
pred = sp.minimum(1-epsilon, pred)
ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
ll = ll * -1.0/len(act)
return ll
def my_logloss(act, pred):
epsilon = 1e-15
pred = K.maximum(epsilon, pred)
pred = K.minimum(1 - epsilon, pred)
ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred))
ll = ll * -1.0 / K.shape(act)[0]
return ll
def logloss(act, pred):
'''
????????
:param act:
:param pred:
:return:
'''
epsilon = 1e-15
pred = sp.maximum(epsilon, pred)
pred = sp.minimum(1 - epsilon, pred)
ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
ll = ll * -1.0 / len(act)
return ll
def logloss(actual, preds):
epsilon = 1e-15
ll = 0
for act, pred in zip(actual, preds):
pred = max(epsilon, pred)
pred = min(1-epsilon, pred)
ll += act*sp.log(pred) + (1-act)*sp.log(1-pred)
return -ll / len(actual)
def binary_logloss(p, y):
epsilon = 1e-15
p = sp.maximum(epsilon, p)
p = sp.minimum(1-epsilon, p)
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
res *= -1.0/len(y)
return res
def multiclass_logloss(P, Y):
score = 0.
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
score = -(1. / len(Y)) * np.sum(np.log(npreds))
return score
def binary_logloss(p, y):
epsilon = 1e-15
p = sp.maximum(epsilon, p)
p = sp.minimum(1-epsilon, p)
res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
res *= -1.0/len(y)
return res
def multiclass_logloss(P, Y):
npreds = [P[i][Y[i]-1] for i in range(len(Y))]
score = -(1. / len(Y)) * np.sum(np.log(npreds))
return score
def logloss(act, pred):
epsilon = 1e-15
pred = sp.maximum(epsilon, pred)
pred = sp.minimum(1-epsilon, pred)
ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
ll = ll * -1.0/len(act)
return ll