def compute_log_lik_exp(self, m, v, y):
if m.ndim == 2:
gh_x, gh_w = self._gh_points(GH_DEGREE)
gh_x = gh_x[:, np.newaxis, np.newaxis]
gh_w = gh_w[:, np.newaxis, np.newaxis] / np.sqrt(np.pi)
v_expand = v[np.newaxis, :, :]
m_expand = m[np.newaxis, :, :]
ts = gh_x * np.sqrt(2 * v_expand) + m_expand
logcdfs = norm.logcdf(ts * y)
prods = gh_w * logcdfs
loglik = np.sum(prods)
pdfs = norm.pdf(ts * y)
cdfs = norm.cdf(ts * y)
grad_cdfs = y * gh_w * pdfs / cdfs
dts_dm = 1
dts_dv = 0.5 * gh_x * np.sqrt(2 / v_expand)
dm = np.sum(grad_cdfs * dts_dm, axis=0)
dv = np.sum(grad_cdfs * dts_dv, axis=0)
else:
gh_x, gh_w = self._gh_points(GH_DEGREE)
gh_x = gh_x[:, np.newaxis, np.newaxis, np.newaxis]
gh_w = gh_w[:, np.newaxis, np.newaxis, np.newaxis] / np.sqrt(np.pi)
v_expand = v[np.newaxis, :, :, :]
m_expand = m[np.newaxis, :, :, :]
ts = gh_x * np.sqrt(2 * v_expand) + m_expand
logcdfs = norm.logcdf(ts * y)
prods = gh_w * logcdfs
prods_mean = np.mean(prods, axis=1)
loglik = np.sum(prods_mean)
pdfs = norm.pdf(ts * y)
cdfs = norm.cdf(ts * y)
grad_cdfs = y * gh_w * pdfs / cdfs
dts_dm = 1
dts_dv = 0.5 * gh_x * np.sqrt(2 / v_expand)
dm = np.sum(grad_cdfs * dts_dm, axis=0) / m.shape[0]
dv = np.sum(grad_cdfs * dts_dv, axis=0) / m.shape[0]
return loglik, dm, dv
python类cdf()的实例源码
def calculate_dv01(forward, strike, implied_sigma, annuity, days_to_maturity, pay_rec):
from scipy.stats import norm
if pay_rec == "Payer":
extra_addend = 0
else:
if pay_rec == "Receiver":
extra_addend = -1
else:
raise NotImplementedError()
d1 = calculate_d1(forward, strike, implied_sigma, days_to_maturity)
dv01 = annuity * (norm.cdf(d1) + extra_addend)
return dv01
def __calculate_price(underlying, strike, annuity, domestic_short_rate, foreign_short_rate,
implied_sigma, days_to_maturity, sign):
""" P_t = A_t * (S_t * N(d_1) - k * e^(- (r_d - r_f) * T) * N(d_2))
:param underlying:
:param strike:
:param annuity:
:param implied_sigma:
:param days_to_maturity:
:param sign:
:return:
"""
from scipy.stats import norm
from math import exp
if sign != 1 and sign != -1:
raise ValueError("Invalid Sign")
d1 = calculate_d1(underlying, strike, domestic_short_rate, foreign_short_rate, implied_sigma, days_to_maturity)
d2 = calculate_d2(underlying, strike, domestic_short_rate, foreign_short_rate, implied_sigma, days_to_maturity)
r = domestic_short_rate - foreign_short_rate
year_fraction = float(days_to_maturity) / 365
df = exp(-r * year_fraction)
price = annuity * sign * (underlying * norm.cdf(sign * d1) - strike * df * norm.cdf(sign * d2))
return price
def phi(self, x):
"""Normalized Downwind and Upwind Distribution.
In order to predict upwind fallout and at the same time preserve normalization, a
function phi is empirically inserted."""
w = (self.L_0 / self.L) * (x / (self.s_x * self.a_1))
return norm.cdf(w)
def D_Hplus1(self, x, y, dunits='km', doseunits='Sv'):
"""Returns dose rate at x, y at 1 hour after burst. This value includes dose rate from all activity that WILL be deposited at that location, not just that that has arrived by H+1 hr."""
rx, ry = self.translation * (convert_units(x, dunits, 'mi'), convert_units(y, dunits, 'mi')) * ~Affine.rotation(-self.wd + 270)
f_x = self.yld * 2e6 * self.phi(rx) * self.g(rx) * self.ff
s_y = np.sqrt(self.s_02 + ((8 * abs(rx + 2 * self.s_x) * self.s_02) / self.L) + (2 * (self.s_x * self.T_c * self.s_h * self.shear)**2 / self.L_2) + (((rx + 2 * self.s_x) * self.L_0 * self.T_c * self.s_h * self.shear)**2 / self.L**4))
a_2 = 1 / (1 + ((0.001 * self.H_c * self.wind) / self.s_0) * (1 - norm.cdf(2 * x / self.wind)))
f_y = np.exp(-0.5 * (ry / (a_2 * s_y))**2) / (2.5066282746310002 * s_y)
return convert_units(f_x * f_y, 'Roentgen', doseunits)
def theta_fn(self, proj, libors, paydate): # theta of one coupon period
ts, te, cov = self.__parse_libors(libors)
eta = (te - paydate.t) / (te - ts)
p = 1.0 + np.array([l.forward(proj) for l in libors]) * cov #p = proj(ts) / proj(te) # = (1 + libor * cov)
xi = self.__xi(proj.t0, ts, te) # proj.t0 is spotdate of curve
def capfloor(k, s): # forward/undiscounted caplet(s=1)/floorlet(s=-1) price
zstar = np.log((1 + k) / p) / xi - xi / 2
return s * (p * norm.cdf(-zstar * s) - (1 + k) * norm.cdf(-(zstar + xi) * s))
def digital(k, s): # s=1 ==> 1 for above k; s=-1 ==> 1 for below k
km = cov * (k - self.digi_gap * s)
kp = cov * (k + self.digi_gap * s)
return (1 + eta * kp) * capfloor(km,s) - (1 + eta * km) * capfloor(kp,s)
denom = 2 * self.digi_gap * cov #* (1 + eta * (p - 1)) # p - 1 = libor * cov
if self.rng_type == 'between': # inside; == (digital(rng_high,-1) - digital(rng_low,-1)) / denom
ratio = (digital(self.rng_low,1) - digital(self.rng_high,1)) / denom
elif self.rng_type == 'outside': # outside
ratio = (digital(self.rng_low,-1) + digital(self.rng_high,1)) / denom
elif self.rng_type == 'above': # above rate
ratio = digital(self.rng_rate,1) / denom
elif self.rng_type == 'below': # below rate
ratio = digital(self.rng_rate,-1) / denom
if self.lk_type == 'skipped':
ratio = ratio[:self.lk_days]
elif self.lk_type == 'crystallized':
ratio[self.lk_days:] = ratio[self.lk_days]
return np.mean(ratio)
def value(self, opt=Option.Call, strike=None):
if strike is None:
strike = self.forward
d1 = self.__d1(strike)
asset_value = opt * self.forward * norm.cdf(opt * d1)
cash_value = opt * strike * norm.cdf(opt * (d1 - self.stdev))
return self.discount * (asset_value - cash_value)
def value(self, opt=Option.Call, strike=None):
if strike is None:
strike = self.forward
dr = opt * (self.forward - strike)
d1 = dr / self.stdev
d2 = self.stdev * norm.pdf(d1)
return self.discount * (dr * norm.cdf(d1) + d2)
def compute_EI(ni, alpha, lr, lr_time, X, y, ei_xi, x):
var = np.var(lr.predict(X) - y)
m = np.dot(X.T, X)
inv = np.linalg.inv(m + alpha * np.eye(sum(ni)))
x_flat = np.array(x)
mu_x = lr.predict([x_flat])
var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T))
sigma_x = np.sqrt(var_x)
u = (np.min(y) - ei_xi - mu_x) / sigma_x
EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u))
estimated_time = lr_time.predict([x_flat])[0]
EIPS = EI / estimated_time
return EIPS
def get_next_by_EI(ni, alpha, lr, lr_time, X, y, ei_xi):
'''
Args:
ni: number of units in the each layer
alpha: lambda for Ridge regression
lr: fitted performance model in burning period
lr_time: fitted time model in burning period
X: all previous inputs x
y: all previous observations corresponding to X
ei_xi: parameter for EI exploitation-exploration trade-off
Returns:
x_next: a nested list [[0,1,0], [1,0,0,0], ...] as the next input x to run a specified pipeline
'''
var = np.var(lr.predict(X) - y)
m = np.dot(X.T, X)
inv = np.linalg.inv(m + alpha * np.eye(sum(ni)))
maxEI = float('-inf')
x_next = None
for i in range(np.prod(ni)):
x = [[0]*n for n in ni]
x_flat = []
pipeline = get_pipeline_by_flatten_index(ni, i)
for layer in range(len(ni)):
x[layer][pipeline[layer]] = 1
x_flat += x[layer]
x_flat = np.array(x_flat)
mu_x = lr.predict([x_flat])
var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T))
sigma_x = np.sqrt(var_x)
u = (np.min(y) - ei_xi - mu_x) / sigma_x
EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u))
estimated_time = lr_time.predict([x_flat])[0]
EIPS = EI / estimated_time
if EIPS > maxEI:
maxEI = EIPS
x_next = x
return x_next
def _get_eis(self, points, score_optimum):
"""
Calculate the expected improvements for all points.
Parameters
----------
points: list
List of parameter settings for the GP to predict on
score_optimum: float
The score optimum value to use for calculating the difference against the expected value
Returns
-------
Returns the Expected Improvement
"""
# Predict mu's and sigmas for each point
mu, sigma = self.score_regressor.predict(points, return_std=True)
# Subtract each item in list by score_optimum
# We subtract 0.01 because http://haikufactory.com/files/bayopt.pdf
# (2.3.2 Exploration-exploitation trade-of)
diff = mu - (score_optimum - 0.01)
# Divide each diff by each sigma
Z = diff / sigma
# Calculate EI's
ei = diff * norm.cdf(Z) + sigma * norm.pdf(Z)
# Make EI zero when sigma is zero (but then -1 when sigma <= 1e-05 to be more sure that everything goes well)
for index, value in enumerate(sigma):
if value <= 1e-05:
ei[index] = -1
return ei
def calc_log_normalizer(mu, sigma, l, h):
return np.log(
norm.cdf(h, loc=mu, scale=sigma)
- norm.cdf(l, loc=mu, scale=sigma))
def PI(params, means, stand_devi, parms_done, y, n_iterations, k):
"""
Probability of Improvement acquisition function
:param params: test data
:param means: GP posterior mean
:param stand_devi: standard deviation
:param parms_done: training data
:param y: training targets
:return: next point that need to pick up
"""
s = 0.0005 # small value
stop_threshold = 0.001
max_mean = np.max(y)
f_max = max_mean + s
z = (means - f_max) / stand_devi
cumu_gaussian = norm.cdf(z)
# early stop criterion, if there are less than 1% to get the greater cumulative Gaussian, then stop.
if cumu_gaussian.sum() <= stop_threshold or np.max(cumu_gaussian) <= stop_threshold:
print "all elements of cumulative are alost zeros!!!"
return True
indices = np.where(cumu_gaussian == np.max(cumu_gaussian))[0]
indices = np.asarray(indices)
# since there are several 1, random pick one of them as the next point except for parms_done
rand_index = random.randint(0, len(indices) - 1)
next_point = params[indices[rand_index]]
condition = next_point in parms_done.tolist()
# early stop criterion, if there is no other point that can maximize the objective then stop
while condition:
rand_index = random.randint(0, len(indices) - 1)
next_point = params[indices[rand_index]]
condition = next_point in parms_done.tolist()
if len(next_point) == 1 and condition:
return True
if k == n_iterations-1:
plt.subplot(2, 1, 2)
plt.plot(params, cumu_gaussian, label='PI')
return next_point
def _get_scaler_function(self, scaler_algo):
scaler = None
if scaler_algo == 'percentile':
scaler = lambda x: rankdata(x).astype(np.float64) / len(x)
elif scaler_algo == 'normcdf':
# scaler = lambda x: ECDF(x[cat_word_counts != 0])(x)
scaler = lambda x: norm.cdf(x, x.mean(), x.std())
elif scaler_algo == 'none':
scaler = lambda x: x
else:
raise InvalidScalerException("Invalid scaler alogrithm. Must be either percentile or normcdf.")
return scaler
def _get_scaler_function(scaler_algo):
scaler = None
if scaler_algo == 'normcdf':
scaler = lambda x: norm.cdf(x, x.mean(), x.std())
elif scaler_algo == 'percentile':
scaler = lambda x: rankdata(x).astype(np.float64) / len(x)
elif scaler_algo == 'none':
scaler = lambda x: x
else:
raise InvalidScalerException("Invalid scaler alogrithm. Must be either percentile or normcdf.")
return scaler
def EI(self, x, gp, ymax):
mean, var = gp.predict(x, eval_MSE = True)
if var == 0:
return 0
else:
Z = (mean - ymax)/sqrt(var)
return (mean - ymax) * norm.cdf(Z) + sqrt(var) * norm.pdf(Z)
def PoI(self, x, gp, ymax):
mean, var = gp.predict(x, eval_MSE = True)
if var == 0:
return 1
else:
Z = (mean - ymax)/sqrt(var)
return norm.cdf(Z)
################################################################################
################################## Print Info ##################################
################################################################################
def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
"""
Calculates the statistic significance between two dependent correlation coefficients
@param xy: correlation coefficient between x and y
@param xz: correlation coefficient between x and z
@param yz: correlation coefficient between y and z
@param n: number of elements in x, y and z
@param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
@param conf_level: confidence level, only works for 'zou' method
@param method: defines the method uses, 'steiger' or 'zou'
@return: t and p-val
"""
if method == 'steiger':
d = xy - xz
determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
av = (xy + xz)/2
cube = (1 - yz) * (1 - yz) * (1 - yz)
t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
p = 1 - t.cdf(abs(t2), n - 3)
if twotailed:
p *= 2
return t2, p
elif method == 'zou':
L1 = rz_ci(xy, n, conf_level=conf_level)[0]
U1 = rz_ci(xy, n, conf_level=conf_level)[1]
L2 = rz_ci(xz, n, conf_level=conf_level)[0]
U2 = rz_ci(xz, n, conf_level=conf_level)[1]
rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
return lower, upper
else:
raise Exception('Wrong method!')
def _ei(x, gp, y_max, xi):
"""
Expected Improvement, (Mockus, 1978)
"""
mean, var = gp.predict(x, eval_MSE=True)
var = np.maximum(var, 1e-9 + 0 * var) # ????????
z = (mean - y_max - xi) / np.sqrt(var)
return (mean - y_max - xi) * norm.cdf(z) + np.sqrt(var) * norm.pdf(z)
def _poi(x, gp, y_max, xi):
"""
Probability of Improvement, (Kushner, 1964)
"""
mean, var = gp.predict(x, eval_MSE=True)
var = np.maximum(var, 1e-9 + 0 * var) # ????????
z = (mean - y_max - xi) / np.sqrt(var)
return norm.cdf(z)