def _zero_one_normalize(predictions, epsilon=1e-7):
"""Normalize the predictions to the range between 0.0 and 1.0.
For some predictions like SVM predictions, we need to normalize them before
calculate the interpolated average precision. The normalization will not
change the rank in the original list and thus won't change the average
precision.
Args:
predictions: a numpy 1-D array storing the sparse prediction scores.
epsilon: a small constant to avoid denominator being zero.
Returns:
The normalized prediction.
"""
denominator = numpy.max(predictions) - numpy.min(predictions)
ret = (predictions - numpy.min(predictions)) / numpy.max(denominator,
epsilon)
return ret
python类min()的实例源码
def selectThreshold(yval,pval):
'''???????'''
bestEpsilon = 0.
bestF1 = 0.
F1 = 0.
step = (np.max(pval)-np.min(pval))/1000
'''??'''
for epsilon in np.arange(np.min(pval),np.max(pval),step):
cvPrecision = pval<epsilon
tp = np.sum((cvPrecision == 1) & (yval == 1)).astype(float) # sum???int???????float
fp = np.sum((cvPrecision == 1) & (yval == 0)).astype(float)
fn = np.sum((cvPrecision == 1) & (yval == 0)).astype(float)
precision = tp/(tp+fp) # ???
recision = tp/(tp+fn) # ???
F1 = (2*precision*recision)/(precision+recision) # F1Score????
if F1 > bestF1: # ?????F1 Score
bestF1 = F1
bestEpsilon = epsilon
return bestEpsilon,bestF1
# ???
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def check_timestamps_left_part(self, df, midway_timestamps, amin, id):
'''
Check left part
:param df:
:param df_grouped_by_id:
:param midway_timestamps:
:return: True if intermediate sale is in left part False otherwise.
'''
df = df[df.id == id]
df_timestamp_interval = df[(df.timestamp >= amin.values[0]) & (df.timestamp <= midway_timestamps)]
df_timestamp_interval_aggregated = df_timestamp_interval.groupby('id').agg([np.min, np.max, len])
amin_left = df_timestamp_interval_aggregated[('timestamp', 'amin')]
amax_left = df_timestamp_interval_aggregated[('timestamp', 'amax')]
lenght_left = df_timestamp_interval_aggregated[('timestamp', 'len')]
is_timestamp_diff_equal_len_left = (amax_left - amin_left).values == (lenght_left - 1)
return is_timestamp_diff_equal_len_left, amin_left, amax_left, lenght_left
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def check_timestamps_right_part(self, df, midway_timestamps, amax, id):
'''
Check right part
:param df:
:param df_grouped_by_id:
:param midway_timestamps:
:return: True if intermediate sale is in left part False otherwise.
'''
df = df[df.id == id]
df_timestamp_interval = df[(df.timestamp > midway_timestamps) & (df.timestamp <= amax.values[0])]
df_timestamp_interval_aggregated = df_timestamp_interval.groupby('id').agg([np.min, np.max, len])
amin_right = df_timestamp_interval_aggregated[('timestamp', 'amin')]
amax_right = df_timestamp_interval_aggregated[('timestamp', 'amax')]
lenght_right = df_timestamp_interval_aggregated[('timestamp', 'len')]
is_timestamp_diff_equal_len_right = (amax_right - amin_right).values == (lenght_right - 1)
return is_timestamp_diff_equal_len_right, amin_right, amax_right, lenght_right
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def predicted_vs_actual_y_xgb(self, xgb, best_nrounds, xgb_params, x_train_split, x_test_split, y_train_split,
y_test_split, title_name):
# Split the training data into an extra set of test
# x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train)
dtrain_split = xgb.DMatrix(x_train_split, label=y_train_split)
dtest_split = xgb.DMatrix(x_test_split)
print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split))
gbdt = xgb.train(xgb_params, dtrain_split, best_nrounds)
y_predicted = gbdt.predict(dtest_split)
plt.figure(figsize=(10, 5))
plt.scatter(y_test_split, y_predicted, s=20)
rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split)
plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)]))
plt.xlabel('Actual y')
plt.ylabel('Predicted y')
plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)])
plt.tight_layout()
def inspect(self, output = True):
''' short function that returns the image values: mean,
standard deviation, max, min and size of image
if output is True, it prints to the console the string containing the
formatted value
'''
m = np.mean(self.data)
s = np.std(self.data)
u = np.max(self.data)
l = np.min(self.data)
d = self.data.shape
if output:
s = "Mean: {0:.2f} | Std: {1:.2f} | Max: {2:.2f}|Min: {3:.2f} | \
Dim: {4[0]}x{4[1]}".format(m, s, u, l, d)
print(s)
return s
return (m, s, u, l, d)
def resize(im, target_size, max_size):
"""
only resize input image to target size and return scale
:param im: BGR image input by opencv
:param target_size: one dimensional size (the short side)
:param max_size: one dimensional max size (the long side)
:return:
"""
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# prevent bigger axis from being more than max_size:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
return im, im_scale
def resize(im, target_size, max_size):
"""
only resize input image to target size and return scale
:param im: BGR image input by opencv
:param target_size: one dimensional size (the short side)
:param max_size: one dimensional max size (the long side)
:return:
"""
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
return im, im_scale
def test2():
patient_data_paths = utils_lung.get_patient_data_paths(pathfinder.DATA_PATH)
print len(patient_data_paths)
pixel_spacings_xy = []
n_slices = []
for k, p in enumerate(patient_data_paths):
pid = utils_lung.extract_pid_dir(p)
sid2data, sid2metadata = utils_lung.get_patient_data(p)
mtd = sid2metadata.itervalues().next()
assert mtd['PixelSpacing'][0] == mtd['PixelSpacing'][1]
pixel_spacings_xy.append(mtd['PixelSpacing'][0])
n_slices.append(len(sid2metadata))
print pid, pixel_spacings_xy[-1], n_slices[-1]
print 'nslices', np.max(n_slices), np.min(n_slices), np.mean(n_slices)
counts = collections.Counter(pixel_spacings_xy)
new_list = sorted(pixel_spacings_xy, key=counts.get, reverse=True)
print 'spacing', new_list
def draw2dsurface(X, Y, zf):
fig = plt.figure()
ax = fig.gca(projection='3d')
X, Y = np.meshgrid(X, Y)
Z = X*0
for i in range(len(X)):
for j in range(len(X[0])):
Z[i][j] = zf([X[i][j], Y[i][j]])
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm,
linewidth=0, antialiased=False)
ax.set_zlim(np.min(Z.flatten()), np.max(Z.flatten()))
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
fig.colorbar(surf, shrink=0.5, aspect=5)
# plt.show()
def getCircularBounds(fitCloud=None,width=64,height=64,smoothing=0.01):
circumference = 2*(width+height)
if not fitCloud is None:
cx = np.mean(fitCloud[:,0])
cy = np.mean(fitCloud[:,1])
r = 0.5* max( np.max(fitCloud[:,0])- np.min(fitCloud[:,0]),np.max(fitCloud[:,1])- np.min(fitCloud[:,1]))
else:
r = circumference /(2.0*math.pi)
cx = cy = r
perimeterPoints = np.zeros((circumference,2),dtype=float)
for i in range(circumference):
angle = (2.0*math.pi)*float(i) / circumference - math.pi * 0.5
perimeterPoints[i][0] = cx + r * math.cos(angle)
perimeterPoints[i][1] = cy + r * math.sin(angle)
bounds = {'top':perimeterPoints[0:width],
'right':perimeterPoints[width-1:width+height-1],
'bottom':perimeterPoints[width+height-2:2*width+height-2],
'left':perimeterPoints[2*width+height-3:]}
bounds['s_top'],u = interpolate.splprep([bounds['top'][:,0], bounds['top'][:,1]],s=smoothing)
bounds['s_right'],u = interpolate.splprep([bounds['right'][:,0],bounds['right'][:,1]],s=smoothing)
bounds['s_bottom'],u = interpolate.splprep([bounds['bottom'][:,0],bounds['bottom'][:,1]],s=smoothing)
bounds['s_left'],u = interpolate.splprep([bounds['left'][:,0],bounds['left'][:,1]],s=smoothing)
return bounds
def swapBlock(self,cells,d,tlx1,tly1,tlx2,tly2,cols,rows,width,height):
if max(tlx1,tlx2)+cols < width and max(tly1,tly2)+rows < height and (max(tlx1,tlx2) - min(tlx1,tlx2) >= cols or max(tly1,tly2) - min(tly1,tly2) >= rows):
temp = []
for row in range( rows):
for col in range( cols):
temp.append(d[cells[tlx1+col][tly1+row]])
d[cells[tlx1+col][tly1+row]] = d[cells[tlx2+col][tly2+row]]
i = 0
for row in range( rows):
for col in range( cols):
d[cells[tlx2+col][tly2+row]] = temp[i]
i+=1
return True
else:
return False
def plot_data(self):
# Right: raw data
all_raw_data = self.raw_data
cmax = 0.5*all_raw_data.max()
cmin = 0.5*all_raw_data.min()
self.update_sort_idcs()
all_raw_data = all_raw_data[self.sort_idcs, :]
self.data_image = self.data_ax.imshow(all_raw_data,
interpolation='nearest', cmap='coolwarm',
extent=(self.raw_lags[0], self.raw_lags[-1],
0, len(self.sort_idcs)), origin='lower')
self.data_ax.set_aspect('auto')
self.data_ax.spines['right'].set_visible(False)
self.data_ax.spines['left'].set_visible(False)
self.data_ax.spines['top'].set_visible(False)
self.data_image.set_clim(cmin, cmax)
self.inspect_markers = self.data_ax.scatter([], [], marker='<',
clip_on=False, s=40)
self.data_selection = mpl.patches.Rectangle((self.raw_lags[0], 0),
width=self.raw_lags[-1] - self.raw_lags[0],
height=0,
color='white', alpha=0.75)
self.data_ax.add_patch(self.data_selection)
self.data_ax.set_xlim(self.raw_lags[0], self.raw_lags[-1])
self.data_ax.set_ylim(0, len(self.sort_idcs)+1)
self.data_ax.set_yticks([])
self.ui.data_overview.draw()
def update_time(self):
if self.show_fit:
self.t_start = min(self.maxtime, self.get_time.value())
self.t_stop = self.t_start + 1
if self.t_stop > self.maxtime:
self.t_stop = self.maxtime
self.get_data()
self.update_data_plot()
def on_mouse_press(self, event):
if event.inaxes == self.electrode_ax:
if self.ui.btn_lasso.isChecked():
# Select multiple points
self.start_lasso_select(event)
elif self.ui.btn_rectangle.isChecked():
pass # handled already by rect selector
elif self.ui.btn_picker.isChecked():
# Select a single point for display
# Transform data coordinates to display coordinates
x = self.x_position
y = self.y_position
data = event.inaxes.transData.transform(zip(x, y))
# Find the closest point
distances = ((data[:, 0] - event.x)**2 +
(data[:, 1] - event.y)**2)
min_idx, min_value = np.argmin(distances), np.min(distances)
if min_value > 50:
# Don't select anything if the mouse cursor is more than
# 50 pixels away from a point
selection = {}
else:
selection = {min_idx}
add_or_remove = None
if event.key == 'shift':
add_or_remove = 'add'
elif event.key == 'control':
add_or_remove = 'remove'
self.update_inspect(selection, add_or_remove)
else:
raise AssertionError('No tool active')
else:
return
def compute_log_sum(val):
min_val = np.min(val, axis=0, keepdims=True)
return np.mean(min_val - np.log(np.mean(np.exp(-val + min_val), axis=0)))
def eigenDecompose(self, X, K, normalize=True):
if (X.shape[1] >= X.shape[0]):
s,U = la.eigh(K)
else:
U, s, _ = la.svd(X, check_finite=False, full_matrices=False)
if (s.shape[0] < U.shape[1]): s = np.concatenate((s, np.zeros(U.shape[1]-s.shape[0]))) #note: can use low-rank formulas here
s=s**2
if normalize: s /= float(X.shape[1])
if (np.min(s) < -1e-10): raise Exception('Negative eigenvalues found')
s[s<0]=0
ind = np.argsort(s)[::-1]
U = U[:, ind]
s = s[ind]
return s,U
def random_channel_shift(x, intensity, channel_axis=0):
x = np.rollaxis(x, channel_axis, 0)
min_x, max_x = np.min(x), np.max(x)
channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
for x_channel in x]
x = np.stack(channel_images, axis=0)
x = np.rollaxis(x, 0, channel_axis + 1)
return x
def cut_out_non_lungs_z (images3, pmasks3, images3_seg, uid, dim):
HU_LUNGS_MIN = -900 # the algo is sensitive to this value -- keep it 900 unless retested
HU_LUNGS_MAX = -400
pix_lungs_min = hu_to_pix(HU_LUNGS_MIN)
pix_lungs_max = hu_to_pix(HU_LUNGS_MAX)
mid = dim // 2
ymin = int(0.4 * images3.shape[3]) ## BUG was 4
ymax = int(0.6 * images3.shape[3]) ## # waut it failed for tne one following 4b351d0c19be183cc880f5af3fe5abee ( index 240 is out of bounds for axis 3 with size 240)
zmin_new = images3.shape[0] // 2
zmax_new = images3.shape[0] // 2
j = ymin
for j in range(ymin, ymax+1):
img_cut = images3[:,0,mid, j]
img_cut_lungs = (img_cut > pix_lungs_min) & (img_cut < pix_lungs_max)
lungs_across = np.sum(img_cut_lungs, axis = 1)
noise_bottom_some = np.mean(lungs_across[0:10]) # increase by 2
noise = np.max([3*np.min(lungs_across), 0.05 * np.max(lungs_across), noise_bottom_some]) # experimanetal -- could fail is scan has only central part of lungs and no borders at all -- CHECK
zmin, zmax = find_lungs_range(lungs_across, noise)
if zmin < zmin_new:
zmin_new = zmin
if zmax > zmax_new:
#print ("j, zmax: ", j, zmax)
zmax_new = zmax
### do not cut it to fine (add few pixels on each side ...)
zmin_new = np.max([0, zmin_new-mid])
zmax_new = np.min([images3.shape[0], zmax_new+mid])
print("cut_out_non_lungs_z from to:", images3.shape[0], zmin_new, zmax_new, uid )
if ((zmax_new-zmin_new)/images3.shape[0] < 0.5):
print ("SUSPICSIOUS large cut of > 50%, NOT executing ...")
else:
images3 = images3[zmin_new:zmax_new]
pmasks3 = pmasks3[zmin_new:zmax_new]
images3_seg = images3_seg[zmin_new:zmax_new]
return images3, pmasks3, images3_seg
def threshold_from_data(self, X, y):
y_bool = y == 1. ## true if x is a catast
y_pred = self.predict_proba(X)
if np.count_nonzero(y) == 0:
return np.max(y_pred)
return np.min(y_pred[y_bool][:,1]) # TODO CHANGED FROM WILL CODE