def viz_textbb(fignum,text_im, bb_list,alpha=1.0):
"""
text_im : image containing text
bb_list : list of 2x4xn_i boundinb-box matrices
"""
plt.close(fignum)
plt.figure(fignum)
plt.imshow(text_im)
plt.hold(True)
H,W = text_im.shape[:2]
for i in xrange(len(bb_list)):
bbs = bb_list[i]
ni = bbs.shape[-1]
for j in xrange(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]]
plt.plot(bb[0,:], bb[1,:], 'r', linewidth=2, alpha=alpha)
plt.gca().set_xlim([0,W-1])
plt.gca().set_ylim([H-1,0])
plt.show(block=False)
python类c_()的实例源码
def bbox(self, image_coords=True):
"""
Return a 3 by 2 matrix, corresponding to the bounding box of the
annotation within the scan. If `scan_slice` is a numpy array
containing aslice of the scan, each slice of the annotation is
contained within the box:
bbox[0,0]:bbox[0,1]+1, bbox[1,0]:bbox[1,1]+1
If `image_coords` is `False` then each annotation slice is
instead contained within:
bbox[1,0]:bbox[1,1]+1, bbox[0,0]:bbox[0,1]+1
The last row of `bbox` give the inclusive lower and upper
bounds of the `image_z_position`.
"""
matrix = self.contours_to_matrix()
bbox = np.c_[matrix.min(axis=0), matrix.max(axis=0)]
return bbox if not image_coords else bbox[[1,0,2]]
def plot_and_save(x, func, xvv_inst, plot=False, fname=False):
mat = x.T
for m in range(xvv_inst.nsites):
for n in range(m+1):
if fname:
mat = np.c_[mat, func[:,m,n].T]
if plot:
plt.plot(x, func[:,m,n],
label='{}-{}'.format(xvv_inst.atom_names[m],
xvv_inst.atom_names[n]))
if fname:
np.savetxt(fname, mat)
if plot:
plt.legend()
plt.savefig('graph.png', dpi=300)
plt.show()
def read_csv(filename, skip_lines=0):
csvfile = file(filename, 'rb')
reader = csv.reader(csvfile)
data = np.empty(0, dtype=object)
last_count = np.NAN
for line in reader:
if skip_lines > 0:
skip_lines = skip_lines - 1
continue
if data.size > 0:
if len(line) != last_count:
raise Exception('unequal columes found')
data = np.c_[data, line]
last_count = len(line)
else:
data = np.array(line, dtype=object)
data = data.reshape(len(data), 1)
last_count = len(line)
csvfile.close()
return data.T
def print_evaluation_result(clf, bags_test, args):
pred_score = np.array([clf(B.data()) for B in bags_test])
pred_label = np.array([1 if score >= 0 else -1 for score in pred_score])
true_label = np.array([B.y for B in bags_test])
a = accuracy (pred_label, true_label) # accuracy
p = precision(pred_label, true_label) # precision
r = recall (pred_label, true_label) # recall
f = f_score (pred_label, true_label) # F-score
auc = metrics.roc_auc_score((true_label+1)/2, pred_score)
if not args.aucplot:
sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
sys.stdout.flush()
else:
sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
# {:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
sys.stdout.flush()
np.savetxt(sys.stdout.buffer, np.c_[pred_score, true_label])
def trotx(theta, unit="rad", xyz=[0, 0, 0]):
"""
TROTX Rotation about X axis
:param theta: rotation in radians or degrees
:param unit: "rad" or "deg" to indicate unit being used
:param xyz: the xyz translation, if blank defaults to [0,0,0]
:return: homogeneous transform matrix
trotx(THETA) is a homogeneous transformation (4x4) representing a rotation
of THETA radians about the x-axis.
trotx(THETA, 'deg') as above but THETA is in degrees
trotx(THETA, 'rad', [x,y,z]) as above with translation of [x,y,z]
"""
check_args.unit_check(unit)
tm = rotx(theta, unit)
tm = np.r_[tm, np.zeros((1, 3))]
mat = np.c_[tm, np.array([[xyz[0]], [xyz[1]], [xyz[2]], [1]])]
mat = np.asmatrix(mat.round(15))
return mat
# ---------------------------------------------------------------------------------------#
def troty(theta, unit="rad", xyz=[0, 0, 0]):
"""
TROTY Rotation about Y axis
:param theta: rotation in radians or degrees
:param unit: "rad" or "deg" to indicate unit being used
:param xyz: the xyz translation, if blank defaults to [0,0,0]
:return: homogeneous transform matrix
troty(THETA) is a homogeneous transformation (4x4) representing a rotation
of THETA radians about the y-axis.
troty(THETA, 'deg') as above but THETA is in degrees
troty(THETA, 'rad', [x,y,z]) as above with translation of [x,y,z]
"""
check_args.unit_check(unit)
tm = roty(theta, unit)
tm = np.r_[tm, np.zeros((1, 3))]
mat = np.c_[tm, np.array([[xyz[0]], [xyz[1]], [xyz[2]], [1]])]
mat = np.asmatrix(mat.round(15))
return mat
# ---------------------------------------------------------------------------------------#
def trotz(theta, unit="rad", xyz=[0, 0, 0]):
"""
TROTZ Rotation about Z axis
:param theta: rotation in radians or degrees
:param unit: "rad" or "deg" to indicate unit being used
:param xyz: the xyz translation, if blank defaults to [0,0,0]
:return: homogeneous transform matrix
trotz(THETA) is a homogeneous transformation (4x4) representing a rotation
of THETA radians about the z-axis.
trotz(THETA, 'deg') as above but THETA is in degrees
trotz(THETA, 'rad', [x,y,z]) as above with translation of [x,y,z]
"""
check_args.unit_check(unit)
tm = rotz(theta, unit)
tm = np.r_[tm, np.zeros((1, 3))]
mat = np.c_[tm, np.array([[xyz[0]], [xyz[1]], [xyz[2]], [1]])]
mat = np.asmatrix(mat.round(15))
return mat
# ---------------------------------------------------------------------------------------#
def trot2(theta, unit='rad'):
"""
TROT2 SE2 rotation matrix
:param theta: rotation in radians or degrees
:param unit: "rad" or "deg" to indicate unit being used
:return: homogeneous transform matrix (3x3)
TROT2(THETA) is a homogeneous transformation (3x3) representing a rotation of
THETA radians.
TROT2(THETA, 'deg') as above but THETA is in degrees.
Notes::
- Translational component is zero.
"""
tm = rot2(theta, unit)
tm = np.r_[tm, np.zeros((1, 2))]
mat = np.c_[tm, np.array([[0], [0], [1]])]
return mat
# ---------------------------------------------------------------------------------------#
def _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip):
"""Compute a bivariate kde using scipy."""
data = np.c_[x, y]
kde = stats.gaussian_kde(data.T)
data_std = data.std(axis=0, ddof=1)
if isinstance(bw, string_types):
bw = "scotts" if bw == "scott" else bw
bw_x = getattr(kde, "%s_factor" % bw)() * data_std[0]
bw_y = getattr(kde, "%s_factor" % bw)() * data_std[1]
elif np.isscalar(bw):
bw_x, bw_y = bw, bw
else:
msg = ("Cannot specify a different bandwidth for each dimension "
"with the scipy backend. You should install statsmodels.")
raise ValueError(msg)
x_support = _kde_support(data[:, 0], bw_x, gridsize, cut, clip[0])
y_support = _kde_support(data[:, 1], bw_y, gridsize, cut, clip[1])
xx, yy = np.meshgrid(x_support, y_support)
z = kde([xx.ravel(), yy.ravel()]).reshape(xx.shape)
return xx, yy, z
def spiral(num_cls, dim, point_per_cls, rnd_state=1024):
np.random.seed(rnd_state)
points_per_cls = 100 # number of points per class
dim = 2 # dimensionality
num_cls = 3 # number of classes
X_data = np.zeros((points_per_cls * num_cls, dim))
y_data = np.zeros(points_per_cls * num_cls, dtype='uint8')
for j in range(num_cls):
ix = range(points_per_cls * j, points_per_cls * (j + 1))
r = np.linspace(0.0, 1, points_per_cls)
t = np.linspace(j * 4, (j + 1) * 4, points_per_cls) + np.random.randn(points_per_cls) * 0.2 # theta
X_data[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
y_data[ix] = j
y_data_encoded = np.zeros((points_per_cls * num_cls, num_cls))
y_data_encoded[range(points_per_cls * num_cls), y_data] = 1
return X_data, y_data, y_data_encoded
Logistic_Regressor_binary.py 文件源码
项目:learning-rank-public
作者: andreweskeclarke
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def gradient(x0, X, y, alpha):
# gradient of the logistic loss
w, c = x0[1:137], x0[0]
#print("c is " + str(c))
z = X.dot(w) + c
z = phi(y * z)
z0 = (z - 1) * y
grad_w = np.matmul(z0,X) / X.shape[0] + alpha * w
grad_c = z0.sum() / X.shape[0]
grad_c = np.array(grad_c)
#print(grad_w[0,1:5])
return np.c_[([grad_c], grad_w)]
##### Stochastic Gradient Descent Optimiser ######
Logistic_Regressor_binary.py 文件源码
项目:learning-rank-public
作者: andreweskeclarke
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def average_ndcg(labels, query_ids, predicted_labels):
ndcg_list = np.zeros(len(set(query_ids)))
k = 0
for i in set(query_ids):
idx = [query_ids == i]
orders = np.c_[labels[idx],predicted_labels[idx]]
sorted_orders = orders[orders[:,1].argsort()[::-1]][:,0]
ndcg_list[k] = ndcg(sorted_orders)
k +=1
if k%2000 == 0:
print(str(k) + " queries calculated")
print("mean ndcg so far: " + str(np.mean(ndcg_list[0:k])))
return np.mean(ndcg_list)
# average ndcg is 0.26333
Logistic_Regressor.py 文件源码
项目:learning-rank-public
作者: andreweskeclarke
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def average_ndcg(labels, query_ids, predicted_labels):
ndcg_list = np.zeros(len(set(query_ids)))
k = 0
for i in set(query_ids):
idx = [query_ids == i]
orders = np.c_[labels[idx],predicted_labels[idx]]
sorted_orders = orders[orders[:,1].argsort()[::-1]][:,0]
ndcg_list[k] = ndcg(sorted_orders)
k +=1
if k%2000 == 0:
print(str(k) + " queries calculated")
print("mean ndcg so far: " + str(np.mean(ndcg_list[0:k])))
return np.mean(ndcg_list)
# average ndcg is 0.26333
def computeGaussianWidthCandidates(self, referenceSamples=None, testSamples=None) :
"""
Compute a candidate list of Gaussian kernel widths. The best width will be
selected via cross-validation
"""
allSamples = numpy.c_[referenceSamples, testSamples]
medianDistance = self.getMedianDistanceBetweenSamples(allSamples.T)
return medianDistance * numpy.array([0.6, 0.8, 1, 1.2, 1.4])
def _fit(self,X,y=None):
if isinstance(X, pyisc.DataObject) and y is None:
assert y is None # Contained in the data object
self.class_column = X.class_column
if self.class_column >= 0:
self.classes_ = X.classes_
self._anomaly_detector._SetParams(
0,
-1 if X.class_column is None else X.class_column,
self.anomaly_threshold,
1 if self.is_clustering else 0
)
self._anomaly_detector._TrainData(X)
return self
if isinstance(X, ndarray):
class_column = -1
data_object = None
assert X.ndim <= 2
if X.ndim == 2:
max_class_column = X.shape[1]
else:
max_class_column = 1
if isinstance(y, list) or isinstance(y, ndarray):
assert len(X) == len(y)
class_column = max_class_column
data_object = pyisc.DataObject(numpy.c_[X, y], class_column=class_column)
elif y is None or int(y) == y and y > -1 and y <= max_class_column:
self.class_column = y
data_object = pyisc.DataObject(X,class_column=y)
if data_object is not None:
return self._fit(data_object)
raise ValueError("Unknown type of data to fit X, y:", type(X), type(y))
def _convert_to_data_object_in_scoring(self, X, y):
data_object = None
if isinstance(y, list) or isinstance(y, ndarray):
assert X.ndim == 2 and self.class_column == X.shape[1] or X.ndim == 1 and self.class_column == 1
data_object = pyisc.DataObject(numpy.c_[X, y], class_column=self.class_column,classes=self.classes_)
else:
assert self.class_column == y
data_object = pyisc.DataObject(X, class_column=self.class_column,classes=self.classes_ if y is not None else None)
return data_object
def loglikelihood(self,X,y=None):
assert isinstance(X, ndarray) and (self.class_column is None and y is None or len(y) == len(X))
if y is not None:
return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(c_[X,y], class_column=len(X[0])), len(X)).sum()
else:
return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(X), len(X)).sum()
def test_dataobject_set_column_values(self):
X = array([norm(1.0).rvs(10) for _ in range(1000)])
y = [None] * 1000
DO = DataObject(c_[X,y], class_column=len(X[0]))
assert_equal(len(X[0]), DO.class_column)
assert_equal(unique(y), DO.classes_)
classes=[None] + ['1', '2', '3', '4', '5']
DO = DataObject(c_[X,y], class_column=len(X[0]), classes=classes)
assert_equal(len(X[0]), DO.class_column)
assert_equal(classes, DO.classes_)
X2 = DO.as_2d_array()
assert_allclose(X2.T[:-1].T.astype(float), X)
assert_equal(X2.T[-1],y)
new_y = ["%i"%(divmod(i,5)[1]+1) for i in range(len(X))]
DO.set_column_values(len(X[0]), new_y)
assert_equal(len(X[0]), DO.class_column)
assert_equal([None]+list(unique(new_y)), DO.classes_)
X2 = DO.as_2d_array()
assert_allclose(X2.T[:-1].T.astype(float), X)
assert_equal(X2.T[-1], new_y)
def test_outlier_detection(self):
print "Start of test"
n_samples = 1000
norm_dist = stats.norm(0, 1)
truth = np.ones((n_samples,))
truth[-100:] = -1
X0 = norm_dist.rvs(n_samples)
X = np.c_[X0*5, X0+norm_dist.rvs(n_samples)*2]
uniform_dist = stats.uniform(-10,10)
X[-100:] = np.c_[uniform_dist.rvs(100),uniform_dist.rvs(100)]
outlier_detector = pyisc.SklearnOutlierDetector(
100.0/n_samples,
pyisc.P_Gaussian([0,1])
)
outlier_detector.fit(X, np.array([1]*len(X)))
self.assertLess(outlier_detector.threshold_, 0.35)
self.assertGreater(outlier_detector.threshold_, 0.25)
predictions = outlier_detector.predict(X, np.array([1]*len(X)))
accuracy = sum(truth == predictions)/float(n_samples)
print "accuracy", accuracy
self.assertGreater(accuracy, 0.85)