def get_Surface_Potentials(mtrue, survey, src, field_obj):
phi = field_obj['phi']
CCLoc = mesh.gridCC
XLoc = np.unique(mesh.gridCC[:, 0])
surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
phiSurface = phi[surfaceInd]
phiScale = 0.
if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
# refPoint = CCLoc[refInd]
# refSurfaceInd = np.where(xSurface == refPoint[0])
# phiScale = np.median(phiSurface)
phiScale = phi[refInd]
phiSurface = phiSurface - phiScale
return XLoc, phiSurface, phiScale
python类unique()的实例源码
def unique(eq):
eq = eqsize(eq)
c1 = [None] * eq.shape
for i in range(0, eq.size):
c1.append[i] = hash(eq[i])
c1 = np.asarray(c1)
if c1.ndim == 1:
_, ia, ic = np.unique(c1, return_index=True, return_inverse=True)
ia = (ia[:, ]).conj().T
ic = (ic[:, ]).conj().T
u = eq[ia]
else:
a = c1
b = np.ascontiguousarray(a).view(
np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
_, ia, ic = np.unique(b, return_index=True, return_inverse=True)
return u, ia, ic
def getTypeProblem (self, solution_filename):
''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
if 'task' not in self.info.keys():
solution = np.array(data_converter.file_to_array(solution_filename))
target_num = solution.shape[1]
self.info['target_num']=target_num
if target_num == 1: # if we have only one column
solution = np.ravel(solution) # flatten
nbr_unique_values = len(np.unique(solution))
if nbr_unique_values < len(solution)/8:
# Classification
self.info['label_num'] = nbr_unique_values
if nbr_unique_values == 2:
self.info['task'] = 'binary.classification'
self.info['target_type'] = 'Binary'
else:
self.info['task'] = 'multiclass.classification'
self.info['target_type'] = 'Categorical'
else:
# Regression
self.info['label_num'] = 0
self.info['task'] = 'regression'
self.info['target_type'] = 'Numerical'
else:
# Multilabel or multiclass
self.info['label_num'] = target_num
self.info['target_type'] = 'Binary'
if any(item > 1 for item in map(np.sum,solution.astype(int))):
self.info['task'] = 'multilabel.classification'
else:
self.info['task'] = 'multiclass.classification'
return self.info['task']
def tiedrank(a):
''' Return the ranks (with base 1) of a list resolving ties by averaging.
This works for numpy arrays.'''
m=len(a)
# Sort a in ascending order (sa=sorted vals, i=indices)
i=a.argsort()
sa=a[i]
# Find unique values
uval=np.unique(a)
# Test whether there are ties
R=np.arange(m, dtype=float)+1 # Ranks with base 1
if len(uval)!=m:
# Average the ranks for the ties
oldval=sa[0]
newval=sa[0]
k0=0
for k in range(1,m):
newval=sa[k]
if newval==oldval:
# moving average
R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
else:
k0=k;
oldval=newval
# Invert the index
S=np.empty(m)
S[i]=R
return S
def binarization (array):
''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
array = np.array(array, dtype=float) # conversion needed to use np.inf after
if len(np.unique(array)) > 2:
raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))
# manipulation which aims at avoid error in data with for example classes '1' and '2'.
array[array == np.amax(array)] = np.inf
array[array == np.amin(array)] = 0
array[array == np.inf] = 1
return np.array(array, dtype=int)
def __init__(self, images, labels, fake_data=False):
if fake_data:
self._num_examples = 10000
else:
assert images.shape[0] == labels.shape[0], (
"images.shape: %s labels.shape: %s" % (images.shape,
labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
self.imageShape = images.shape[1:]
self.imageChannels = self.imageShape[2]
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2] * images.shape[3])
# Convert from [0, 255] -> [0.0, 1.0].
images = images.astype(numpy.float32)
images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
try:
if len(numpy.shape(self._labels)) == 1:
self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
except:
traceback.print_exc()
self._epochs_completed = 0
self._index_in_epoch = 0
def __init__(self, images, labels, fake_data=False):
if fake_data:
self._num_examples = 10000
else:
assert images.shape[0] == labels.shape[0], (
"images.shape: %s labels.shape: %s" % (images.shape,
labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
self.imageShape = images.shape[1:]
self.imageChannels = self.imageShape[2]
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2] * images.shape[3])
# Convert from [0, 255] -> [0.0, 1.0].
images = images.astype(numpy.float32)
images = numpy.multiply(images, 1.0 / 255.0)
self._images = images
self._labels = labels
try:
if len(numpy.shape(self._labels)) == 1:
self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
except:
traceback.print_exc()
self._epochs_completed = 0
self._index_in_epoch = 0
def cluster_service(path, service, cluster_size, prev_metadata=None):
filename = os.path.join(path, service["preprocessed_filename"])
df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)
initial_idx = None
if prev_metadata:
initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
# adjust cluster_size if an initial assigment has been found
if initial_idx is not None:
cluster_size = len(np.unique(initial_idx))
prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
if os.path.exists(prefix + "_1.png"):
print("skip " + prefix)
return (None, None)
cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
if cluster_size < 2:
# no silhouette_score for cluster size 1
return (None, None)
print("silhouette_score: %f" % score)
# protect the write access to the metadata file
metadata_lock.acquire()
with metadata.update(path) as data:
for srv in data["services"]:
if srv["name"] == service["name"]:
if "clusters" not in srv:
srv["clusters"] = {}
d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
srv["clusters"][cluster_size] = d
metadata_lock.release()
return (service["name"], cluster_size)
def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False):
nb_templates = templates.shape[1]
n_panels = numpy.ceil(numpy.sqrt(nb_templates))
mask = numpy.where(halo > -1)[0]
clust_idx = numpy.unique(halo[mask])
fig = pylab.figure()
square = True
center = len(data[0] - 1)//2
for count, i in enumerate(xrange(nb_templates)):
if square:
pylab.subplot(n_panels, n_panels, count + 1)
if (numpy.mod(count, n_panels) != 0):
pylab.setp(pylab.gca(), yticks=[])
if (count < n_panels*(n_panels - 1)):
pylab.setp(pylab.gca(), xticks=[])
subcurves = numpy.where(halo == clust_idx[count])[0]
for k in numpy.random.permutation(subcurves)[:n_curves]:
pylab.plot(data[k], '0.5')
pylab.plot(templates[:, count], 'r')
pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5)
pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5)
xmin, xmax = pylab.xlim()
pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--')
pylab.plot([xmin, xmax], [threshold, threshold], 'k--')
#pylab.ylim(-1.5*threshold, 1.5*threshold)
ymin, ymax = pylab.ylim()
pylab.plot([center, center], [ymin, ymax], 'k--')
pylab.title('Cluster %d' %i)
if nb_templates > 0:
pylab.tight_layout()
if save:
pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1]))
pylab.close()
else:
pylab.show()
del fig
def check_consistent_length(*arrays):
"""Check that all arrays have consistent first dimensions.
Checks whether all objects in arrays have the same shape or length.
Parameters
----------
*arrays : list or tuple of input objects.
Objects that will be checked for consistent length.
"""
uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
if len(uniques) > 1:
raise ValueError("Found arrays with inconsistent numbers of samples: "
"%s" % str(uniques))
mit_sceneparsing_benchmark_loader.py 文件源码
项目:pytorch-semseg
作者: meetshah1995
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def transform(self, img, lbl):
"""transform
:param img:
:param lbl:
"""
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
classes = np.unique(lbl)
lbl = lbl.astype(float)
lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
lbl = lbl.astype(int)
if not np.all(classes == np.unique(lbl)):
print("WARN: resizing labels yielded fewer classes")
if not np.all(np.unique(lbl) < self.n_classes):
raise ValueError("Segmentation map contained invalid class values")
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def transform(self, img, lbl):
"""transform
:param img:
:param lbl:
"""
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
classes = np.unique(lbl)
lbl = lbl.astype(float)
lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
lbl = lbl.astype(int)
if not np.all(classes == np.unique(lbl)):
print("WARN: resizing labels yielded fewer classes")
if not np.all(np.unique(lbl) < self.n_classes):
raise ValueError("Segmentation map contained invalid class values")
img = torch.from_numpy(img).float()
lbl = torch.from_numpy(lbl).long()
return img, lbl
def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100):
#construct a list of kernel names (one for each region)
if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter)
else: kernelNames = [kernelType] * len(regions)
#perform optimization
kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter)
#compute posterior distribution
Ktraintrain = kernelObj.getTrainKernel(hyp_kernels)
post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects)
#fix intercept if phenotype is binary
if (len(np.unique(y)) == 2):
controls = (y<y.mean())
cases = ~controls
meanVec = C.dot(fixedEffects)
mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec)
fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x
#construct trainObj
trainObj = dict([])
trainObj['sig2e'] = sig2e
trainObj['hyp_kernels'] = hyp_kernels
trainObj['fixedEffects'] = fixedEffects
trainObj['kernelNames'] = kernelNames
return trainObj
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#slices.sort(key = lambda x: int(x.InstanceNumber))
acquisitions = [x.AcquisitionNumber for x in slices]
vals, counts = np.unique(acquisitions, return_counts=True)
vals = vals[::-1] # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
counts = counts[::-1]
## take the acquistions that has more entries; if these are identical take the later entrye
acq_val_sel = vals[np.argmax(counts)]
##acquisitions = sorted(np.unique(acquisitions), reverse=True)
if len(vals) > 1:
print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]
slices = slices2
## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
## example of the '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'
#slices.sort(key = lambda x: int(x.ImagePositionPatient[2])) # from v 8, BUG should be float
slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) # from v 9
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
def largest_label_volume(im, bg=-1):
vals, counts = np.unique(im, return_counts=True)
counts = counts[vals != bg]
vals = vals[vals != bg]
if len(counts) > 0:
return vals[np.argmax(counts)]
else:
return None
#image=sample_image
def get_chunks_by_gem_group(self):
""" Return exactly one chunk per gem group."""
gem_group_arr = self.get_column('gem_group')
# verify gem groups are sorted
assert np.all(np.diff(gem_group_arr)>=0)
unique_ggs = np.unique(gem_group_arr)
gg_key = lambda i: gem_group_arr[i]
chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key)
for (gg, chunk) in zip(unique_ggs, chunk_iter):
yield (gg, chunk[0], chunk[1])
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
reads (np.array(int)) - Read pairs for each UMI
subsample_rate (float) - Subsample reads to this fraction.
Returns threshold (int) - The RPPU threshold in the subsampled space '''
if len(np.unique(reads)) < 2:
print 'Skipping RPPU threshold calculation.'
return 1
print 'RPPU subsample rate: %0.4f' % subsample_rate
reads = np.random.binomial(reads, subsample_rate)
reads = reads[reads > 0]
if len(np.unique(reads)) < 2:
print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
return 1
new_n50 = tk_stats.NX(reads, 0.5)
print 'New N50: %d:' % new_n50
# Log-transform counts
log_reads = np.log(reads)
# Run K-Means. Reshape necessary because kmeans takes a matrix.
kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
kmeans.predict(log_reads.reshape((-1,1)))
# Take the cluster with the smallest mean
min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]
print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))
# Take the max element in the min-cluster
threshold = np.max(reads[kmeans.labels_ == min_cluster])
return threshold
def append_data_column(ds, column):
# Extend the dataset to fit the new data
new_count = column.shape[0]
existing_count = ds.shape[0]
ds.resize((existing_count + new_count,))
levels = get_levels(ds)
if levels is not None:
# update levels if we have new unique values
if type(column.values) == p.Categorical:
added_levels = set(column.values.categories) - set(levels)
elif len(column) == 0:
# Workaround for bug in pandas - get a crash in .unique() for an empty series
added_levels = set([])
else:
added_levels = set(column.unique()) - set(levels)
new_levels = list(levels)
new_levels.extend(added_levels)
# Check if the new categorical column has more levels
# than the current bit width supports.
# If so, rewrite the existing column data w/ more bits
if len(new_levels) > np.iinfo(ds.dtype).max:
new_dtype = pick_cat_dtype(len(new_levels))
ds = widen_cat_column(ds, new_dtype)
new_levels = np.array(new_levels, dtype=np.object)
new_data = make_index_array(new_levels, column.values, ds.dtype)
clear_levels(ds)
create_levels(ds, new_levels)
else:
new_data = column
# Append new data
ds[existing_count:(existing_count + new_count)] = new_data
def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)):
"""Visualise each segment in `label_field` with its mean color in `image`.
Parameters
----------
label_field : array of int
A segmentation of an image.
image : array, shape ``label_field.shape + (3,)``
A color image of the same spatial shape as `label_field`.
bg_label : int, optional
A value in `label_field` to be treated as background.
bg_color : 3-tuple of int, optional
The color for the background label
Returns
-------
out : array, same shape and type as `image`
The output visualization.
"""
out = np.zeros_like(image)
labels = np.unique(label_field)
bg = (labels == bg_label)
if bg.any():
labels = labels[labels != bg_label]
out[bg] = bg_color
for label in labels:
mask = (label_field == label).nonzero()
color = image[mask].mean(axis=0)
out[mask] = color
return out
def stan_map(vector):
""" Create a map of vector items : id. """
unique_items = np.unique(vector)
return {item: id_ for id_, item in enumerate(unique_items, start=1)}