def project_shapes(self, set_y, bbox, window):
"""Project shapes from original plane to a new one [-1,+1].
"""
nbr = set_y.shape[0]
nfids = int(set_y.shape[1]/2)
std_w= window[0]
std_h = window[1]
for i in xrange(nbr):
XY=set_y[i,:]
X = XY[:nfids]
Y = XY[nfids:]
bb = bbox[i,:]
center_x = bb[0] + bb[2]/2 # x_up + w/2
center_y = bb[1] + bb[3]/2 # y_up + h/2
X = (X - center_x) / (std_w/2)
Y = (Y - center_y) / (std_h /2)
XY=np.concatenate((X, Y))
set_y[i] = XY
return set_y
python类int()的实例源码
def GenCR(MCMCPar,pCR):
if type(pCR) is np.ndarray:
p=np.ndarray.tolist(pCR)[0]
else:
p=pCR
CR=np.zeros((MCMCPar.seq * MCMCPar.steps),dtype=np.float)
L = np.random.multinomial(MCMCPar.seq * MCMCPar.steps, p, size=1)
L2 = np.concatenate((np.zeros((1),dtype=np.int), np.cumsum(L)),axis=0)
r = np.random.permutation(MCMCPar.seq * MCMCPar.steps)
for zz in range(0,MCMCPar.nCR):
i_start = L2[zz]
i_end = L2[zz+1]
idx = r[i_start:i_end]
CR[idx] = np.float(zz+1)/MCMCPar.nCR
CR = np.reshape(CR,(MCMCPar.seq,MCMCPar.steps))
return CR, L
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--def', dest='prototxt',
help='prototxt file defining the network',
default=None, type=str)
parser.add_argument('--net', dest='caffemodel',
help='model to test',
default=None, type=str)
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to train on',
default='voc_2007_trainval', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Train SVMs (old skool)')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--def', dest='prototxt',
help='prototxt file defining the network',
default=None, type=str)
parser.add_argument('--net', dest='caffemodel',
help='model to test',
default=None, type=str)
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to train on',
default='voc_2007_trainval', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def binarization (array):
''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
array = np.array(array, dtype=float) # conversion needed to use np.inf after
if len(np.unique(array)) > 2:
raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))
# manipulation which aims at avoid error in data with for example classes '1' and '2'.
array[array == np.amax(array)] = np.inf
array[array == np.amin(array)] = 0
array[array == np.inf] = 1
return np.array(array, dtype=int)
def sparse_file_to_sparse_list (filename, verbose=True):
''' Converts a sparse data file to a sparse list, so that :
sparse_list[i][j] = (a,b) means matrix[i][a]=b'''
data_file = open(filename, "r")
if verbose: print ("Reading {}...".format(filename))
lines = data_file.readlines()
if verbose: print ("Converting {} to correct array")
data = [lines[i].split(' ') for i in range (len(lines))]
del lines #djajetic 11.11.2015 questionable
if verbose: print ("Converting {} to sparse list".format (filename))
return [[tuple(map(int, data[i][j].rstrip().split(':'))) for j in range(len(data[i])) if data[i][j] != '\n'] for i in range (len(data))]
def convert_to_bin(Ycont, nval, verbose=True):
''' Convert numeric vector to binary (typically classification target values)'''
if verbose: print ("\t_______ Converting to binary representation")
Ybin=[[0]*nval for x in xrange(len(Ycont))]
for i in range(len(Ybin)):
line = Ybin[i]
line[np.int(Ycont[i])]=1
Ybin[i] = line
return Ybin
def galfit_getcentralcoordinate(modelfile,coordorigin=1,verbose=True):
"""
Return the central coordinates of a GALFIT model extracted using the reference image WCS and the FITSECT keyword
--- INPUT ---
modelfile Path and name to GALFIT model fits file to retrieve central coordinates for
coordorigin Origin of coordinates in reference image to use when converting pixels to degrees (skycoord)
verbose Toggle verbosity
--- EXAMPLE OF USE ---
fileG = '/Volumes/DATABCKUP2/TDOSEextractions/models_cutouts/model8685multicomponent/model_acs_814w_candels-cdfs-02_cut_v1.0_id8685_cutout7p0x7p0arcsec.fits' # Gauss components
fileS = '/Volumes/DATABCKUP2/TDOSEextractions/models_cutouts/model8685multicomponent/model_acs_814w_candels-cdfs-02_cut_v1.0_id9262_cutout2p0x2p0arcsec.fits' # Sersic components
xpix, ypix, ra_model, dec_model = tu.galfit_getcentralcoordinate(fileG,coordorigin=1)
"""
if verbose: print ' - Will extract central coordinates from '+modelfile
refimg_hdr = pyfits.open(modelfile)[1].header
model_hdr = pyfits.open(modelfile)[2].header
imgwcs = wcs.WCS(tu.strip_header(refimg_hdr.copy()))
fit_region = model_hdr['FITSECT']
cutrange_low_x = int(float(fit_region.split(':')[0].split('[')[-1]))
cutrange_low_y = int(float(fit_region.split(',')[-1].split(':')[0]))
xsize = model_hdr['NAXIS1']
ysize = model_hdr['NAXIS2']
xpix = cutrange_low_x + int(xsize/2.)
ypix = cutrange_low_y + int(ysize/2.)
if verbose: print ' - Converting pixel position to coordinates using a pixel origin='+str(coordorigin)
skycoord = wcs.utils.pixel_to_skycoord(xpix,ypix,imgwcs,origin=coordorigin)
ra_model = skycoord.ra.value
dec_model = skycoord.dec.value
return xpix,ypix,ra_model,dec_model
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
def pad_batch(mini_batch):
mini_batch_size = len(mini_batch)
# print mini_batch.shape
# print mini_batch
max_sent_len1 = int(np.max([len(x[0]) for x in mini_batch]))
max_sent_len2 = int(np.max([len(x[1]) for x in mini_batch]))
# print max_sent_len1, max_sent_len2
# max_token_len = int(np.mean([len(val) for sublist in mini_batch for val in sublist]))
main_matrix1 = np.zeros((mini_batch_size, max_sent_len1), dtype= np.int)
main_matrix2 = np.zeros((mini_batch_size, max_sent_len2), dtype= np.int)
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[0]):
try:
main_matrix1[i,j] = j
except IndexError:
pass
for idx1, i in enumerate(mini_batch):
for idx2, j in enumerate(i[1]):
try:
main_matrix2[i,j] = j
except IndexError:
pass
main_matrix1_t = Variable(torch.from_numpy(main_matrix1))
main_matrix2_t = Variable(torch.from_numpy(main_matrix2))
# print main_matrix1_t.size()
# print main_matrix2_t.size()
return [main_matrix1_t, main_matrix2_t]
# return [Variable(torch.cat((main_matrix1_t, main_matrix2_t), 0))
# def pad_batch(mini_batch):
# # print mini_batch
# # print type(mini_batch)
# # print mini_batch.shape
# # for i, _ in enumerate(mini_batch):
# # print i, _
# return [Variable(torch.from_numpy(np.asarray(_))) for _ in mini_batch[0]]
def do_kshape(name_prefix, df, cluster_size, initial_clustering=None):
columns = df.columns
matrix = []
for c in columns:
matrix.append(zscore(df[c]))
res = kshape(matrix, cluster_size, initial_clustering)
labels, score = silhouette_score(np.array(matrix), res)
# keep a reference of which metrics are in each cluster
cluster_metrics = defaultdict(list)
# we keep it in a dict: cluster_metrics[<cluster_nr>]{<metric_a>, <metric_b>}
for i, col in enumerate(columns):
cluster_metrics[int(labels[i])].append(col)
filenames = []
for i, (centroid, assigned_series) in enumerate(res):
d = {}
for serie in assigned_series:
d[columns[serie]] = pd.Series(matrix[serie], index=df.index)
d["centroid"] = pd.Series(centroid, index=df.index)
df2 = pd.DataFrame(d)
figure = df2.plot()
figure.legend(loc='center left', bbox_to_anchor=(1, 0.5))
name = "%s_%d" % (name_prefix, (i+1))
filename = name + ".tsv.gz"
print(filename)
df2.to_csv(filename, sep="\t", compression='gzip')
filenames.append(os.path.basename(filename))
graphs.write(df2, name + ".png")
return cluster_metrics, score, filenames
def get_initial_clustering(service, metadata, metrics):
s_score, cluster_metrics = msu.get_cluster_metrics(metadata, service)
if not cluster_metrics:
return None
common_metrics_all = set()
initial_idx = np.zeros(len(metrics), dtype=np.int)
for key, value in cluster_metrics.iteritems():
other_metrics = value['other_metrics']
common_metrics = set(metrics) & set(other_metrics)
for metric in list(common_metrics):
initial_idx[list(metrics).index(metric)] = int(key)
common_metrics_all = (common_metrics_all | common_metrics)
# assign remaining metrics to a new cluster (if any)
remaining_metrics = list(metrics - common_metrics_all)
remaining_cluster = max(initial_idx) + 1
if remaining_metrics:
for metric in remaining_metrics:
initial_idx[list(metrics).index(metric)] = remaining_cluster
return initial_idx
def deriveKernel(self, params, i):
self.checkParamsI(params, i)
params_kernels = params[len(self.kernels):]
#sf2 derivatives
if (i < len(self.kernels)):
EE = self.getEE(params_kernels)
#if (i==2): Z = 2*np.exp(2*params[i]) * EE[:,:,i+1]; print i, Z[:5, :5]; sys.exit(0)
return 2*np.exp(2*params[i]) * EE[:,:,i+1]
#params_kernel derivatives
else:
params_ind = len(self.kernels)
for k_i, k in enumerate(self.kernels):
numHyp = k.getNumParams()
if (i not in xrange(params_ind, params_ind+numHyp)):
params_ind += numHyp
continue
#now we found our kernel
dKj = k.deriveKernel(params[params_ind:params_ind+numHyp], i-params_ind)
Kd = self.Kdim(params_kernels)
range1 = np.array(xrange(0,k_i), dtype=np.int)
range2 = np.array(xrange(k_i+1, len(self.kernels)), dtype=np.int)
Kd_nocov = Kd[:, :, np.concatenate((range1, range2))]
E = elsympol(Kd_nocov, len(self.kernels)-1) #R-1th elementary sym polyn
K=0
for ii in xrange(len(self.kernels)):
K += E[:,:,ii]*np.exp(2*params[ii])
#if (i==5): Z = dKj * K; print i, Z[:5, :5]; sys.exit(0)
return dKj * K
raise Exception('Invalid parameter')
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
#slices.sort(key = lambda x: int(x.InstanceNumber))
acquisitions = [x.AcquisitionNumber for x in slices]
vals, counts = np.unique(acquisitions, return_counts=True)
vals = vals[::-1] # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
counts = counts[::-1]
## take the acquistions that has more entries; if these are identical take the later entrye
acq_val_sel = vals[np.argmax(counts)]
##acquisitions = sorted(np.unique(acquisitions), reverse=True)
if len(vals) > 1:
print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]
slices = slices2
## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
## example of the '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'
#slices.sort(key = lambda x: int(x.ImagePositionPatient[2])) # from v 8, BUG should be float
slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) # from v 9
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
def generate_markers_3d(image):
#Creation of the internal Marker
marker_internal = image < -400
marker_internal_labels = np.zeros(image.shape).astype(np.int16)
for i in range(marker_internal.shape[0]):
marker_internal[i] = segmentation.clear_border(marker_internal[i])
marker_internal_labels[i] = measure.label(marker_internal[i])
#areas = [r.area for r in measure.regionprops(marker_internal_labels)]
areas = [r.area for i in range(marker_internal.shape[0]) for r in measure.regionprops(marker_internal_labels[i])]
for i in range(marker_internal.shape[0]):
areas = [r.area for r in measure.regionprops(marker_internal_labels[i])]
areas.sort()
if len(areas) > 2:
for region in measure.regionprops(marker_internal_labels[i]):
if region.area < areas[-2]:
for coordinates in region.coords:
marker_internal_labels[i, coordinates[0], coordinates[1]] = 0
marker_internal = marker_internal_labels > 0
#Creation of the external Marker
# 3x3 structuring element with connectivity 1, used by default
struct1 = ndimage.generate_binary_structure(2, 1)
struct1 = struct1[np.newaxis,:,:] # expand by z axis .
external_a = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=10)
external_b = ndimage.binary_dilation(marker_internal, structure=struct1, iterations=55)
marker_external = external_b ^ external_a
#Creation of the Watershed Marker matrix
#marker_watershed = np.zeros((512, 512), dtype=np.int) # origi
marker_watershed = np.zeros((marker_external.shape), dtype=np.int)
marker_watershed += marker_internal * 255
marker_watershed += marker_external * 128
return marker_internal, marker_external, marker_watershed
def seq(start, stop, step=1):
n = int(round((stop - start)/float(step)))
if n > 1:
return([start + step*i for i in range(n+1)])
else:
return([])
def draw_circles(image,cands,origin,spacing):
#make empty matrix, which will be filled with the mask
image_mask = np.zeros(image.shape, dtype=np.int16)
#run over all the nodules in the lungs
for ca in cands.values:
#get middel x-,y-, and z-worldcoordinate of the nodule
#radius = np.ceil(ca[4])/2 ## original: replaced the ceil with a very minor increase of 1% ....
radius = (ca[4])/2 + 0.51 * spacing[0] # increasing by circa half of distance in z direction .... (trying to capture wider region/border for learning ... and adress the rough net .
coord_x = ca[1]
coord_y = ca[2]
coord_z = ca[3]
image_coord = np.array((coord_z,coord_y,coord_x))
#determine voxel coordinate given the worldcoordinate
image_coord = world_2_voxel(image_coord,origin,spacing)
#determine the range of the nodule
#noduleRange = seq(-radius, radius, RESIZE_SPACING[0]) # original, uniform spacing
noduleRange_z = seq(-radius, radius, spacing[0])
noduleRange_y = seq(-radius, radius, spacing[1])
noduleRange_x = seq(-radius, radius, spacing[2])
#x = y = z = -2
#create the mask
for x in noduleRange_x:
for y in noduleRange_y:
for z in noduleRange_z:
coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing)
#if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius: ### original (contrained to a uniofrm RESIZE)
if (np.linalg.norm((image_coord-coords) * spacing)) < radius:
image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1)
return image_mask
def seq(start, stop, step=1):
n = int(round((stop - start)/float(step)))
if n > 1:
return([start + step*i for i in range(n+1)])
else:
return([])
def draw_circles(image,cands,origin,spacing):
#make empty matrix, which will be filled with the mask
image_mask = np.zeros(image.shape, dtype=np.int16)
#run over all the nodules in the lungs
for ca in cands.values:
#get middel x-,y-, and z-worldcoordinate of the nodule
#radius = np.ceil(ca[4])/2 ## original: replaced the ceil with a very minor increase of 1% ....
radius = (ca[4])/2 + 0.51 * spacing[0] # increasing by circa half of distance in z direction .... (trying to capture wider region/border for learning ... and adress the rough net .
coord_x = ca[1]
coord_y = ca[2]
coord_z = ca[3]
image_coord = np.array((coord_z,coord_y,coord_x))
#determine voxel coordinate given the worldcoordinate
image_coord = world_2_voxel(image_coord,origin,spacing)
#determine the range of the nodule
#noduleRange = seq(-radius, radius, RESIZE_SPACING[0]) # original, uniform spacing
noduleRange_z = seq(-radius, radius, spacing[0])
noduleRange_y = seq(-radius, radius, spacing[1])
noduleRange_x = seq(-radius, radius, spacing[2])
#x = y = z = -2
#create the mask
for x in noduleRange_x:
for y in noduleRange_y:
for z in noduleRange_z:
coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing)
#if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius: ### original (contrained to a uniofrm RESIZE)
if (np.linalg.norm((image_coord-coords) * spacing)) < radius:
image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1)
return image_mask
def _unpack_header_adjunct(hdr):
"""
Unpacks the adjunct from a BLUE file header dictionary. Expects
the 'adjunct' field to be a 256 byte raw string, or None (in which
case it returns with default values).
"""
# Convert the adjunct (file type specific) part of the header
file_class = int(hdr['type'] / 1000)
endian = _rep_tran[hdr['head_rep']]
hdr.update(_unpack_blue_struct(hdr['adjunct'],
_bluestructs['T%dADJUNCT' % file_class],
endian))
del hdr['adjunct']
if file_class == 3:
hdr['subr'] = _unpack_blue_struct_array(hdr['subr'],
_bluestructs['SUBRECSTRUCT'],
hdr['subrecords'], endian)
elif file_class == 5:
hdr['comp'] = _unpack_blue_struct_array(hdr['comp'],
_bluestructs['COMPSTRUCT'],
hdr['components'], endian)
hdr['quadwords'] = _unpack_blue_struct(hdr['quadwords'],
_bluestructs['T5QUADWORDS'],
endian)
elif file_class == 6:
# For Type 6000 files, the hdr['subr'] field gets filled in from
# information in the extended header, which is not unpacked
# until after the main header and header adjunct are unpacked.
# See _open_t6subrecords(), which gets called by readheader().
# Just initialize it to an empty list here.
hdr['subr'] = []
def pack_header(hdr, structured=0):
"""
Returns the header in the form of a string.
Pack the given BLUE header dictionary to its binary format for
storage on disk. All keywords, main and extended, are packed
and updated in the hdr dict that is passed in.
If you intend on writing this header out to disk, do not forget to
write out the extended header as well. It should be written at
512*hdr['ext_start'] bytes offset from the beginning of the file
and the extended header itself should be padded out to a multiple
of 512 bytes.
If there is an extended header present, setting the optional
<structured> argument to true (default is false) will cause any
Python dictionaries, lists, and tuples to be packed with their
structures intact. See the doc string for pack_keywords() for
more details.
"""
# Leave the user's copy alone
hdr = hdr.copy()
# Pack the main header keywords
pack_main_header_keywords(hdr)
# We have to pack the adjunct part explicitly because it is a union
endian = _rep_tran[hdr['head_rep']]
# Recalculate class from current type (user may have changed)
file_class = int(hdr['type'] / 1000)
hdr['adjunct'] = _pack_blue_struct(hdr, _bluestructs['T%dADJUNCT' %
file_class], endian)
# Pack the extended header if present
if hdr.has_key('ext_header'):
pack_ext_header(hdr, structured=structured)
return _pack_blue_struct(hdr, _bluestructs['HEADER'], endian)