def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
python类str_()的实例源码
def set_format(self, data, digits, scientific):
"""data: object with a dtype attribute"""
type = data.dtype.type
if type in (np.str, np.str_, np.bool_, np.bool, np.object_):
fmt = '%s'
else:
# XXX: use self.digits_spinbox.getValue() and instead?
# XXX: use self.digits_spinbox.getValue() instead?
format_letter = 'e' if scientific else 'f'
fmt = '%%.%d%s' % (digits, format_letter)
# this does not call model_data.reset() so it should be called by the caller
self.model_data._set_format(fmt)
def to_excel(self):
"""View selection in Excel"""
if xw is None:
QMessageBox.critical(self, "Error", "to_excel() is not available because xlwings is not installed")
data = self._selection_data()
if data is None:
return
# convert (row) generators to lists then array
# TODO: the conversion to array is currently necessary even though xlwings will translate it back to a list
# anyway. The problem is that our lists contains numpy types and especially np.str_ crashes xlwings.
# unsure how we should fix this properly: in xlwings, or change _selection_data to return only standard
# Python types.
xw.view(np.array([list(r) for r in data]))
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def get_data(lst,preproc):
data = []
result = []
for path in lst:
f = dicom.read_file(path)
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
scipy.misc.imsave(dst_path, img)
result.append(dst_path)
data.append(img)
data = np.array(data, dtype=np.uint8)
data = data.reshape(data.size)
data = np.array(data,dtype=np.str_)
data = data.reshape(data.size)
return [data,result]
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def test_structure_format(self):
dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))])
x = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt)
assert_equal(np.array2string(x),
"[('Sarah', [ 8., 7.]) ('John', [ 6., 7.])]")
# for issue #5692
A = np.zeros(shape=10, dtype=[("A", "M8[s]")])
A[5:].fill(np.nan)
assert_equal(np.array2string(A),
"[('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) " +
"('1970-01-01T00:00:00',)\n ('1970-01-01T00:00:00',) " +
"('1970-01-01T00:00:00',) ('NaT',) ('NaT',)\n " +
"('NaT',) ('NaT',) ('NaT',)]")
# See #8160
struct_int = np.array([([1, -1],), ([123, 1],)], dtype=[('B', 'i4', 2)])
assert_equal(np.array2string(struct_int),
"[([ 1, -1],) ([123, 1],)]")
struct_2dint = np.array([([[0, 1], [2, 3]],), ([[12, 0], [0, 0]],)],
dtype=[('B', 'i4', (2, 2))])
assert_equal(np.array2string(struct_2dint),
"[([[ 0, 1], [ 2, 3]],) ([[12, 0], [ 0, 0]],)]")
# See #8172
array_scalar = np.array(
(1., 2.1234567890123456789, 3.), dtype=('f8,f8,f8'))
assert_equal(np.array2string(array_scalar), "( 1., 2.12345679, 3.)")
def store_data(self, store_loc, **kwargs):
"""Put arrays to store
"""
#print(store_loc)
g = self.store.create_group(store_loc)
for k, v, in kwargs.items():
#print(type(v[0]))
#print(k)
if type(v) == list:
if len(v) != 0:
if type(v[0]) is np.str_ or type(v[0]) is str:
v = [a.encode('utf8') for a in v]
g.create_dataset(k, data=v, compression=self.clib, compression_opts=self.clev)
def test_object_array_to_fixed_string(self):
# Ticket #1235.
a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
b = np.array(a, dtype=(np.str_, 8))
assert_equal(a, b)
c = np.array(a, dtype=(np.str_, 5))
assert_equal(c, np.array(['abcde', 'ijklm']))
d = np.array(a, dtype=(np.str_, 12))
assert_equal(a, d)
e = np.empty((2, ), dtype=(np.str_, 8))
e[:] = a[:]
assert_equal(a, e)
def get_datinfo(cutoutid,setupdic):
"""
Function returning information on file names etc. for both default run and cutout run
--- INPUT ---
cutoutid ID to return information for
setupdic Dictionary containing the setup parameters read from the TDOSE setup file
"""
if cutoutid == -9999:
cutstr = None
imgsize = setupdic['cutout_sizes']
refimg = setupdic['ref_image']
datacube = setupdic['data_cube']
variancecube = setupdic['noise_cube']
sourcecat = setupdic['source_catalog']
else:
if type(setupdic['cutout_sizes']) == np.str_:
sizeinfo = np.genfromtxt(setupdic['cutout_sizes'],dtype=None,comments='#')
objent = np.where(sizeinfo[:,0] == cutoutid)[0]
if len(objent) > 1:
sys.exit(' ---> More than one match in '+setupdic['cutout_sizes']+' for object '+str(cutoutid))
elif len(objent) == 0:
sys.exit(' ---> No match in '+setupdic['cutout_sizes']+' for object '+str(cutoutid))
else:
imgsize = sizeinfo[objent,1:][0].astype(float).tolist()
else:
imgsize = setupdic['cutout_sizes']
cutstr = ('_id'+str(int(cutoutid))+'_cutout'+str(imgsize[0])+'x'+str(imgsize[1])+'arcsec').replace('.','p')
img_init_base = setupdic['ref_image'].split('/')[-1]
cube_init_base = setupdic['data_cube'].split('/')[-1]
var_init_base = setupdic['variance_cube'].split('/')[-1]
cut_img = setupdic['cutout_directory']+img_init_base.replace('.fits',cutstr+'.fits')
cut_cube = setupdic['cutout_directory']+cube_init_base.replace('.fits',cutstr+'.fits')
cut_variance = setupdic['cutout_directory']+var_init_base.replace('.fits',cutstr+'.fits')
cut_sourcecat = setupdic['source_catalog'].replace('.fits',cutstr+'.fits')
if setupdic['wht_image'] is None:
refimg = cut_img
else:
wht_init_base = setupdic['wht_image'].split('/')[-1]
wht_img = setupdic['cutout_directory']+wht_init_base.replace('.fits',cutstr+'.fits')
refimg = [cut_img,wht_img]
datacube = cut_cube
variancecube = cut_variance
sourcecat = cut_sourcecat
return cutstr, imgsize, refimg, datacube, variancecube, sourcecat
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
common.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def _infer_dtype_from_scalar(val):
""" interpret the dtype from a scalar """
dtype = np.object_
# a 1-element ndarray
if isinstance(val, np.ndarray):
if val.ndim != 0:
raise ValueError(
"invalid ndarray passed to _infer_dtype_from_scalar")
dtype = val.dtype
val = val.item()
elif isinstance(val, compat.string_types):
# If we create an empty array using a string to infer
# the dtype, NumPy will only allocate one character per entry
# so this is kind of bad. Alternately we could use np.repeat
# instead of np.empty (but then you still don't want things
# coming out as np.str_!
dtype = np.object_
elif isinstance(val, (np.datetime64,
datetime)) and getattr(val, 'tzinfo', None) is None:
val = lib.Timestamp(val).value
dtype = np.dtype('M8[ns]')
elif isinstance(val, (np.timedelta64, timedelta)):
val = tslib.convert_to_timedelta(val, 'ns')
dtype = np.dtype('m8[ns]')
elif is_bool(val):
dtype = np.bool_
elif is_integer(val):
if isinstance(val, np.integer):
dtype = type(val)
else:
dtype = np.int64
elif is_float(val):
if isinstance(val, np.floating):
dtype = type(val)
else:
dtype = np.float64
elif is_complex(val):
dtype = np.complex_
return dtype, val
def make_dataset(dir, train=True):
paths = None
poses = None
# ??? ? ??? ?
for target in os.listdir(dir):
target_dir = os.path.join(dir, target)
# if not os.path.isdir(target_dir) or target == "Street" or target == "GreatCourt":
# if not os.path.isdir(target_dir):
if not target == "KingsCollege":
continue
# ?? ??? ?? ??? ?? ???? ??? ? ?
if train:
path = np.genfromtxt(os.path.join(target_dir, 'dataset_train.txt'),
dtype=np.str_, delimiter=' ', skip_header=3,
usecols=[0])
pose = np.genfromtxt(os.path.join(target_dir, 'dataset_train.txt'),
dtype=np.float32, delimiter=' ', skip_header=3,
usecols=[1, 2, 3, 4, 5, 6, 7])
else:
path = np.genfromtxt(os.path.join(target_dir, 'dataset_test.txt'),
dtype=np.str_, delimiter=' ', skip_header=3,
usecols=[0])
pose = np.genfromtxt(os.path.join(target_dir, 'dataset_test.txt'),
dtype=np.float32, delimiter=' ', skip_header=3,
usecols=[1, 2, 3, 4, 5, 6, 7])
# order ? path ? ????? ???
order = path.argsort()
# order ? sorting
path1 = path[order]
pose1 = pose[order]
# reverse order ?? sorting
path2 = path[order[-2::-1]]
pose2 = pose[order[-2::-1]]
# concat
path = np.hstack((path1, path2))
pose = np.vstack((pose1, pose2))
path = np.core.defchararray.add(target + '/', path)
if paths is None:
paths = path
poses = pose
else:
paths = np.hstack((paths, path))
poses = np.vstack((poses, pose))
return paths, poses
def data(self, index, role=Qt.DisplayRole):
"""Cell content"""
if not index.isValid():
return to_qvariant()
# if role == Qt.DecorationRole:
# return ima.icon('editcopy')
# if role == Qt.DisplayRole:
# return ""
if role == Qt.TextAlignmentRole:
return to_qvariant(int(Qt.AlignRight | Qt.AlignVCenter))
elif role == Qt.FontRole:
return self.font
value = self.get_value(index)
if role == Qt.DisplayRole:
if value is np.ma.masked:
return ''
# for headers
elif isinstance(value, str) and not isinstance(value, np.str_):
return value
else:
return to_qvariant(self._format % value)
elif role == Qt.BackgroundColorRole:
if self.bgcolor_possible and self.bg_gradient is not None and value is not np.ma.masked:
if self.bg_value is None:
try:
v = self.color_func(value) if self.color_func is not None else value
if -np.inf < v < self.vmin:
# TODO: this is suboptimal, as it can reset many times (though in practice, it is usually
# ok). When we get buffering, we will need to compute vmin/vmax on the whole buffer
# at once, eliminating this problem (and we could even compute final colors directly
# all at once)
self.vmin = v
self.reset()
elif self.vmax < v < np.inf:
self.vmax = v
self.reset()
v = scale_to_01range(v, self.vmin, self.vmax)
except TypeError:
v = np.nan
else:
i, j = index.row(), index.column()
v = self.bg_value[i, j]
return self.bg_gradient[v]
# elif role == Qt.ToolTipRole:
# return to_qvariant("{}\n{}".format(repr(value),self.get_labels(index)))
return to_qvariant()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('file')
args = parser.parse_args()
print("Using file %s" % args.file)
if not os.path.isfile(args.file):
raise FileNotFoundError("Couldn't find file at '%s'" % args.file)
if args.file.split('.')[-1] != 'mat':
raise ValueError("File '%s' not a valid mat file" % args.file)
file = args.file
name = file.split('.')[0]
outfile = '.'.join([name, 'csv'])
data = sio.loadmat(file)
keys = ['classification_id', 'user_name','user_id',\
'annotation','gold_label','machine_score', \
'diff','object_id','subject_id','mag','mag_err']
count = 0
with open(outfile, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=keys)
writer.writeheader()
for index in range(len(data['classification_id'][0])):
d = {}
for key in keys:
#print(key, data[key], type(data[key][0]))
if type(data[key][0]) is numpy.str_:
d[key] = data[key][index].strip()
else:
d[key] = data[key][0][index]
writer.writerow(d)
sys.stdout.write("%d records processed\r" % count)
sys.stdout.flush()
count += 1
def get_wqp_results(self, service, **kwargs):
"""Bring data from WQP site into a Pandas DataFrame for analysis"""
# set data types
Rdtypes = {"OrganizationIdentifier": np.str_, "OrganizationFormalName": np.str_, "ActivityIdentifier": np.str_,
"ActivityStartTime/Time": np.str_,
"ActivityTypeCode": np.str_, "ActivityMediaName": np.str_, "ActivityMediaSubdivisionName": np.str_,
"ActivityStartDate": np.str_, "ActivityStartTime/TimeZoneCode": np.str_,
"ActivityEndDate": np.str_, "ActivityEndTime/Time": np.str_, "ActivityEndTime/TimeZoneCode": np.str_,
"ActivityDepthHeightMeasure/MeasureValue": np.float16,
"ActivityDepthHeightMeasure/MeasureUnitCode": np.str_,
"ActivityDepthAltitudeReferencePointText": np.str_,
"ActivityTopDepthHeightMeasure/MeasureValue": np.float16,
"ActivityTopDepthHeightMeasure/MeasureUnitCode": np.str_,
"ActivityBottomDepthHeightMeasure/MeasureValue": np.float16,
"ActivityBottomDepthHeightMeasure/MeasureUnitCode": np.str_,
"ProjectIdentifier": np.str_, "ActivityConductingOrganizationText": np.str_,
"MonitoringLocationIdentifier": np.str_, "ActivityCommentText": np.str_,
"SampleAquifer": np.str_, "HydrologicCondition": np.str_, "HydrologicEvent": np.str_,
"SampleCollectionMethod/MethodIdentifier": np.str_,
"SampleCollectionMethod/MethodIdentifierContext": np.str_,
"SampleCollectionMethod/MethodName": np.str_, "SampleCollectionEquipmentName": np.str_,
"ResultDetectionConditionText": np.str_, "CharacteristicName": np.str_,
"ResultSampleFractionText": np.str_,
"ResultMeasureValue": np.str_, "ResultMeasure/MeasureUnitCode": np.str_,
"MeasureQualifierCode": np.str_,
"ResultStatusIdentifier": np.str_, "StatisticalBaseCode": np.str_, "ResultValueTypeName": np.str_,
"ResultWeightBasisText": np.str_, "ResultTimeBasisText": np.str_,
"ResultTemperatureBasisText": np.str_,
"ResultParticleSizeBasisText": np.str_, "PrecisionValue": np.str_, "ResultCommentText": np.str_,
"USGSPCode": np.str_, "ResultDepthHeightMeasure/MeasureValue": np.float16,
"ResultDepthHeightMeasure/MeasureUnitCode": np.str_,
"ResultDepthAltitudeReferencePointText": np.str_,
"SubjectTaxonomicName": np.str_, "SampleTissueAnatomyName": np.str_,
"ResultAnalyticalMethod/MethodIdentifier": np.str_,
"ResultAnalyticalMethod/MethodIdentifierContext": np.str_,
"ResultAnalyticalMethod/MethodName": np.str_, "MethodDescriptionText": np.str_,
"LaboratoryName": np.str_,
"AnalysisStartDate": np.str_, "ResultLaboratoryCommentText": np.str_,
"DetectionQuantitationLimitTypeName": np.str_,
"DetectionQuantitationLimitMeasure/MeasureValue": np.str_,
"DetectionQuantitationLimitMeasure/MeasureUnitCode": np.str_, "PreparationStartDate": np.str_,
"ProviderName": np.str_}
# define date field indices
dt = [6, 56, 61]
csv = self.get_response(service, **kwargs).url
print(csv)
# read csv into DataFrame
df = pd.read_csv(csv, dtype=Rdtypes, parse_dates=dt)
return df