def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Bool column with NA values becomes object
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object),
"BoolColWithNull loaded with incorrect type")
python类integer()的实例源码
test_sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
test_sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
# sqlite has no boolean type, so integer type is returned
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Non-native Bool column with NA values stays as float
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
"BoolColWithNull loaded with incorrect type")
test_sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def test_default_type_conversion(self):
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating),
"FloatCol loaded with incorrect type")
self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer),
"IntCol loaded with incorrect type")
# MySQL has no real BOOL type (it's an alias for TINYINT)
self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer),
"BoolCol loaded with incorrect type")
# Int column with NA values stays as float
self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating),
"IntColWithNull loaded with incorrect type")
# Bool column with NA = int column with NA values => becomes float
self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating),
"BoolColWithNull loaded with incorrect type")
parsers.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def __init__(self, f, colspecs, delimiter, comment):
self.f = f
self.buffer = None
self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t '
self.comment = comment
if colspecs == 'infer':
self.colspecs = self.detect_colspecs()
else:
self.colspecs = colspecs
if not isinstance(self.colspecs, (tuple, list)):
raise TypeError("column specifications must be a list or tuple, "
"input was a %r" % type(colspecs).__name__)
for colspec in self.colspecs:
if not (isinstance(colspec, (tuple, list)) and
len(colspec) == 2 and
isinstance(colspec[0], (int, np.integer, type(None))) and
isinstance(colspec[1], (int, np.integer, type(None)))):
raise TypeError('Each column specification must be '
'2 element tuple or list of integers')
pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def set_atom_categorical(self, block, items, info=None, values=None):
# currently only supports a 1-D categorical
# in a 1-D block
values = block.values
codes = values.codes
self.kind = 'integer'
self.dtype = codes.dtype.name
if values.ndim > 1:
raise NotImplementedError("only support 1-d categoricals")
if len(items) > 1:
raise NotImplementedError("only support single block categoricals")
# write the codes; must be in a block shape
self.ordered = values.ordered
self.typ = self.get_atom_data(block, kind=codes.dtype.name)
self.set_data(_block_shape(codes))
# write the categories
self.meta = 'category'
self.set_metadata(block.values.categories)
# update the info
self.update_info(info)
sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _handle_date_column(col, format=None):
if isinstance(format, dict):
return to_datetime(col, errors='ignore', **format)
else:
if format in ['D', 's', 'ms', 'us', 'ns']:
return to_datetime(col, errors='coerce', unit=format, utc=True)
elif (issubclass(col.dtype.type, np.floating) or
issubclass(col.dtype.type, np.integer)):
# parse dates as timestamp
format = 's' if format is None else format
return to_datetime(col, errors='coerce', unit=format, utc=True)
elif com.is_datetime64tz_dtype(col):
# coerce to UTC timezone
# GH11216
return (to_datetime(col, errors='coerce')
.astype('datetime64[ns, UTC]'))
else:
return to_datetime(col, errors='coerce', format=format, utc=True)
sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def _get_dtype(self, sqltype):
from sqlalchemy.types import (Integer, Float, Boolean, DateTime,
Date, TIMESTAMP)
if isinstance(sqltype, Float):
return float
elif isinstance(sqltype, Integer):
# TODO: Refine integer size.
return np.dtype('int64')
elif isinstance(sqltype, TIMESTAMP):
# we have a timezone capable type
if not sqltype.timezone:
return datetime
return DatetimeTZDtype
elif isinstance(sqltype, DateTime):
# Caution: np.datetime64 is also a subclass of np.number.
return datetime
elif isinstance(sqltype, Date):
return date
elif isinstance(sqltype, Boolean):
return bool
return object
sql.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 45
收藏 0
点赞 0
评论 0
def _sql_type_name(self, col):
dtype = self.dtype or {}
if col.name in dtype:
return dtype[col.name]
col_type = self._get_notnull_col_dtype(col)
if col_type == 'timedelta64':
warnings.warn("the 'timedelta' type is not supported, and will be "
"written as integer values (ns frequency) to the "
"database.", UserWarning, stacklevel=8)
col_type = "integer"
elif col_type == "datetime64":
col_type = "datetime"
elif col_type == "empty":
col_type = "string"
elif col_type == "complex":
raise ValueError('Complex datatypes not supported')
if col_type not in _SQL_TYPES:
col_type = "string"
return _SQL_TYPES[col_type][self.pd_sql.flavor]
series.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def __getitem__(self, key):
"""
"""
try:
return self._get_val_at(self.index.get_loc(key))
except KeyError:
if isinstance(key, (int, np.integer)):
return self._get_val_at(key)
raise Exception('Requested index not in this series!')
except TypeError:
# Could not hash item, must be array-like?
pass
# is there a case where this would NOT be an ndarray?
# need to find an example, I took out the case for now
key = _values_from_object(key)
dataSlice = self.values[key]
new_index = Index(self.index.view(ndarray)[key])
return self._constructor(dataSlice, index=new_index).__finalize__(self)
test_core.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 43
收藏 0
点赞 0
评论 0
def test_allclose(self):
# Tests allclose on arrays
a = np.random.rand(10)
b = a + np.random.rand(10) * 1e-8
self.assertTrue(allclose(a, b))
# Test allclose w/ infs
a[0] = np.inf
self.assertTrue(not allclose(a, b))
b[0] = np.inf
self.assertTrue(allclose(a, b))
# Test all close w/ masked
a = masked_array(a)
a[-1] = masked
self.assertTrue(allclose(a, b, masked_equal=True))
self.assertTrue(not allclose(a, b, masked_equal=False))
# Test comparison w/ scalar
a *= 1e-8
a[0] = 0
self.assertTrue(allclose(a, 0, masked_equal=True))
# Test that the function works for MIN_INT integer typed arrays
a = masked_array([np.iinfo(np.int_).min], dtype=np.int_)
self.assertTrue(allclose(a, a))
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
Parameters
----------
seed : None | int | instance of RandomState
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (numbers.Integral, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def guessCfgType( value ):
# For guessing the data type (bool, integer, float, or string only) from ConfigParser
if value.lower() == 'true':
return True
if value.lower() == 'false':
return False
try:
value = np.int( value )
return value
except:
pass
try:
value = np.float32( value )
return value
except:
pass
return value
def check_window_length(window_length):
"""
Ensure the window length provided to a transform is valid.
"""
if window_length is None:
raise InvalidWindowLength("window_length must be provided")
if not isinstance(window_length, Integral):
raise InvalidWindowLength(
"window_length must be an integer-like number")
if window_length == 0:
raise InvalidWindowLength("window_length must be non-zero")
if window_length < 0:
raise InvalidWindowLength("window_length must be positive")
def _extract_field_names(self, event):
# extract field names from sids (price, volume etc), make sure
# every sid has the same fields.
sid_keys = []
for sid in itervalues(event.data):
keys = set([name for name, value in sid.items()
if isinstance(value,
(int,
float,
numpy.integer,
numpy.float,
numpy.long))
])
sid_keys.append(keys)
# with CUSTOM data events, there may be different fields
# per sid. So the allowable keys are the union of all events.
union = set.union(*sid_keys)
unwanted_fields = {
'portfolio',
'sid',
'dt',
'type',
'source_id',
'_initial_len',
}
return union - unwanted_fields
def RATWriteArray(rat, array, field, start=0):
"""
Pure Python implementation of writing a chunk of the RAT
from a numpy array. Type of array is coerced to one of the types
(int, double, string) supported. Called from RasterAttributeTable.WriteArray
"""
if array is None:
raise ValueError("Expected array of dim 1")
# if not the array type convert it to handle lists etc
if not isinstance(array, numpy.ndarray):
array = numpy.array(array)
if array.ndim != 1:
raise ValueError("Expected array of dim 1")
if (start + array.size) > rat.GetRowCount():
raise ValueError("Array too big to fit into RAT from start position")
if numpy.issubdtype(array.dtype, numpy.integer):
# is some type of integer - coerce to standard int
# TODO: must check this is fine on all platforms
# confusingly numpy.int 64 bit even if native type 32 bit
array = array.astype(numpy.int32)
elif numpy.issubdtype(array.dtype, numpy.floating):
# is some type of floating point - coerce to double
array = array.astype(numpy.double)
elif numpy.issubdtype(array.dtype, numpy.character):
# cast away any kind of Unicode etc
array = array.astype(numpy.character)
else:
raise ValueError("Array not of a supported type (integer, double or string)")
return RATValuesIONumPyWrite(rat, field, start, array)
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, np.floating):
return float(obj)
else:
return super(MyEncoder, self).default(obj)
def writeHDF5Meta(self, root, name, data, **dsOpts):
if isinstance(data, np.ndarray):
dsOpts['maxshape'] = (None,) + data.shape[1:]
root.create_dataset(name, data=data, **dsOpts)
elif isinstance(data, list) or isinstance(data, tuple):
gr = root.create_group(name)
if isinstance(data, list):
gr.attrs['_metaType_'] = 'list'
else:
gr.attrs['_metaType_'] = 'tuple'
#n = int(np.log10(len(data))) + 1
for i in range(len(data)):
self.writeHDF5Meta(gr, str(i), data[i], **dsOpts)
elif isinstance(data, dict):
gr = root.create_group(name)
gr.attrs['_metaType_'] = 'dict'
for k, v in data.items():
self.writeHDF5Meta(gr, k, v, **dsOpts)
elif isinstance(data, int) or isinstance(data, float) or isinstance(data, np.integer) or isinstance(data, np.floating):
root.attrs[name] = data
else:
try: ## strings, bools, None are stored as repr() strings
root.attrs[name] = repr(data)
except:
print("Can not store meta data of type '%s' in HDF5. (key is '%s')" % (str(type(data)), str(name)))
raise
def writeHDF5Meta(self, root, name, data, **dsOpts):
if isinstance(data, np.ndarray):
dsOpts['maxshape'] = (None,) + data.shape[1:]
root.create_dataset(name, data=data, **dsOpts)
elif isinstance(data, list) or isinstance(data, tuple):
gr = root.create_group(name)
if isinstance(data, list):
gr.attrs['_metaType_'] = 'list'
else:
gr.attrs['_metaType_'] = 'tuple'
#n = int(np.log10(len(data))) + 1
for i in range(len(data)):
self.writeHDF5Meta(gr, str(i), data[i], **dsOpts)
elif isinstance(data, dict):
gr = root.create_group(name)
gr.attrs['_metaType_'] = 'dict'
for k, v in data.items():
self.writeHDF5Meta(gr, k, v, **dsOpts)
elif isinstance(data, int) or isinstance(data, float) or isinstance(data, np.integer) or isinstance(data, np.floating):
root.attrs[name] = data
else:
try: ## strings, bools, None are stored as repr() strings
root.attrs[name] = repr(data)
except:
print("Can not store meta data of type '%s' in HDF5. (key is '%s')" % (str(type(data)), str(name)))
raise
def repeat(self, nr_sites):
"""Construct a longer MP-POVM by repetition
The resulting POVM will have length `nr_sites`. If `nr_sites`
is not an integer multiple of `len(self)`, `self` must
factorize (have leg dimension one) at the position where it
will be cut. For example, consider the tensor product MP-POVM
of Pauli X and Pauli Y. Calling `repeat(nr_sites=5)` will
construct the tensor product POVM XYXYX:
>>> import mpnum as mp
>>> import mpnum.povm as mpp
>>> x, y = (mpp.MPPovm.from_local_povm(lp(3), 1) for lp in
... (mpp.x_povm, mpp.y_povm))
>>> xy = mp.chain([x, y])
>>> xyxyx = mp.chain([x, y, x, y, x])
>>> mp.norm(xyxyx - xy.repeat(5)) <= 1e-10
True
"""
n_repeat, n_last = nr_sites // len(self), nr_sites % len(self)
if n_last > 0:
assert self.ranks[n_last - 1] == 1, \
"Partial repetition requires factorizing MP-POVM"
return mp.chain([self] * n_repeat
+ ([MPPovm(self.lt[:n_last])] if n_last > 0 else []))
def est_pmf(self, samples, normalize=True, eps=1e-10):
"""Estimate probability mass function from samples
:param np.ndarray samples: `(n_samples, len(self.nsoutdims))`
array of samples
:param bool normalize: True: Return normalized probability
estimates (default). False: Return integer outcome counts.
:returns: Estimated probabilities as ndarray `est_pmf` with
shape `self.nsoutdims`
`n_samples * est_pmf[i1, ..., ik]` provides the number of
occurences of outcome `(i1, ..., ik)` in `samples`.
"""
n_samples = samples.shape[0]
n_out = np.prod(self.nsoutdims)
if samples.ndim > 1:
samples = self.pack_samples(samples)
counts = np.bincount(samples, minlength=n_out)
assert counts.shape == (n_out,)
counts = counts.reshape(self.nsoutdims)
assert counts.sum() == n_samples
if normalize:
return counts / n_samples
else:
return counts