def _join_non_unique(self, other, how='left', return_indexers=False):
from pandas.tools.merge import _get_join_indexers
left_idx, right_idx = _get_join_indexers([self.values],
[other._values], how=how,
sort=True)
left_idx = com._ensure_platform_int(left_idx)
right_idx = com._ensure_platform_int(right_idx)
join_index = self.values.take(left_idx)
mask = left_idx == -1
np.putmask(join_index, mask, other._values.take(right_idx))
join_index = self._wrap_joined_index(join_index, other)
if return_indexers:
return join_index, left_idx, right_idx
else:
return join_index
python类putmask()的实例源码
base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
groupby.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def decons_group_index(comp_labels, shape):
# reconstruct labels
if _int64_overflow_possible(shape):
# at some point group indices are factorized,
# and may not be deconstructed here! wrong path!
raise ValueError('cannot deconstruct factorized group indices!')
label_list = []
factor = 1
y = 0
x = comp_labels
for i in reversed(range(len(shape))):
labels = (x - y) % (factor * shape[i]) // factor
np.putmask(labels, comp_labels < 0, -1)
label_list.append(labels)
y = labels * factor
factor *= shape[i]
return label_list[::-1]
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def _get_counts_nanvar(mask, axis, ddof, dtype=float):
dtype = _get_dtype(dtype)
count = _get_counts(mask, axis, dtype=dtype)
d = count - dtype.type(ddof)
# always return NaN, never inf
if lib.isscalar(count):
if count <= ddof:
count = np.nan
d = np.nan
else:
mask2 = count <= ddof
if mask2.any():
np.putmask(d, mask2, np.nan)
np.putmask(count, mask2, np.nan)
return count, d
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def make_nancomp(op):
def f(x, y):
xmask = isnull(x)
ymask = isnull(y)
mask = xmask | ymask
result = op(x, y)
if mask.any():
if is_bool_dtype(result):
result = result.astype('O')
np.putmask(result, mask, np.nan)
return result
return f
test_multilevel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_frame_getitem_setitem_boolean(self):
df = self.frame.T.copy()
values = df.values
result = df[df > 0]
expected = df.where(df > 0)
assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
assert_almost_equal(df.values, values)
with assertRaisesRegexp(TypeError, 'boolean values only'):
df[df * 0] = 2
merge.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 40
收藏 0
点赞 0
评论 0
def _sort_labels(uniques, left, right):
if not isinstance(uniques, np.ndarray):
# tuplesafe
uniques = Index(uniques).values
sorter = uniques.argsort()
reverse_indexer = np.empty(len(sorter), dtype=np.int64)
reverse_indexer.put(sorter, np.arange(len(sorter)))
new_left = reverse_indexer.take(com._ensure_platform_int(left))
np.putmask(new_left, left == -1, -1)
new_right = reverse_indexer.take(com._ensure_platform_int(right))
np.putmask(new_right, right == -1, -1)
return new_left, new_right
def cloud_shadow_mask_array(image_array, image_difference_array, solar_zenith, solar_azimuth, resolution):
'''
This method creates a mask for clouds and shadows using a reference array.
'''
clouds = cloud_mask_array(image_difference_array)
shadows = shadow_mask_array(image_difference_array)
inbetween = calculate_cloud_shadow(clouds, shadows, solar_zenith, solar_azimuth, resolution)
image_mask_array = outside_mask_array(image_array, outside_value=255)
pixel_sizes = [250, 150, 50]
number_of_sizes = len(pixel_sizes)
for pixel_size in pixel_sizes:
numpy.putmask(image_mask_array, morph_dilation(clouds, pixel_size) == 1, FMASK_CLOUD * 10 + number_of_sizes)
number_of_sizes = number_of_sizes - 1
numpy.putmask(image_mask_array, inbetween == 1, FMASK_CLOUD_SHADOW)
numpy.putmask(image_mask_array, clouds == 1, FMASK_CLOUD)
return image_mask_array
def filter_bytes(self, databytes, bit_mask):
"""
Detect from a bit mask which bits
to keep to recompose the signal.
"""
n_bytes = len(databytes)
mask = np.ones(n_bytes, dtype=int)
np.putmask(mask, mask, bit_mask)
to_keep = np.where(mask > 0)[0]
return databytes.take(to_keep)
def get_tag_data(self, episode, tag_channel):
#memorise some useful properties
block = self.episode_block(episode)
sample_size = self.sample_size(episode, tag_channel)
sample_symbol = self.sample_symbol(episode, tag_channel)
#create a bit mask to define which
#sample to keep from the file
channel_mask = self.create_channel_mask(episode)
bit_mask = self.create_bit_mask(channel_mask, 1)
#get bytes from the file
data_block = self.data_blocks[episode - 1]
n_bytes = data_block.size
self.file.seek(data_block.start)
databytes = np.frombuffer(self.file.read(n_bytes), '<i1')
#detect which bits keep to recompose the tag
ep_mask = np.ones(n_bytes, dtype=int)
np.putmask(ep_mask, ep_mask, bit_mask)
to_keep = np.where(ep_mask > 0)[0]
raw = databytes.take(to_keep)
raw = raw.reshape([len(raw) / sample_size, sample_size])
#create a recarray containing data
dt = np.dtype(numpy_map[sample_symbol])
dt.newbyteorder('<')
tag_mask = 0b01 if (tag_channel == 1) else 0b10
y_data = np.frombuffer(raw, dt) & tag_mask
x_data = np.arange(0, len(y_data)) * block.dX + block.X0
data = np.recarray(len(y_data), dtype=[('x', b_float), ('y', b_int)])
data['x'] = x_data
data['y'] = y_data
return data
def filter_bytes(self, databytes, bit_mask):
"""
Detect from a bit mask which bits
to keep to recompose the signal.
"""
n_bytes = len(databytes)
mask = np.ones(n_bytes, dtype=int)
np.putmask(mask, mask, bit_mask)
to_keep = np.where(mask > 0)[0]
return databytes.take(to_keep)
def get_tag_data(self, episode, tag_channel):
#memorise some useful properties
block = self.episode_block(episode)
sample_size = self.sample_size(episode, tag_channel)
sample_symbol = self.sample_symbol(episode, tag_channel)
#create a bit mask to define which
#sample to keep from the file
channel_mask = self.create_channel_mask(episode)
bit_mask = self.create_bit_mask(channel_mask, 1)
#get bytes from the file
data_block = self.data_blocks[episode - 1]
n_bytes = data_block.size
self.file.seek(data_block.start)
databytes = np.frombuffer(self.file.read(n_bytes), '<i1')
#detect which bits keep to recompose the tag
ep_mask = np.ones(n_bytes, dtype=int)
np.putmask(ep_mask, ep_mask, bit_mask)
to_keep = np.where(ep_mask > 0)[0]
raw = databytes.take(to_keep)
raw = raw.reshape([len(raw) / sample_size, sample_size])
#create a recarray containing data
dt = np.dtype(numpy_map[sample_symbol])
dt.newbyteorder('<')
tag_mask = 0b01 if (tag_channel == 1) else 0b10
y_data = np.frombuffer(raw, dt) & tag_mask
x_data = np.arange(0, len(y_data)) * block.dX + block.X0
data = np.recarray(len(y_data), dtype=[('x', b_float), ('y', b_int)])
data['x'] = x_data
data['y'] = y_data
return data
def tst_basic(self, x, T, mask, val):
np.putmask(x, mask, val)
assert_(np.all(x[mask] == T(val)))
assert_(x.dtype == T)
def test_mask_size(self):
assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5)
def tst_byteorder(self, dtype):
x = np.array([1, 2, 3], dtype)
np.putmask(x, [True, False, True], -1)
assert_array_equal(x, [-1, 2, -1])
def test_record_array(self):
# Note mixed byteorder.
rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)],
dtype=[('x', '<f8'), ('y', '>f8'), ('z', '<f8')])
np.putmask(rec['x'], [True, False], 10)
assert_array_equal(rec['x'], [10, 5])
assert_array_equal(rec['y'], [2, 4])
assert_array_equal(rec['z'], [3, 3])
np.putmask(rec['y'], [True, False], 11)
assert_array_equal(rec['x'], [10, 5])
assert_array_equal(rec['y'], [11, 4])
assert_array_equal(rec['z'], [3, 3])
def oppnorm_convert(arr, threshold=0.1):
#assert(arr.min()>=0 and arr.max()<=1)
#out = sp.empty_like(arr)
arr = arr.astype('float32')
out = np.empty(arr.shape[:2]+(2,), dtype='float32')
print out.shape
# red-green
out[:,:,0] = arr[:,:,0] - arr[:,:,1]
# blue-yellow
out[:,:,1] = arr[:,:,2] - arr[:,:,[0,1]].min(2)
# intensity
denom = arr.max(2)
mask = denom < threshold#*denom[:,:,2].mean()
out[:,:,0] /= denom
out[:,:,1] /= denom
np.putmask(out[:,:,0], mask, 0)
np.putmask(out[:,:,1], mask, 0)
return out
# ------------------------------------------------------------------------------
def radian2degree(x):
"""
Convert radians angles to torsion angles.
@param x: radian angle
@return: torsion angle of x
"""
x = x % (2 * numpy.pi)
numpy.putmask(x, x > numpy.pi, x - 2 * numpy.pi)
return x * 180. / numpy.pi
def degree2radian(x):
"""
Convert randian angles to torsion angles.
@param x: torsion angle
@return: radian angle of x
"""
numpy.putmask(x, x < 0., x + 360.)
return x * numpy.pi / 180.
def tst_basic(self, x, T, mask, val):
np.putmask(x, mask, val)
assert_(np.all(x[mask] == T(val)))
assert_(x.dtype == T)
def test_mask_size(self):
assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5)
def tst_byteorder(self, dtype):
x = np.array([1, 2, 3], dtype)
np.putmask(x, [True, False, True], -1)
assert_array_equal(x, [-1, 2, -1])
def test_record_array(self):
# Note mixed byteorder.
rec = np.array([(-5, 2.0, 3.0), (5.0, 4.0, 3.0)],
dtype=[('x', '<f8'), ('y', '>f8'), ('z', '<f8')])
np.putmask(rec['x'], [True, False], 10)
assert_array_equal(rec['x'], [10, 5])
assert_array_equal(rec['y'], [2, 4])
assert_array_equal(rec['z'], [3, 3])
np.putmask(rec['y'], [True, False], 11)
assert_array_equal(rec['x'], [10, 5])
assert_array_equal(rec['y'], [11, 4])
assert_array_equal(rec['z'], [3, 3])
def resize_save(path, size):
path = os.path.join(os.getcwd(), path)
images_files = os.listdir(path)
el = (np.array([[0,1],[1,1]])).astype(np.float32)
for i, image in enumerate(images_files):
image_file = os.path.join(path,image)
try:
im = (ndimage.imread(image_file)).astype(np.float32)
np.putmask(im, im < 100, 0)
im = ndimage.binary_dilation(im, structure=el)
im = (misc.imresize(im, (size, size))).astype(np.uint8)
new_im = Image.fromarray(im)
new_im.save(image_file)
except Exception as e:
print('Could not read:', image_file, ':', e, '- it\'s ok, skipping.')
base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def putmask(self, mask, value):
"""
return a new Index of the values set with the mask
See also
--------
numpy.ndarray.putmask
"""
values = self.values.copy()
try:
np.putmask(values, mask, self._convert_for_op(value))
return self._shallow_copy(values)
except (ValueError, TypeError):
# coerces to object
return self.astype(object).putmask(mask, value)
base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def fillna(self, value=None, downcast=None):
self._assert_can_do_op(value)
if self.hasnans:
result = self.putmask(self._isnan, value)
if downcast is None:
# no need to care metadata other than name
# because it can't have freq if
return Index(result, name=self.name)
return self._shallow_copy()
ops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 44
收藏 0
点赞 0
评论 0
def _comp_method_PANEL(op, name, str_rep=None, masker=False):
def na_op(x, y):
try:
result = expressions.evaluate(op, str_rep, x, y,
raise_on_error=True)
except TypeError:
xrav = x.ravel()
result = np.empty(x.size, dtype=bool)
if isinstance(y, np.ndarray):
yrav = y.ravel()
mask = notnull(xrav) & notnull(yrav)
result[mask] = op(np.array(list(xrav[mask])),
np.array(list(yrav[mask])))
else:
mask = notnull(xrav)
result[mask] = op(np.array(list(xrav[mask])), y)
if op == operator.ne: # pragma: no cover
np.putmask(result, ~mask, True)
else:
np.putmask(result, ~mask, False)
result = result.reshape(x.shape)
return result
@Appender('Wrapper for comparison method %s' % name)
def f(self, other):
if isinstance(other, self._constructor):
return self._compare_constructor(other, na_op)
elif isinstance(other, (self._constructor_sliced, pd.DataFrame,
ABCSeries)):
raise Exception("input needs alignment for this object [%s]" %
self._constructor)
else:
return self._combine_const(other, na_op)
f.__name__ = name
return f
generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
**kwargs):
# TODO: Not sure if above is correct - need someone to confirm.
axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name))
if fill_method is None:
data = self
else:
data = self.fillna(method=fill_method, limit=limit, axis=axis)
rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis,
**kwargs)) - 1)
if freq is None:
mask = com.isnull(_values_from_object(self))
np.putmask(rs.values, mask, np.nan)
return rs
generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def _make_cum_function(name, name1, name2, axis_descr, desc, accum_func,
mask_a, mask_b):
@Substitution(outname=name, desc=desc, name1=name1, name2=name2,
axis_descr=axis_descr)
@Appender("Return cumulative {0} over requested axis.".format(name) +
_cnum_doc)
def func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs):
_validate_kwargs(name, kwargs, 'out', 'dtype')
if axis is None:
axis = self._stat_axis_number
else:
axis = self._get_axis_number(axis)
y = _values_from_object(self).copy()
if (skipna and
issubclass(y.dtype.type, (np.datetime64, np.timedelta64))):
result = accum_func(y, axis)
mask = isnull(self)
np.putmask(result, mask, pd.tslib.iNaT)
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isnull(self)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)
d = self._construct_axes_dict()
d['copy'] = False
return self._constructor(result, **d).__finalize__(self)
func.__name__ = name
return func
nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def nanvar(values, axis=None, skipna=True, ddof=1):
dtype = values.dtype
mask = isnull(values)
if is_any_int_dtype(values):
values = values.astype('f8')
values[mask] = np.nan
if is_float_dtype(values):
count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype)
else:
count, d = _get_counts_nanvar(mask, axis, ddof)
if skipna:
values = values.copy()
np.putmask(values, mask, 0)
# xref GH10242
# Compute variance via two-pass algorithm, which is stable against
# cancellation errors and relatively accurate for small numbers of
# observations.
#
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
if axis is not None:
avg = np.expand_dims(avg, axis)
sqr = _ensure_numeric((avg - values)**2)
np.putmask(sqr, mask, 0)
result = sqr.sum(axis=axis, dtype=np.float64) / d
# Return variance as np.float64 (the datatype used in the accumulator),
# unless we were dealing with a float array, in which case use the same
# precision as the original values array.
if is_float_dtype(dtype):
result = result.astype(dtype)
return _wrap_results(result, values.dtype)
strings.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
from pandas.core.series import Series
if not len(arr):
return np.ndarray(0, dtype=dtype)
if isinstance(arr, Series):
arr = arr.values
if not isinstance(arr, np.ndarray):
arr = np.asarray(arr, dtype=object)
if na_mask:
mask = isnull(arr)
try:
result = lib.map_infer_mask(arr, f, mask.view(np.uint8))
except (TypeError, AttributeError):
def g(x):
try:
return f(x)
except (TypeError, AttributeError):
return na_value
return _map(g, arr, dtype=dtype)
if na_value is not np.nan:
np.putmask(result, mask, na_value)
if result.dtype == object:
result = lib.maybe_convert_objects(result)
return result
else:
return lib.map_infer(arr, f)