def transform(self,X):
"""
?????: ???0?n-1???????????-1?
X: ?????????DataFrame??Series?
???????????????????DataFrame??Series?
"""
data=X.copy()
if isinstance(data,np.ndarray):
if isinstance(self.fill_na,str):
raise Exception('numpy?????????????')
if not self.return_numeric:
warnings.warn('numpy????????????????????????dataframe?series?')
if not self.return_numeric:
newlabel=self.get_label()
if len(data.shape)==1:
tmp=np.searchsorted(self.cuts,data).astype(int)
result=np.where(np.isnan(data),-1,tmp)
if (not self.return_numeric) and (not isinstance(data,np.ndarray)):
f=np.frompyfunc(lambda xx: newlabel.get(xx,self.fill_na),1,1)
result=f(result)
if isinstance(data,np.ndarray):
result[result==-1]=self.fill_na
else:
result=pd.Series(result)
result.index=data.index
result.index.name=data.index.name
result.name=data.name
result[result==-1]=self.fill_na
data=result.copy()
else:
for feature in self.cuts:
if not isinstance(data,pd.DataFrame):
tmp=np.searchsorted(self.cuts[feature],data[:,feature]).astype(int)
data[:,feature]=np.where(np.isnan(data[:,feature]),self.fill_na,tmp)
else:
tmp=np.searchsorted(self.cuts[feature],data[feature]).astype(int)
data[feature]=np.where(np.isnan(data[feature]),-1,tmp)
if not self.return_numeric:
f=np.frompyfunc(lambda xx: newlabel[feature].get(xx,self.fill_na),1,1)
data[feature]=f(data[feature])
else:
data.loc[data[feature]==-1,feature]=self.fill_na
if self.return_array and isinstance(data,(pd.Series,pd.DataFrame)):
return data.values
else:
return data
python类frompyfunc()的实例源码
def prepare_node(self, node, storage_map, compute_map, impl):
# Postpone the ufunc building to the last minutes
# NumPy ufunc support only up to 31 inputs.
# But our c code support more.
if (len(node.inputs) < 32 and
(self.nfunc is None or
self.scalar_op.nin != len(node.inputs)) and
self.ufunc is None and
impl == 'py'):
ufunc = numpy.frompyfunc(self.scalar_op.impl,
len(node.inputs),
self.scalar_op.nout)
if self.scalar_op.nin > 0:
# We can reuse it for many nodes
self.ufunc = ufunc
else:
node.tag.ufunc = ufunc
# Numpy ufuncs will sometimes perform operations in
# float16, in particular when the input is int8.
# This is not something that we want, and we do not
# do it in the C code, so we specify that the computation
# should be carried out in the returned dtype.
# This is done via the "sig" kwarg of the ufunc, its value
# should be something like "ff->f", where the characters
# represent the dtype of the inputs and outputs.
# NumPy 1.10.1 raise an error when giving the signature
# when the input is complex. So add it only when inputs is int.
out_dtype = node.outputs[0].dtype
if (out_dtype in float_dtypes and
isinstance(self.nfunc, numpy.ufunc) and
node.inputs[0].dtype in discrete_dtypes):
char = numpy.sctype2char(out_dtype)
sig = char * node.nin + '->' + char * node.nout
node.tag.sig = sig
node.tag.fake_node = Apply(
self.scalar_op,
[get_scalar_type(dtype=input.type.dtype).make_variable()
for input in node.inputs],
[get_scalar_type(dtype=output.type.dtype).make_variable()
for output in node.outputs])
self.scalar_op.prepare_node(node.tag.fake_node, None, None, impl)
def perform(self, node, inp, out):
input, = inp
output, = out
axis = self.axis
if axis is None:
axis = list(range(input.ndim))
variable = input
to_reduce = reversed(sorted(axis))
if hasattr(self, 'acc_dtype') and self.acc_dtype is not None:
acc_dtype = self.acc_dtype
else:
acc_dtype = node.outputs[0].type.dtype
if to_reduce:
for dimension in to_reduce:
# If it's a zero-size array, use scalar_op.identity
# if available
if variable.shape[dimension] == 0:
if hasattr(self.scalar_op, 'identity'):
# Compute the shape of the output
v_shape = list(variable.shape)
del v_shape[dimension]
variable = numpy.empty(tuple(v_shape),
dtype=acc_dtype)
variable.fill(self.scalar_op.identity)
else:
raise ValueError((
"Input (%s) has zero-size on axis %s, but "
"self.scalar_op (%s) has no attribute 'identity'"
% (variable, dimension, self.scalar_op)))
else:
# Numpy 1.6 has a bug where you sometimes have to specify
# "dtype='object'" in reduce for it to work, if the ufunc
# was built with "frompyfunc". We need to find out if we
# are in one of these cases (only "object" is supported in
# the output).
if ((self.ufunc.ntypes == 1) and
(self.ufunc.types[0][-1] == 'O')):
variable = self.ufunc.reduce(variable, dimension,
dtype='object')
else:
variable = self.ufunc.reduce(variable, dimension,
dtype=acc_dtype)
variable = numpy.asarray(variable)
if numpy.may_share_memory(variable, input):
# perhaps numpy is clever for reductions of size 1?
# We don't want this.
variable = variable.copy()
output[0] = theano._asarray(variable,
dtype=node.outputs[0].type.dtype)
else:
# Force a copy
output[0] = numpy.array(variable, copy=True,
dtype=node.outputs[0].type.dtype)