def transform(self, x):
"""
Transform array|series x
"""
try:
return self.trans.transform(x)
except TypeError:
return np.array([self.trans.transform(val) for val in x])
python类Series()的实例源码
def inverse(self, x):
"""
Inverse transform array|series x
"""
try:
return self.trans.inverse(x)
except TypeError:
return np.array([self.trans.inverse(val) for val in x])
def image_identification_datasetup(df1,df2,sample=30000):
'''
Function:
- takes two dataframe (dataframe should be the output dataframe
from "feature_engineering_CNN" of feature_engineering_func.py) and
convine two dataframe into one.
- it also creates label pd.series for CNN image recognition
filter applied:
- "sample" value determines number of sample extract from each dataframe.
for instance if sample = 30000,
30000 rows are randomly chosen from df1,df2,df3 and df4.
- it also takeout countrycode and word columns
inputs:
2 dataframe
sample = number of rows you want to extract frim each dataframe
outputs:
dataframe and a label
'''
random_index1 = np.random.choice(list(df1.index), sample, replace=False)
random_index2 = np.random.choice(list(df2.index), sample, replace=False)
df1 = df1.loc[list(random_index1)]
df2 = df2.loc[list(random_index2)]
df_test = pd.concat([df1,df2],axis = 0)
df_test = df_test.drop(['countrycode','word'], axis=1)
label = [1]*sample+[0]*sample
# 1= df1, 0 = df2
label = np.array(label)
label = pd.Series(label)
label.index = df_test.index
return df_test,label
def construct_empty_hist(self, columns):
"""Create an (empty) histogram of right type
Create a multi-dim histogram by iterating through the columns in
reverse order and passing a single-dim hist as input to the next
column.
:param list columns: histogram columns
:returns: created histogram
:rtype: histogrammar.Count
"""
hist = hg.Count()
# create a multi-dim histogram by iterating through the columns in reverse order
# and passing a single-dim hist as input to the next column
for col in reversed(columns):
# histogram type depends on the data type
dt = np.dtype(self.var_dtype[col])
# processing function, e.g. only accept boolians during filling
f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type]
if len(columns) == 1:
# df[col] is a pd.series
quant = lambda x, fnc=f: fnc(x)
else:
# df[columns] is a pd.Dataframe
# fix column to col
quant = lambda x, fnc=f, clm=col: fnc(x[clm])
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
if is_number or is_timestamp:
# numbers and timestamps are put in a sparse binned histogram
bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs)
hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist)
else:
# string and boolians are treated as categories
hist = hg.Categorize(quantity=quant, value=hist)
# FIXME stick data types and number of dimension to histogram
dta = [self.var_dtype[col] for col in columns]
hist.datatype = dta[0] if len(columns) == 1 else dta
hist.n_dim = len(columns)
@property
def n_bins(self):
if hasattr(self, 'num'):
return self.num
elif hasattr(self, 'size'):
return self.size
else:
raise RuntimeError('Cannot retrieve number of bins from hgr hist')
hist.n_bins = n_bins
return hist
def continuum_correct(spectrum, nodes=None, method='linear'):
"""
Apply a continuum correction to a given spectrum
Parameters
==========
spectrum : pd.Series
A pandas series or Spectrum object
nodes: list
A list of the nodes between which piecewise continuum
will be fit
method : {'linear', 'regresison', 'cubic'}
The type of regression to be fit, where 'linear' is a piecewise
linear fit, 'regression' is an Ordinary Least Squares fit, and
'cubic' is a 2nd order polynomial fit.
Returns
=======
: pd.Series
The continuum corrected Spectrum
: pd.Series
The continuum line
"""
x = spectrum.index
y = spectrum
if not nodes:
nodes = [x[0], x[-1]]
return_length = len(y)
corrected = np.empty(return_length)
continuum = np.empty(return_length)
start = 0
nlist = list(zip(nodes, nodes[1:]))
for i, n in enumerate(nlist):
# Define indices into sub-series
ny = y[n[0]:n[1]]
nx = ny.index
if i == 0:
stop = start + len(y[:n[1]])
c = correction_methods[method](nx, ny, ex=y[:n[1]].index.values)
ey = y[:n[1]]
elif i == len(nlist) - 1:
stop = start + len(y[n[0]:])
c = correction_methods[method](nx, ny, ex=y[n[0]:].index.values)
ey = y[n[0]:]
else:
stop = start + len(ny)
c = correction_methods[method](nx, ny)
ey = ny
continuum[start:stop] = c
corrected[start:stop] = ey / c
start = stop
return pd.Series(corrected, index=x), pd.Series(continuum, index=x)