def data_handlemissing(dataframe, pipeline):
try:
if pipeline['options']['type'] == "dropcolumns":
thresh = pipeline['options']['thresh']
if thresh == -1:
dataframe.dropna(axis=1, how="all", inplace=True)
elif thresh == 0:
dataframe.dropna(axis=1, how="any", inplace=True)
elif thresh > 0:
dataframe.dropna(axis=1, thresh=thresh, inplace=True)
elif pipeline['options']['type'] == "droprows":
thresh = pipeline['options']['thresh']
if thresh == -1:
dataframe.dropna(axis=0, how="all", inplace=True)
elif thresh == 0:
dataframe.dropna(axis=0, how="any", inplace=True)
elif thresh > 0:
dataframe.dropna(axis=0, thresh=thresh)
elif pipeline['options']['type'] == "fillmissing":
strategy = pipeline['options']['strategy']
imp = Imputer(missing_values='NaN', strategy=strategy, axis=0)
array = imp.fit_transform(dataframe.values)
dataframe = pandas.DataFrame(array, columns = dataframe.columns)
return dataframe
except Exception as e:
raise Exception("data_handlemissing: " + str(e))
评论列表
文章目录