def analyze_pd_dataframe(dataframe, target_attributes):
"""Analyze pandas.Dataframe and convert it into internal representation.
Parameters
----------
dataframe : pd.Dataframe
input data, can contain float, int, object
target_attributes : int, str or list
Index the target attribute. If this is
* an int, use this as an index (only works with positive indices)
* a str, use this to compare with the column values
* a list (which must either consist of all ints or strs), of which
all elements that matched are assumed to be targets.
Returns
-------
np.ndarray
Data. All columns are converted to type float. Categorical data is
encoded by positive integers.
dict
Attribute types. Contains the following keys:
* `type`: `categorical` or 'numerical`
* `name`: column name of the dataframe
* `is_target`: whether this column was designated as a target column
"""
dataframe = _normalize_pd_column_names(dataframe)
attribute_types = _get_pd_attribute_types(dataframe, target_attributes)
dataframe = _replace_objects_by_integers(dataframe, attribute_types)
return dataframe.values, attribute_types
object_analyzer.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录