fix_pandas_dataframe.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:Eskapade 作者: KaveIO 项目源码 文件源码
def initialize(self):
        """Initialize FixPandasDataFrame"""

        self.check_arg_types(read_key=str, store_key=str)
        self.check_arg_types(recurse=True, allow_none=True, original_columns=str)
        self.check_arg_vals('read_key')

        if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool):
            raise AssertionError('cleanup_string_columns should be a list of column names or boolean.')

        if self.read_key == self.store_key:
            self.inplace = True
            self.log().debug('store_key equals read_key; inplace has been set to "True"')

        if self.inplace:
            self.store_key = self.read_key
            self.log().debug('store_key has been set to read_key "%s"', self.store_key)

        if not self.store_key:
            self.store_key = self.read_key + '_fix'
            self.log().debug('store_key has been set to "%s"', self.store_key)

        # check data types
        for k in self.var_dtype.keys():
            if k not in self.contaminated_columns:
                self.contaminated_columns.append(k)
            try:
                # convert to consistent types
                dt = np.dtype(self.var_dtype[k]).type
                if dt is np.str_ or dt is np.object_:
                    dt = str
                self.var_dtype[k] = dt
            except BaseException:
                raise TypeError('unknown assigned datatype to variable "%s"' % k)

        return StatusCode.Success
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号