def initialize(self):
"""Initialize FixPandasDataFrame"""
self.check_arg_types(read_key=str, store_key=str)
self.check_arg_types(recurse=True, allow_none=True, original_columns=str)
self.check_arg_vals('read_key')
if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool):
raise AssertionError('cleanup_string_columns should be a list of column names or boolean.')
if self.read_key == self.store_key:
self.inplace = True
self.log().debug('store_key equals read_key; inplace has been set to "True"')
if self.inplace:
self.store_key = self.read_key
self.log().debug('store_key has been set to read_key "%s"', self.store_key)
if not self.store_key:
self.store_key = self.read_key + '_fix'
self.log().debug('store_key has been set to "%s"', self.store_key)
# check data types
for k in self.var_dtype.keys():
if k not in self.contaminated_columns:
self.contaminated_columns.append(k)
try:
# convert to consistent types
dt = np.dtype(self.var_dtype[k]).type
if dt is np.str_ or dt is np.object_:
dt = str
self.var_dtype[k] = dt
except BaseException:
raise TypeError('unknown assigned datatype to variable "%s"' % k)
return StatusCode.Success
评论列表
文章目录