def sanitize_dataframe(df):
"""Sanitize a DataFrame to prepare it for serialization.
* Make a copy
* Raise ValueError if it has a hierarchical index.
* Convert categoricals to strings.
* Convert np.int dtypes to Python int objects
* Convert floats to objects and replace NaNs by None.
* Convert DateTime dtypes into appropriate string representations
"""
import pandas as pd
import numpy as np
df = df.copy()
if isinstance(df.index, pd.core.index.MultiIndex):
raise ValueError('Hierarchical indices not supported')
if isinstance(df.columns, pd.core.index.MultiIndex):
raise ValueError('Hierarchical indices not supported')
for col_name, dtype in df.dtypes.iteritems():
if str(dtype) == 'category':
# XXXX: work around bug in to_json for categorical types
# https://github.com/pydata/pandas/issues/10778
df[col_name] = df[col_name].astype(str)
elif np.issubdtype(dtype, np.integer):
# convert integers to objects; np.int is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif np.issubdtype(dtype, np.floating):
# For floats, convert nan->None: np.float is not JSON serializable
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif str(dtype).startswith('datetime'):
# Convert datetimes to strings
# astype(str) will choose the appropriate resolution
df[col_name] = df[col_name].astype(str).replace('NaT', '')
return df
评论列表
文章目录