def append_data_column(ds, column):
# Extend the dataset to fit the new data
new_count = column.shape[0]
existing_count = ds.shape[0]
ds.resize((existing_count + new_count,))
levels = get_levels(ds)
if levels is not None:
# update levels if we have new unique values
if type(column.values) == p.Categorical:
added_levels = set(column.values.categories) - set(levels)
elif len(column) == 0:
# Workaround for bug in pandas - get a crash in .unique() for an empty series
added_levels = set([])
else:
added_levels = set(column.unique()) - set(levels)
new_levels = list(levels)
new_levels.extend(added_levels)
# Check if the new categorical column has more levels
# than the current bit width supports.
# If so, rewrite the existing column data w/ more bits
if len(new_levels) > np.iinfo(ds.dtype).max:
new_dtype = pick_cat_dtype(len(new_levels))
ds = widen_cat_column(ds, new_dtype)
new_levels = np.array(new_levels, dtype=np.object)
new_data = make_index_array(new_levels, column.values, ds.dtype)
clear_levels(ds)
create_levels(ds, new_levels)
else:
new_data = column
# Append new data
ds[existing_count:(existing_count + new_count)] = new_data
评论列表
文章目录