hdf5.py 文件源码-python代码片段

def append_data_column(ds, column):

    # Extend the dataset to fit the new data
    new_count = column.shape[0]
    existing_count = ds.shape[0]
    ds.resize((existing_count + new_count,))

    levels = get_levels(ds)

    if levels is not None:
        # update levels if we have new unique values
        if type(column.values) == p.Categorical:
            added_levels = set(column.values.categories) - set(levels)
        elif len(column) == 0:
            # Workaround for bug in pandas - get a crash in .unique() for an empty series
            added_levels = set([])
        else:
            added_levels = set(column.unique()) - set(levels)

        new_levels = list(levels)
        new_levels.extend(added_levels)

        # Check if the new categorical column has more levels
        # than the current bit width supports.
        # If so, rewrite the existing column data w/ more bits
        if len(new_levels) > np.iinfo(ds.dtype).max:
            new_dtype = pick_cat_dtype(len(new_levels))
            ds = widen_cat_column(ds, new_dtype)

        new_levels = np.array(new_levels, dtype=np.object)
        new_data = make_index_array(new_levels, column.values, ds.dtype)

        clear_levels(ds)
        create_levels(ds, new_levels)
    else:
        new_data = column

    # Append new data
    ds[existing_count:(existing_count + new_count)] = new_data