def combine_dataframe_into_pickle_file(dataframe, outfile, overwrite=False):
"""
Save the provided pandas dataframe as a pickle to the provided file path. If a file is already present at that
location, unpickle it, combine the dataframes, and save the result as a pickle (overwriting the file but keeping the
data). Uses combine_first, prioritizing new data but keeping data from before.
Obviously this will blow up catastrophically if there is a file at outfile which is not a DataFrame, and the data
will get super gross if it *is* a DataFrame but the indices do not match.
:param pandas.DataFrame dataframe: input dataframe
:param str outfile: output file
:return None:
"""
if os.path.exists(outfile) and not overwrite:
target_df = pandas.read_pickle(outfile)
merged_df = dataframe.combine_first(target_df)
merged_df.to_pickle(outfile)
else:
dataframe.to_pickle(outfile)
评论列表
文章目录