def preprocess_day(a, b):
a = pd.read_csv('data/restaurants_train_data.tsv', delimiter='\t')
b = pd.read_csv('data/restaurants_test_data.tsv', delimiter='\t')
print(a['text'][10])
a['text'] = a['text'].apply(clean)
b['text'] = b['text'].apply(clean)
# save pre-processed data as pickle file
a.to_hdf('data/restaurants_train_data_processed.h5', 'table')
b.to_hdf('data/restaurants_test_data_processed.h5', 'table')
# load pre-processed pickle data
a = pd.read_hdf('data/restaurants_train_data_processed.h5', 'table')
a['text'] = a['text'].apply(ast.literal_eval)
b = pd.read_hdf('data/restaurants_test_data_processed.h5', 'table')
b['text'] = b['text'].apply(ast.literal_eval)
print(a['text'][10])
preprocess.py 文件源码
python
阅读 35
收藏 0
点赞 0
评论 0
评论列表
文章目录