def get_comments_product_feat(start_date, end_date):
dump_path = './cache/comments_accumulate_%s_%s.pkl' % (start_date, end_date)
if os.path.exists(dump_path):
comments = pickle.load(open(dump_path))
else:
comments = pd.read_csv(comment_path)
comment_date_end = end_date
comment_date_begin = comment_date[0]
for date in reversed(comment_date):
if date < comment_date_end:
comment_date_begin = date
break
comments = comments[(comments.dt >= comment_date_begin) & (comments.dt < comment_date_end)]
df = pd.get_dummies(comments['comment_num'], prefix='comment_num')
comments = pd.concat([comments, df], axis=1) # type: pd.DataFrame
# del comments['dt']
# del comments['comment_num']
comments = comments[
['sku_id', 'has_bad_comment', 'bad_comment_rate', 'comment_num_1', 'comment_num_2', 'comment_num_3',
'comment_num_4']]
pickle.dump(comments, open(dump_path, 'w'))
return comments
评论列表
文章目录