def load_data(filename, columns, separator):
''' Load the data from filename and sort it according to timestamp.
Returns a dataframe with 3 columns: user_id, item_id, rating
'''
print('Load data...')
data = pd.read_csv(filename, sep=separator, names=list(columns), index_col=False, usecols=range(len(columns)))
if 'r' not in columns:
# Add a column of default ratings
data['r'] = 1
if 't' in columns:
# sort according to the timestamp column
if data['t'].dtype == np.int64: # probably a timestamp
data['t'] = pd.to_datetime(data['t'], unit='s')
else:
data['t'] = pd.to_datetime(data['t'])
print('Sort data in chronological order...')
data.sort_values('t', inplace=True)
return data
preprocess.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录