def _split_train_tst(self):
"""
divide the data into training and testing data
Create the X_trn, X_tst, for both forward and backward, and Y_trn and Y_tst
Note that only the reviews are changed, and not the summary.
:return: None
"""
num_samples = self.Y.shape[0]
mapper_file = self.checkpointer.get_mapper_file_location()
if not self.checkpointer.is_mapper_checkpointed():
print 'No mapper checkpoint found. Fresh loading in progress ...'
# Now shuffle the data
sample_id = range(num_samples)
random.shuffle(sample_id)
print 'Dumping the mapper shuffle for reuse.'
Pickle.dump(sample_id, open(mapper_file, 'wb'))
print 'Dump complete. Moving Forward...'
else:
print 'Mapper Checkpoint found... Reading from mapper dump'
sample_id = Pickle.load(open(mapper_file, 'rb'))
print 'Mapping unpickling complete.. Moving forward...'
self.X_fwd = self.X_fwd[sample_id]
self.X_bwd = self.X_bwd[sample_id]
self.Y = self.Y[sample_id]
# Now divide the data into test ans train set
test_fraction = 0.01
self.test_size = int(test_fraction * num_samples)
self.train_size = num_samples - self.test_size
# Forward review
self.X_trn_fwd = self.X_fwd[0:self.train_size]
self.X_tst_fwd = self.X_fwd[self.train_size:num_samples]
# Backward review
self.X_trn_bwd = self.X_bwd[0:self.train_size]
self.X_tst_bwd = self.X_bwd[self.train_size:num_samples]
# Summary
self.Y_trn = self.Y[0:self.train_size]
self.Y_tst = self.Y[self.train_size:num_samples]
stacked_bidirectional.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录