def main():
load_sequence('/home/beki/Documents/2nd Year/BD & DM Project/retail_dataset.csv')
# split patterns to train_patterns and test_patterns
train_patterns = np.random.choice(patterns, np.floor(len(patterns) * 0.8))
test_patterns = np.random.choice(patterns, np.floor(len(patterns) * 0.2))
# Word vector representation learning
model = Word2Vec(train_patterns, size=15, window=3, min_count=1, workers=1, iter=3, sample=1e-4, negative=20)
# Test
test_size = float(len(test_patterns))
hit = 0.0
for current_pattern in test_patterns:
if len(current_pattern) < 2:
test_size -= 1.0
continue
# Reduce the current pattern in the test set by removing the last item
last_item = current_pattern.pop()
# Keep those items in the reduced current pattern, which are also in the models vocabulary
items = [it for it in current_pattern if it in model.vocab]
if len(items) <= 2:
test_size -= 1.0
continue
# Predict the most similar items to items
prediction = model.most_similar(positive=items)
# Check if the item that we have removed from the test, last_item, is among
# the predicted ones.
for predicted_item, score in prediction:
if predicted_item == last_item:
hit += 1.0
#print last_item
#print prediction
print 'Accuracy like measure: {}'.format(hit / test_size)
frequent_pattern Item.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录