def create_model(self, training_articles):
model = OneVsRestClassifier(svm.SVC(probability=True))
features = []
labels = []
i = 0
for article in training_articles:
print("Generating features for article " + str(i) + "...")
google_cloud_response = self.analyze_text_google_cloud(article["article"])
relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])
# Only count this article if a relevant entity is present
if relevant_entities:
article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
features.append(article_features)
labels.append(article["label"])
else:
print("Skipping article " + str(i) + "...")
i = i + 1
print("Performing feature scaling...")
scaler = preprocessing.StandardScaler().fit(features)
features_scaled = scaler.transform(features)
print("Fitting model...")
model.fit(features_scaled, labels)
print("Saving model...")
joblib.dump(scaler, "data_analysis/caler.pkl")
joblib.dump(model, "data_analysis/model.pkl")
print("Done!")
# For use in prod
评论列表
文章目录