def predict_job(job_list):
"""Assign a classification to a url"""
# TODO: Add case where len is 1 or 0....
job_list = [job for j in job_list for job in j]
new_job_list = [regex.tokenize_and_stem(i) for i in job_list]
new_job_list = [' '.join(job) for job in new_job_list]
vect = CountVectorizer()
x_series = pd.Series(X)
X_train_dtm = vect.fit_transform(x_series)
y_train = pd.Series(y)
job_list_series = pd.Series(new_job_list)
job_list_dtm = vect.transform(job_list_series)
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)
y_pred = nb.predict(job_list_dtm)
# for i in range(len(job_list)):
# print(job_list[i], y_pred[i])
return y_pred
# print(predict_job([('Founder',), ('Founder',), ('Architect & Full-stack developer',), ('Senior Engineer',), ('Technical Consultant',)]))
评论列表
文章目录