def get_train_test_sets(X, y):
""" Split X and y into a train and a test sets.
Args:
X: the TF-IDF matrix where each line represents a document and each
column represents a word, typically obtained by running
transform_text() from the TP2.
y: a binary vector where the i-th value indicates whether the i-th is a
spam or a ham.
Returns:
X_train: train subset of X
X_test: test subset of X
y_train: train subset of y
y_test: test subset of y
"""
return train_test_split(X, y)
# Ex4.2, 4.3, 4.4
评论列表
文章目录