def prepare_sentence(words,
vectorizer=None,
lemmatizer=None,
max_words=78,
return_output=True):
X = np.ones((max_words, 300))*ZERO_EPSILON
if return_output:
y = np.ones((max_words, 300))*ZERO_EPSILON
raw_pos = [p[1]for p in pos_tag(words)]
pos = [str(treebank_to_simple(p, default=wordnet.NOUN)) for p in raw_pos]
lemmas = [str(lemmatizer(w, pos=p)) for (w,p) in zip(words, pos)]
num_words = len(words) if len(words) <= max_words else max_words
for word_i in range(num_words):
word_vector = vectorizer(words[word_i])
X[word_i, :] = word_vector
if return_output:
lemma_vector = lemmas[word_i]
y[word_i, :] = vectorizer(lemma_vector)
if return_output:
return X, y
return X
评论列表
文章目录