def sentence2vec(sentence, model=WORD2VEC, stopwords=STOPWORDS, metadata=None, section=None, wordvecs_only=True):
"""
Changes a sentence into a vector by averaging the word vectors of every non-stopword word in the sentence.
:param sentence: the sentence to turn into a vector, as a list of words
:param model: the word2vec model to use to convert words to vectors
:param stopwords: stopwords to not include in the averaging of each sentence.
:param metadata: dictionaries of metadata for the paper.
:param section: the section of the paper the sentence occurs in.
:param wordvecs_only: will turn a sentence into a vector using only the the word vectors from the model, no extra
features.
:return: the sentence in vector representation
"""
# The shape of the model, used to get the number of features and its vocab
model_shape = model.syn0.shape
vocab = set(model.index2word)
# The array that will be used to calculate the average word vector
average = np.zeros((model_shape[1]), dtype="float32")
total_word_count = 0
for word in sentence:
if word in stopwords:
continue
if word in vocab:
word_rep = model[word]
average += word_rep
total_word_count += 1
if total_word_count == 0:
total_word_count = 1
average = np.divide(average, total_word_count)
sentence_vec = average
return sentence_vec
useful_functions.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录