def genderclassify(sentence):
"""
genderclassify tags with the help of multilayer perceptron classifier
trained over word vectors created with gensim's word2vec
:param sentence: string to be tokenized and tagged
:type sentence: string
:return: Returns a List of tuples of the form [(token1, genderTag), (token2, genderTag)...]
:rtype: List of Tuples.
"""
sentences = sent.drawlist()
sentences2 = sents.drawlist()
sentences2.append(sentence)
sentences = sentences + sentences2
sentences = [tok.wordtokenize(i) for i in sentences]
sentence = tok.wordtokenize(sentence)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model = gensim.models.Word2Vec(sentences, size =10, min_count=1)
pred = []
for word in sentence:
pred.append(model.wv[word].tolist())
genders = gndr.drawlist()
vector = [i[0] for i in genders]
tags = [i[1] for i in genders]
print(tags)
X = vector
y = tags
clf = MLPClassifier(solver='sgd', alpha= 1e-5,
hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X, y)
predictions = clf.predict_proba(pred).tolist()
predictions = [genderdecode(i) for i in predictions]
print(predictions)
for index,item in enumerate(sentence):
sentence[index] = (sentence[index], predictions[index])
return(sentence)
评论列表
文章目录