def partial_match(x, y):
"""
Return whether these two mentions have a partial match in WordNet synset.
:param x: the first mention
:param y: the second mention
:return: Whether they are aligned
"""
# Allow partial matching
if fuzz.partial_ratio(' ' + x + ' ', ' ' + y + ' ') == 100:
return True
x_words = [w for w in x.split() if not is_stop(w)]
y_words = [w for w in y.split() if not is_stop(w)]
if len(x_words) == 0 or len(y_words) == 0:
return False
x_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
for w in x_words]
y_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
for w in y_words]
# One word - check whether there is intersection between synsets
if len(x_synonyms) == 1 and len(y_synonyms) == 1 and \
len([w for w in x_synonyms[0].intersection(y_synonyms[0]) if not is_stop(w)]) > 0:
return True
# More than one word - align words from x with words from y
cost = -np.vstack([np.array([len([w for w in s1.intersection(s2) if not is_stop(w)]) for s1 in x_synonyms])
for s2 in y_synonyms])
m = Munkres()
cost = pad_to_square(cost)
indices = m.compute(cost)
# Compute the average score of the alignment
average_score = np.mean([-cost[row, col] for row, col in indices])
if average_score >= 0.75:
return True
return False
评论列表
文章目录