get_corefering_predicates.py 文件源码-python代码片段

def is_aligned_arg(x, y):
    """
    Return whether these two arguments are aligned: they occur in the same WordNet synset.
    :param x: the first argument
    :param y: the second argument
    :return: Whether they are aligned
    """
    global nlp

    # Allow partial matching
    if fuzz.partial_ratio(' ' + x + ' ', ' ' + y + ' ') == 100:
        return True

    x_words = [w for w in x.split() if not nlp.is_stop(w)]
    y_words = [w for w in y.split() if not nlp.is_stop(w)]

    if len(x_words) == 0 or len(y_words) == 0:
        return False

    x_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
                  for w in x_words]
    y_synonyms = [set([lemma.lower().replace('_', ' ') for synset in wn.synsets(w) for lemma in synset.lemma_names()])
                  for w in y_words]

    # One word - check whether there is intersection between synsets
    if len(x_synonyms) == 1 and len(y_synonyms) == 1 and \
                    len([w for w in x_synonyms[0].intersection(y_synonyms[0]) if not nlp.is_stop(w)]) > 0:
        return True

    # More than one word - align words from x with words from y
    intersections = [len([w for w in s1.intersection(s2) if not nlp.is_stop(w)])
                     for s1 in x_synonyms for s2 in y_synonyms]

    if len([intersection_len for intersection_len in intersections if intersection_len > 0]) >= \
                    0.75 * max(len(x_synonyms), len(y_synonyms)):
        return True

    return False