chunkers.py 文件源码-python代码片段

chunkers.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

项目：Natural-Language-Processing-Python-and-NLTK 作者: PacktPublishing 项目源码文件源码

def iob_locations(self, tagged_sent):
        i = 0
        l = len(tagged_sent)
        inside = False

        while i < l:
            word, tag = tagged_sent[i]
            j = i + 1
            k = j + self.lookahead
            nextwords, nexttags = [], []
            loc = False
            # lookahead in the sentence to find multi-word locations
            while j < k:
                if ' '.join([word] + nextwords) in self.locations:
                    # combine multiple separate locations into single location chunk
                    if inside:
                        yield word, tag, 'I-LOCATION'
                    else:
                        yield word, tag, 'B-LOCATION'
                    # every next word is inside the location chunk
                    for nword, ntag in zip(nextwords, nexttags):
                        yield nword, ntag, 'I-LOCATION'
                    # found a location, so we're inside a chunk
                    loc, inside = True, True
                    # move forward to the next word since the current words
                    # are already chunked
                    i = j
                    break

                if j < l:
                    nextword, nexttag = tagged_sent[j]
                    nextwords.append(nextword)
                    nexttags.append(nexttag)
                    j += 1
                else:
                    break
            # if no location found, then we're outside the location chunk
            if not loc:
                inside = False
                i += 1
                yield word, tag, 'O'