def iob_locations(self, tagged_sent):
i = 0
l = len(tagged_sent)
inside = False
while i < l:
word, tag = tagged_sent[i]
j = i + 1
k = j + self.lookahead
nextwords, nexttags = [], []
loc = False
# lookahead in the sentence to find multi-word locations
while j < k:
if ' '.join([word] + nextwords) in self.locations:
# combine multiple separate locations into single location chunk
if inside:
yield word, tag, 'I-LOCATION'
else:
yield word, tag, 'B-LOCATION'
# every next word is inside the location chunk
for nword, ntag in zip(nextwords, nexttags):
yield nword, ntag, 'I-LOCATION'
# found a location, so we're inside a chunk
loc, inside = True, True
# move forward to the next word since the current words
# are already chunked
i = j
break
if j < l:
nextword, nexttag = tagged_sent[j]
nextwords.append(nextword)
nexttags.append(nexttag)
j += 1
else:
break
# if no location found, then we're outside the location chunk
if not loc:
inside = False
i += 1
yield word, tag, 'O'
chunkers.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录