def consume(self, doc, _):
"""
Find entities in documents matching compiled regular expression.
:param doc: Document object.
:type doc: ``gransk.core.document.Document``
"""
if not doc.text:
return
entities = doc.entities
for result in self.pattern.finditer(doc.text):
entity_value = result.group(result.lastgroup)
if result.lastgroup == 'ip_addr':
try:
socket.inet_aton(entity_value)
except socket.error:
continue
entities.add(
result.start(result.lastgroup), result.lastgroup, entity_value)
评论列表
文章目录