def extract(text, paper=None, logger=logger):
search_any = functools.partial(re_util.search_any, logger=logger)
if not text and paper:
try:
text, _ = paper.get_text()
except pdfutil.pdfutil.MalformedPDF as e:
return None
filters = [r'analys(is|es)']
for sentence in nltk.sent_tokenize(text):
match = search_any(filters, sentence)
if match and search_any([r'algorithm', r'summary', r'outline', r'statistic', r'table|graph', r'following'], sentence):
source_type = "extracted"
source_detail = "nltk search v1"
value_text = sentence
value_result = "Yes"
return (value_text, value_result, source_type, source_detail)
#if no match found:
source_type = "extracted"
source_detail = "nltk search v1"
value_text = "Not Found"
value_result = "No"
return (value_text, value_result, source_type, source_detail)
评论列表
文章目录