def parse_pdf_doc(self):
"""
Open a pdf document filetype and parse contents to string variable
for matching comparison.
"""
docText = ''
# open the file, with read/binary priviledges
f = open(self.file, 'rb')
pdf = PyPDF2.PdfFileReader(f)
for page in pdf.pages :
docText += page.extractText()
f.close()
return docText.strip() or None
评论列表
文章目录