def extract_metadata(self, file_path):
with open(file_path, 'rb') as fh:
pdf = PdfFileReader(fh, strict=False)
meta = pdf.getDocumentInfo()
if meta is not None:
self.update('title', meta.title)
self.update('author', meta.author)
self.update('generator', meta.creator)
self.update('generator', meta.producer)
if meta.subject:
self.result.keywords.append(meta.subject)
xmp = pdf.getXmpMetadata()
if xmp is not None:
self.update('id', xmp.xmpmm_documentId)
for lang, title in xmp.dc_title.items():
self.update('title', title)
self.result.languages.append(lang)
self.update('generator', xmp.pdf_producer)
self.update('created_at', xmp.xmp_createDate)
self.update('modified_at', xmp.xmp_modifyDate)
self.result.languages.extend(xmp.dc_language)
# from pprint import pprint
# pprint(self.result.to_dict())
评论列表
文章目录