def pdfMetaData(file_path, save=True):
'''Get PDF document metadata, takes 2 arguments, file_path and save (boolean, default is True)'''
pdf_doc = PdfFileReader(open(file_path, "rb"))
if pdf_doc.isEncrypted:
try:
if pdf_doc.decrypt("") != 1:
sys.exit("target pdf document is encrypted... exiting...")
except:
sys.exit("target pdf document is encrypted with an unsupported algorithm... exiting...")
doc_info = pdf_doc.getDocumentInfo()
stats = os.stat(file_path)
now = dt.now()
file_name = getFileName(file_path)
metadata = "Time: %d/%d/%d %d : %d : %d. Found the following metadata for file %s:\n\n" % (now.year, now.month,
now.day, now.hour, now.minute,
now.second, file_name[:-4])
try:
for md in doc_info:
metadata += str(md[1:]) + " : " + pretifyPyPDF2Time(str(md[1:]) ,str(doc_info[md])) + "\n"
except TypeError:
sys.exit("Couldn't read document info! Make sure target is a valid pdf document...")
metadata += "Last metadata mod Date: %s\nLast Mod Date: %s\nLast Access Date: %s\nOwner User ID: %s" %(dt.fromtimestamp(stats.st_ctime),
dt.fromtimestamp(stats.st_mtime),
dt.fromtimestamp(stats.st_atime),
stats.st_uid)
try:
print(metadata)
except UnicodeEncodeError:
print("Console encoding can't decode the result. Enter chcp 65001 in the console and rerun the script.")
if save:
file_name = getFileName(file_path)
tgt = file_name + ".txt"
saveResult(tgt, metadata)
评论列表
文章目录