def __loadSATPDF(self, filename):
print("loading SAT score pdf")
"""
loads the SAT PDF file, deletes all nonsense and creates an array containing only the numbers
from the table
Return
------
All numbers from the SAT table in a string array
"""
pdf = pypdf.PdfFileReader(open(filename, "rb"))
tableContents = []
for page in pdf.pages:
content = page.extractText()
tableHeader = "Total \nMale Female \nScore \nNumber Percentile Number Percentile Number Percentile "
tableFooter = "De˜nitions of statistical terms are provided online at research."
tableContents += self.__getTableContent(content, tableHeader, tableFooter)
if "Number" and "Mean" and "S.D." in tableContents:
tableContents = tableContents[:tableContents.index("S.D.") - 2]
return tableContents
评论列表
文章目录