From 49ad58d9423c4155bce7f3585a28df0e80e59ace Mon Sep 17 00:00:00 2001 From: Andrew Ferrier Date: Sun, 6 Sep 2020 09:52:56 +0100 Subject: [PATCH] Handle empty PDF files. --- tests/BaseTestClasses.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/BaseTestClasses.py b/tests/BaseTestClasses.py index 38f8290..5659f24 100644 --- a/tests/BaseTestClasses.py +++ b/tests/BaseTestClasses.py @@ -367,9 +367,16 @@ def getMetadataField(self, pdf_filename, field_name): return None def getPDFText(self, filename): - text = pdfminer.high_level.extract_text(filename) - text = text.replace("\t", " ") - return text + if os.path.exists(filename): + try: + text = pdfminer.high_level.extract_text(filename) + except pdfminer.pdfparser.PDFSyntaxError: + return None + + text = text.replace("\t", " ") + return text + else: + return None def touch(self, fname): open(fname, 'w').close()