From a083214da2ed32ecaa0e8a41acd5544d6a2dd530 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Wed, 8 Jan 2020 11:34:35 +0000 Subject: [PATCH] handle missing mediabox irrespective of parsing type since pdfbox defaults to us letter if the mediabox is missing rather than throwing we remove the behaviour where uselenientparsing is false which used to throw, now we log an error. throwing didn't provide any benefit to consumers. --- src/UglyToad.PdfPig/Parser/PageFactory.cs | 30 ++++++++++------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/UglyToad.PdfPig/Parser/PageFactory.cs b/src/UglyToad.PdfPig/Parser/PageFactory.cs index 8345ba56..0637c1be 100644 --- a/src/UglyToad.PdfPig/Parser/PageFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PageFactory.cs @@ -54,8 +54,8 @@ rotation = new PageRotationDegrees(rotateToken.Int); } - MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, isLenientParsing); - CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, isLenientParsing); + MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers); + CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox); var stackDepth = 0; @@ -160,15 +160,15 @@ return spaceUnits; } - private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox, bool isLenientParsing) + private CropBox GetCropBox(DictionaryToken dictionary, PageTreeMembers pageTreeMembers, MediaBox mediaBox) { CropBox cropBox; if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) && DirectObjectFinder.TryGet(cropBoxObject, pdfScanner, out ArrayToken cropBoxArray)) { - if (cropBoxArray.Length != 4 && isLenientParsing) + if (cropBoxArray.Length != 4) { - log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}."); + log.Error($"The CropBox was the wrong length in the dictionary: {dictionary}. Array was: {cropBoxArray}. Using MediaBox."); cropBox = new CropBox(mediaBox.Bounds); @@ -185,17 +185,17 @@ return cropBox; } - private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, bool isLenientParsing) + private MediaBox GetMediaBox(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers) { MediaBox mediaBox; if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject) && DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray)) { - if (mediaboxArray.Length != 4 && isLenientParsing) + if (mediaboxArray.Length != 4) { - log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}."); + log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}. Defaulting to US Letter."); - mediaBox = MediaBox.A4; + mediaBox = MediaBox.Letter; return mediaBox; } @@ -208,14 +208,10 @@ if (mediaBox == null) { - if (isLenientParsing) - { - mediaBox = MediaBox.A4; - } - else - { - throw new InvalidOperationException("No mediabox was present for page: " + number); - } + log.Error($"The MediaBox was the wrong missing for page {number}. Using US Letter."); + + // PDFBox defaults to US Letter. + mediaBox = MediaBox.Letter; } }