diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 9821cb34..5a3be587 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -16,6 +16,7 @@ { private readonly IResourceStore resourceStore; private readonly UserSpaceUnit userSpaceUnit; + private readonly bool isLenientParsing; private Stack graphicsStack = new Stack(); @@ -25,10 +26,11 @@ public List Letters = new List(); - public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit) + public ContentStreamProcessor(PdfRectangle cropBox, IResourceStore resourceStore, UserSpaceUnit userSpaceUnit, bool isLenientParsing) { this.resourceStore = resourceStore; this.userSpaceUnit = userSpaceUnit; + this.isLenientParsing = isLenientParsing; graphicsStack.Push(new CurrentGraphicsState()); } @@ -104,7 +106,12 @@ { var code = font.ReadCharacterCode(bytes, out int codeLength); - font.TryGetUnicode(code, out var unicode); + var foundUnicode = font.TryGetUnicode(code, out var unicode); + + if (!foundUnicode && !isLenientParsing) + { + throw new InvalidOperationException($"We could not find the corresponding character with code {code} in font {font.Name}."); + } var wordSpacing = 0m; if (code == ' ' && codeLength == 1) diff --git a/src/UglyToad.PdfPig/Parser/PageFactory.cs b/src/UglyToad.PdfPig/Parser/PageFactory.cs index 79d28b42..ad669f26 100644 --- a/src/UglyToad.PdfPig/Parser/PageFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PageFactory.cs @@ -67,7 +67,7 @@ var bytes = contentStream.Decode(filterProvider); - content = GetContent(bytes, cropBox, userSpaceUnit); + content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } else if (contents is COSArray arr) { @@ -91,7 +91,7 @@ bytes.AddRange(contentStream.Decode(filterProvider)); } - content = GetContent(bytes, cropBox, userSpaceUnit); + content = GetContent(bytes, cropBox, userSpaceUnit, isLenientParsing); } var page = new Page(number, mediaBox, cropBox, content); @@ -99,7 +99,7 @@ return page; } - private PageContent GetContent(IReadOnlyList contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit) + private PageContent GetContent(IReadOnlyList contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit, bool isLenientParsing) { if (Debugger.IsAttached) { @@ -108,7 +108,7 @@ var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes)); - var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit); + var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit, isLenientParsing); return context.Process(operations); }