diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Text - from google chrome print pdf.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Test - from google chrome print pdf.pdf similarity index 100% rename from src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Text - from google chrome print pdf.pdf rename to src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Test - from google chrome print pdf.pdf diff --git a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs new file mode 100644 index 00000000..e82984ec --- /dev/null +++ b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs @@ -0,0 +1,39 @@ +namespace UglyToad.Pdf.Tests.Integration +{ + using System; + using System.IO; + using Xunit; + + public class FontSizeTestFromGoogleChromeTests + { + private static string GetFilename() + { + var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")); + + return Path.Combine(documentFolder, "Font Size Test - from google chrome print pdf.pdf"); + } + + [Fact] + public void GetsCorrectNumberOfPages() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var pageCount = document.NumberOfPages; + + Assert.Equal(1, pageCount); + } + } + + [Fact] + public void GetsCorrectPageWidthAndHeight() + { + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + Assert.Equal(595, page.Width); + Assert.Equal(842, page.Height); + } + } + } +} diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj index 44d0b0ce..49106bee 100644 --- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj +++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj @@ -10,8 +10,8 @@ + - @@ -25,7 +25,7 @@ PreserveNewest - + PreserveNewest diff --git a/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs b/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs index 996f08bf..7308a75f 100644 --- a/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs +++ b/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs @@ -71,7 +71,12 @@ if (offsetOrStreamNumber == null) { - return CosNull.Null; + if (isLenientParsing) + { + return CosNull.Null; + } + + throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table."); } var isCompressedStreamObject = offsetOrStreamNumber <= 0; diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs index d65f359f..645e44b5 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs @@ -10,6 +10,9 @@ internal class CrossReferenceTableParser { + private const string InUseEntry = "n"; + private const string FreeEntry = "f"; + private readonly ILog log; private readonly CosDictionaryParser dictionaryParser; private readonly CosBaseParser baseParser; @@ -25,7 +28,8 @@ { builder = null; - long xrefTableStartOffset = source.GetPosition(); + var tableStartOffset = source.GetPosition(); + if (source.Peek() != 'x') { return false; @@ -40,6 +44,7 @@ // check for trailer after xref var str = ReadHelper.ReadString(source); byte[] b = OtherEncodings.StringAsLatin1Bytes(str); + source.Rewind(b.Length); if (str.StartsWith("trailer")) @@ -54,23 +59,25 @@ XRefType = CrossReferenceType.Table }; - // Xref tables can have multiple sections. Each starts with a starting object id and a count. + // Tables can have multiple sections. Each starts with a starting object id and a count. while (true) { - var currentLine = ReadHelper.ReadLine(source); - String[] splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries); - if (splitString.Length != 2) + if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition)) { - log.Warn("Unexpected XRefTable Entry: " + currentLine); - break; + if (isLenientParsing) + { + log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}"); + } + else + { + throw new InvalidOperationException($"Unexpected subsection definition in the cross-reference table at offset {offset}"); + } } - // first obj id - long currObjID = long.Parse(splitString[0]); - // the number of objects in the xref table - int count = int.Parse(splitString[1]); + + var currentObjectId = subsectionDefinition.FirstNumber; ReadHelper.SkipSpaces(source); - for (int i = 0; i < count; i++) + for (var i = 0; i < subsectionDefinition.Count; i++) { if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek())) { @@ -81,42 +88,47 @@ break; } //Ignore table contents - currentLine = ReadHelper.ReadLine(source); - splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries); + var currentLine = ReadHelper.ReadLine(source); + var splitString = currentLine.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries); if (splitString.Length < 3) { log.Warn("invalid xref line: " + currentLine); break; } - /* This supports the corrupt table as reported in - * PDFBOX-474 (XXXX XXX XX n) */ - if (splitString[splitString.Length - 1].Equals("n")) + // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n) + if (splitString[splitString.Length - 1].Equals(InUseEntry)) { try { - long currOffset = long.Parse(splitString[0]); - if (currOffset >= xrefTableStartOffset && currOffset <= source.GetPosition()) + var objectOffset = long.Parse(splitString[0]); + + if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition()) { // PDFBOX-3923: offset points inside this table - that can't be good - throw new InvalidOperationException("XRefTable offset " + currOffset + - " is within xref table for " + currObjID); + throw new InvalidOperationException( + $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}"); } - int currGenID = int.Parse(splitString[1]); - builder.Add(currObjID, currGenID, currOffset); + + var generation = int.Parse(splitString[1]); + builder.Add(currentObjectId, generation, objectOffset); } catch (FormatException e) { throw new InvalidOperationException("Bad", e); } } - else if (!splitString[2].Equals("f")) + else if (!splitString[2].Equals(FreeEntry)) { - throw new InvalidOperationException("Corrupt XRefTable Entry - ObjID:" + currObjID); + throw new InvalidOperationException( + $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}."); } - currObjID++; + + currentObjectId++; + ReadHelper.SkipSpaces(source); } + ReadHelper.SkipSpaces(source); if (!ReadHelper.IsDigit(source)) { diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs index d272ba20..5ea7b1e9 100644 --- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs +++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs @@ -45,15 +45,14 @@ var table = new CrossReferenceTableBuilder(); - long prev = xrefLocation; + long previousCrossReferenceLocation = xrefLocation; // ---- parse whole chain of xref tables/object streams using PREV reference HashSet prevSet = new HashSet(); - while (prev > 0) + while (previousCrossReferenceLocation > 0) { // seek to xref table - reader.Seek(prev); - - // skip white spaces + reader.Seek(previousCrossReferenceLocation); + ReadHelper.SkipSpaces(reader); var isTable = reader.Peek() == X; @@ -63,7 +62,7 @@ { // xref table and trailer // use existing parser to parse xref table - if (!crossReferenceTableParser.TryParse(reader, prev, isLenientParsing, pool, out var tableBuilder)) + if (!crossReferenceTableParser.TryParse(reader, previousCrossReferenceLocation, isLenientParsing, pool, out var tableBuilder)) { throw new InvalidOperationException($"Expected trailer object at position: {reader.GetPosition()}"); } @@ -89,7 +88,7 @@ ReadHelper.SkipSpaces(reader); try { - streamPart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing); + streamPart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing); } catch (InvalidOperationException ex) { @@ -115,46 +114,50 @@ } } } - prev = trailer.GetLongOrDefault(CosName.PREV); - if (prev > 0) + previousCrossReferenceLocation = trailer.GetLongOrDefault(CosName.PREV); + if (previousCrossReferenceLocation > 0) { // check the xref table reference - fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing); - if (fixedOffset > -1 && fixedOffset != prev) + fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing); + if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation) { - prev = fixedOffset; - trailer.SetLong(CosName.PREV, prev); + previousCrossReferenceLocation = fixedOffset; + trailer.SetLong(CosName.PREV, previousCrossReferenceLocation); } } tableBuilder.Previous = tableBuilder.Dictionary.GetLongOrDefault(CosName.PREV); table.Add(tableBuilder.AsCrossReferenceTablePart()); - table.Add(streamPart); + + if (streamPart != null) + { + table.Add(streamPart); + } } else { // parse xref stream - var tablePart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing); + var tablePart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing); table.Add(tablePart); - prev = tablePart.Previous; - if (prev > 0) + previousCrossReferenceLocation = tablePart.Previous; + if (previousCrossReferenceLocation > 0) { // check the xref table reference - fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing); - if (fixedOffset > -1 && fixedOffset != prev) + fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing); + if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation) { - prev = fixedOffset; - tablePart.FixOffset(prev); + previousCrossReferenceLocation = fixedOffset; + tablePart.FixOffset(previousCrossReferenceLocation); } } } - if (prevSet.Contains(prev)) + if (prevSet.Contains(previousCrossReferenceLocation)) { - throw new InvalidOperationException("/Prev loop at offset " + prev); + throw new InvalidOperationException("/Prev loop at offset " + previousCrossReferenceLocation); } - prevSet.Add(prev); + prevSet.Add(previousCrossReferenceLocation); } var resolved = table.Build(xrefLocation, log); diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs new file mode 100644 index 00000000..96a43266 --- /dev/null +++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs @@ -0,0 +1,82 @@ +namespace UglyToad.Pdf.Parser.Parts.CrossReference +{ + using System; + using IO; + using Logging; + + /// + /// Each subsection of the cross-reference table starts with a line defining the starting object number + /// and the count of objects in the subsection. + /// + /// + /// xref + /// 12 16 + /// ... + /// + /// Defines a table subsection that starts with object 12 and has 16 entries (12-27). + /// + internal struct TableSubsectionDefinition + { + private static readonly char[] Splitters = { ' ' }; + + /// + /// The first object number in the table. + /// + public long FirstNumber { get; } + + /// + /// The number of consecutive objects declared in the table. + /// + public int Count { get; } + + /// + /// Create a new to define a range of consecutive objects in the cross-reference table. + /// + public TableSubsectionDefinition(long firstNumber, int count) + { + FirstNumber = firstNumber; + Count = count; + } + + /// + /// Attempts to read the from the current line of the source. + /// + public static bool TryRead(ILog log, IRandomAccessRead source, out TableSubsectionDefinition definition) + { + definition = default(TableSubsectionDefinition); + + var line = ReadHelper.ReadLine(source); + + var parts = line.Split(Splitters, StringSplitOptions.RemoveEmptyEntries); + + if (parts.Length != 2) + { + return false; + } + + try + { + + var firstObjectId = long.Parse(parts[0]); + var objectCount = int.Parse(parts[1]); + + definition = new TableSubsectionDefinition(firstObjectId, objectCount); + + return true; + + } + catch (Exception ex) + { + log.Error( + $"The format for the subsection definition was invalid, expected [long] [int], instead got '{line}'", ex); + + return false; + } + } + + public override string ToString() + { + return $"{FirstNumber} {Count}"; + } + } +} \ No newline at end of file