diff --git a/src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Text - from google chrome print pdf.pdf b/src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Test - from google chrome print pdf.pdf
similarity index 100%
rename from src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Text - from google chrome print pdf.pdf
rename to src/UglyToad.Pdf.Tests/Integration/Documents/Font Size Test - from google chrome print pdf.pdf
diff --git a/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs
new file mode 100644
index 00000000..e82984ec
--- /dev/null
+++ b/src/UglyToad.Pdf.Tests/Integration/FontSizeTestFromGoogleChromeTests.cs
@@ -0,0 +1,39 @@
+namespace UglyToad.Pdf.Tests.Integration
+{
+ using System;
+ using System.IO;
+ using Xunit;
+
+ public class FontSizeTestFromGoogleChromeTests
+ {
+ private static string GetFilename()
+ {
+ var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
+
+ return Path.Combine(documentFolder, "Font Size Test - from google chrome print pdf.pdf");
+ }
+
+ [Fact]
+ public void GetsCorrectNumberOfPages()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var pageCount = document.NumberOfPages;
+
+ Assert.Equal(1, pageCount);
+ }
+ }
+
+ [Fact]
+ public void GetsCorrectPageWidthAndHeight()
+ {
+ using (var document = PdfDocument.Open(GetFilename()))
+ {
+ var page = document.GetPage(1);
+
+ Assert.Equal(595, page.Width);
+ Assert.Equal(842, page.Height);
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
index 44d0b0ce..49106bee 100644
--- a/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
+++ b/src/UglyToad.Pdf.Tests/UglyToad.Pdf.Tests.csproj
@@ -10,8 +10,8 @@
+
-
@@ -25,7 +25,7 @@
PreserveNewest
-
+
PreserveNewest
diff --git a/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs b/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
index 996f08bf..7308a75f 100644
--- a/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
+++ b/src/UglyToad.Pdf/Parser/IPdfObjectParser.cs
@@ -71,7 +71,12 @@
if (offsetOrStreamNumber == null)
{
- return CosNull.Null;
+ if (isLenientParsing)
+ {
+ return CosNull.Null;
+ }
+
+ throw new InvalidOperationException($"Could not locate the object {key.Number} which was not found in the cross reference table.");
}
var isCompressedStreamObject = offsetOrStreamNumber <= 0;
diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs
index d65f359f..645e44b5 100644
--- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs
+++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/CrossReferenceTableParser.cs
@@ -10,6 +10,9 @@
internal class CrossReferenceTableParser
{
+ private const string InUseEntry = "n";
+ private const string FreeEntry = "f";
+
private readonly ILog log;
private readonly CosDictionaryParser dictionaryParser;
private readonly CosBaseParser baseParser;
@@ -25,7 +28,8 @@
{
builder = null;
- long xrefTableStartOffset = source.GetPosition();
+ var tableStartOffset = source.GetPosition();
+
if (source.Peek() != 'x')
{
return false;
@@ -40,6 +44,7 @@
// check for trailer after xref
var str = ReadHelper.ReadString(source);
byte[] b = OtherEncodings.StringAsLatin1Bytes(str);
+
source.Rewind(b.Length);
if (str.StartsWith("trailer"))
@@ -54,23 +59,25 @@
XRefType = CrossReferenceType.Table
};
- // Xref tables can have multiple sections. Each starts with a starting object id and a count.
+ // Tables can have multiple sections. Each starts with a starting object id and a count.
while (true)
{
- var currentLine = ReadHelper.ReadLine(source);
- String[] splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries);
- if (splitString.Length != 2)
+ if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition))
{
- log.Warn("Unexpected XRefTable Entry: " + currentLine);
- break;
+ if (isLenientParsing)
+ {
+ log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}");
+ }
+ else
+ {
+ throw new InvalidOperationException($"Unexpected subsection definition in the cross-reference table at offset {offset}");
+ }
}
- // first obj id
- long currObjID = long.Parse(splitString[0]);
- // the number of objects in the xref table
- int count = int.Parse(splitString[1]);
+
+ var currentObjectId = subsectionDefinition.FirstNumber;
ReadHelper.SkipSpaces(source);
- for (int i = 0; i < count; i++)
+ for (var i = 0; i < subsectionDefinition.Count; i++)
{
if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek()))
{
@@ -81,42 +88,47 @@
break;
}
//Ignore table contents
- currentLine = ReadHelper.ReadLine(source);
- splitString = currentLine.Split(new[] { "\\s" }, StringSplitOptions.RemoveEmptyEntries);
+ var currentLine = ReadHelper.ReadLine(source);
+ var splitString = currentLine.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
if (splitString.Length < 3)
{
log.Warn("invalid xref line: " + currentLine);
break;
}
- /* This supports the corrupt table as reported in
- * PDFBOX-474 (XXXX XXX XX n) */
- if (splitString[splitString.Length - 1].Equals("n"))
+ // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
+ if (splitString[splitString.Length - 1].Equals(InUseEntry))
{
try
{
- long currOffset = long.Parse(splitString[0]);
- if (currOffset >= xrefTableStartOffset && currOffset <= source.GetPosition())
+ var objectOffset = long.Parse(splitString[0]);
+
+ if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition())
{
// PDFBOX-3923: offset points inside this table - that can't be good
- throw new InvalidOperationException("XRefTable offset " + currOffset +
- " is within xref table for " + currObjID);
+ throw new InvalidOperationException(
+ $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}");
}
- int currGenID = int.Parse(splitString[1]);
- builder.Add(currObjID, currGenID, currOffset);
+
+ var generation = int.Parse(splitString[1]);
+ builder.Add(currentObjectId, generation, objectOffset);
}
catch (FormatException e)
{
throw new InvalidOperationException("Bad", e);
}
}
- else if (!splitString[2].Equals("f"))
+ else if (!splitString[2].Equals(FreeEntry))
{
- throw new InvalidOperationException("Corrupt XRefTable Entry - ObjID:" + currObjID);
+ throw new InvalidOperationException(
+ $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}.");
}
- currObjID++;
+
+ currentObjectId++;
+
ReadHelper.SkipSpaces(source);
}
+
ReadHelper.SkipSpaces(source);
if (!ReadHelper.IsDigit(source))
{
diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs
index d272ba20..5ea7b1e9 100644
--- a/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs
+++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/FileCrossReferenceTableParser.cs
@@ -45,15 +45,14 @@
var table = new CrossReferenceTableBuilder();
- long prev = xrefLocation;
+ long previousCrossReferenceLocation = xrefLocation;
// ---- parse whole chain of xref tables/object streams using PREV reference
HashSet prevSet = new HashSet();
- while (prev > 0)
+ while (previousCrossReferenceLocation > 0)
{
// seek to xref table
- reader.Seek(prev);
-
- // skip white spaces
+ reader.Seek(previousCrossReferenceLocation);
+
ReadHelper.SkipSpaces(reader);
var isTable = reader.Peek() == X;
@@ -63,7 +62,7 @@
{
// xref table and trailer
// use existing parser to parse xref table
- if (!crossReferenceTableParser.TryParse(reader, prev, isLenientParsing, pool, out var tableBuilder))
+ if (!crossReferenceTableParser.TryParse(reader, previousCrossReferenceLocation, isLenientParsing, pool, out var tableBuilder))
{
throw new InvalidOperationException($"Expected trailer object at position: {reader.GetPosition()}");
}
@@ -89,7 +88,7 @@
ReadHelper.SkipSpaces(reader);
try
{
- streamPart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing);
+ streamPart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing);
}
catch (InvalidOperationException ex)
{
@@ -115,46 +114,50 @@
}
}
}
- prev = trailer.GetLongOrDefault(CosName.PREV);
- if (prev > 0)
+ previousCrossReferenceLocation = trailer.GetLongOrDefault(CosName.PREV);
+ if (previousCrossReferenceLocation > 0)
{
// check the xref table reference
- fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing);
- if (fixedOffset > -1 && fixedOffset != prev)
+ fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing);
+ if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation)
{
- prev = fixedOffset;
- trailer.SetLong(CosName.PREV, prev);
+ previousCrossReferenceLocation = fixedOffset;
+ trailer.SetLong(CosName.PREV, previousCrossReferenceLocation);
}
}
tableBuilder.Previous = tableBuilder.Dictionary.GetLongOrDefault(CosName.PREV);
table.Add(tableBuilder.AsCrossReferenceTablePart());
- table.Add(streamPart);
+
+ if (streamPart != null)
+ {
+ table.Add(streamPart);
+ }
}
else
{
// parse xref stream
- var tablePart = ParseCrossReferenceStream(reader, prev, pool, isLenientParsing);
+ var tablePart = ParseCrossReferenceStream(reader, previousCrossReferenceLocation, pool, isLenientParsing);
table.Add(tablePart);
- prev = tablePart.Previous;
- if (prev > 0)
+ previousCrossReferenceLocation = tablePart.Previous;
+ if (previousCrossReferenceLocation > 0)
{
// check the xref table reference
- fixedOffset = xrefOffsetValidator.CheckXRefOffset(prev, isLenientParsing);
- if (fixedOffset > -1 && fixedOffset != prev)
+ fixedOffset = xrefOffsetValidator.CheckXRefOffset(previousCrossReferenceLocation, isLenientParsing);
+ if (fixedOffset > -1 && fixedOffset != previousCrossReferenceLocation)
{
- prev = fixedOffset;
- tablePart.FixOffset(prev);
+ previousCrossReferenceLocation = fixedOffset;
+ tablePart.FixOffset(previousCrossReferenceLocation);
}
}
}
- if (prevSet.Contains(prev))
+ if (prevSet.Contains(previousCrossReferenceLocation))
{
- throw new InvalidOperationException("/Prev loop at offset " + prev);
+ throw new InvalidOperationException("/Prev loop at offset " + previousCrossReferenceLocation);
}
- prevSet.Add(prev);
+ prevSet.Add(previousCrossReferenceLocation);
}
var resolved = table.Build(xrefLocation, log);
diff --git a/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs
new file mode 100644
index 00000000..96a43266
--- /dev/null
+++ b/src/UglyToad.Pdf/Parser/Parts/CrossReference/TableSubsectionDefinition.cs
@@ -0,0 +1,82 @@
+namespace UglyToad.Pdf.Parser.Parts.CrossReference
+{
+ using System;
+ using IO;
+ using Logging;
+
+ ///
+ /// Each subsection of the cross-reference table starts with a line defining the starting object number
+ /// and the count of objects in the subsection.
+ ///
+ ///
+ /// xref
+ /// 12 16
+ /// ...
+ ///
+ /// Defines a table subsection that starts with object 12 and has 16 entries (12-27).
+ ///
+ internal struct TableSubsectionDefinition
+ {
+ private static readonly char[] Splitters = { ' ' };
+
+ ///
+ /// The first object number in the table.
+ ///
+ public long FirstNumber { get; }
+
+ ///
+ /// The number of consecutive objects declared in the table.
+ ///
+ public int Count { get; }
+
+ ///
+ /// Create a new to define a range of consecutive objects in the cross-reference table.
+ ///
+ public TableSubsectionDefinition(long firstNumber, int count)
+ {
+ FirstNumber = firstNumber;
+ Count = count;
+ }
+
+ ///
+ /// Attempts to read the from the current line of the source.
+ ///
+ public static bool TryRead(ILog log, IRandomAccessRead source, out TableSubsectionDefinition definition)
+ {
+ definition = default(TableSubsectionDefinition);
+
+ var line = ReadHelper.ReadLine(source);
+
+ var parts = line.Split(Splitters, StringSplitOptions.RemoveEmptyEntries);
+
+ if (parts.Length != 2)
+ {
+ return false;
+ }
+
+ try
+ {
+
+ var firstObjectId = long.Parse(parts[0]);
+ var objectCount = int.Parse(parts[1]);
+
+ definition = new TableSubsectionDefinition(firstObjectId, objectCount);
+
+ return true;
+
+ }
+ catch (Exception ex)
+ {
+ log.Error(
+ $"The format for the subsection definition was invalid, expected [long] [int], instead got '{line}'", ex);
+
+ return false;
+ }
+ }
+
+ public override string ToString()
+ {
+ return $"{FirstNumber} {Count}";
+ }
+ }
+}
\ No newline at end of file