mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
handle case where contents is an array of objects
This commit is contained in:
Binary file not shown.
57
src/UglyToad.Pdf.Tests/Integration/JudgementDocumentTests.cs
Normal file
57
src/UglyToad.Pdf.Tests/Integration/JudgementDocumentTests.cs
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
namespace UglyToad.Pdf.Tests.Integration
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using Content;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class JudgementDocumentTests
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||||
|
|
||||||
|
return Path.Combine(documentFolder, "Judgement Document.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectNumberOfPages()
|
||||||
|
{
|
||||||
|
var file = GetFilename();
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||||
|
{
|
||||||
|
Assert.Equal(13, document.NumberOfPages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectPageContents()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
|
||||||
|
Assert.Contains("Royal Courts of Justice, Rolls Building Fetter Lane, London, EC4A 1NL", page.Text);
|
||||||
|
|
||||||
|
page = document.GetPage(2);
|
||||||
|
|
||||||
|
Assert.Contains("The reference to BAR is to another trade organisation of which CMUK was", page.Text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void HasCorrectPageSize()
|
||||||
|
{
|
||||||
|
using (var document = PdfDocument.Open(GetFilename()))
|
||||||
|
{
|
||||||
|
var pages = Enumerable.Range(1, 13)
|
||||||
|
.Select(x => document.GetPage(x))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Assert.All(pages, x => Assert.Equal(PageSize.A4, x.Size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -2,8 +2,6 @@
|
|||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
|
||||||
using Content;
|
|
||||||
using Xunit;
|
using Xunit;
|
||||||
|
|
||||||
public class SinglePageType1FontTests
|
public class SinglePageType1FontTests
|
@@ -12,6 +12,7 @@
|
|||||||
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||||
|
<None Remove="Integration\Documents\Judgement Document.pdf" />
|
||||||
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
||||||
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||||
@@ -34,6 +35,9 @@
|
|||||||
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
<Content Include="Integration\Documents\Judgement Document.pdf">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</Content>
|
||||||
<Content Include="Integration\Documents\Multiple Page - from Mortality Statistics.pdf">
|
<Content Include="Integration\Documents\Multiple Page - from Mortality Statistics.pdf">
|
||||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
</Content>
|
</Content>
|
||||||
|
@@ -7,6 +7,7 @@
|
|||||||
using Fonts;
|
using Fonts;
|
||||||
using IO;
|
using IO;
|
||||||
using Parser;
|
using Parser;
|
||||||
|
using Parser.Parts;
|
||||||
|
|
||||||
internal class ResourceContainer : IResourceStore
|
internal class ResourceContainer : IResourceStore
|
||||||
{
|
{
|
||||||
@@ -71,7 +72,7 @@
|
|||||||
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var fontObject = pdfObjectParser.Parse(objectKey.ToIndirectReference(), reader, false) as PdfDictionary;
|
var fontObject = DirectObjectFinder.Find<PdfDictionary>(objectKey, pdfObjectParser, reader, false);
|
||||||
|
|
||||||
if (fontObject == null)
|
if (fontObject == null)
|
||||||
{
|
{
|
||||||
|
@@ -343,6 +343,74 @@
|
|||||||
break;
|
break;
|
||||||
case FamilyName:
|
case FamilyName:
|
||||||
builder.FamilyName = ReadLine(bytes);
|
builder.FamilyName = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case Weight:
|
||||||
|
builder.Weight = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case ItalicAngle:
|
||||||
|
builder.ItalicAngle = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case IsFixedPitch:
|
||||||
|
builder.IsFixedPitch = ReadBool(bytes);
|
||||||
|
break;
|
||||||
|
case FontBbox:
|
||||||
|
builder.SetBoundingBox(ReadDecimal(bytes), ReadDecimal(bytes),
|
||||||
|
ReadDecimal(bytes), ReadDecimal(bytes));
|
||||||
|
break;
|
||||||
|
case UnderlinePosition:
|
||||||
|
builder.UnderlinePosition = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case UnderlineThickness:
|
||||||
|
builder.UnderlineThickness = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case Version:
|
||||||
|
builder.Version = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case Notice:
|
||||||
|
builder.Notice = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case EncodingScheme:
|
||||||
|
builder.EncodingScheme = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case MappingScheme:
|
||||||
|
builder.MappingScheme = (int) ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case CharacterSet:
|
||||||
|
builder.CharacterSet = ReadLine(bytes);
|
||||||
|
break;
|
||||||
|
case IsBaseFont:
|
||||||
|
builder.IsBaseFont = ReadBool(bytes);
|
||||||
|
break;
|
||||||
|
case CapHeight:
|
||||||
|
builder.CapHeight = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case XHeight:
|
||||||
|
builder.XHeight = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case Ascender:
|
||||||
|
builder.Ascender = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case Descender:
|
||||||
|
builder.Descender = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case StdHw:
|
||||||
|
builder.StdHw = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case StdVw:
|
||||||
|
builder.StdVw = ReadDecimal(bytes);
|
||||||
|
break;
|
||||||
|
case CharWidth:
|
||||||
|
builder.SetCharacterWidth(ReadDecimal(bytes), ReadDecimal(bytes));
|
||||||
|
break;
|
||||||
|
case StartCharMetrics:
|
||||||
|
var count = (int)ReadDecimal(bytes);
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
var metric = ReadCharacterMetric(bytes);
|
||||||
|
builder.CharacterMetrics.Add(metric);
|
||||||
|
}
|
||||||
|
var end = ReadString(bytes);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -414,6 +482,13 @@
|
|||||||
|
|
||||||
return Builder.ToString();
|
return Builder.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static IndividualCharacterMetric ReadCharacterMetric(IInputBytes bytes)
|
||||||
|
{
|
||||||
|
var line = ReadLine(bytes);
|
||||||
|
|
||||||
|
return new IndividualCharacterMetric();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal interface IAdobeFontMetricsParser
|
internal interface IAdobeFontMetricsParser
|
||||||
|
@@ -7,6 +7,7 @@
|
|||||||
using IO;
|
using IO;
|
||||||
using Parts;
|
using Parts;
|
||||||
using Pdf.Parser;
|
using Pdf.Parser;
|
||||||
|
using Pdf.Parser.Parts;
|
||||||
|
|
||||||
internal static class FontDictionaryAccessHelper
|
internal static class FontDictionaryAccessHelper
|
||||||
{
|
{
|
||||||
@@ -32,11 +33,16 @@
|
|||||||
return lastChar.AsInt();
|
return lastChar.AsInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static decimal[] GetWidths(PdfDictionary dictionary)
|
public static decimal[] GetWidths(IPdfObjectParser pdfObjectParser, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||||
{
|
{
|
||||||
if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray))
|
if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray))
|
||||||
{
|
{
|
||||||
throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}.");
|
if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out CosObject arr))
|
||||||
|
{
|
||||||
|
throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
widthArray = DirectObjectFinder.Find<COSArray>(arr, pdfObjectParser, reader, isLenientParsing);
|
||||||
}
|
}
|
||||||
|
|
||||||
return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray();
|
return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray();
|
||||||
|
@@ -41,7 +41,7 @@
|
|||||||
|
|
||||||
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
||||||
|
|
||||||
var widths = FontDictionaryAccessHelper.GetWidths(dictionary);
|
var widths = FontDictionaryAccessHelper.GetWidths(pdfObjectParser, dictionary, reader, isLenientParsing);
|
||||||
|
|
||||||
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
||||||
|
|
||||||
|
@@ -33,7 +33,7 @@
|
|||||||
|
|
||||||
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
||||||
|
|
||||||
var widths = FontDictionaryAccessHelper.GetWidths(dictionary);
|
var widths = FontDictionaryAccessHelper.GetWidths(pdfObjectParser, dictionary, reader, isLenientParsing);
|
||||||
|
|
||||||
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@
|
|||||||
using Cos;
|
using Cos;
|
||||||
using Exceptions;
|
using Exceptions;
|
||||||
using IO;
|
using IO;
|
||||||
|
using Parts;
|
||||||
|
|
||||||
internal class CatalogFactory
|
internal class CatalogFactory
|
||||||
{
|
{
|
||||||
@@ -33,7 +34,7 @@
|
|||||||
throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
|
throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
|
||||||
}
|
}
|
||||||
|
|
||||||
var pages = pdfObjectParser.Parse(value.ToIndirectReference(), reader, isLenientParsing);
|
var pages = DirectObjectFinder.Find<PdfDictionary>(value, pdfObjectParser, reader, isLenientParsing);
|
||||||
|
|
||||||
if (!(pages is PdfDictionary pagesDictionary))
|
if (!(pages is PdfDictionary pagesDictionary))
|
||||||
{
|
{
|
||||||
|
@@ -1,9 +1,13 @@
|
|||||||
namespace UglyToad.Pdf.Parser
|
namespace UglyToad.Pdf.Parser
|
||||||
{
|
{
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.Linq;
|
||||||
using Content;
|
using Content;
|
||||||
using ContentStream;
|
using ContentStream;
|
||||||
using Cos;
|
using Cos;
|
||||||
|
using Exceptions;
|
||||||
using Filters;
|
using Filters;
|
||||||
using Geometry;
|
using Geometry;
|
||||||
using Graphics;
|
using Graphics;
|
||||||
@@ -51,8 +55,8 @@
|
|||||||
|
|
||||||
PageContent content = default(PageContent);
|
PageContent content = default(PageContent);
|
||||||
|
|
||||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
var contents = dictionary.GetItemOrDefault(CosName.CONTENTS);
|
||||||
if (contentObject != null)
|
if (contents is CosObject contentObject)
|
||||||
{
|
{
|
||||||
var contentStream = DirectObjectFinder.Find<PdfRawStream>(contentObject, pdfObjectParser, reader, false);
|
var contentStream = DirectObjectFinder.Find<PdfRawStream>(contentObject, pdfObjectParser, reader, false);
|
||||||
|
|
||||||
@@ -61,15 +65,33 @@
|
|||||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||||
}
|
}
|
||||||
|
|
||||||
var contents = contentStream.Decode(filterProvider);
|
var bytes = contentStream.Decode(filterProvider);
|
||||||
|
|
||||||
var txt = OtherEncodings.BytesAsLatin1String(contents);
|
content = GetContent(bytes, cropBox, userSpaceUnit);
|
||||||
|
}
|
||||||
|
else if (contents is COSArray arr)
|
||||||
|
{
|
||||||
|
var bytes = new List<byte>();
|
||||||
|
|
||||||
|
foreach (var item in arr)
|
||||||
|
{
|
||||||
|
var obj = item as CosObject;
|
||||||
|
if (obj == null)
|
||||||
|
{
|
||||||
|
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
|
||||||
|
}
|
||||||
|
|
||||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
var contentStream = DirectObjectFinder.Find<PdfRawStream>(obj, pdfObjectParser, reader, isLenientParsing);
|
||||||
|
|
||||||
|
if (contentStream == null)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
|
||||||
|
}
|
||||||
|
|
||||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
bytes.AddRange(contentStream.Decode(filterProvider));
|
||||||
|
}
|
||||||
|
|
||||||
content = context.Process(operations);
|
content = GetContent(bytes, cropBox, userSpaceUnit);
|
||||||
}
|
}
|
||||||
|
|
||||||
var page = new Page(number, mediaBox, cropBox, content);
|
var page = new Page(number, mediaBox, cropBox, content);
|
||||||
@@ -77,6 +99,20 @@
|
|||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private PageContent GetContent(IReadOnlyList<byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit)
|
||||||
|
{
|
||||||
|
if (Debugger.IsAttached)
|
||||||
|
{
|
||||||
|
var txt = OtherEncodings.BytesAsLatin1String(contentBytes.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
||||||
|
|
||||||
|
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||||
|
|
||||||
|
return context.Process(operations);
|
||||||
|
}
|
||||||
|
|
||||||
private static UserSpaceUnit GetUserSpaceUnits(PdfDictionary dictionary)
|
private static UserSpaceUnit GetUserSpaceUnits(PdfDictionary dictionary)
|
||||||
{
|
{
|
||||||
var spaceUnits = UserSpaceUnit.Default;
|
var spaceUnits = UserSpaceUnit.Default;
|
||||||
|
Reference in New Issue
Block a user