mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
handle case where contents is an array of objects
This commit is contained in:
Binary file not shown.
57
src/UglyToad.Pdf.Tests/Integration/JudgementDocumentTests.cs
Normal file
57
src/UglyToad.Pdf.Tests/Integration/JudgementDocumentTests.cs
Normal file
@@ -0,0 +1,57 @@
|
||||
namespace UglyToad.Pdf.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class JudgementDocumentTests
|
||||
{
|
||||
private static string GetFilename()
|
||||
{
|
||||
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
|
||||
|
||||
return Path.Combine(documentFolder, "Judgement Document.pdf");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectNumberOfPages()
|
||||
{
|
||||
var file = GetFilename();
|
||||
|
||||
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
|
||||
{
|
||||
Assert.Equal(13, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectPageContents()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
|
||||
Assert.Contains("Royal Courts of Justice, Rolls Building Fetter Lane, London, EC4A 1NL", page.Text);
|
||||
|
||||
page = document.GetPage(2);
|
||||
|
||||
Assert.Contains("The reference to BAR is to another trade organisation of which CMUK was", page.Text);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HasCorrectPageSize()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename()))
|
||||
{
|
||||
var pages = Enumerable.Range(1, 13)
|
||||
.Select(x => document.GetPage(x))
|
||||
.ToList();
|
||||
|
||||
Assert.All(pages, x => Assert.Equal(PageSize.A4, x.Size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -2,8 +2,6 @@
|
||||
{
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Content;
|
||||
using Xunit;
|
||||
|
||||
public class SinglePageType1FontTests
|
@@ -12,6 +12,7 @@
|
||||
<None Remove="Fonts\TrueType\Roboto-Regular.ttf" />
|
||||
<None Remove="Integration\Documents\Font Size Test - from google chrome print pdf.pdf" />
|
||||
<None Remove="Integration\Documents\Font Size Test - from libre office.pdf" />
|
||||
<None Remove="Integration\Documents\Judgement Document.pdf" />
|
||||
<None Remove="Integration\Documents\Multiple Page - from Mortality Statistics.pdf" />
|
||||
<None Remove="Integration\Documents\Single Page Form Content - from itext 1_1.pdf" />
|
||||
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
|
||||
@@ -34,6 +35,9 @@
|
||||
<Content Include="Integration\Documents\Font Size Test - from google chrome print pdf.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
<Content Include="Integration\Documents\Judgement Document.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
<Content Include="Integration\Documents\Multiple Page - from Mortality Statistics.pdf">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
|
@@ -7,6 +7,7 @@
|
||||
using Fonts;
|
||||
using IO;
|
||||
using Parser;
|
||||
using Parser.Parts;
|
||||
|
||||
internal class ResourceContainer : IResourceStore
|
||||
{
|
||||
@@ -71,7 +72,7 @@
|
||||
throw new InvalidOperationException($"The font with name {pair.Key} did not link to an object key. Value was: {pair.Value}.");
|
||||
}
|
||||
|
||||
var fontObject = pdfObjectParser.Parse(objectKey.ToIndirectReference(), reader, false) as PdfDictionary;
|
||||
var fontObject = DirectObjectFinder.Find<PdfDictionary>(objectKey, pdfObjectParser, reader, false);
|
||||
|
||||
if (fontObject == null)
|
||||
{
|
||||
|
@@ -343,6 +343,74 @@
|
||||
break;
|
||||
case FamilyName:
|
||||
builder.FamilyName = ReadLine(bytes);
|
||||
break;
|
||||
case Weight:
|
||||
builder.Weight = ReadLine(bytes);
|
||||
break;
|
||||
case ItalicAngle:
|
||||
builder.ItalicAngle = ReadDecimal(bytes);
|
||||
break;
|
||||
case IsFixedPitch:
|
||||
builder.IsFixedPitch = ReadBool(bytes);
|
||||
break;
|
||||
case FontBbox:
|
||||
builder.SetBoundingBox(ReadDecimal(bytes), ReadDecimal(bytes),
|
||||
ReadDecimal(bytes), ReadDecimal(bytes));
|
||||
break;
|
||||
case UnderlinePosition:
|
||||
builder.UnderlinePosition = ReadDecimal(bytes);
|
||||
break;
|
||||
case UnderlineThickness:
|
||||
builder.UnderlineThickness = ReadDecimal(bytes);
|
||||
break;
|
||||
case Version:
|
||||
builder.Version = ReadLine(bytes);
|
||||
break;
|
||||
case Notice:
|
||||
builder.Notice = ReadLine(bytes);
|
||||
break;
|
||||
case EncodingScheme:
|
||||
builder.EncodingScheme = ReadLine(bytes);
|
||||
break;
|
||||
case MappingScheme:
|
||||
builder.MappingScheme = (int) ReadDecimal(bytes);
|
||||
break;
|
||||
case CharacterSet:
|
||||
builder.CharacterSet = ReadLine(bytes);
|
||||
break;
|
||||
case IsBaseFont:
|
||||
builder.IsBaseFont = ReadBool(bytes);
|
||||
break;
|
||||
case CapHeight:
|
||||
builder.CapHeight = ReadDecimal(bytes);
|
||||
break;
|
||||
case XHeight:
|
||||
builder.XHeight = ReadDecimal(bytes);
|
||||
break;
|
||||
case Ascender:
|
||||
builder.Ascender = ReadDecimal(bytes);
|
||||
break;
|
||||
case Descender:
|
||||
builder.Descender = ReadDecimal(bytes);
|
||||
break;
|
||||
case StdHw:
|
||||
builder.StdHw = ReadDecimal(bytes);
|
||||
break;
|
||||
case StdVw:
|
||||
builder.StdVw = ReadDecimal(bytes);
|
||||
break;
|
||||
case CharWidth:
|
||||
builder.SetCharacterWidth(ReadDecimal(bytes), ReadDecimal(bytes));
|
||||
break;
|
||||
case StartCharMetrics:
|
||||
var count = (int)ReadDecimal(bytes);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
var metric = ReadCharacterMetric(bytes);
|
||||
builder.CharacterMetrics.Add(metric);
|
||||
}
|
||||
var end = ReadString(bytes);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -414,6 +482,13 @@
|
||||
|
||||
return Builder.ToString();
|
||||
}
|
||||
|
||||
private static IndividualCharacterMetric ReadCharacterMetric(IInputBytes bytes)
|
||||
{
|
||||
var line = ReadLine(bytes);
|
||||
|
||||
return new IndividualCharacterMetric();
|
||||
}
|
||||
}
|
||||
|
||||
internal interface IAdobeFontMetricsParser
|
||||
|
@@ -7,6 +7,7 @@
|
||||
using IO;
|
||||
using Parts;
|
||||
using Pdf.Parser;
|
||||
using Pdf.Parser.Parts;
|
||||
|
||||
internal static class FontDictionaryAccessHelper
|
||||
{
|
||||
@@ -32,11 +33,16 @@
|
||||
return lastChar.AsInt();
|
||||
}
|
||||
|
||||
public static decimal[] GetWidths(PdfDictionary dictionary)
|
||||
public static decimal[] GetWidths(IPdfObjectParser pdfObjectParser, PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing)
|
||||
{
|
||||
if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out COSArray widthArray))
|
||||
{
|
||||
throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
if (!dictionary.TryGetItemOfType(CosName.WIDTHS, out CosObject arr))
|
||||
{
|
||||
throw new InvalidFontFormatException($"No widths array was found in the font dictionary for this TrueType font: {dictionary}.");
|
||||
}
|
||||
|
||||
widthArray = DirectObjectFinder.Find<COSArray>(arr, pdfObjectParser, reader, isLenientParsing);
|
||||
}
|
||||
|
||||
return widthArray.Select(x => ((ICosNumber)x).AsDecimal()).ToArray();
|
||||
|
@@ -41,7 +41,7 @@
|
||||
|
||||
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
||||
|
||||
var widths = FontDictionaryAccessHelper.GetWidths(dictionary);
|
||||
var widths = FontDictionaryAccessHelper.GetWidths(pdfObjectParser, dictionary, reader, isLenientParsing);
|
||||
|
||||
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
||||
|
||||
|
@@ -33,7 +33,7 @@
|
||||
|
||||
var lastCharacter = FontDictionaryAccessHelper.GetLastCharacter(dictionary);
|
||||
|
||||
var widths = FontDictionaryAccessHelper.GetWidths(dictionary);
|
||||
var widths = FontDictionaryAccessHelper.GetWidths(pdfObjectParser, dictionary, reader, isLenientParsing);
|
||||
|
||||
var descriptor = FontDictionaryAccessHelper.GetFontDescriptor(pdfObjectParser, fontDescriptorFactory, dictionary, reader, isLenientParsing);
|
||||
|
||||
|
@@ -6,6 +6,7 @@
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using IO;
|
||||
using Parts;
|
||||
|
||||
internal class CatalogFactory
|
||||
{
|
||||
@@ -33,7 +34,7 @@
|
||||
throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
|
||||
}
|
||||
|
||||
var pages = pdfObjectParser.Parse(value.ToIndirectReference(), reader, isLenientParsing);
|
||||
var pages = DirectObjectFinder.Find<PdfDictionary>(value, pdfObjectParser, reader, isLenientParsing);
|
||||
|
||||
if (!(pages is PdfDictionary pagesDictionary))
|
||||
{
|
||||
|
@@ -1,9 +1,13 @@
|
||||
namespace UglyToad.Pdf.Parser
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using Content;
|
||||
using ContentStream;
|
||||
using Cos;
|
||||
using Exceptions;
|
||||
using Filters;
|
||||
using Geometry;
|
||||
using Graphics;
|
||||
@@ -51,8 +55,8 @@
|
||||
|
||||
PageContent content = default(PageContent);
|
||||
|
||||
var contentObject = dictionary.GetItemOrDefault(CosName.CONTENTS) as CosObject;
|
||||
if (contentObject != null)
|
||||
var contents = dictionary.GetItemOrDefault(CosName.CONTENTS);
|
||||
if (contents is CosObject contentObject)
|
||||
{
|
||||
var contentStream = DirectObjectFinder.Find<PdfRawStream>(contentObject, pdfObjectParser, reader, false);
|
||||
|
||||
@@ -61,15 +65,33 @@
|
||||
throw new InvalidOperationException("Failed to parse the content for the page: " + number);
|
||||
}
|
||||
|
||||
var contents = contentStream.Decode(filterProvider);
|
||||
var bytes = contentStream.Decode(filterProvider);
|
||||
|
||||
var txt = OtherEncodings.BytesAsLatin1String(contents);
|
||||
content = GetContent(bytes, cropBox, userSpaceUnit);
|
||||
}
|
||||
else if (contents is COSArray arr)
|
||||
{
|
||||
var bytes = new List<byte>();
|
||||
|
||||
foreach (var item in arr)
|
||||
{
|
||||
var obj = item as CosObject;
|
||||
if (obj == null)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"The contents contained something which was not an indirect reference: {item}.");
|
||||
}
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contents));
|
||||
var contentStream = DirectObjectFinder.Find<PdfRawStream>(obj, pdfObjectParser, reader, isLenientParsing);
|
||||
|
||||
if (contentStream == null)
|
||||
{
|
||||
throw new InvalidOperationException($"Could not find the contents for object {obj}.");
|
||||
}
|
||||
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||
bytes.AddRange(contentStream.Decode(filterProvider));
|
||||
}
|
||||
|
||||
content = context.Process(operations);
|
||||
content = GetContent(bytes, cropBox, userSpaceUnit);
|
||||
}
|
||||
|
||||
var page = new Page(number, mediaBox, cropBox, content);
|
||||
@@ -77,6 +99,20 @@
|
||||
return page;
|
||||
}
|
||||
|
||||
private PageContent GetContent(IReadOnlyList<byte> contentBytes, CropBox cropBox, UserSpaceUnit userSpaceUnit)
|
||||
{
|
||||
if (Debugger.IsAttached)
|
||||
{
|
||||
var txt = OtherEncodings.BytesAsLatin1String(contentBytes.ToArray());
|
||||
}
|
||||
|
||||
var operations = pageContentParser.Parse(new ByteArrayInputBytes(contentBytes));
|
||||
|
||||
var context = new ContentStreamProcessor(cropBox.Bounds, resourceStore, userSpaceUnit);
|
||||
|
||||
return context.Process(operations);
|
||||
}
|
||||
|
||||
private static UserSpaceUnit GetUserSpaceUnits(PdfDictionary dictionary)
|
||||
{
|
||||
var spaceUnits = UserSpaceUnit.Default;
|
||||
|
Reference in New Issue
Block a user