fix bug where cross reference stream subsections were skipped

a single cross-reference stream may contain multiple disjoint runs of object numbers, previously we only took the first now we load all objects.

adds indexer to array token for ease-of-use.

adds page number and bounds information to all form fields.
This commit is contained in:
Eliot Jones
2019-10-10 16:05:21 +01:00
parent 2ef45f71d5
commit dec4c31a33
15 changed files with 204 additions and 66 deletions

View File

@@ -39,19 +39,20 @@
using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
{
var form = document.GetForm();
Assert.Equal(16, form.Fields.Count);
Assert.Equal(18, form.Fields.Count);
}
}
[Fact]
public void GetsEmptyFormFields()
{
using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
{
var form = document.GetForm();
var annots = document.GetPage(1).ExperimentalAccess.GetAnnotations().ToList();
Assert.Equal(16, form.Fields.Count);
}
}
//[Fact]
//public void GetFormFieldsByPage()
//{
// using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
// {
// var form = document.GetForm();
// var fields = form.GetFieldsForPage(1).ToList();
// var page = document.GetPage(1).ExperimentalAccess.GetAnnotations().ToList();
// Assert.Equal(16, fields.Count);
// }
//}
}
}

View File

@@ -48,6 +48,25 @@
Fields = fields ?? throw new ArgumentNullException(nameof(fields));
}
/// <summary>
/// Get the set of fields which appear on the given page number.
/// </summary>
public IEnumerable<AcroFieldBase> GetFieldsForPage(int pageNumber)
{
if (pageNumber <= 0)
{
throw new ArgumentOutOfRangeException(nameof(pageNumber), $"Page number starts at 1, instead got {pageNumber}.");
}
foreach (var field in Fields)
{
if (field.Value.PageNumber == pageNumber)
{
yield return field.Value;
}
}
}
/// <inheritdoc />
public override string ToString()
{

View File

@@ -7,6 +7,7 @@
using Exceptions;
using Fields;
using Filters;
using Geometry;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
@@ -91,7 +92,7 @@
var fieldDictionary = DirectObjectFinder.Get<DictionaryToken>(fieldToken, tokenScanner);
var field = GetAcroField(fieldDictionary);
var field = GetAcroField(fieldDictionary, catalog);
fields[fieldReferenceToken.Data] = field;
}
@@ -99,7 +100,7 @@
return new AcroForm(acroDictionary, signatureFlags, needAppearances, fields);
}
private AcroFieldBase GetAcroField(DictionaryToken fieldDictionary)
private AcroFieldBase GetAcroField(DictionaryToken fieldDictionary, Catalog catalog)
{
fieldDictionary.TryGet(NameToken.Ft, out NameToken fieldType);
fieldDictionary.TryGet(NameToken.Ff, out NumericToken fieldFlagsToken);
@@ -134,6 +135,17 @@
fieldDictionary.TryGet(NameToken.Parent, out IndirectReferenceToken parentReferenceToken);
var information = new AcroFieldCommonInformation(parentReferenceToken?.Data, partialFieldName, alternateFieldName, mappingName);
int? pageNumber = null;
if (fieldDictionary.TryGet(NameToken.P, tokenScanner, out IndirectReferenceToken pageReference))
{
pageNumber = catalog.GetPageByReference(pageReference.Data)?.PageNumber;
}
PdfRectangle? bounds = null;
if (fieldDictionary.TryGet(NameToken.Rect, tokenScanner, out ArrayToken rectArray) && rectArray.Length == 4)
{
bounds = rectArray.ToRectangle();
}
var fieldFlags = (uint) (fieldFlagsToken?.Long ?? 0);
@@ -143,7 +155,7 @@
var children = new List<AcroFieldBase>();
foreach (var kid in kids)
{
var kidField = GetAcroField(kid);
var kidField = GetAcroField(kid, catalog);
children.Add(kidField);
}
@@ -155,12 +167,16 @@
if (buttonFlags.HasFlag(AcroButtonFieldFlags.Radio))
{
var field = new AcroRadioButtonsField(fieldDictionary, fieldType, buttonFlags, information);
var field = new AcroRadioButtonsField(fieldDictionary, fieldType, buttonFlags, information,
pageNumber,
bounds);
result = field;
}
else if (buttonFlags.HasFlag(AcroButtonFieldFlags.PushButton))
{
var field = new AcroPushButtonField(fieldDictionary, fieldType, buttonFlags, information);
var field = new AcroPushButtonField(fieldDictionary, fieldType, buttonFlags, information,
pageNumber,
bounds);
result = field;
}
else
@@ -175,25 +191,30 @@
isChecked = !string.Equals(valueToken.Data, NameToken.Off, StringComparison.OrdinalIgnoreCase);
}
var field = new AcroCheckboxField(fieldDictionary, fieldType, buttonFlags,
information,
var field = new AcroCheckboxField(fieldDictionary, fieldType, buttonFlags, information,
valueToken,
isChecked);
isChecked,
pageNumber,
bounds);
result = field;
}
}
else if (fieldType == NameToken.Tx)
{
result = GetTextField(fieldDictionary, fieldType, fieldFlags, information);
result = GetTextField(fieldDictionary, fieldType, fieldFlags, information, pageNumber, bounds);
}
else if (fieldType == NameToken.Ch)
{
result = GetChoiceField(fieldDictionary, fieldType, fieldFlags, information);
result = GetChoiceField(fieldDictionary, fieldType, fieldFlags, information,
pageNumber,
bounds);
}
else if (fieldType == NameToken.Sig)
{
var field = new AcroSignatureField(fieldDictionary, fieldType, fieldFlags, information);
var field = new AcroSignatureField(fieldDictionary, fieldType, fieldFlags, information,
pageNumber,
bounds);
result = field;
}
else
@@ -204,7 +225,10 @@
return result;
}
private AcroFieldBase GetTextField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags, AcroFieldCommonInformation information)
private AcroFieldBase GetTextField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags,
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds)
{
var textFlags = (AcroTextFieldFlags)fieldFlags;
@@ -231,12 +255,20 @@
maxLength = maxLenToken.Int;
}
var field = new AcroTextField(fieldDictionary, fieldType, textFlags, information, textValue, maxLength);
var field = new AcroTextField(fieldDictionary, fieldType, textFlags, information,
textValue,
maxLength,
pageNumber,
bounds);
return field;
}
private AcroFieldBase GetChoiceField(DictionaryToken fieldDictionary, NameToken fieldType, uint fieldFlags, AcroFieldCommonInformation information)
private AcroFieldBase GetChoiceField(DictionaryToken fieldDictionary, NameToken fieldType,
uint fieldFlags,
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds)
{
var selectedOptions = EmptyArray<string>.Instance;
if (fieldDictionary.TryGet(NameToken.V, out var valueToken))
@@ -348,7 +380,12 @@
if (choiceFlags.HasFlag(AcroChoiceFieldFlags.Combo))
{
var field = new AcroComboBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices);
var field = new AcroComboBoxField(fieldDictionary, fieldType, choiceFlags, information,
options,
selectedOptions,
selectedIndices,
pageNumber,
bounds);
return field;
}
@@ -358,7 +395,13 @@
topIndex = topIndexToken.Int;
}
return new AcroListBoxField(fieldDictionary, fieldType, choiceFlags, information, options, selectedOptions, selectedIndices, topIndex);
return new AcroListBoxField(fieldDictionary, fieldType, choiceFlags, information,
options,
selectedOptions,
selectedIndices,
topIndex,
pageNumber,
bounds);
}
private static bool IsChoiceSelected(IReadOnlyList<string> selectedOptionNames, IReadOnlyList<int> selectedOptionIndices, int index, string name)

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using Geometry;
using Tokens;
/// <inheritdoc />
@@ -29,8 +30,11 @@
/// </summary>
public AcroCheckboxField(DictionaryToken dictionary, string fieldType, AcroButtonFieldFlags fieldFlags,
AcroFieldCommonInformation information, NameToken currentValue,
bool isChecked) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.Checkbox, information)
bool isChecked,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.Checkbox, information,
pageNumber, bounds)
{
Flags = fieldFlags;
CurrentValue = currentValue;

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using Geometry;
using Tokens;
using Util.JetBrains.Annotations;
@@ -46,11 +47,17 @@
/// <param name="options">The options in this field.</param>
/// <param name="selectedOptionIndices">The indices of the selected options where there are multiple with the same name.</param>
/// <param name="selectedOptions">The names of the selected options.</param>
/// <param name="pageNumber">The number of the page this field appears on.</param>
/// <param name="bounds">The location of this field on the page.</param>
public AcroComboBoxField(DictionaryToken dictionary, string fieldType, AcroChoiceFieldFlags fieldFlags,
AcroFieldCommonInformation information, IReadOnlyList<AcroChoiceOption> options,
IReadOnlyList<string> selectedOptions,
IReadOnlyList<int> selectedOptionIndices) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.ComboBox, information)
IReadOnlyList<int> selectedOptionIndices,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.ComboBox, information,
pageNumber,
bounds)
{
Flags = fieldFlags;
Options = options ?? throw new ArgumentNullException(nameof(options));

View File

@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using System;
using Geometry;
using Tokens;
using Util.JetBrains.Annotations;
@@ -37,6 +38,16 @@
[NotNull]
public AcroFieldCommonInformation Information { get; }
/// <summary>
/// The page number of the page containing this form field if known.
/// </summary>
public int? PageNumber { get; }
/// <summary>
/// The placement rectangle of this form field on the page given by <see cref="PageNumber"/> if known.
/// </summary>
public PdfRectangle? Bounds { get; }
/// <summary>
/// Create a new <see cref="AcroFieldBase"/>.
/// </summary>
@@ -45,16 +56,22 @@
/// <param name="fieldFlags">The flags specifying behaviour for this field.</param>
/// <param name="fieldType">The type of this field.</param>
/// <param name="information">Additional information for this field.</param>
/// <param name="pageNumber">The number of the page this field appears on.</param>
/// <param name="bounds">The location of this field on the page.</param>
protected AcroFieldBase(DictionaryToken dictionary, string rawFieldType,
uint fieldFlags,
AcroFieldType fieldType,
AcroFieldCommonInformation information)
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds)
{
Dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
RawFieldType = rawFieldType ?? throw new ArgumentNullException(nameof(rawFieldType));
FieldFlags = fieldFlags;
FieldType = fieldType;
Information = information ?? new AcroFieldCommonInformation(null, null, null, null);
PageNumber = pageNumber;
Bounds = bounds;
}
/// <inheritdoc />

View File

@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using Geometry;
using Tokens;
using Util.JetBrains.Annotations;
@@ -56,12 +57,16 @@
/// <param name="selectedOptionIndices">The indices of the selected options where there are multiple with the same name.</param>
/// <param name="topIndex">The first visible option index.</param>
/// <param name="selectedOptions">The names of the selected options.</param>
/// <param name="pageNumber">The number of the page this field appears on.</param>
/// <param name="bounds">The location of this field on the page.</param>
public AcroListBoxField(DictionaryToken dictionary, string fieldType, AcroChoiceFieldFlags fieldFlags,
AcroFieldCommonInformation information, IReadOnlyList<AcroChoiceOption> options,
IReadOnlyList<string> selectedOptions,
IReadOnlyList<int> selectedOptionIndices,
int? topIndex) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.ListBox, information)
int? topIndex,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.ListBox, information, pageNumber, bounds)
{
Flags = fieldFlags;
Options = options ?? throw new ArgumentNullException(nameof(options));

View File

@@ -21,7 +21,8 @@
/// </summary>
internal AcroNonTerminalField(DictionaryToken dictionary, string fieldType, uint fieldFlags, AcroFieldCommonInformation information,
IReadOnlyList<AcroFieldBase> children) :
base(dictionary, fieldType, fieldFlags, AcroFieldType.NonTerminal, information)
base(dictionary, fieldType, fieldFlags, AcroFieldType.NonTerminal, information,
null, null)
{
Children = children ?? throw new ArgumentNullException(nameof(children));
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using Geometry;
using Tokens;
/// <inheritdoc />
@@ -17,9 +18,12 @@
/// <summary>
/// Create a new <see cref="AcroPushButtonField"/>.
/// </summary>
public AcroPushButtonField(DictionaryToken dictionary, string fieldType, AcroButtonFieldFlags fieldFlags,
AcroFieldCommonInformation information) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.PushButton, information)
public AcroPushButtonField(DictionaryToken dictionary, string fieldType,
AcroButtonFieldFlags fieldFlags,
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.PushButton, information, pageNumber, bounds)
{
Flags = fieldFlags;
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using Geometry;
using Tokens;
/// <inheritdoc />
@@ -18,8 +19,10 @@
/// Create a new <see cref="AcroRadioButtonsField"/>.
/// </summary>
public AcroRadioButtonsField(DictionaryToken dictionary, string fieldType, AcroButtonFieldFlags fieldFlags,
AcroFieldCommonInformation information) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.RadioButton, information)
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.RadioButton, information, pageNumber, bounds)
{
Flags = fieldFlags;
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using Geometry;
using Tokens;
/// <inheritdoc />
@@ -12,8 +13,11 @@
/// <summary>
/// Create a new <see cref="T:UglyToad.PdfPig.AcroForms.Fields.AcroSignatureField" />.
/// </summary>
public AcroSignatureField(DictionaryToken dictionary, string fieldType, uint fieldFlags, AcroFieldCommonInformation information) :
base(dictionary, fieldType, fieldFlags, AcroFieldType.Signature, information)
public AcroSignatureField(DictionaryToken dictionary, string fieldType, uint fieldFlags,
AcroFieldCommonInformation information,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, fieldFlags, AcroFieldType.Signature, information, pageNumber, bounds)
{
}
}

View File

@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.AcroForms.Fields
{
using Geometry;
using Tokens;
/// <inheritdoc />
@@ -45,9 +46,15 @@
/// <param name="information">Additional information for this field.</param>
/// <param name="value">The text value.</param>
/// <param name="maxLength">The maximum length.</param>
/// <param name="pageNumber">The number of the page this field appears on.</param>
/// <param name="bounds">The location of this field on the page.</param>
public AcroTextField(DictionaryToken dictionary, string fieldType, AcroTextFieldFlags fieldFlags,
AcroFieldCommonInformation information, string value, int? maxLength) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.Text, information)
AcroFieldCommonInformation information,
string value,
int? maxLength,
int? pageNumber,
PdfRectangle? bounds) :
base(dictionary, fieldType, (uint)fieldFlags, AcroFieldType.Text, information, pageNumber, bounds)
{
Flags = fieldFlags;
Value = value;

View File

@@ -78,5 +78,18 @@
return node;
}
internal PageTreeNode GetPageByReference(IndirectReference reference)
{
foreach (var page in pagesByNumber)
{
if (page.Value.Reference.Equals(reference))
{
return page.Value;
}
}
return null;
}
}
}

View File

@@ -92,13 +92,13 @@
break;
case 1:
// Non object stream entries.
int offset = 0;
for (int i = 0; i < fieldSizes.Field2Size; i++)
var offset = 0;
for (var i = 0; i < fieldSizes.Field2Size; i++)
{
offset += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
}
int genNum = 0;
for (int i = 0; i < fieldSizes.Field3Size; i++)
var genNum = 0;
for (var i = 0; i < fieldSizes.Field3Size; i++)
{
genNum += (lineBuffer[i + fieldSizes.Field1Size + fieldSizes.Field2Size] & 0x00ff) << ((fieldSizes.Field3Size - i - 1) * 8);
}
@@ -122,8 +122,8 @@
* table but add object stream number with minus sign in order to
* distinguish from file offsets
*/
int objstmObjNr = 0;
for (int i = 0; i < fieldSizes.Field2Size; i++)
var objstmObjNr = 0;
for (var i = 0; i < fieldSizes.Field2Size; i++)
{
objstmObjNr += (lineBuffer[i + fieldSizes.Field1Size] & 0x00ff) << ((fieldSizes.Field2Size - i - 1) * 8);
}
@@ -134,32 +134,37 @@
}
}
private static List<long> GetObjectNumbers(DictionaryToken dictionary)
private static IEnumerable<long> GetObjectNumbers(DictionaryToken dictionary)
{
// The number one greater than the highest object number used in this section or in any section for which this is an update.
if (!dictionary.TryGet(NameToken.Size, out var sizeToken) || !(sizeToken is NumericToken sizeNumeric))
{
throw new PdfDocumentFormatException($"The stream dictionary must contain a numeric size value: {dictionary}.");
}
var indexArray = new[] { 0, sizeNumeric.Int };
var objNums = new List<long>();
if (dictionary.TryGet(NameToken.Index, out var indexToken) && indexToken is ArrayToken indexArrayToken)
{
indexArray = new[]
// An array containing a pair of integers for each subsection in this section.
// Pair[0] is the first object number in the subsection; Pair[1] is the number of entries in the subsection.
for (var i = 0; i < indexArrayToken.Length; i += 2)
{
indexArrayToken.GetNumeric(0).Int,
indexArrayToken.GetNumeric(1).Int
};
var firstObjectNumber = indexArrayToken.GetNumeric(i).Int;
var size = indexArrayToken.GetNumeric(i + 1).Int;
for (var j = 0; j < size; j++)
{
objNums.Add(firstObjectNumber + j);
}
List<long> objNums = new List<long>();
var firstObjectNumber = indexArray[0];
var size = indexArray[1];
for (var i = 0; i < size; i++)
}
}
else
{
objNums.Add(firstObjectNumber + i);
for (var i = 0; i < sizeNumeric.Int; i++)
{
objNums.Add(i);
}
}
return objNums;

View File

@@ -23,6 +23,11 @@
/// </summary>
public int Length { get; }
/// <summary>
/// Indexer into <see cref="Data"/> for convenience.
/// </summary>
public IToken this[int i] => Data[i];
/// <summary>
/// Create a new <see cref="ArrayToken"/>.
/// </summary>