mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-15 19:54:52 +08:00
small performance tweaks and remove package license expression
package license url is deprecated in favour of package license expression but nuget doesn't seem to support expressions properly for published packages yet so we'll keep the deprecated url for the time being. having both url and expression causes the build to fail. small obvious performance improvements for file header passing and getting the encoding information using the existing reverse name to code map.
This commit is contained in:
@@ -2,17 +2,17 @@
|
||||
{
|
||||
using System;
|
||||
using Exceptions;
|
||||
using Logging;
|
||||
using PdfPig.Parser.FileStructure;
|
||||
using Xunit;
|
||||
|
||||
public class FileHeaderParserTests
|
||||
{
|
||||
private readonly FileHeaderParser parser = new FileHeaderParser(new TestingLog());
|
||||
|
||||
private readonly ILog log = new NoOpLog();
|
||||
[Fact]
|
||||
public void NullScannerThrows()
|
||||
{
|
||||
Action action = () => parser.Parse(null, false);
|
||||
Action action = () => FileHeaderParser.Parse(null, false, log);
|
||||
|
||||
Assert.Throws<ArgumentNullException>(action);
|
||||
}
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
var scanner = StringBytesTestConverter.Scanner(input);
|
||||
|
||||
var result = parser.Parse(scanner, false);
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(format, result.VersionString);
|
||||
}
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
var scanner = StringBytesTestConverter.Scanner(input);
|
||||
|
||||
var result = parser.Parse(scanner, false);
|
||||
var result = FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(1.2m, result.Version);
|
||||
}
|
||||
@@ -54,7 +54,7 @@
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner(string.Empty);
|
||||
|
||||
Action action = () => parser.Parse(scanner, false);
|
||||
Action action = () => FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -65,7 +65,7 @@
|
||||
var scanner = StringBytesTestConverter.Scanner(@"one
|
||||
%PDF-1.2");
|
||||
|
||||
Action action = () => parser.Parse(scanner, false);
|
||||
Action action = () => FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -76,7 +76,7 @@
|
||||
var scanner = StringBytesTestConverter.Scanner(@"one
|
||||
%PDF-1.7");
|
||||
|
||||
var result = parser.Parse(scanner, true);
|
||||
var result = FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Equal(1.7m, result.Version);
|
||||
}
|
||||
@@ -87,7 +87,7 @@
|
||||
var scanner = StringBytesTestConverter.Scanner(@"one two
|
||||
three %PDF-1.6");
|
||||
|
||||
Action action = () => parser.Parse(scanner, true);
|
||||
Action action = () => FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -97,7 +97,7 @@ three %PDF-1.6");
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner(@"one two");
|
||||
|
||||
Action action = () => parser.Parse(scanner, true);
|
||||
Action action = () => FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -107,7 +107,7 @@ three %PDF-1.6");
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner("%Pdeef-1.69");
|
||||
|
||||
Action action = () => parser.Parse(scanner, false);
|
||||
Action action = () => FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Throws<PdfDocumentFormatException>(action);
|
||||
}
|
||||
@@ -117,7 +117,7 @@ three %PDF-1.6");
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner("%Pdeef-1.69");
|
||||
|
||||
var result = parser.Parse(scanner, true);
|
||||
var result = FileHeaderParser.Parse(scanner, true, log);
|
||||
|
||||
Assert.Equal(1.4m, result.Version);
|
||||
}
|
||||
@@ -127,7 +127,7 @@ three %PDF-1.6");
|
||||
{
|
||||
var scanner = StringBytesTestConverter.Scanner(@"%FDF-1.6");
|
||||
|
||||
parser.Parse(scanner, false);
|
||||
FileHeaderParser.Parse(scanner, false, log);
|
||||
|
||||
Assert.Equal(0, scanner.CurrentPosition);
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@
|
||||
|
||||
public IReadOnlyDictionary<int, string> CodeToNameMap => CodeToName;
|
||||
|
||||
protected readonly Dictionary<string, int> NameToCode = new Dictionary<string, int>(250);
|
||||
protected readonly Dictionary<string, int> NameToCode = new Dictionary<string, int>(250, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
public IReadOnlyDictionary<string, int> NameToCodeMap => NameToCode;
|
||||
|
||||
@@ -21,15 +21,7 @@
|
||||
|
||||
public bool ContainsName(string name)
|
||||
{
|
||||
foreach (var keyValuePair in CodeToNameMap)
|
||||
{
|
||||
if (string.Equals(keyValuePair.Value, name, StringComparison.InvariantCultureIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return NameToCode.ContainsKey(name);
|
||||
}
|
||||
|
||||
public bool ContainsCode(int code)
|
||||
|
@@ -1,8 +1,6 @@
|
||||
namespace UglyToad.PdfPig.Parser.FileStructure
|
||||
{
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
using Content;
|
||||
using Exceptions;
|
||||
using Logging;
|
||||
@@ -27,19 +25,10 @@
|
||||
/// This parser allows versions up to 1.9.
|
||||
/// For versions equal or greater to PDF 1.4, the optional Version entry in the document’s catalog dictionary should be used instead of the header version.
|
||||
/// </remarks>
|
||||
internal class FileHeaderParser
|
||||
internal static class FileHeaderParser
|
||||
{
|
||||
private static readonly Regex VersionRegex = new Regex(@"[FP]DF-(?<version>1.\d)", RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly ILog log;
|
||||
|
||||
public FileHeaderParser(ILog log)
|
||||
{
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
[NotNull]
|
||||
public HeaderVersion Parse([NotNull]ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||
public static HeaderVersion Parse([NotNull]ISeekableTokenScanner scanner, bool isLenientParsing, ILog log)
|
||||
{
|
||||
if (scanner == null)
|
||||
{
|
||||
@@ -72,19 +61,17 @@
|
||||
|
||||
attempts++;
|
||||
}
|
||||
|
||||
var match = VersionRegex.Match(comment.Data);
|
||||
|
||||
if (!match.Success || !decimal.TryParse(match.Groups["version"].Value, NumberStyles.Any, CultureInfo.InvariantCulture, out var version))
|
||||
if (comment.Data.IndexOf("PDF-1.", StringComparison.OrdinalIgnoreCase) != 0 && comment.Data.IndexOf("FDF-1.", StringComparison.OrdinalIgnoreCase) != 0)
|
||||
{
|
||||
if (isLenientParsing)
|
||||
{
|
||||
log.Warn($"Did not find a version header of the correct format, defaulting to 1.4 since lenient. Header was: {comment.Data}.");
|
||||
return HandleMissingVersion(comment, isLenientParsing, log);
|
||||
}
|
||||
|
||||
return new HeaderVersion(1.4m, "PDF-1.4");
|
||||
}
|
||||
const int toDecimalStartLength = 4;
|
||||
|
||||
throw new PdfDocumentFormatException($"The comment which should have provided the version was in the wrong format: {comment.Data}.");
|
||||
if (!decimal.TryParse(comment.Data.Substring(toDecimalStartLength), out var version))
|
||||
{
|
||||
return HandleMissingVersion(comment, isLenientParsing, log);
|
||||
}
|
||||
|
||||
scanner.Seek(0);
|
||||
@@ -93,5 +80,17 @@
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static HeaderVersion HandleMissingVersion(CommentToken comment, bool isLenientParsing, ILog log)
|
||||
{
|
||||
if (isLenientParsing)
|
||||
{
|
||||
log.Warn($"Did not find a version header of the correct format, defaulting to 1.4 since lenient. Header was: {comment.Data}.");
|
||||
|
||||
return new HeaderVersion(1.4m, "PDF-1.4");
|
||||
}
|
||||
|
||||
throw new PdfDocumentFormatException($"The comment which should have provided the version was in the wrong format: {comment.Data}.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -88,7 +88,7 @@
|
||||
var crossReferenceStreamParser = new CrossReferenceStreamParser(filterProvider);
|
||||
var crossReferenceParser = new CrossReferenceParser(log, xrefValidator, objectChecker, crossReferenceStreamParser, new CrossReferenceTableParser());
|
||||
|
||||
var version = container.Get<FileHeaderParser>().Parse(scanner, isLenientParsing);
|
||||
var version = FileHeaderParser.Parse(scanner, isLenientParsing, log);
|
||||
|
||||
var crossReferenceOffset = container.Get<FileTrailerParser>().GetFirstCrossReferenceOffset(inputBytes, scanner, isLenientParsing);
|
||||
|
||||
|
@@ -7,7 +7,6 @@
|
||||
<Authors>UglyToad</Authors>
|
||||
<Title>PdfPig</Title>
|
||||
<Description>Reads text content from PDF documents and supports document creation. Apache 2.0 licensed.</Description>
|
||||
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
|
||||
<PackageLicenseUrl>https://raw.githubusercontent.com/UglyToad/PdfPig/master/LICENSE</PackageLicenseUrl>
|
||||
<PackageProjectUrl>https://github.com/UglyToad/PdfPig</PackageProjectUrl>
|
||||
<PackageTags>PDF;Reader;Document;Adobe;PDFBox;PdfPig;pdf-extract</PackageTags>
|
||||
|
@@ -26,7 +26,6 @@
|
||||
logger = new NoOpLog();
|
||||
}
|
||||
|
||||
var headerParser = new FileHeaderParser(logger);
|
||||
var trailerParser = new FileTrailerParser();
|
||||
var filterProvider = new MemoryFilterProvider(new DecodeParameterResolver(logger), new PngPredictor(), logger);
|
||||
|
||||
@@ -34,7 +33,6 @@
|
||||
var afmParser = new AdobeFontMetricsParser();
|
||||
|
||||
var container = new Container();
|
||||
container.Register(headerParser);
|
||||
container.Register(trailerParser);
|
||||
container.Register(filterProvider);
|
||||
container.Register(cmapParser);
|
||||
|
Reference in New Issue
Block a user