mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 04:48:53 +08:00
allow missing catalog type definition for catalog dictionary
Some checks failed
Build and test / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / Check latest commit (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build and test / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / Check latest commit (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
as long as there is a pages entry we accept this in lenient parsing mode. this is to fix document 006705.pdf in the corpus that had '/calalog' as the dictionary entry. also adds a test for some weird content stream content in 0006324.pdf where numbers seem to get split in the content stream on a decimal place. this is just to check that our parser doesn't hard crash
This commit is contained in:
parent
febfa4d4b3
commit
83d6fc6cc2
@ -261,6 +261,47 @@ endobj";
|
||||
Assert.Equal(3, tokens.OfType<DictionaryToken>().Count());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Document006324Test()
|
||||
{
|
||||
const string content =
|
||||
"""
|
||||
q
|
||||
1 0 0 1 248.6304 572.546 cm
|
||||
0 0 m
|
||||
0.021 -0.007 l
|
||||
3 -0.003 -0.01 0 0 0 c
|
||||
f
|
||||
Q
|
||||
q
|
||||
1 0 0 1 2489394 57249855 cm
|
||||
0 0 m
|
||||
-0.046 -0.001 -0.609 0.029 -0.286 -0.014 c
|
||||
-02.61 -0.067 -0.286 -0. .61 -0 0 c
|
||||
f
|
||||
Q
|
||||
q
|
||||
1 0 0 1 24862464 572. .836 cm
|
||||
0 0 m
|
||||
0.936 -0.029 l
|
||||
0.038 -0.021 0.55 -0.014 0 0 c
|
||||
f
|
||||
Q
|
||||
""";
|
||||
|
||||
var tokens = new List<IToken>();
|
||||
|
||||
var scanner = new CoreTokenScanner(
|
||||
StringBytesTestConverter.Convert(content, false).Bytes,
|
||||
true,
|
||||
isStream: true);
|
||||
|
||||
while (scanner.MoveNext())
|
||||
{
|
||||
tokens.Add(scanner.CurrentToken);
|
||||
}
|
||||
}
|
||||
|
||||
private static void AssertCorrectToken<T, TData>(IToken token, TData expected) where T : IDataToken<TData>
|
||||
{
|
||||
var cast = Assert.IsType<T>(token);
|
||||
|
@ -19,7 +19,8 @@
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog))
|
||||
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog)
|
||||
&& !isLenientParsing)
|
||||
{
|
||||
throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user