mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-08-20 09:00:07 +08:00
allow missing catalog type definition for catalog dictionary
Some checks failed
Build and test / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / Check latest commit (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build and test / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / Check latest commit (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
as long as there is a pages entry we accept this in lenient parsing mode. this is to fix document 006705.pdf in the corpus that had '/calalog' as the dictionary entry. also adds a test for some weird content stream content in 0006324.pdf where numbers seem to get split in the content stream on a decimal place. this is just to check that our parser doesn't hard crash
This commit is contained in:
parent
febfa4d4b3
commit
83d6fc6cc2
@ -261,6 +261,47 @@ endobj";
|
|||||||
Assert.Equal(3, tokens.OfType<DictionaryToken>().Count());
|
Assert.Equal(3, tokens.OfType<DictionaryToken>().Count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Document006324Test()
|
||||||
|
{
|
||||||
|
const string content =
|
||||||
|
"""
|
||||||
|
q
|
||||||
|
1 0 0 1 248.6304 572.546 cm
|
||||||
|
0 0 m
|
||||||
|
0.021 -0.007 l
|
||||||
|
3 -0.003 -0.01 0 0 0 c
|
||||||
|
f
|
||||||
|
Q
|
||||||
|
q
|
||||||
|
1 0 0 1 2489394 57249855 cm
|
||||||
|
0 0 m
|
||||||
|
-0.046 -0.001 -0.609 0.029 -0.286 -0.014 c
|
||||||
|
-02.61 -0.067 -0.286 -0. .61 -0 0 c
|
||||||
|
f
|
||||||
|
Q
|
||||||
|
q
|
||||||
|
1 0 0 1 24862464 572. .836 cm
|
||||||
|
0 0 m
|
||||||
|
0.936 -0.029 l
|
||||||
|
0.038 -0.021 0.55 -0.014 0 0 c
|
||||||
|
f
|
||||||
|
Q
|
||||||
|
""";
|
||||||
|
|
||||||
|
var tokens = new List<IToken>();
|
||||||
|
|
||||||
|
var scanner = new CoreTokenScanner(
|
||||||
|
StringBytesTestConverter.Convert(content, false).Bytes,
|
||||||
|
true,
|
||||||
|
isStream: true);
|
||||||
|
|
||||||
|
while (scanner.MoveNext())
|
||||||
|
{
|
||||||
|
tokens.Add(scanner.CurrentToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void AssertCorrectToken<T, TData>(IToken token, TData expected) where T : IDataToken<TData>
|
private static void AssertCorrectToken<T, TData>(IToken token, TData expected) where T : IDataToken<TData>
|
||||||
{
|
{
|
||||||
var cast = Assert.IsType<T>(token);
|
var cast = Assert.IsType<T>(token);
|
||||||
|
|||||||
@ -19,7 +19,8 @@
|
|||||||
throw new ArgumentNullException(nameof(dictionary));
|
throw new ArgumentNullException(nameof(dictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog))
|
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog)
|
||||||
|
&& !isLenientParsing)
|
||||||
{
|
{
|
||||||
throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
|
throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user