mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-11-24 08:47:01 +08:00
Add CMap caching at document level and add MurmurHash3 hashing function
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
Some checks failed
Build, test and publish draft / build (push) Has been cancelled
Build and test [MacOS] / build (push) Has been cancelled
Run Common Crawl Tests / build (0000-0001) (push) Has been cancelled
Run Common Crawl Tests / build (0002-0003) (push) Has been cancelled
Run Common Crawl Tests / build (0004-0005) (push) Has been cancelled
Run Common Crawl Tests / build (0006-0007) (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled
Nightly Release / Check if this commit has already been published (push) Has been cancelled
Nightly Release / tests (push) Has been cancelled
Nightly Release / build_and_publish_nightly (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1 @@
|
||||
Random Big Title Lorem Ipsum text with lists Lorem ipsum dolor sit amet, consectetur adipiscing elit. In sodales gravida felis, in rhoncus velit rutrum at. Curabitur hendrerit dapibus nulla, ut hendrerit diam imperdiet quis. Pellentesque id neque ali-quam, pulvinar neque in, vulputate elit. Pel-lentesque ut erat sit amet massa suscipit ullamcor-per. Sed porttitor viverra convallis. Duis vitae sem-per metus. Pellentesque eros purus, egestas eget velit eget, elementum aliquet velit. Suspendisse potenti. Nulla vitae massa rutrum, blandit erat vi-tae, aliquet arcu. Aenean feugiat leo sed enim sodales vehicula. Sus-pendisse tempus hendrerit magna sagittis dictum. Duis ultrices dapibus egestas. Cras eu felis eu lectus suscipit pharetra at at lacus. Nulla facilisi. Proin in-terdum faucibus elit nec rhoncus. Proin sodaless metus sed tincidunt hendrerit. • Duis leo enim, convallis sit amet orci eget, condimentum mattis mi ; • Etiam dolor erat, maximus nec mi sed, con-vallis convallis orci ; • Morbi viverra diam in diam cursus, vitae aliquet velit tempus ; • Donec at nisi fermentum, ultricies odio eget, egestas massa at nisi fermentum, ul-tricies odio eget, egestas massa. Donec ultricies cursus odio sed rutrum. Nam ven-enatis metus vitae elementum scelerisque. Ali-quam tempor sapien at turpis posuere eleifend. Sed placerat posuere nunc vel efficitur. Quisque auctor felis vel lectus dictum fringilla. Quisque vo-lutpat pulvinar© elit. Aliquam ultrices feugiat ali-quam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sus-pendisse imperdiet ex lorem, porta bibendum pu-rus ultricies id. Integer vel lacus sapien. Nam sodales ante eu risus facilisis placerat. Aliquam suscipit pulvinar ultricies. Aenean pulvinar, ex ac fermentum egestas, erat nisi feugiat velit, vitae suscipit tellus odio vitae quam. Morbi elementum sem in elit posuere, non rhoncus magna fringilla. Phasellus cursus in dolor laoreet rutrum. Curabitur tincidunt risus ullamcor-per, vehicula velit at, pulvinar metus. Donec quis ante leo. Vivamus pharetra, nisl ac vehi-cula tempor, tellus lacus aliquam sapien, eu congue nibh quam sit amet odio. Quisque metus arcu, sem-per nec consequat eu, pellentesque vel sem. Sed purus risus, tincidunt¹ sit amet dictum vitae, euis-mod id nibh. Praesent ultrices libero quis enim porta, sit amet pellentesque augue pretium. Viva-mus nec molestie nunc. Donec finibus enim nec tel-lus laoreet elementum. Curabitur efficitur placerat dolor et semper. Morbi laoreet dui eu tortor luctus, nec ultrices do-lor ullamcorper. Ut gravida sed nisl a efficitur. In tincidunt orci a condimentum semper. Suspendisse scelerisque fermentum lacinia. Vestibulum sit amet ornare tellus, aliquet euismod mauris. Cras suscipit venenatis ultrices. Sed diam erat, aliquet a tellus ut, viverra 12º ongue magna. Cras id justo tortor. Mauris in tortor vulputate, pellentesque nisl ac, facilisis ligula. Class aptent taciti² sociosqu ad li-tora torquent per conubia nostra³, per inceptos himenaeos. Aliquam eget dolor turpis. Mauris id molestie tellus. Sed elementum molestie nisi, at ali-quet sem vehicula nec. Morbi tempus nulla enim, a vulputate magna €51 luctus £66 eu. Fusce sodales, libero quis suscipit ultrices, metus erat auctor urna, sit amet dictum arcu tortor eu metus. 1. Ut volutpat, velit at interdum consectetur, nisl lorem consequat mauris, feugiat dignissim tellus massa ut nisl. 2. Praesent at est nisi. Pellentesque rutrum lorem sed dui accumsan gravida. 3. Pellentesque dictum nisl vitae urna luctus, congue pulvinar mi congue. Morbi vestibulum varius ipsum nec molestie. Proin auctor efficitur diam ut luctus. Phasellus cursus maximus ultricies. Mauris eu neque ut sem semper tempus. Curabitur non lorem eu nunc lobortis vi-verra at in diam. Pellentesque euismod purus a leo lobortis tempor. Maecenas mollis ligula at sem sus-cipit fringilla. Mauris sollicitudin tincidunt lectus id tempor. Etiam ut nisi est.
|
||||
@@ -0,0 +1,55 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System;
|
||||
using System.Text;
|
||||
|
||||
public class CMapLocalCachingTests
|
||||
{
|
||||
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")));
|
||||
private static readonly Lazy<string> DlaFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Dla", "Documents")));
|
||||
|
||||
public static object[][] DocumentsData = new object[][]
|
||||
{
|
||||
["68-1990-01_A.pdf"],
|
||||
["Type0 Font.pdf"],
|
||||
["11194059_2017-11_de_s.pdf"],
|
||||
["2108.11480.pdf"],
|
||||
["reference-2-numeric-error.pdf"],
|
||||
["MOZILLA-3136-0.pdf"],
|
||||
["FICTIF_TABLE_INDEX.pdf"],
|
||||
["Approved_Document_B__fire_safety__volume_2_-_Buildings_other_than_dwellings__2019_edition_incorporating_2020_and_2022_amendments.pdf"],
|
||||
["dotnet-ai.pdf"],
|
||||
["Old Gutnish Internet Explorer.pdf"],
|
||||
["Random 2 Columns Lists Hyph - Justified.pdf"]
|
||||
};
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(DocumentsData))]
|
||||
public void CheckText(string documentName)
|
||||
{
|
||||
string fullPath = Path.Combine(DocumentFolder.Value, documentName);
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
fullPath = Path.Combine(DlaFolder.Value, documentName);
|
||||
}
|
||||
|
||||
Assert.True(File.Exists(fullPath));
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
using (var document = PdfDocument.Open(fullPath, new ParsingOptions { UseLenientParsing = true }))
|
||||
{
|
||||
for (var i = 0; i < document.NumberOfPages; i++)
|
||||
{
|
||||
var page = document.GetPage(i + 1);
|
||||
sb.Append(page.Text);
|
||||
}
|
||||
}
|
||||
|
||||
//File.WriteAllText(Path.ChangeExtension(fullPath, "txt"), sb.ToString());
|
||||
|
||||
string expected = File.ReadAllText(Path.ChangeExtension(fullPath, "txt"));
|
||||
Assert.Equal(expected, sb.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/2108.11480.txt
Normal file
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/2108.11480.txt
Normal file
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
||||
TypeWeekly newspaperEditorJoe StuverFoundedOctober, 1918 (asthe BroadusIndependent), 1935(as the PowderRiver CountyExaminer), and1965 (as thePowder RiverExaminer)Headquarters119 1/2 N. ParkAve.Broadus, MT 59317United StatesPowder River ExaminerPowder River ExaminerThe Powder River Examiner, originally established in October, 1918 asthe Broadus Independent, is the only newspaper printed in Powder RiverCounty, Montana, and is located in the county seat of Broadus.The Broadus Independent was first published in Broadus, Montana inOctober, 1918, and continued until February, 1919.From March 6, 1919 until April 17, 1919, the paper was published inOlive, Montana as the Olive Branch.The Broadus Independent was published weekly from April 24, 1919until 1935.The Powder River County Examiner replaced the BroadusIndependent in 1935, beginning publication and continuing weekly until1965.In 1965 the newspaper's name was shortened to Powder RiverExaminer, and remains that today.Broadus Independent, Broadus, Montana, October, 1918-February, 1919.Olive Branch, Olive, Montana, March 6, 1919 – April 17, 1919.Broadus Independent, Broadus, Montana, April 24, 1919 – 1935.Powder River County Examiner, Broadus, Montana, 1935-1965.Powder River Examiner, Broadus, Montana, 1965-current.Joe Stuver, (current editor)Retrieved from "https://en.wikipedia.org/w/index.php?title=Powder_River_Examiner&oldid=747264669"This page was last edited on 1 November 2016, at 11:53.Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By usingthis site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the WikimediaFoundation, Inc., a non-profit organization.HistoryPreceding TitlesNotable contributorsPowder River Examiner - Wikipediahttps://en.wikipedia.org/wiki/Powder_River_Examiner1 of 130/03/2018, 03:50
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
||||
(Jl σ3 法海阔』咱国司被NA咿清峭阉且啊辛茹海耀表3-19车辆齿轮用钢系列牌号化学成分(质量分数)(%)国内牌号国外牌号c Si Mn p s Ni Cr Mo Al Cu Ti B 16CrMnTiH 0.13~0.18 0. \Kl -1. 20 20CrMnTiHI 20CrMnTiH2 。.80 -1. 10 1.00-1.30 20CrMnTiH3 0. 17 -0. 37军三0.0350. 04 -0. 10 0.18句0.2320CrMnTiH4 20CrMnTiH5 0. \Kl -1. 25 I.I。”1.4520CrMnTiH6 16MnCrH 16MnCr5 0. 14 -0. 20 I. 00~1.40 O.\KJ-1.20 20MnCrH 20MnCr5 0.17~0.23 1.10-1.50 1.00-1.30 0.02~ 罢王0.20运0.120.02~0.仍525MnCrH 25MnCr5 0.23~0.28 0.055 o. ro -o. so0. 80-1.10 28MnCrH 28MnCr5 0. 25 -0. 30 运0.15运0.1016CrMnBH ZF6 0. 13 -0. 18。.80-1.100.001 -18CrMnBH ZF7 0.15 -0. 40 1.00-1.30 0. 15 -0. 20军军aα丑。0. 015 -0. 035 1.00-1.30 0.α)317CrMnBH ZF7B 17Cr2Ni2H ZFI 0.15 -0.19 0.15 -0. 40 0.40~o.ro1.40~I. 70 1.40-1.70 16CrNiH 16CrNi4 0.13“0.18 0.02-0.04 0.15~0.35 0.70句1.100. 80-1. 200. 80~1.20 :;;;0.10 0.02-0. 05 19CrNiH 19CrNi5 0.16~0. 21 0. 02 -0. 035 17Cr2Ni2MoH ZFlA 0.15”0.19 0.15~0.40 o. 40 -o. ro0.015~0.035 I. 4。”I.70 1.50-1.80 0. 25 -0. 35 20CrNiMoHI 8620Hl 0.02-0.17”0.23 0.15町0.35o. ro -o. 95 0.017~0.032 0.35~0. 75 0.35”0.65 0. 15 -0. 25 20CrNiMoH2 8620田0.045 15CrMoH 0.13”0.18 0.25~0.45 0.17~0. 37 0.4。”0.70 髦。但50.8。”1.1020CrMo 0. 18 -0. 230.15 -0. 25 20CrMoH SCM420 0.17 -0. 23 0.17町0.350.55~0. \Kl 0.85句I.25 0.15”0.35 0.02-0.0击:;;;0.15 35CrMo 0. 32 -0. 40 0.40~0. 70 0.80斗100. 15 -0. 25 运0.03520CrH 0.17~0.37 0.70-1.00 运0.200.50~0.80 40Cr 0. 18 -0. 23 0.37~0.440. 80 -1.10
|
||||
31
src/UglyToad.PdfPig.Tests/Util/MurmurHash3Tests.cs
Normal file
31
src/UglyToad.PdfPig.Tests/Util/MurmurHash3Tests.cs
Normal file
@@ -0,0 +1,31 @@
|
||||
namespace UglyToad.PdfPig.Tests.Util
|
||||
{
|
||||
using PdfPig.Util;
|
||||
using System.Text;
|
||||
|
||||
public class MurmurHash3Tests
|
||||
{
|
||||
public static object[][] MurmurHashData = new object[][]
|
||||
{
|
||||
// https://murmurhash.shorelabs.com/
|
||||
["The quick brown fox jumps over the lazy dog", "2f1583c3ecee2c675d7bf66ce5e91d2c", "e34bbc7bbc071b6c7a433ca9c49a9347"],
|
||||
["MurmurHash3 was written by Austin Appleby, and is placed in the public", "6d3583489d9d1e5a898493af67e2ad10", "a91793d43f82cbabda2fb0c28c24799a"],
|
||||
["0", "0ab2409ea5eb34f8a5eb34f8a5eb34f8", "2ac9debed546a3803a8de9e53c875e09"],
|
||||
};
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(MurmurHashData))]
|
||||
public void x86x64Check(string sentence, string expectedX86, string expectedX64)
|
||||
{
|
||||
byte[] data = Encoding.UTF8.GetBytes(sentence);
|
||||
|
||||
var hash = MurmurHash3.Compute_x86_128(data, data.Length, 0);
|
||||
var actual = string.Concat(Array.ConvertAll(hash, x => x.ToString("x2")));
|
||||
Assert.Equal(expectedX86, actual);
|
||||
|
||||
hash = MurmurHash3.Compute_x64_128(data, data.Length, 0);
|
||||
actual = string.Concat(Array.ConvertAll(hash, x => x.ToString("x2")));
|
||||
Assert.Equal(expectedX64, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@
|
||||
using PdfFonts.Parser.Parts;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using UglyToad.PdfPig.PdfFonts.Cmap;
|
||||
|
||||
internal static class PdfDocumentFactory
|
||||
{
|
||||
@@ -166,22 +167,27 @@
|
||||
|
||||
var encodingReader = new EncodingReader(pdfScanner);
|
||||
|
||||
var cmapCache = new CMapLocalCache(filterProvider, pdfScanner);
|
||||
|
||||
var type0Handler = new Type0FontHandler(
|
||||
cidFontFactory,
|
||||
filterProvider,
|
||||
pdfScanner,
|
||||
cmapCache,
|
||||
parsingOptions);
|
||||
|
||||
var type1Handler = new Type1FontHandler(
|
||||
pdfScanner,
|
||||
filterProvider,
|
||||
encodingReader,
|
||||
cmapCache,
|
||||
parsingOptions.UseLenientParsing);
|
||||
|
||||
var trueTypeHandler = new TrueTypeFontHandler(parsingOptions.Logger,
|
||||
var trueTypeHandler = new TrueTypeFontHandler(
|
||||
parsingOptions.Logger,
|
||||
pdfScanner,
|
||||
filterProvider,
|
||||
encodingReader,
|
||||
cmapCache,
|
||||
SystemFontFinder.Instance,
|
||||
type1Handler);
|
||||
|
||||
@@ -190,7 +196,7 @@
|
||||
type0Handler,
|
||||
trueTypeHandler,
|
||||
type1Handler,
|
||||
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
|
||||
new Type3FontHandler(pdfScanner, encodingReader, cmapCache));
|
||||
|
||||
var resourceContainer = new ResourceStore(pdfScanner, fontFactory, filterProvider, parsingOptions);
|
||||
|
||||
|
||||
@@ -26,9 +26,7 @@
|
||||
|
||||
if (CMapParser.TryParseExternal(name, out result))
|
||||
{
|
||||
|
||||
Cache[name] = result;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -43,9 +41,7 @@
|
||||
throw new ArgumentNullException(nameof(bytes));
|
||||
}
|
||||
|
||||
var result = CMapParser.Parse(bytes);
|
||||
|
||||
return result;
|
||||
return CMapParser.Parse(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
115
src/UglyToad.PdfPig/PdfFonts/Cmap/CMapLocalCache.cs
Normal file
115
src/UglyToad.PdfPig/PdfFonts/Cmap/CMapLocalCache.cs
Normal file
@@ -0,0 +1,115 @@
|
||||
namespace UglyToad.PdfPig.PdfFonts.Cmap
|
||||
{
|
||||
using Core;
|
||||
using Filters;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using UglyToad.PdfPig.Util;
|
||||
|
||||
/// <summary>
|
||||
/// Provides a local (per document) cache for CMap objects, allowing efficient retrieval and storage of CMap instances based on
|
||||
/// their names and unique identifiers.
|
||||
/// </summary>
|
||||
/// <remarks>This class is designed to cache CMap objects to improve performance by avoiding redundant
|
||||
/// parsing of CMap data. It uses a combination of CMap names and GUIDs derived from the CMap data to uniquely
|
||||
/// identify and store CMap instances.</remarks>
|
||||
internal sealed class CMapLocalCache
|
||||
{
|
||||
private static ReadOnlySpan<byte> cmapNameTag => @"/CMapName "u8;
|
||||
private readonly object cacheLock = new object();
|
||||
|
||||
private readonly Dictionary<string, Dictionary<Guid, CMap>> _cache = new();
|
||||
private readonly ILookupFilterProvider _filterProvider;
|
||||
private readonly IPdfTokenScanner _scanner;
|
||||
|
||||
/// <summary>
|
||||
/// Provides a local (per document) cache for CMap objects, allowing efficient retrieval and storage of CMap instances based on
|
||||
/// their names and unique identifiers.
|
||||
/// </summary>
|
||||
/// <remarks>This class is designed to cache CMap objects to improve performance by avoiding redundant
|
||||
/// parsing of CMap data. It uses a combination of CMap names and GUIDs derived from the CMap data to uniquely
|
||||
/// identify and store CMap instances.</remarks>
|
||||
public CMapLocalCache(ILookupFilterProvider filterProvider, IPdfTokenScanner scanner)
|
||||
{
|
||||
_filterProvider = filterProvider;
|
||||
_scanner = scanner;
|
||||
}
|
||||
|
||||
public bool TryGet(string name, [NotNullWhen(true)] out CMap? result)
|
||||
{
|
||||
return CMapCache.TryGet(name, out result);
|
||||
}
|
||||
|
||||
private static Guid GetGuid(ReadOnlySpan<byte> bytes)
|
||||
{
|
||||
// Assumes MurmurHash3 is good enough for hashing CMap data to create unique identifiers,
|
||||
// i.e. collisions should be extremely rare.
|
||||
return new Guid(MurmurHash3.Compute_x64_128(bytes));
|
||||
}
|
||||
|
||||
public bool TryGet(StreamToken token, [NotNullWhen(true)] out CMap? result)
|
||||
{
|
||||
if (token.Data.IsEmpty)
|
||||
{
|
||||
result = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
var decodedUnicodeCMap = token.Decode(_filterProvider, _scanner);
|
||||
|
||||
if (!TryGetNameFast(decodedUnicodeCMap.Span, out string? cmapName))
|
||||
{
|
||||
result = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
return true;
|
||||
}
|
||||
|
||||
var guid = GetGuid(decodedUnicodeCMap.Span);
|
||||
|
||||
lock (cacheLock)
|
||||
{
|
||||
if (!_cache.TryGetValue(cmapName!, out var cMaps))
|
||||
{
|
||||
cMaps = new Dictionary<Guid, CMap>();
|
||||
_cache[cmapName!] = cMaps;
|
||||
}
|
||||
|
||||
if (cMaps.TryGetValue(guid, out result))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
result = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
cMaps[guid] = result;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryGetNameFast(ReadOnlySpan<byte> bytes, out string? name)
|
||||
{
|
||||
name = null;
|
||||
int nameIndex = bytes.IndexOf(cmapNameTag);
|
||||
|
||||
if (nameIndex <= -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
nameIndex += cmapNameTag.Length;
|
||||
|
||||
int nameEndIndex = bytes.Slice(nameIndex).IndexOf("def"u8);
|
||||
|
||||
if (nameEndIndex <= -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
name = Encoding.UTF8.GetString(bytes.Slice(nameIndex, nameEndIndex - 1));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,6 @@
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Globalization;
|
||||
using System.Linq;
|
||||
using Cmap;
|
||||
@@ -30,12 +29,14 @@
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly ISystemFontFinder systemFontFinder;
|
||||
private readonly IFontHandler type1FontHandler;
|
||||
private readonly CMapLocalCache cmapLocalCache;
|
||||
|
||||
public TrueTypeFontHandler(
|
||||
ILog log,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IEncodingReader encodingReader,
|
||||
CMapLocalCache cmapLocalCache,
|
||||
ISystemFontFinder systemFontFinder,
|
||||
IFontHandler type1FontHandler)
|
||||
{
|
||||
@@ -45,6 +46,7 @@
|
||||
this.systemFontFinder = systemFontFinder;
|
||||
this.type1FontHandler = type1FontHandler;
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.cmapLocalCache = cmapLocalCache;
|
||||
}
|
||||
|
||||
public IFont Generate(DictionaryToken dictionary)
|
||||
@@ -135,10 +137,9 @@
|
||||
try
|
||||
{
|
||||
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, pdfScanner);
|
||||
|
||||
if (toUnicode?.Decode(filterProvider, pdfScanner) is { } decodedUnicodeCMap)
|
||||
if (!cmapLocalCache.TryGet(toUnicode, out toUnicodeCMap))
|
||||
{
|
||||
toUnicodeCMap = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
log.Error("Failed to decode ToUnicode CMap for a TrueType font in file.");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -180,7 +181,7 @@
|
||||
return new TrueTypeSimpleFont(name, descriptor, toUnicodeCMap, encoding, font, firstCharacter, widths);
|
||||
}
|
||||
|
||||
private TrueTypeFont? ParseTrueTypeFont(FontDescriptor descriptor, [NotNullWhen(true)] out IFontHandler? actualHandler)
|
||||
private TrueTypeFont? ParseTrueTypeFont(FontDescriptor descriptor, out IFontHandler? actualHandler)
|
||||
{
|
||||
actualHandler = null;
|
||||
|
||||
@@ -203,8 +204,6 @@
|
||||
{
|
||||
var fontFileStream = DirectObjectFinder.Get<StreamToken>(descriptor.FontFile.ObjectKey, pdfScanner);
|
||||
|
||||
var fontFile = fontFileStream.Decode(filterProvider, pdfScanner);
|
||||
|
||||
if (descriptor.FontFile.FileType == DescriptorFontFile.FontFileType.FromSubtype)
|
||||
{
|
||||
var shouldThrow = true;
|
||||
@@ -229,7 +228,8 @@
|
||||
$"Expected a TrueType font in the TrueType font descriptor, instead it was {descriptor.FontFile.FileType}.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var fontFile = fontFileStream.Decode(filterProvider, pdfScanner);
|
||||
var font = TrueTypeFontParser.Parse(new TrueTypeDataBytes(new MemoryInputBytes(fontFile)));
|
||||
|
||||
return font;
|
||||
@@ -237,7 +237,6 @@
|
||||
catch (Exception ex)
|
||||
{
|
||||
log.Error("Could not parse the TrueType font.", ex);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,8 +5,6 @@
|
||||
using CidFonts;
|
||||
using Cmap;
|
||||
using Composite;
|
||||
using Core;
|
||||
using Filters;
|
||||
using Fonts;
|
||||
using Logging;
|
||||
using Parts;
|
||||
@@ -18,20 +16,20 @@
|
||||
internal sealed class Type0FontHandler : IFontHandler
|
||||
{
|
||||
private readonly CidFontFactory cidFontFactory;
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IPdfTokenScanner scanner;
|
||||
private readonly ILog logger;
|
||||
private readonly CMapLocalCache cmapLocalCache;
|
||||
private readonly ParsingOptions parsingOptions;
|
||||
|
||||
public Type0FontHandler(
|
||||
CidFontFactory cidFontFactory,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IPdfTokenScanner scanner,
|
||||
CMapLocalCache cmapLocalCache,
|
||||
ParsingOptions parsingOptions)
|
||||
{
|
||||
this.cidFontFactory = cidFontFactory;
|
||||
this.filterProvider = filterProvider;
|
||||
this.scanner = scanner;
|
||||
this.cmapLocalCache = cmapLocalCache;
|
||||
logger = parsingOptions.Logger;
|
||||
this.parsingOptions = parsingOptions;
|
||||
}
|
||||
@@ -73,17 +71,12 @@
|
||||
{
|
||||
var toUnicodeValue = dictionary.Data[NameToken.ToUnicode];
|
||||
|
||||
if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream))
|
||||
{
|
||||
if (toUnicodeStream?.Decode(filterProvider, scanner) is { } decodedUnicodeCMap)
|
||||
{
|
||||
toUnicodeCMap = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
}
|
||||
}
|
||||
if (DirectObjectFinder.TryGet<StreamToken>(toUnicodeValue, scanner, out var toUnicodeStream)
|
||||
&& cmapLocalCache.TryGet(toUnicodeStream,out toUnicodeCMap))
|
||||
{ }
|
||||
else if (DirectObjectFinder.TryGet<NameToken>(toUnicodeValue, scanner, out var toUnicodeName)
|
||||
&& CMapCache.TryGet(toUnicodeName.Data, out toUnicodeCMap))
|
||||
{
|
||||
}
|
||||
&& cmapLocalCache.TryGet(toUnicodeName.Data, out toUnicodeCMap))
|
||||
{ }
|
||||
else
|
||||
{
|
||||
// Rather than throwing here, let's try returning the font anyway since
|
||||
@@ -152,7 +145,7 @@
|
||||
|
||||
if (dictionary.TryGet(NameToken.Encoding, scanner, out NameToken? encodingName))
|
||||
{
|
||||
if (!CMapCache.TryGet(encodingName.Data, out var cmap))
|
||||
if (!cmapLocalCache.TryGet(encodingName.Data, out var cmap))
|
||||
{
|
||||
throw new InvalidOperationException($"Missing CMap named {encodingName.Data}.");
|
||||
}
|
||||
@@ -163,11 +156,12 @@
|
||||
}
|
||||
else if (dictionary.TryGet(NameToken.Encoding, scanner, out StreamToken? stream))
|
||||
{
|
||||
var decoded = stream.Decode(filterProvider, scanner);
|
||||
if (!cmapLocalCache.TryGet(stream, out var cmap))
|
||||
{
|
||||
throw new InvalidOperationException($"Could not read CMap from stream in the dictionary: {dictionary}");
|
||||
}
|
||||
|
||||
var cmap = CMapCache.Parse(new MemoryInputBytes(decoded));
|
||||
|
||||
result = cmap ?? throw new InvalidOperationException($"Could not read CMap from stream in the dictionary: {dictionary}");
|
||||
result = cmap;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -199,7 +193,7 @@
|
||||
|
||||
var isChineseJapaneseOrKorean = false;
|
||||
|
||||
if (cidFont != null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
|
||||
if (cidFont is not null && string.Equals(cidFont.SystemInfo.Registry, "Adobe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
isChineseJapaneseOrKorean = string.Equals(cidFont.SystemInfo.Ordering, "GB1", StringComparison.OrdinalIgnoreCase)
|
||||
|| string.Equals(cidFont.SystemInfo.Ordering, "CNS1", StringComparison.OrdinalIgnoreCase)
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
using Fonts.Type1.Parser;
|
||||
using PdfPig.Parser.Parts;
|
||||
using Simple;
|
||||
using System;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
|
||||
@@ -20,17 +19,20 @@
|
||||
private readonly IPdfTokenScanner pdfScanner;
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly CMapLocalCache cmapLocalCache;
|
||||
private readonly bool isLenientParsing;
|
||||
|
||||
public Type1FontHandler(
|
||||
IPdfTokenScanner pdfScanner,
|
||||
ILookupFilterProvider filterProvider,
|
||||
IEncodingReader encodingReader,
|
||||
CMapLocalCache cmapLocalCache,
|
||||
bool isLenientParsing)
|
||||
{
|
||||
this.pdfScanner = pdfScanner;
|
||||
this.filterProvider = filterProvider;
|
||||
this.encodingReader = encodingReader;
|
||||
this.cmapLocalCache = cmapLocalCache;
|
||||
this.isLenientParsing = isLenientParsing;
|
||||
}
|
||||
|
||||
@@ -80,7 +82,7 @@
|
||||
{
|
||||
var metrics = Standard14.GetAdobeFontMetrics(baseFontToken.Data);
|
||||
|
||||
if (metrics == null)
|
||||
if (metrics is null)
|
||||
{
|
||||
if (isLenientParsing)
|
||||
{
|
||||
@@ -112,19 +114,15 @@
|
||||
{
|
||||
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, pdfScanner);
|
||||
|
||||
if (toUnicode?.Decode(filterProvider, pdfScanner) is { } decodedUnicodeCMap)
|
||||
{
|
||||
toUnicodeCMap = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
}
|
||||
cmapLocalCache.TryGet(toUnicode, out toUnicodeCMap);
|
||||
}
|
||||
|
||||
|
||||
var fromFont = default(Encoding);
|
||||
if (font != null)
|
||||
{
|
||||
if (font.TryGetFirst(out var t1Font))
|
||||
{
|
||||
fromFont = t1Font.Encoding != null ? new BuiltInEncoding(t1Font.Encoding) : default(Encoding);
|
||||
fromFont = t1Font.Encoding is not null ? new BuiltInEncoding(t1Font.Encoding) : default(Encoding);
|
||||
}
|
||||
else if (font.TryGetSecond(out var cffFont))
|
||||
{
|
||||
|
||||
@@ -2,28 +2,27 @@
|
||||
{
|
||||
using Cmap;
|
||||
using Core;
|
||||
using Filters;
|
||||
using Fonts;
|
||||
using Fonts.Encodings;
|
||||
using PdfPig.Parser.Parts;
|
||||
using Simple;
|
||||
using System;
|
||||
using Tokenization.Scanner;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
internal class Type3FontHandler : IFontHandler
|
||||
{
|
||||
private readonly ILookupFilterProvider filterProvider;
|
||||
private readonly IEncodingReader encodingReader;
|
||||
private readonly IPdfTokenScanner scanner;
|
||||
private readonly CMapLocalCache cmapLocalCache;
|
||||
|
||||
public Type3FontHandler(IPdfTokenScanner scanner, ILookupFilterProvider filterProvider,
|
||||
IEncodingReader encodingReader)
|
||||
public Type3FontHandler(IPdfTokenScanner scanner,
|
||||
IEncodingReader encodingReader,
|
||||
CMapLocalCache cMapLocalCache)
|
||||
{
|
||||
this.filterProvider = filterProvider;
|
||||
this.encodingReader = encodingReader;
|
||||
this.scanner = scanner;
|
||||
this.cmapLocalCache = cMapLocalCache;
|
||||
}
|
||||
|
||||
public IFont Generate(DictionaryToken dictionary)
|
||||
@@ -48,11 +47,7 @@
|
||||
if (dictionary.TryGet(NameToken.ToUnicode, out var toUnicodeObj))
|
||||
{
|
||||
var toUnicode = DirectObjectFinder.Get<StreamToken>(toUnicodeObj, scanner);
|
||||
|
||||
if (toUnicode?.Decode(filterProvider, scanner) is { } decodedUnicodeCMap)
|
||||
{
|
||||
toUnicodeCMap = CMapCache.Parse(new MemoryInputBytes(decodedUnicodeCMap));
|
||||
}
|
||||
cmapLocalCache.TryGet(toUnicode, out toUnicodeCMap);
|
||||
}
|
||||
|
||||
var name = GetFontName(dictionary);
|
||||
|
||||
436
src/UglyToad.PdfPig/Util/MurmurHash3.cs
Normal file
436
src/UglyToad.PdfPig/Util/MurmurHash3.cs
Normal file
@@ -0,0 +1,436 @@
|
||||
namespace UglyToad.PdfPig.Util
|
||||
{
|
||||
// Ported from c++ implementation at https://github.com/aappleby/smhasher/blob/0ff96f7835817a27d0487325b6c16033e2992eb5/src/MurmurHash3.cpp#L1
|
||||
// The code was ported with LLM assistance.
|
||||
//
|
||||
// The original license is included below.
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
// algorithms are optimized for their respective platforms. You can still
|
||||
// compile and run any of them on any platform, but your performance with the
|
||||
// non-native version will be less than optimal.
|
||||
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.Security.Cryptography;
|
||||
|
||||
/// <summary>
|
||||
/// MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup.
|
||||
/// <para>
|
||||
/// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
/// algorithms are optimized for their respective platforms. You can still
|
||||
/// compile and run any of them on any platform, but your performance with the
|
||||
/// non-native version will be less than optimal.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
internal static class MurmurHash3
|
||||
{
|
||||
// From Wikipedia:
|
||||
// MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. It was created
|
||||
// by Austin Appleby in 2008 and, as of 8 January 2016, is hosted on GitHub along with its test suite named
|
||||
// SMHasher. It also exists in a number of variants, all of which have been released into the public domain.
|
||||
// The name comes from two basic operations, multiply (MU) and rotate (R), used in its inner loop.
|
||||
//
|
||||
// Unlike cryptographic hash functions, it is not specifically designed to be difficult to reverse by
|
||||
// an adversary, making it unsuitable for cryptographic purposes.
|
||||
|
||||
/// <summary>
|
||||
/// MurmurHash3 128-bit x86 variant, returns hash as byte array (16 bytes).
|
||||
/// <para>
|
||||
/// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
/// algorithms are optimized for their respective platforms. You can still
|
||||
/// compile and run any of them on any platform, but your performance with the
|
||||
/// non-native version will be less than optimal.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static byte[] Compute_x86_128(ReadOnlySpan<byte> data)
|
||||
{
|
||||
return Compute_x86_128(data, data.Length, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// MurmurHash3 128-bit x86 variant, returns hash as byte array (16 bytes).
|
||||
/// <para>
|
||||
/// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
/// algorithms are optimized for their respective platforms. You can still
|
||||
/// compile and run any of them on any platform, but your performance with the
|
||||
/// non-native version will be less than optimal.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static byte[] Compute_x86_128(ReadOnlySpan<byte> data, int len, uint seed)
|
||||
{
|
||||
Span<uint> hash = stackalloc uint[4];
|
||||
Compute_x86_128(data, len, seed, hash);
|
||||
|
||||
#if NET
|
||||
byte[] result = GC.AllocateUninitializedArray<byte>(16);
|
||||
#else
|
||||
byte[] result = new byte[16];
|
||||
#endif
|
||||
|
||||
var span = result.AsSpan();
|
||||
|
||||
Span<byte> buffer = stackalloc byte[4];
|
||||
GetBytes(buffer, hash[0]);
|
||||
buffer.CopyTo(span.Slice(0, 4));
|
||||
|
||||
GetBytes(buffer, hash[1]);
|
||||
buffer.CopyTo(span.Slice(4, 4));
|
||||
|
||||
GetBytes(buffer, hash[2]);
|
||||
buffer.CopyTo(span.Slice(8, 4));
|
||||
|
||||
GetBytes(buffer, hash[3]);
|
||||
buffer.CopyTo(span.Slice(12, 4));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// MurmurHash3 128-bit x64 variant, returns hash as byte array (16 bytes).
|
||||
/// <para>
|
||||
/// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
/// algorithms are optimized for their respective platforms. You can still
|
||||
/// compile and run any of them on any platform, but your performance with the
|
||||
/// non-native version will be less than optimal.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static byte[] Compute_x64_128(ReadOnlySpan<byte> data)
|
||||
{
|
||||
return Compute_x64_128(data, data.Length, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// MurmurHash3 128-bit x64 variant, returns hash as byte array (16 bytes).
|
||||
/// <para>
|
||||
/// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
/// algorithms are optimized for their respective platforms. You can still
|
||||
/// compile and run any of them on any platform, but your performance with the
|
||||
/// non-native version will be less than optimal.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public static byte[] Compute_x64_128(ReadOnlySpan<byte> data, int len, uint seed)
|
||||
{
|
||||
Span<ulong> hash = stackalloc ulong[2];
|
||||
Compute_x64_128(data, len, seed, hash);
|
||||
|
||||
#if NET
|
||||
byte[] result = GC.AllocateUninitializedArray<byte>(16);
|
||||
#else
|
||||
byte[] result = new byte[16];
|
||||
#endif
|
||||
|
||||
var span = result.AsSpan();
|
||||
|
||||
Span<byte> buffer = stackalloc byte[8];
|
||||
GetBytes(buffer, hash[0]);
|
||||
buffer.CopyTo(span.Slice(0, 8));
|
||||
|
||||
GetBytes(buffer, hash[1]);
|
||||
buffer.CopyTo(span.Slice(8, 8));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void Compute_x86_128(ReadOnlySpan<byte> data, int len, uint seed, Span<uint> outHash)
|
||||
{
|
||||
const uint c1 = 0x239b961b, c2 = 0xab0e9789, c3 = 0x38b34ae5, c4 = 0xa1e38b93;
|
||||
|
||||
uint h1 = seed, h2 = seed, h3 = seed, h4 = seed;
|
||||
int nblocks = len / 16;
|
||||
|
||||
// Body
|
||||
for (int i = 0; i < nblocks; ++i)
|
||||
{
|
||||
int offset = i * 16;
|
||||
|
||||
uint k1 = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(offset));
|
||||
uint k2 = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(offset + 4));
|
||||
uint k3 = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(offset + 8));
|
||||
uint k4 = BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(offset + 12));
|
||||
|
||||
k1 *= c1;
|
||||
k1 = Rotl32(k1, 15);
|
||||
k1 *= c2;
|
||||
h1 ^= k1;
|
||||
h1 = Rotl32(h1, 19);
|
||||
h1 += h2;
|
||||
h1 = h1 * 5 + 0x561ccd1b;
|
||||
|
||||
k2 *= c2;
|
||||
k2 = Rotl32(k2, 16);
|
||||
k2 *= c3;
|
||||
h2 ^= k2;
|
||||
h2 = Rotl32(h2, 17);
|
||||
h2 += h3;
|
||||
h2 = h2 * 5 + 0x0bcaa747;
|
||||
|
||||
k3 *= c3;
|
||||
k3 = Rotl32(k3, 17);
|
||||
k3 *= c4;
|
||||
h3 ^= k3;
|
||||
h3 = Rotl32(h3, 15);
|
||||
h3 += h4;
|
||||
h3 = h3 * 5 + 0x96cd1c35;
|
||||
|
||||
k4 *= c4;
|
||||
k4 = Rotl32(k4, 18);
|
||||
k4 *= c1;
|
||||
h4 ^= k4;
|
||||
h4 = Rotl32(h4, 13);
|
||||
h4 += h1;
|
||||
h4 = h4 * 5 + 0x32ac3b17;
|
||||
}
|
||||
|
||||
// Tail
|
||||
int tailStart = nblocks * 16;
|
||||
uint tk1 = 0, tk2 = 0, tk3 = 0, tk4 = 0;
|
||||
switch (len & 15)
|
||||
{
|
||||
case 15:
|
||||
tk4 ^= (uint)data[tailStart + 14] << 16;
|
||||
goto case 14;
|
||||
case 14:
|
||||
tk4 ^= (uint)data[tailStart + 13] << 8;
|
||||
goto case 13;
|
||||
case 13:
|
||||
tk4 ^= (uint)data[tailStart + 12];
|
||||
tk4 *= c4;
|
||||
tk4 = Rotl32(tk4, 18);
|
||||
tk4 *= c1;
|
||||
h4 ^= tk4;
|
||||
goto case 12;
|
||||
case 12:
|
||||
tk3 ^= (uint)data[tailStart + 11] << 24;
|
||||
goto case 11;
|
||||
case 11:
|
||||
tk3 ^= (uint)data[tailStart + 10] << 16;
|
||||
goto case 10;
|
||||
case 10:
|
||||
tk3 ^= (uint)data[tailStart + 9] << 8;
|
||||
goto case 9;
|
||||
case 9:
|
||||
tk3 ^= (uint)data[tailStart + 8];
|
||||
tk3 *= c3;
|
||||
tk3 = Rotl32(tk3, 17);
|
||||
tk3 *= c4;
|
||||
h3 ^= tk3;
|
||||
goto case 8;
|
||||
case 8:
|
||||
tk2 ^= (uint)data[tailStart + 7] << 24;
|
||||
goto case 7;
|
||||
case 7:
|
||||
tk2 ^= (uint)data[tailStart + 6] << 16;
|
||||
goto case 6;
|
||||
case 6:
|
||||
tk2 ^= (uint)data[tailStart + 5] << 8;
|
||||
goto case 5;
|
||||
case 5:
|
||||
tk2 ^= (uint)data[tailStart + 4];
|
||||
tk2 *= c2;
|
||||
tk2 = Rotl32(tk2, 16);
|
||||
tk2 *= c3;
|
||||
h2 ^= tk2;
|
||||
goto case 4;
|
||||
case 4:
|
||||
tk1 ^= (uint)data[tailStart + 3] << 24;
|
||||
goto case 3;
|
||||
case 3:
|
||||
tk1 ^= (uint)data[tailStart + 2] << 16;
|
||||
goto case 2;
|
||||
case 2:
|
||||
tk1 ^= (uint)data[tailStart + 1] << 8;
|
||||
goto case 1;
|
||||
case 1:
|
||||
tk1 ^= (uint)data[tailStart];
|
||||
tk1 *= c1;
|
||||
tk1 = Rotl32(tk1, 15);
|
||||
tk1 *= c2;
|
||||
h1 ^= tk1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Finalization
|
||||
h1 ^= (uint)len;
|
||||
h2 ^= (uint)len;
|
||||
h3 ^= (uint)len;
|
||||
h4 ^= (uint)len;
|
||||
h1 += h2;
|
||||
h1 += h3;
|
||||
h1 += h4;
|
||||
h2 += h1;
|
||||
h3 += h1;
|
||||
h4 += h1;
|
||||
h1 = Fmix32(h1);
|
||||
h2 = Fmix32(h2);
|
||||
h3 = Fmix32(h3);
|
||||
h4 = Fmix32(h4);
|
||||
h1 += h2;
|
||||
h1 += h3;
|
||||
h1 += h4;
|
||||
h2 += h1;
|
||||
h3 += h1;
|
||||
h4 += h1;
|
||||
|
||||
outHash[0] = h1;
|
||||
outHash[1] = h2;
|
||||
outHash[2] = h3;
|
||||
outHash[3] = h4;
|
||||
}
|
||||
|
||||
private static void Compute_x64_128(ReadOnlySpan<byte> data, int len, uint seed, Span<ulong> outHash)
|
||||
{
|
||||
const ulong c1 = 0x87c37b91114253d5UL;
|
||||
const ulong c2 = 0x4cf5ad432745937fUL;
|
||||
|
||||
ulong h1 = seed, h2 = seed;
|
||||
int nblocks = len / 16;
|
||||
|
||||
// Body
|
||||
for (int i = 0; i < nblocks; ++i)
|
||||
{
|
||||
int offset = i * 16;
|
||||
ulong k1 = BinaryPrimitives.ReadUInt64LittleEndian(data.Slice(offset));
|
||||
ulong k2 = BinaryPrimitives.ReadUInt64LittleEndian(data.Slice(offset + 8));
|
||||
|
||||
k1 *= c1;
|
||||
k1 = Rotl64(k1, 31);
|
||||
k1 *= c2;
|
||||
h1 ^= k1;
|
||||
h1 = Rotl64(h1, 27);
|
||||
h1 += h2;
|
||||
h1 = h1 * 5 + 0x52dce729;
|
||||
|
||||
k2 *= c2;
|
||||
k2 = Rotl64(k2, 33);
|
||||
k2 *= c1;
|
||||
h2 ^= k2;
|
||||
h2 = Rotl64(h2, 31);
|
||||
h2 += h1;
|
||||
h2 = h2 * 5 + 0x38495ab5;
|
||||
}
|
||||
|
||||
// Tail
|
||||
int tailStart = nblocks * 16;
|
||||
ulong tk1 = 0, tk2 = 0;
|
||||
switch (len & 15)
|
||||
{
|
||||
case 15:
|
||||
tk2 ^= ((ulong)data[tailStart + 14]) << 48;
|
||||
goto case 14;
|
||||
case 14:
|
||||
tk2 ^= ((ulong)data[tailStart + 13]) << 40;
|
||||
goto case 13;
|
||||
case 13:
|
||||
tk2 ^= ((ulong)data[tailStart + 12]) << 32;
|
||||
goto case 12;
|
||||
case 12:
|
||||
tk2 ^= ((ulong)data[tailStart + 11]) << 24;
|
||||
goto case 11;
|
||||
case 11:
|
||||
tk2 ^= ((ulong)data[tailStart + 10]) << 16;
|
||||
goto case 10;
|
||||
case 10:
|
||||
tk2 ^= ((ulong)data[tailStart + 9]) << 8;
|
||||
goto case 9;
|
||||
case 9:
|
||||
tk2 ^= ((ulong)data[tailStart + 8]);
|
||||
tk2 *= c2;
|
||||
tk2 = Rotl64(tk2, 33);
|
||||
tk2 *= c1;
|
||||
h2 ^= tk2;
|
||||
goto case 8;
|
||||
case 8:
|
||||
tk1 ^= ((ulong)data[tailStart + 7]) << 56;
|
||||
goto case 7;
|
||||
case 7:
|
||||
tk1 ^= ((ulong)data[tailStart + 6]) << 48;
|
||||
goto case 6;
|
||||
case 6:
|
||||
tk1 ^= ((ulong)data[tailStart + 5]) << 40;
|
||||
goto case 5;
|
||||
case 5:
|
||||
tk1 ^= ((ulong)data[tailStart + 4]) << 32;
|
||||
goto case 4;
|
||||
case 4:
|
||||
tk1 ^= ((ulong)data[tailStart + 3]) << 24;
|
||||
goto case 3;
|
||||
case 3:
|
||||
tk1 ^= ((ulong)data[tailStart + 2]) << 16;
|
||||
goto case 2;
|
||||
case 2:
|
||||
tk1 ^= ((ulong)data[tailStart + 1]) << 8;
|
||||
goto case 1;
|
||||
case 1:
|
||||
tk1 ^= ((ulong)data[tailStart + 0]);
|
||||
tk1 *= c1;
|
||||
tk1 = Rotl64(tk1, 31);
|
||||
tk1 *= c2;
|
||||
h1 ^= tk1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Finalization
|
||||
h1 ^= (ulong)len;
|
||||
h2 ^= (ulong)len;
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
h1 = Fmix64(h1);
|
||||
h2 = Fmix64(h2);
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
outHash[0] = h1;
|
||||
outHash[1] = h2;
|
||||
}
|
||||
|
||||
// ---- Utility functions and mixing ----
|
||||
|
||||
private static uint Rotl32(uint x, int r) => (x << r) | (x >> (32 - r));
|
||||
|
||||
private static ulong Rotl64(ulong x, int r) => (x << r) | (x >> (64 - r));
|
||||
|
||||
private static uint Fmix32(uint h)
|
||||
{
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
return h;
|
||||
}
|
||||
|
||||
private static ulong Fmix64(ulong k)
|
||||
{
|
||||
k ^= k >> 33;
|
||||
k *= 0xff51afd7ed558ccdUL;
|
||||
k ^= k >> 33;
|
||||
k *= 0xc4ceb9fe1a85ec53UL;
|
||||
k ^= k >> 33;
|
||||
return k;
|
||||
}
|
||||
|
||||
private static void GetBytes(Span<byte> buffer, ulong v)
|
||||
{
|
||||
if (BitConverter.IsLittleEndian)
|
||||
{
|
||||
v = BinaryPrimitives.ReverseEndianness(v);
|
||||
}
|
||||
|
||||
BinaryPrimitives.WriteUInt64LittleEndian(buffer, v);
|
||||
}
|
||||
|
||||
private static void GetBytes(Span<byte> buffer, uint v)
|
||||
{
|
||||
if (BitConverter.IsLittleEndian)
|
||||
{
|
||||
v = BinaryPrimitives.ReverseEndianness(v);
|
||||
}
|
||||
|
||||
BinaryPrimitives.WriteUInt32LittleEndian(buffer, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user