mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
Enabled ClipPaths option to filter letters. (#905)
* Enabled ClipPaths option to filter letters. * parsingOptions is not passed as an argument now.
This commit is contained in:
parent
cd2a85e642
commit
09bddba778
Binary file not shown.
Binary file not shown.
46
src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
Normal file
46
src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
Normal file
@ -0,0 +1,46 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System.Linq;
|
||||
|
||||
public class LetterFilterTests
|
||||
{
|
||||
[Fact]
|
||||
public void CanFilterClippedLetters()
|
||||
{
|
||||
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");
|
||||
|
||||
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
|
||||
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
|
||||
{
|
||||
var allLetters = doc2.GetPage(5).Letters.Count;
|
||||
var filteredLetters = doc1.GetPage(5).Letters.Count;
|
||||
|
||||
Assert.True(filteredLetters < allLetters,
|
||||
"Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanFilterClippedLetters_CheckBleedInSpecificWord()
|
||||
{
|
||||
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");
|
||||
|
||||
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
|
||||
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
|
||||
{
|
||||
var allWords = doc2.GetPage(1).GetWords().ToList();
|
||||
var filteredWords = doc1.GetPage(1).GetWords().ToList();
|
||||
|
||||
// The table has hidden columns at the left end. Letters from these columns get merged in words
|
||||
// which is incorrect. Filtering letters based on clip path should fix that...
|
||||
const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";
|
||||
|
||||
var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
|
||||
Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");
|
||||
|
||||
var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
|
||||
Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -140,9 +140,7 @@
|
||||
}
|
||||
|
||||
public override void RenderGlyph(IFont font,
|
||||
IColor strokingColor,
|
||||
IColor nonStrokingColor,
|
||||
TextRenderingMode textRenderingMode,
|
||||
CurrentGraphicsState currentState,
|
||||
double fontSize,
|
||||
double pointSize,
|
||||
int code,
|
||||
|
@ -61,9 +61,7 @@
|
||||
}
|
||||
|
||||
public override void RenderGlyph(IFont font,
|
||||
IColor strokingColor,
|
||||
IColor nonStrokingColor,
|
||||
TextRenderingMode textRenderingMode,
|
||||
CurrentGraphicsState currentState,
|
||||
double fontSize,
|
||||
double pointSize,
|
||||
int code,
|
||||
@ -74,6 +72,7 @@
|
||||
in TransformationMatrix transformationMatrix,
|
||||
CharacterBoundingBox characterBoundingBox)
|
||||
{
|
||||
var textRenderingMode = currentState.FontState.TextRenderingMode;
|
||||
if (textRenderingMode == TextRenderingMode.Neither)
|
||||
{
|
||||
return;
|
||||
@ -85,7 +84,7 @@
|
||||
if (font.TryGetNormalisedPath(code, out var path))
|
||||
{
|
||||
var skPath = path.ToSKPath();
|
||||
ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix,
|
||||
ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
|
||||
textMatrix, transformationMatrix);
|
||||
}
|
||||
else
|
||||
|
@ -517,6 +517,26 @@
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Whether the one of rectangle corners is located inside the path.
|
||||
/// </summary>
|
||||
/// <param name="path">The path that should contain the rectangle corner.</param>
|
||||
/// <param name="rectangle">The rectangle that should be intersected within the path.</param>
|
||||
/// <param name="includeBorder">If set to false, will return false if the rectangle is on the path's border.</param>
|
||||
public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
|
||||
{
|
||||
// NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
|
||||
var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
|
||||
var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
|
||||
foreach (var point in rectangle.ToClipperPolygon())
|
||||
{
|
||||
if (PointInPaths(point, clipperPaths, fillType, includeBorder))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
|
||||
/// <para>Only works for axis-aligned rectangles.</para>
|
||||
|
@ -282,9 +282,7 @@
|
||||
var boundingBox = font.GetBoundingBox(code);
|
||||
|
||||
RenderGlyph(font,
|
||||
currentState.CurrentStrokingColor!,
|
||||
currentState.CurrentNonStrokingColor!,
|
||||
currentState.FontState.TextRenderingMode,
|
||||
currentState,
|
||||
fontSize,
|
||||
pointSize,
|
||||
code,
|
||||
@ -317,9 +315,7 @@
|
||||
/// Render glyph implement.
|
||||
/// </summary>
|
||||
public abstract void RenderGlyph(IFont font,
|
||||
IColor strokingColor,
|
||||
IColor nonStrokingColor,
|
||||
TextRenderingMode textRenderingMode,
|
||||
CurrentGraphicsState currentState,
|
||||
double fontSize,
|
||||
double pointSize,
|
||||
int code,
|
||||
|
@ -87,9 +87,7 @@ namespace UglyToad.PdfPig.Graphics
|
||||
}
|
||||
|
||||
public override void RenderGlyph(IFont font,
|
||||
IColor strokingColor,
|
||||
IColor nonStrokingColor,
|
||||
TextRenderingMode textRenderingMode,
|
||||
CurrentGraphicsState currentState,
|
||||
double fontSize,
|
||||
double pointSize,
|
||||
int code,
|
||||
@ -109,6 +107,13 @@ namespace UglyToad.PdfPig.Graphics
|
||||
transformationMatrix,
|
||||
new PdfRectangle(0, 0, characterBoundingBox.Width, 0));
|
||||
|
||||
if (ParsingOptions.ClipPaths)
|
||||
{
|
||||
var currentClipping = currentState.CurrentClippingPath;
|
||||
if (currentClipping?.IntersectsWith(transformedGlyphBounds) == false)
|
||||
return;
|
||||
}
|
||||
|
||||
Letter letter = null;
|
||||
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
|
||||
{
|
||||
@ -147,9 +152,9 @@ namespace UglyToad.PdfPig.Graphics
|
||||
transformedPdfBounds.Width,
|
||||
fontSize,
|
||||
font.Details,
|
||||
textRenderingMode,
|
||||
strokingColor,
|
||||
nonStrokingColor,
|
||||
currentState.FontState.TextRenderingMode,
|
||||
currentState.CurrentStrokingColor!,
|
||||
currentState.CurrentNonStrokingColor!,
|
||||
pointSize,
|
||||
TextSequence);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user