Enabled ClipPaths option to filter letters. (#905)

* Enabled ClipPaths option to filter letters.

* parsingOptions is not passed as  an argument now.
This commit is contained in:
yjagota 2024-09-08 18:06:26 +05:30 committed by GitHub
parent cd2a85e642
commit 09bddba778
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 83 additions and 19 deletions

View File

@ -0,0 +1,46 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Linq;
public class LetterFilterTests
{
[Fact]
public void CanFilterClippedLetters()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allLetters = doc2.GetPage(5).Letters.Count;
var filteredLetters = doc1.GetPage(5).Letters.Count;
Assert.True(filteredLetters < allLetters,
"Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
}
}
[Fact]
public void CanFilterClippedLetters_CheckBleedInSpecificWord()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allWords = doc2.GetPage(1).GetWords().ToList();
var filteredWords = doc1.GetPage(1).GetWords().ToList();
// The table has hidden columns at the left end. Letters from these columns get merged in words
// which is incorrect. Filtering letters based on clip path should fix that...
const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";
var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");
var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
}
}
}
}

View File

@ -140,9 +140,7 @@
}
public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,

View File

@ -61,9 +61,7 @@
}
public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
@ -74,6 +72,7 @@
in TransformationMatrix transformationMatrix,
CharacterBoundingBox characterBoundingBox)
{
var textRenderingMode = currentState.FontState.TextRenderingMode;
if (textRenderingMode == TextRenderingMode.Neither)
{
return;
@ -85,7 +84,7 @@
if (font.TryGetNormalisedPath(code, out var path))
{
var skPath = path.ToSKPath();
ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix,
ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
textMatrix, transformationMatrix);
}
else

View File

@ -517,6 +517,26 @@
}
}
/// <summary>
/// Whether the one of rectangle corners is located inside the path.
/// </summary>
/// <param name="path">The path that should contain the rectangle corner.</param>
/// <param name="rectangle">The rectangle that should be intersected within the path.</param>
/// <param name="includeBorder">If set to false, will return false if the rectangle is on the path's border.</param>
public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
{
// NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
foreach (var point in rectangle.ToClipperPolygon())
{
if (PointInPaths(point, clipperPaths, fillType, includeBorder))
return true;
}
return false;
}
/// <summary>
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
/// <para>Only works for axis-aligned rectangles.</para>

View File

@ -282,9 +282,7 @@
var boundingBox = font.GetBoundingBox(code);
RenderGlyph(font,
currentState.CurrentStrokingColor!,
currentState.CurrentNonStrokingColor!,
currentState.FontState.TextRenderingMode,
currentState,
fontSize,
pointSize,
code,
@ -317,9 +315,7 @@
/// Render glyph implement.
/// </summary>
public abstract void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,

View File

@ -87,9 +87,7 @@ namespace UglyToad.PdfPig.Graphics
}
public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
@ -109,6 +107,13 @@ namespace UglyToad.PdfPig.Graphics
transformationMatrix,
new PdfRectangle(0, 0, characterBoundingBox.Width, 0));
if (ParsingOptions.ClipPaths)
{
var currentClipping = currentState.CurrentClippingPath;
if (currentClipping?.IntersectsWith(transformedGlyphBounds) == false)
return;
}
Letter letter = null;
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
{
@ -147,9 +152,9 @@ namespace UglyToad.PdfPig.Graphics
transformedPdfBounds.Width,
fontSize,
font.Details,
textRenderingMode,
strokingColor,
nonStrokingColor,
currentState.FontState.TextRenderingMode,
currentState.CurrentStrokingColor!,
currentState.CurrentNonStrokingColor!,
pointSize,
TextSequence);
}