Enabled ClipPaths option to filter letters. (#905)

* Enabled ClipPaths option to filter letters.

* parsingOptions is not passed as  an argument now.
This commit is contained in:
yjagota 2024-09-08 18:06:26 +05:30 committed by GitHub
parent cd2a85e642
commit 09bddba778
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 83 additions and 19 deletions

View File

@ -0,0 +1,46 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Linq;
public class LetterFilterTests
{
[Fact]
public void CanFilterClippedLetters()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allLetters = doc2.GetPage(5).Letters.Count;
var filteredLetters = doc1.GetPage(5).Letters.Count;
Assert.True(filteredLetters < allLetters,
"Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
}
}
[Fact]
public void CanFilterClippedLetters_CheckBleedInSpecificWord()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allWords = doc2.GetPage(1).GetWords().ToList();
var filteredWords = doc1.GetPage(1).GetWords().ToList();
// The table has hidden columns at the left end. Letters from these columns get merged in words
// which is incorrect. Filtering letters based on clip path should fix that...
const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";
var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");
var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
}
}
}
}

View File

@ -140,9 +140,7 @@
} }
public override void RenderGlyph(IFont font, public override void RenderGlyph(IFont font,
IColor strokingColor, CurrentGraphicsState currentState,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
double fontSize, double fontSize,
double pointSize, double pointSize,
int code, int code,

View File

@ -61,9 +61,7 @@
} }
public override void RenderGlyph(IFont font, public override void RenderGlyph(IFont font,
IColor strokingColor, CurrentGraphicsState currentState,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
double fontSize, double fontSize,
double pointSize, double pointSize,
int code, int code,
@ -74,6 +72,7 @@
in TransformationMatrix transformationMatrix, in TransformationMatrix transformationMatrix,
CharacterBoundingBox characterBoundingBox) CharacterBoundingBox characterBoundingBox)
{ {
var textRenderingMode = currentState.FontState.TextRenderingMode;
if (textRenderingMode == TextRenderingMode.Neither) if (textRenderingMode == TextRenderingMode.Neither)
{ {
return; return;
@ -85,7 +84,7 @@
if (font.TryGetNormalisedPath(code, out var path)) if (font.TryGetNormalisedPath(code, out var path))
{ {
var skPath = path.ToSKPath(); var skPath = path.ToSKPath();
ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix, ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
textMatrix, transformationMatrix); textMatrix, transformationMatrix);
} }
else else

View File

@ -517,6 +517,26 @@
} }
} }
/// <summary>
/// Whether the one of rectangle corners is located inside the path.
/// </summary>
/// <param name="path">The path that should contain the rectangle corner.</param>
/// <param name="rectangle">The rectangle that should be intersected within the path.</param>
/// <param name="includeBorder">If set to false, will return false if the rectangle is on the path's border.</param>
public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
{
// NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
foreach (var point in rectangle.ToClipperPolygon())
{
if (PointInPaths(point, clipperPaths, fillType, includeBorder))
return true;
}
return false;
}
/// <summary> /// <summary>
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles. /// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
/// <para>Only works for axis-aligned rectangles.</para> /// <para>Only works for axis-aligned rectangles.</para>

View File

@ -282,9 +282,7 @@
var boundingBox = font.GetBoundingBox(code); var boundingBox = font.GetBoundingBox(code);
RenderGlyph(font, RenderGlyph(font,
currentState.CurrentStrokingColor!, currentState,
currentState.CurrentNonStrokingColor!,
currentState.FontState.TextRenderingMode,
fontSize, fontSize,
pointSize, pointSize,
code, code,
@ -317,9 +315,7 @@
/// Render glyph implement. /// Render glyph implement.
/// </summary> /// </summary>
public abstract void RenderGlyph(IFont font, public abstract void RenderGlyph(IFont font,
IColor strokingColor, CurrentGraphicsState currentState,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
double fontSize, double fontSize,
double pointSize, double pointSize,
int code, int code,

View File

@ -87,9 +87,7 @@ namespace UglyToad.PdfPig.Graphics
} }
public override void RenderGlyph(IFont font, public override void RenderGlyph(IFont font,
IColor strokingColor, CurrentGraphicsState currentState,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
double fontSize, double fontSize,
double pointSize, double pointSize,
int code, int code,
@ -109,6 +107,13 @@ namespace UglyToad.PdfPig.Graphics
transformationMatrix, transformationMatrix,
new PdfRectangle(0, 0, characterBoundingBox.Width, 0)); new PdfRectangle(0, 0, characterBoundingBox.Width, 0));
if (ParsingOptions.ClipPaths)
{
var currentClipping = currentState.CurrentClippingPath;
if (currentClipping?.IntersectsWith(transformedGlyphBounds) == false)
return;
}
Letter letter = null; Letter letter = null;
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0) if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
{ {
@ -147,9 +152,9 @@ namespace UglyToad.PdfPig.Graphics
transformedPdfBounds.Width, transformedPdfBounds.Width,
fontSize, fontSize,
font.Details, font.Details,
textRenderingMode, currentState.FontState.TextRenderingMode,
strokingColor, currentState.CurrentStrokingColor!,
nonStrokingColor, currentState.CurrentNonStrokingColor!,
pointSize, pointSize,
TextSequence); TextSequence);
} }