mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
Enabled ClipPaths option to filter letters. (#905)
* Enabled ClipPaths option to filter letters. * parsingOptions is not passed as an argument now.
This commit is contained in:
parent
cd2a85e642
commit
09bddba778
Binary file not shown.
Binary file not shown.
46
src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
Normal file
46
src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
|
{
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
public class LetterFilterTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void CanFilterClippedLetters()
|
||||||
|
{
|
||||||
|
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");
|
||||||
|
|
||||||
|
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
|
||||||
|
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
|
||||||
|
{
|
||||||
|
var allLetters = doc2.GetPage(5).Letters.Count;
|
||||||
|
var filteredLetters = doc1.GetPage(5).Letters.Count;
|
||||||
|
|
||||||
|
Assert.True(filteredLetters < allLetters,
|
||||||
|
"Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanFilterClippedLetters_CheckBleedInSpecificWord()
|
||||||
|
{
|
||||||
|
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");
|
||||||
|
|
||||||
|
using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
|
||||||
|
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
|
||||||
|
{
|
||||||
|
var allWords = doc2.GetPage(1).GetWords().ToList();
|
||||||
|
var filteredWords = doc1.GetPage(1).GetWords().ToList();
|
||||||
|
|
||||||
|
// The table has hidden columns at the left end. Letters from these columns get merged in words
|
||||||
|
// which is incorrect. Filtering letters based on clip path should fix that...
|
||||||
|
const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";
|
||||||
|
|
||||||
|
var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
|
||||||
|
Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");
|
||||||
|
|
||||||
|
var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
|
||||||
|
Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -140,9 +140,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
public override void RenderGlyph(IFont font,
|
public override void RenderGlyph(IFont font,
|
||||||
IColor strokingColor,
|
CurrentGraphicsState currentState,
|
||||||
IColor nonStrokingColor,
|
|
||||||
TextRenderingMode textRenderingMode,
|
|
||||||
double fontSize,
|
double fontSize,
|
||||||
double pointSize,
|
double pointSize,
|
||||||
int code,
|
int code,
|
||||||
|
@ -61,9 +61,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
public override void RenderGlyph(IFont font,
|
public override void RenderGlyph(IFont font,
|
||||||
IColor strokingColor,
|
CurrentGraphicsState currentState,
|
||||||
IColor nonStrokingColor,
|
|
||||||
TextRenderingMode textRenderingMode,
|
|
||||||
double fontSize,
|
double fontSize,
|
||||||
double pointSize,
|
double pointSize,
|
||||||
int code,
|
int code,
|
||||||
@ -74,6 +72,7 @@
|
|||||||
in TransformationMatrix transformationMatrix,
|
in TransformationMatrix transformationMatrix,
|
||||||
CharacterBoundingBox characterBoundingBox)
|
CharacterBoundingBox characterBoundingBox)
|
||||||
{
|
{
|
||||||
|
var textRenderingMode = currentState.FontState.TextRenderingMode;
|
||||||
if (textRenderingMode == TextRenderingMode.Neither)
|
if (textRenderingMode == TextRenderingMode.Neither)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
@ -85,7 +84,7 @@
|
|||||||
if (font.TryGetNormalisedPath(code, out var path))
|
if (font.TryGetNormalisedPath(code, out var path))
|
||||||
{
|
{
|
||||||
var skPath = path.ToSKPath();
|
var skPath = path.ToSKPath();
|
||||||
ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix,
|
ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
|
||||||
textMatrix, transformationMatrix);
|
textMatrix, transformationMatrix);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -517,6 +517,26 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Whether the one of rectangle corners is located inside the path.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="path">The path that should contain the rectangle corner.</param>
|
||||||
|
/// <param name="rectangle">The rectangle that should be intersected within the path.</param>
|
||||||
|
/// <param name="includeBorder">If set to false, will return false if the rectangle is on the path's border.</param>
|
||||||
|
public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
|
||||||
|
{
|
||||||
|
// NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
|
||||||
|
var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
|
||||||
|
var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
|
||||||
|
foreach (var point in rectangle.ToClipperPolygon())
|
||||||
|
{
|
||||||
|
if (PointInPaths(point, clipperPaths, fillType, includeBorder))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
|
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
|
||||||
/// <para>Only works for axis-aligned rectangles.</para>
|
/// <para>Only works for axis-aligned rectangles.</para>
|
||||||
|
@ -282,9 +282,7 @@
|
|||||||
var boundingBox = font.GetBoundingBox(code);
|
var boundingBox = font.GetBoundingBox(code);
|
||||||
|
|
||||||
RenderGlyph(font,
|
RenderGlyph(font,
|
||||||
currentState.CurrentStrokingColor!,
|
currentState,
|
||||||
currentState.CurrentNonStrokingColor!,
|
|
||||||
currentState.FontState.TextRenderingMode,
|
|
||||||
fontSize,
|
fontSize,
|
||||||
pointSize,
|
pointSize,
|
||||||
code,
|
code,
|
||||||
@ -317,9 +315,7 @@
|
|||||||
/// Render glyph implement.
|
/// Render glyph implement.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public abstract void RenderGlyph(IFont font,
|
public abstract void RenderGlyph(IFont font,
|
||||||
IColor strokingColor,
|
CurrentGraphicsState currentState,
|
||||||
IColor nonStrokingColor,
|
|
||||||
TextRenderingMode textRenderingMode,
|
|
||||||
double fontSize,
|
double fontSize,
|
||||||
double pointSize,
|
double pointSize,
|
||||||
int code,
|
int code,
|
||||||
|
@ -87,9 +87,7 @@ namespace UglyToad.PdfPig.Graphics
|
|||||||
}
|
}
|
||||||
|
|
||||||
public override void RenderGlyph(IFont font,
|
public override void RenderGlyph(IFont font,
|
||||||
IColor strokingColor,
|
CurrentGraphicsState currentState,
|
||||||
IColor nonStrokingColor,
|
|
||||||
TextRenderingMode textRenderingMode,
|
|
||||||
double fontSize,
|
double fontSize,
|
||||||
double pointSize,
|
double pointSize,
|
||||||
int code,
|
int code,
|
||||||
@ -109,6 +107,13 @@ namespace UglyToad.PdfPig.Graphics
|
|||||||
transformationMatrix,
|
transformationMatrix,
|
||||||
new PdfRectangle(0, 0, characterBoundingBox.Width, 0));
|
new PdfRectangle(0, 0, characterBoundingBox.Width, 0));
|
||||||
|
|
||||||
|
if (ParsingOptions.ClipPaths)
|
||||||
|
{
|
||||||
|
var currentClipping = currentState.CurrentClippingPath;
|
||||||
|
if (currentClipping?.IntersectsWith(transformedGlyphBounds) == false)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Letter letter = null;
|
Letter letter = null;
|
||||||
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
|
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
|
||||||
{
|
{
|
||||||
@ -147,9 +152,9 @@ namespace UglyToad.PdfPig.Graphics
|
|||||||
transformedPdfBounds.Width,
|
transformedPdfBounds.Width,
|
||||||
fontSize,
|
fontSize,
|
||||||
font.Details,
|
font.Details,
|
||||||
textRenderingMode,
|
currentState.FontState.TextRenderingMode,
|
||||||
strokingColor,
|
currentState.CurrentStrokingColor!,
|
||||||
nonStrokingColor,
|
currentState.CurrentNonStrokingColor!,
|
||||||
pointSize,
|
pointSize,
|
||||||
TextSequence);
|
TextSequence);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user