diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs
new file mode 100644
index 00000000..b3e3e2b1
--- /dev/null
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs
@@ -0,0 +1,94 @@
+namespace UglyToad.PdfPig.DocumentLayoutAnalysis
+{
+ using System.Collections.Generic;
+ using System.Linq;
+ using UglyToad.PdfPig.Content;
+ using UglyToad.PdfPig.PdfFonts;
+
+ ///
+ /// Checks if each letter is a duplicate and overlaps any other letter and remove the duplicate, and flag the remaining as bold.
+ /// Logic inspired from PdfBox's PDFTextStripper class.
+ ///
+ public static class DuplicateOverlappingTextProcessor
+ {
+ ///
+ /// Checks if each letter is a duplicate and overlaps any other letter and remove the duplicate, and flag the remaining as bold.
+ /// Logic inspired from PdfBox's PDFTextStripper class.
+ ///
+ /// Letters to be processed.
+ /// Letters with no duplicate overlapping.
+ public static IReadOnlyList Get(IEnumerable letters)
+ {
+ if (letters?.Any() != true)
+ {
+ return letters?.ToList();
+ }
+
+ var queue = new Queue(letters);
+ var cleanLetters = new List() { queue.Dequeue() }; // dequeue the first letter
+
+ while (queue.Count > 0)
+ {
+ var letter = queue.Dequeue();
+ bool addLetter = true;
+ int duplicatesOverlappingIndex = -1;
+
+ var duplicates = cleanLetters.Where(l => l.Value.Equals(letter.Value) && l.FontName.Equals(letter.FontName)); // do other checks?
+
+ if (duplicates.Any())
+ {
+ double tolerance = letter.GlyphRectangle.Width / (letter.Value.Length == 0 ? 1 : letter.Value.Length) / 3.0;
+ double minX = letter.GlyphRectangle.BottomLeft.X - tolerance;
+ double maxX = letter.GlyphRectangle.BottomLeft.X + tolerance;
+ double minY = letter.GlyphRectangle.BottomLeft.Y - tolerance;
+ double maxY = letter.GlyphRectangle.BottomLeft.Y + tolerance;
+
+ var duplicatesOverlapping = duplicates.FirstOrDefault(l => minX <= l.GlyphRectangle.BottomLeft.X &&
+ maxX >= l.GlyphRectangle.BottomLeft.X &&
+ minY <= l.GlyphRectangle.BottomLeft.Y &&
+ maxY >= l.GlyphRectangle.BottomLeft.Y);
+
+ if (duplicatesOverlapping != default)
+ {
+ // duplicate overlapping letter was found, keeping the existing one and not adding this one.
+ addLetter = false;
+ duplicatesOverlappingIndex = cleanLetters.IndexOf(duplicatesOverlapping);
+ }
+ }
+
+ if (addLetter)
+ {
+ cleanLetters.Add(letter);
+ }
+ else if (duplicatesOverlappingIndex != -1)
+ {
+ // TODO: need to update the bounding box
+ // TODO: need to update bottom left/right
+ // TODO: need to update width
+ // update textSequence?
+
+ // update font details to bold
+ var fontDetails = new FontDetails(letter.Font.Name, true, letter.Font.Weight, letter.Font.IsItalic);
+
+ var newLetter = new Letter(letter.Value,
+ letter.GlyphRectangle,
+ letter.StartBaseLine,
+ letter.EndBaseLine,
+ letter.Width,
+ letter.FontSize,
+ fontDetails,
+ letter.Color,
+ letter.PointSize,
+ letter.TextSequence);
+
+ // update markedContentStack?
+
+ // update letters
+ cleanLetters[duplicatesOverlappingIndex] = newLetter;
+ }
+ }
+
+ return cleanLetters;
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs
index 146b7d03..87d66c03 100644
--- a/src/UglyToad.PdfPig/Content/Letter.cs
+++ b/src/UglyToad.PdfPig/Content/Letter.cs
@@ -80,12 +80,12 @@
///
/// Create a new letter to represent some text drawn by the Tj operator.
///
- internal Letter(string value, PdfRectangle glyphRectangle,
- PdfPoint startBaseLine,
+ public Letter(string value, PdfRectangle glyphRectangle,
+ PdfPoint startBaseLine,
PdfPoint endBaseLine,
double width,
double fontSize,
- FontDetails font,
+ FontDetails font,
IColor color,
double pointSize,
int textSequence)