diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs index b3e3e2b1..e6900dfe 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/DuplicateOverlappingTextProcessor.cs @@ -77,7 +77,9 @@ letter.Width, letter.FontSize, fontDetails, - letter.Color, + letter.RenderingMode, + letter.StrokeColor, + letter.FillColor, letter.PointSize, letter.TextSequence); diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs index 8766a694..80f5f58a 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs @@ -65,7 +65,9 @@ letter.Width, letter.FontSize, letter.Font, - letter.Color, + letter.RenderingMode, + letter.StrokeColor, + letter.FillColor, letter.PointSize, letter.TextSequence); } diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/Jpg/218995467-ccb746b0-dc28-4616-bcb1-4ad685f81876.jpg b/src/UglyToad.PdfPig.Tests/Images/Files/Jpg/218995467-ccb746b0-dc28-4616-bcb1-4ad685f81876.jpg new file mode 100644 index 00000000..d9875e94 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/Jpg/218995467-ccb746b0-dc28-4616-bcb1-4ad685f81876.jpg differ diff --git a/src/UglyToad.PdfPig.Tests/Images/JpegHandlerTests.cs b/src/UglyToad.PdfPig.Tests/Images/JpegHandlerTests.cs new file mode 100644 index 00000000..45d6d96b --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Images/JpegHandlerTests.cs @@ -0,0 +1,38 @@ +namespace UglyToad.PdfPig.Tests.Images +{ + using System; + using System.IO; + using Xunit; + using JpegHandler = UglyToad.PdfPig.Images.JpegHandler; + + public class JpegHandlerTests + { + + + [Fact] + public void CanGetJpegInformation() + { + + var dataJpg = LoadJpg("218995467-ccb746b0-dc28-4616-bcb1-4ad685f81876.jpg"); + + using (var ms = new MemoryStream(dataJpg)) + { + var jpegInfo = JpegHandler.GetInformation(ms); + + Assert.Equal(8, jpegInfo.BitsPerComponent); + Assert.Equal(2290, jpegInfo.Height); + Assert.Equal(1648, jpegInfo.Width); + } + } + + + + private static byte[] LoadJpg(string name) + { + var baseFolder = Path.GetFullPath(AppDomain.CurrentDomain.BaseDirectory); + var JpgFilesFolder = Path.Combine(baseFolder, "..", "..", "..", "Images", "Files", "Jpg"); ; + var JpgFilePath = Path.Combine(JpgFilesFolder, name); + return File.ReadAllBytes(JpgFilePath); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf new file mode 100644 index 00000000..19c83395 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/Various Content Types.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs new file mode 100644 index 00000000..222c74a0 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/PageContentTests.cs @@ -0,0 +1,24 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using PdfPig.Core; + using Xunit; + + public class PageContentTests + { + [Fact] + public void DetectPageContents() + { + var file = IntegrationHelpers.GetDocumentPath("Various Content Types"); + + using (var document = PdfDocument.Open(file, ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + var letters = page.Letters; + Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Stroke); // "REGULAR TEXT" + Assert.Contains(letters, l => l.RenderingMode == TextRenderingMode.Neither); // "INVISIBLE TEXT" + Assert.NotEmpty(page.Content.GetImages()); + Assert.NotEmpty(page.Content.Paths); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj index 878d9824..6879cdc1 100644 --- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj +++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj @@ -133,9 +133,4 @@ PreserveNewest - - - - - diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index b15ea4f3..08abfcd3 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -61,12 +61,33 @@ /// Details about the font for this letter. /// public FontDetails Font { get; } + + /// + /// Text rendering mode that indicates whether we should draw this letter's strokes, + /// fill, both, neither (in case of hidden text), etc. + /// If it calls for stroking the is used. + /// If it calls for filling, the is used. + /// In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. + /// + public TextRenderingMode RenderingMode { get; } /// - /// The color of the letter. + /// The primary color of the letter, which is either the in case + /// is , or otherwise + /// it is the . /// public IColor Color { get; } + /// + /// Stroking color + /// + public IColor StrokeColor { get; } + + /// + /// Non-stroking (fill) color + /// + public IColor FillColor { get; } + /// /// The size of the font in points. /// @@ -86,7 +107,9 @@ double width, double fontSize, FontDetails font, - IColor color, + TextRenderingMode renderingMode, + IColor strokeColor, + IColor fillColor, double pointSize, int textSequence) { @@ -97,7 +120,17 @@ Width = width; FontSize = fontSize; Font = font; - Color = color ?? GrayColor.Black; + RenderingMode = renderingMode; + if (renderingMode == TextRenderingMode.Stroke) + { + Color = StrokeColor = strokeColor ?? GrayColor.Black; + FillColor = fillColor; + } + else + { + Color = FillColor = fillColor ?? GrayColor.Black; + StrokeColor = strokeColor; + } PointSize = pointSize; TextSequence = textSequence; TextOrientation = GetTextOrientation(); diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 1e5efffd..7759c9cc 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -292,14 +292,7 @@ var transformedPdfBounds = PerformantRectangleTransformer .Transform(renderingMatrix, textMatrix, transformationMatrix, new PdfRectangle(0, 0, boundingBox.Width, 0)); - // If the text rendering mode calls for filling, the current nonstroking color in the graphics state is used; - // if it calls for stroking, the current stroking color is used. - // In modes that perform both filling and stroking, the effect is as if each glyph outline were filled and then stroked in separate operations. - // TODO: expose color as something more advanced - var color = currentState.FontState.TextRenderingMode != TextRenderingMode.Stroke - ? currentState.CurrentNonStrokingColor - : currentState.CurrentStrokingColor; - + Letter letter = null; if (Diacritics.IsInCombiningDiacriticRange(unicode) && bytes.CurrentOffset > 0 && letters.Count > 0) { @@ -319,26 +312,16 @@ attachTo.Width, attachTo.FontSize, attachTo.Font, - attachTo.Color, + attachTo.RenderingMode, + attachTo.StrokeColor, + attachTo.FillColor, attachTo.PointSize, attachTo.TextSequence); } - else - { - letter = new Letter( - unicode, - transformedGlyphBounds, - transformedPdfBounds.BottomLeft, - transformedPdfBounds.BottomRight, - transformedPdfBounds.Width, - fontSize, - font.Details, - color, - pointSize, - textSequence); - } } - else + + // If we did not create a letter for a combined diacritic, create one here. + if (letter == null) { letter = new Letter( unicode, @@ -348,7 +331,9 @@ transformedPdfBounds.Width, fontSize, font.Details, - color, + currentState.FontState.TextRenderingMode, + currentState.CurrentStrokingColor, + currentState.CurrentNonStrokingColor, pointSize, textSequence); } diff --git a/src/UglyToad.PdfPig/Images/JpegHandler.cs b/src/UglyToad.PdfPig/Images/JpegHandler.cs index 46c19726..eb4c70af 100644 --- a/src/UglyToad.PdfPig/Images/JpegHandler.cs +++ b/src/UglyToad.PdfPig/Images/JpegHandler.cs @@ -1,6 +1,6 @@ namespace UglyToad.PdfPig.Images { - using System; + using System; using System.IO; internal static class JpegHandler @@ -28,6 +28,19 @@ { switch (marker) { + case JpegMarker.StartOfImage: + case JpegMarker.EndOfImage: + case JpegMarker.Restart0: + case JpegMarker.Restart1: + case JpegMarker.Restart2: + case JpegMarker.Restart3: + case JpegMarker.Restart4: + case JpegMarker.Restart5: + case JpegMarker.Restart6: + case JpegMarker.Restart7: + + // No length markers + break; case JpegMarker.StartOfBaselineDctFrame: case JpegMarker.StartOfProgressiveDctFrame: { @@ -55,6 +68,7 @@ case JpegMarker.ApplicationSpecific13: case JpegMarker.ApplicationSpecific14: case JpegMarker.ApplicationSpecific15: + default: { var length = ReadShort(stream, shortBuffer); stream.Seek(length - 2, SeekOrigin.Current); @@ -105,7 +119,7 @@ } if (previous.HasValue && previous.Value == MarkerStart && b != MarkerStart) - { + { return b; } diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index c814f75b..7552a9ac 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -895,7 +895,16 @@ var documentSpace = textMatrix.Transform(renderingMatrix.Transform(fontMatrix.Transform(rect))); - var letter = new Letter(c.ToString(), documentSpace, advanceRect.BottomLeft, advanceRect.BottomRight, width, (double)fontSize, FontDetails.GetDefault(name), + var letter = new Letter( + c.ToString(), + documentSpace, + advanceRect.BottomLeft, + advanceRect.BottomRight, + width, + (double)fontSize, + FontDetails.GetDefault(name), + TextRenderingMode.Fill, + GrayColor.Black, GrayColor.Black, (double)fontSize, textSequence);