diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/SvgTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/SvgTextExporter.cs index c359a520..3b40633b 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/SvgTextExporter.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/SvgTextExporter.cs @@ -1,4 +1,6 @@ -namespace UglyToad.PdfPig.DocumentLayoutAnalysis.Export +using System.Xml; + +namespace UglyToad.PdfPig.DocumentLayoutAnalysis.Export { using System; using System.Collections.Generic; @@ -44,16 +46,17 @@ } } + var doc = new XmlDocument(); foreach (var letter in page.Letters) { - builder.Append(LetterToSvg(letter, page.Height)); + builder.Append(LetterToSvg(letter, page.Height, doc)); } builder.Append(""); return builder.ToString(); } - private static string LetterToSvg(Letter l, double height) + private static string LetterToSvg(Letter l, double height, XmlDocument doc) { string fontFamily = GetFontFamily(l.FontName, out string style, out string weight); string rotation = ""; @@ -64,7 +67,12 @@ string fontSize = l.FontSize != 1 ? $"font-size='{l.FontSize:0}'" : $"style='font-size:{Math.Round(l.GlyphRectangle.Height, 2)}px'"; - return $"{l.Value}"; + var safeValue = XmlEscape(l, doc); + var x = Math.Round(l.StartBaseLine.X, Rounding); + var y = Math.Round(height - l.StartBaseLine.Y, Rounding); + + return $"{safeValue}" + + Environment.NewLine; } private static string GetFontFamily(string fontName, out string style, out string weight) @@ -121,6 +129,13 @@ return fontName; } + private static string XmlEscape(Letter letter, XmlDocument doc) + { + XmlNode node = doc.CreateElement("root"); + node.InnerText = letter.Value; + return node.InnerXml; + } + private static string ColorToSvg(IColor color) { if (color == null) return ""; diff --git a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs index 3c351c65..95ddf844 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs @@ -1,4 +1,6 @@ -namespace UglyToad.PdfPig.Tests.Integration +using UglyToad.PdfPig.DocumentLayoutAnalysis.Export; + +namespace UglyToad.PdfPig.Tests.Integration { using System; using System.Collections.Generic; @@ -145,6 +147,19 @@ used per estimate, we introduce a “complement class” Naive Bayes is often us } } + [Fact] + public void CanExportSvg() + { + using (var document = PdfDocument.Open(GetFilename(), new ParsingOptions{ ClipPaths = true })) + { + var page = document.GetPage(1); + + var svg = new SvgTextExporter().Get(page); + + Assert.NotNull(svg); + } + } + private static IReadOnlyList GetPdfBoxPositionData() { var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Integration", "Documents", "ICML03-081.Page1.Positions.txt");