mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-06-28 15:30:17 +08:00
Merge pull request #527 from fnatzke/Issue350-PdfPageBuilder.CopyFrom()_creates_invalid_PDF
Issue350 pdf page builder.copy from() creates invalid pdf
This commit is contained in:
commit
68c00c9b51
Binary file not shown.
48
src/UglyToad.PdfPig.Tests/Integration/ShowTextEscapeText.cs
Normal file
48
src/UglyToad.PdfPig.Tests/Integration/ShowTextEscapeText.cs
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
namespace UglyToad.PdfPig.Tests.Integration
|
||||||
|
{
|
||||||
|
using System.Linq;
|
||||||
|
using UglyToad.PdfPig;
|
||||||
|
using UglyToad.PdfPig.Writer;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
public class ShowTextEscapeText
|
||||||
|
{
|
||||||
|
private static string GetFilename()
|
||||||
|
{
|
||||||
|
// On the single page of the source PDF has 3 ShowText operations with unbalanced round brackets in the text.
|
||||||
|
// Unbalanced meaning there is an open bracket without a close bracket or close without open.
|
||||||
|
// 1. line 387 (\() Tj
|
||||||
|
// 2. line 397 (\)) Tj
|
||||||
|
// 3. line 384 ( \(I\222ll try to stay on ) Tj
|
||||||
|
// note in text 3 the 0222 (octal) or (0x92 hex) is similar to an apostople ' so text is similar to " (I'll try to stay on" (with an open bracket).
|
||||||
|
return IntegrationHelpers.GetDocumentPath("ShowTextOpWithUnbalancedRoundBrackets.pdf");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void PdfCopyShowTextOpUsesEscapedText()
|
||||||
|
{
|
||||||
|
var filePath = GetFilename();
|
||||||
|
using (var sourceDocument = PdfDocument.Open(filePath))
|
||||||
|
{
|
||||||
|
PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder();
|
||||||
|
var numberOfPages = sourceDocument.NumberOfPages;
|
||||||
|
int pageNumber = 1; ////for (int pageNumber = 1; pageNumber <= numberOfPages; pageNumber++)
|
||||||
|
{
|
||||||
|
var sourcePage = sourceDocument.GetPage(pageNumber);
|
||||||
|
|
||||||
|
pdfBuilder.AddPage(sourcePage.Width, sourcePage.Height).CopyFrom(sourcePage);
|
||||||
|
}
|
||||||
|
var pdfBytes = pdfBuilder.Build();
|
||||||
|
|
||||||
|
// Reread (in memory) copied PDF and check example text ("wander") exists in word extract after ShowText operation with unbalanced bracket.
|
||||||
|
using (var document = PdfDocument.Open(pdfBytes))
|
||||||
|
{
|
||||||
|
var page = document.GetPage(1);
|
||||||
|
var words = page.GetWords();
|
||||||
|
var isExpectedTextInCopiedPdf = words.Any(w => w.Text.Contains("wander"));
|
||||||
|
Assert.True(isExpectedTextInCopiedPdf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -67,16 +67,40 @@
|
|||||||
operationContext.ShowText(input);
|
operationContext.ShowText(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string EscapeText(string text)
|
||||||
|
{
|
||||||
|
if (text is null) return null;
|
||||||
|
// Fix Issue 350 from PDF Spec 1.7 (page 408) on handling 'special characters' of '(', ')' and '\'.
|
||||||
|
|
||||||
|
// The strings must conform to the syntax for string objects.
|
||||||
|
// When a string is written by enclosing the data in parentheses,
|
||||||
|
// bytes whose values are the same as those
|
||||||
|
// of the ASCII characters left parenthesis (40), right parenthesis (41), and backslash (92)
|
||||||
|
// must be preceded by a backslash character.
|
||||||
|
// All other byte values between 0 and 255 may be used in a string object.
|
||||||
|
// These rules apply to each individual byte in a string object, whether the string is interpreted by the text-showing operators
|
||||||
|
// as single-byte or multiple-byte character codes.
|
||||||
|
|
||||||
|
// Note: order of replacing is important. Replace slash first before brackets.
|
||||||
|
text = text.Replace(@"\", @"\\)"); // Escape any slash '\' -> '\\'
|
||||||
|
text = text.Replace("(", @"\("); // Escape any open brackets '(' -> '\('
|
||||||
|
text = text.Replace(")", @"\)"); // Escape any close brackets ')' -> '\)'
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public void Write(Stream stream)
|
public void Write(Stream stream)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (Bytes != null)
|
if (Bytes != null)
|
||||||
{
|
{
|
||||||
stream.WriteHex(Bytes);
|
stream.WriteHex(Bytes);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stream.WriteText($"({Text})");
|
var EscapedText = EscapeText(Text); // escape '(', ')' or '\'
|
||||||
|
stream.WriteText($"({EscapedText})");
|
||||||
}
|
}
|
||||||
|
|
||||||
stream.WriteWhiteSpace();
|
stream.WriteWhiteSpace();
|
||||||
|
Loading…
Reference in New Issue
Block a user