mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-09-20 20:07:57 +08:00
fix tests for renaming and validating generate alto xml
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Xml;
|
||||
using System.Xml.Linq;
|
||||
using DocumentLayoutAnalysis;
|
||||
using Export.Alto;
|
||||
@@ -136,17 +137,35 @@
|
||||
[Fact]
|
||||
public void CanExportAltoXmlFormat()
|
||||
{
|
||||
using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
|
||||
{
|
||||
var exporter = new AltoXmlTextExporter(new NearestNeighbourWordExtractor(), new DocstrumBoundingBoxes());
|
||||
var xml = exporter.Get(document.GetPage(4), true);
|
||||
Assert.NotNull(xml);
|
||||
using (var xmlStream = new MemoryStream(Encoding.UTF8.GetBytes(xml)))
|
||||
using (var xmlReader = new XmlTextReader(xmlStream))
|
||||
{
|
||||
var xDocument = XDocument.Load(xmlReader);
|
||||
Assert.NotNull(xDocument);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanExportAltoXmlFormatPage16()
|
||||
{
|
||||
// Page 16 contains an unprintable string and a single line of text which causes problems for Docstrum.
|
||||
using (var document = PdfDocument.Open(GetFilename(), ParsingOptions.LenientParsingOff))
|
||||
{
|
||||
var exporter = new AltoXmlTextExporter(new NearestNeighbourWordExtractor(), new DocstrumBoundingBoxes());
|
||||
var xml = exporter.Get(document.GetPage(16), true);
|
||||
Assert.NotNull(xml);
|
||||
// TODO: generated XML is invalid due to BOM.
|
||||
//using (var stringStream = GenerateStreamFromString(xml, Encoding.UTF8))
|
||||
//{
|
||||
// var xDocument = XDocument.Load(stringStream);
|
||||
// Assert.NotNull(xDocument);
|
||||
//}
|
||||
using (var xmlStream = new MemoryStream(Encoding.UTF8.GetBytes(xml)))
|
||||
using (var xmlReader = new XmlTextReader(xmlStream))
|
||||
{
|
||||
var xDocument = XDocument.Load(xmlReader);
|
||||
Assert.NotNull(xDocument);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,16 +200,6 @@
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Stream GenerateStreamFromString(string s, Encoding encoding)
|
||||
{
|
||||
var stream = new MemoryStream();
|
||||
var writer = new StreamWriter(stream, encoding);
|
||||
writer.Write(s);
|
||||
writer.Flush();
|
||||
stream.Position = 0;
|
||||
return stream;
|
||||
}
|
||||
|
||||
private const string WordsPage4 = @"Disclaimer
|
||||
The designations employed end the presentation of the material in this information
|
||||
product do not imply the expression of any opinion whatsoever on the part of the
|
||||
|
Reference in New Issue
Block a user