From 93313118e9481b655de03973ee9e8b0e72ecaf73 Mon Sep 17 00:00:00 2001 From: BobLd Date: Mon, 7 Oct 2019 15:19:30 +0100 Subject: [PATCH] Support for hORC, AtloXml and PageXml output formats Tested with: - 'hocrjs' for hORC (see https://unpkg.com/hocrjs) - 'PAGE Viewer' for hORC, AtloXml and PageXml (see http://www.primaresearch.org/tools/PAGEViewer) --- .../PublicApiScannerTests.cs | 1 + src/UglyToad.PdfPig/Content/Page.cs | 43 + .../DefaultPageSegmenter.cs | 29 + .../Export/AltoXmlTextExporter.cs | 5308 +++++++++ .../Export/HOcrTextExporter.cs | 345 + src/UglyToad.PdfPig/Export/ITextExporter.cs | 17 + .../Export/PageXmlTextExporter.cs | 9803 +++++++++++++++++ src/UglyToad.PdfPig/Geometry/PdfRectangle.cs | 2 +- 8 files changed, 15547 insertions(+), 1 deletion(-) create mode 100644 src/UglyToad.PdfPig/DocumentLayoutAnalysis/DefaultPageSegmenter.cs create mode 100644 src/UglyToad.PdfPig/Export/AltoXmlTextExporter.cs create mode 100644 src/UglyToad.PdfPig/Export/HOcrTextExporter.cs create mode 100644 src/UglyToad.PdfPig/Export/ITextExporter.cs create mode 100644 src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 1e8488e6..621b3792 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -76,6 +76,7 @@ "UglyToad.PdfPig.DocumentLayoutAnalysis.EdgeType", "UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException", "UglyToad.PdfPig.Exceptions.PdfDocumentFormatException", + "UglyToad.PdfPig.Export.ITextExporter", "UglyToad.PdfPig.Fonts.DescriptorFontFile", "UglyToad.PdfPig.Fonts.Exceptions.InvalidFontFormatException", "UglyToad.PdfPig.Fonts.FontDescriptor", diff --git a/src/UglyToad.PdfPig/Content/Page.cs b/src/UglyToad.PdfPig/Content/Page.cs index e18c0bf9..761f955c 100644 --- a/src/UglyToad.PdfPig/Content/Page.cs +++ b/src/UglyToad.PdfPig/Content/Page.cs @@ -177,6 +177,49 @@ { return letter.PointSize; } + + /// + /// Get the hOCR (html) string of the page layout. + /// This is considered experimental because it needs more testing. + /// + /// The word extractor to use to generate words. + /// The page segmenter to use. + /// Indent character to use. + /// Draw s present in the page. + /// Will add a reference to the 'hocrjs' script just before the closing 'body' tag, adding the interface to a plain hOCR file.See https://github.com/kba/hocrjs for more information. + public string GetHOCR(IWordExtractor wordExtractor, DocumentLayoutAnalysis.IPageSegmenter pageSegmenter, string indent = "\t", bool drawPaths = false, bool useHocrjs = false) + { + var hocr = new Export.HOcrTextExporter(wordExtractor, pageSegmenter, 2, indent); + return hocr.Get(page, drawPaths, useHocrjs: useHocrjs); + } + + /// + /// Get the Alto (xml) string of the page layout. + /// This is considered experimental because it needs more testing. + /// + /// The word extractor to use to generate words. + /// The page segmenter to use. + /// Indent character to use. + /// Draw s present in the page. + public string GetAltoXml(IWordExtractor wordExtractor, DocumentLayoutAnalysis.IPageSegmenter pageSegmenter, string indent = "\t", bool drawPaths = false) + { + var alto = new Export.AltoXmlTextExporter(wordExtractor, pageSegmenter, 2, indent); + return alto.Get(page, drawPaths); + } + + /// + /// Get the PageXml (xml) string of the page layout. + /// This is considered experimental because it needs more testing. + /// + /// The word extractor to use to generate words. + /// The page segmenter to use. + /// Indent character to use. + /// Draw s present in the page. + public string GetPageXml(IWordExtractor wordExtractor, DocumentLayoutAnalysis.IPageSegmenter pageSegmenter, string indent = "\t", bool drawPaths = false) + { + var pageXml = new Export.PageXmlTextExporter(wordExtractor, pageSegmenter, 2, indent); + return pageXml.Get(page, drawPaths); + } } } } diff --git a/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DefaultPageSegmenter.cs b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DefaultPageSegmenter.cs new file mode 100644 index 00000000..ab533027 --- /dev/null +++ b/src/UglyToad.PdfPig/DocumentLayoutAnalysis/DefaultPageSegmenter.cs @@ -0,0 +1,29 @@ +using System.Collections.Generic; +using System.Linq; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.Util; + +namespace UglyToad.PdfPig.DocumentLayoutAnalysis +{ + /// + /// Default Page Segmenter. All words are included in one block. + /// + public class DefaultPageSegmenter : IPageSegmenter + { + /// + /// Create an instance of default page segmenter, . + /// + public static DefaultPageSegmenter Instance { get; } = new DefaultPageSegmenter(); + + /// + /// Get the blocks. + /// + /// The words in the page. + public IReadOnlyList GetBlocks(IEnumerable pageWords) + { + if (pageWords.Count() == 0) return EmptyArray.Instance; + + return new List() { new TextBlock(new XYLeaf(pageWords).GetLines()) }; + } + } +} diff --git a/src/UglyToad.PdfPig/Export/AltoXmlTextExporter.cs b/src/UglyToad.PdfPig/Export/AltoXmlTextExporter.cs new file mode 100644 index 00000000..b377841f --- /dev/null +++ b/src/UglyToad.PdfPig/Export/AltoXmlTextExporter.cs @@ -0,0 +1,5308 @@ +using System; +using System.CodeDom.Compiler; +using System.Collections.Generic; +using System.ComponentModel; +using System.Diagnostics; +using System.Linq; +using System.Xml; +using System.Xml.Schema; +using System.Xml.Serialization; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.DocumentLayoutAnalysis; +using UglyToad.PdfPig.Geometry; +using UglyToad.PdfPig.Util; + +namespace UglyToad.PdfPig.Export +{ + /// + /// Alto 4.1 (XML) text exporter. + /// See https://github.com/altoxml/schema + /// + internal class AltoXmlTextExporter : ITextExporter + { + private IPageSegmenter pageSegmenter; + private IWordExtractor wordExtractor; + + private decimal scale; + private string indentChar; + + int pageCount = 0; + int pageSpaceCount = 0; + int graphicalElementCount = 0; + int textBlockCount = 0; + int textLineCount = 0; + int stringCount = 0; + int glyphCount = 0; + + /// + /// Alto 4.1 (XML) + /// See https://github.com/altoxml/schema + /// + /// + /// + /// + /// Indent character. + public AltoXmlTextExporter(IWordExtractor wordExtractor, IPageSegmenter pageSegmenter, double scale = 1.0, string indent = "\t") + { + this.wordExtractor = wordExtractor; + this.pageSegmenter = pageSegmenter; + this.scale = (decimal)scale; + this.indentChar = indent; + } + + /// + /// Get the Alto (XML) string of the pages layout. + /// + /// + /// Draw s present in the page. + /// + public string Get(PdfDocument document, bool includePaths = false) + { + AltoDocument alto = CreateAltoDocument("unknown"); + var altoPages = new List(); + + for (var i = 0; i < document.NumberOfPages; i++) + { + var page = document.GetPage(i + 1); + altoPages.Add(ToAltoPage(page, includePaths)); + } + alto.Layout.Pages = altoPages.ToArray(); + + return Serialize(alto); + } + + /// + /// Get the Alto (XML) string of the page layout. Excludes s. + /// + /// + /// + public string Get(Page page) + { + return Get(page, false); + } + + /// + /// Get the Alto (XML) string of the page layout. + /// + /// + /// Draw s present in the page. + /// + public string Get(Page page, bool includePaths) + { + AltoDocument alto = CreateAltoDocument("unknown"); + var altoPages = new List(); + + alto.Layout.Pages = new AltoDocument.AltoPage[] { ToAltoPage(page, includePaths) }; + + return Serialize(alto); + } + + /// + /// Create an empty . + /// + /// + /// + private AltoDocument CreateAltoDocument(string fileName) + { + return new AltoDocument() + { + Layout = new AltoDocument.AltoLayout() + { + StyleRefs = null + }, + Description = GetAltoDescription(fileName), + SchemaVersion = "4", + //Styles = new AltoStyles() { }, + //Tags = new AltoTags() { } + }; + } + + /// + /// + /// + /// + /// Draw s present in the page. + /// + private AltoDocument.AltoPage ToAltoPage(Page page, bool includePaths) + { + pageCount = page.Number; + pageSpaceCount++; + + var words = page.GetWords(wordExtractor); + var altoPage = new AltoDocument.AltoPage() + { + Height = (float)Math.Round(page.Height * scale), + Width = (float)Math.Round(page.Width * scale), + Accuracy = float.NaN, + Quality = AltoDocument.AltoQuality.OK, + QualityDetail = null, + BottomMargin = null, + LeftMargin = null, + RightMargin = null, + TopMargin = null, + Pc = float.NaN, + PhysicalImgNr = page.Number, + PrintedImgNr = null, + PageClass = null, + Position = AltoDocument.AltoPosition.Cover, + Processing = null, + ProcessingRefs = null, + StyleRefs = null, + PrintSpace = new AltoDocument.AltoPageSpace() + { + Height = (float)Math.Round(page.Height * scale), // TBD + Width = (float)Math.Round(page.Width * scale), // TBD + VPos = 0f, // TBD + HPos = 0f, // TBD + ComposedBlocks = null, // TBD + GraphicalElements = null, // TBD + Illustrations = null, // TBD + ProcessingRefs = null, // TBD + StyleRefs = null, // TBD + Id = "P" + pageCount + "_PS" + pageSpaceCount.ToString("#00000") //P1_PS00001 + }, + Id = "P" + pageCount + }; + + if (words.Count() > 0) + { + var blocks = pageSegmenter.GetBlocks(words); + altoPage.PrintSpace.TextBlock = blocks.Select(b => ToAltoTextBlock(b, page.Height)).ToArray(); + } + + if (includePaths) + { + var graphicalElements = page.ExperimentalAccess.Paths.Select(p => ToAltoGraphicalElement(p, page.Height)); + if (graphicalElements.Count() > 0) + { + altoPage.PrintSpace.GraphicalElements = graphicalElements.ToArray(); + } + } + return altoPage; + } + + /// + /// + /// + /// + /// + /// + private AltoDocument.AltoGraphicalElement ToAltoGraphicalElement(PdfPath pdfPath, decimal height) + { + graphicalElementCount++; + + var rectangle = pdfPath.GetBoundingRectangle(); + if (rectangle.HasValue) + { + return new AltoDocument.AltoGraphicalElement() + { + VPos = (float)Math.Round((height - rectangle.Value.Top) * scale), + HPos = (float)Math.Round(rectangle.Value.Left * scale), + Height = (float)Math.Round(rectangle.Value.Height * scale), + Width = (float)Math.Round(rectangle.Value.Width * scale), + Rotation = 0, + //Cs = false, + StyleRefs = null, + TagRefs = null, + title = null, + type = null, + //IdNext = "NA", // for reading order + Id = "P" + pageCount + "_GE" + graphicalElementCount.ToString("#00000") + }; + } + return null; + } + + /// + /// + /// + /// + /// + /// + private AltoDocument.AltoTextBlock ToAltoTextBlock(TextBlock textBlock, decimal height) + { + textBlockCount++; + + return new AltoDocument.AltoTextBlock() + { + VPos = (float)Math.Round((height - textBlock.BoundingBox.Top) * scale), + HPos = (float)Math.Round(textBlock.BoundingBox.Left * scale), + Height = (float)Math.Round(textBlock.BoundingBox.Height * scale), + Width = (float)Math.Round(textBlock.BoundingBox.Width * scale), + Rotation = 0, // check textBlock.TextDirection + TextLines = textBlock.TextLines.Select(l => ToAltoTextLine(l, height)).ToArray(), + //Cs = false, + StyleRefs = null, + TagRefs = null, + title = null, + type = null, + //IdNext = "NA", // for reading order + Id = "P" + pageCount + "_TB" + textBlockCount.ToString("#00000") + }; + } + + /// + /// + /// + /// + /// + /// + private AltoDocument.AltoTextBlockTextLine ToAltoTextLine(TextLine textLine, decimal height) + { + textLineCount++; + var strings = textLine.Words.Select(w => ToAltoString(w, height)).ToArray(); + + return new AltoDocument.AltoTextBlockTextLine() + { + VPos = (float)Math.Round((height - textLine.BoundingBox.Top) * scale), + HPos = (float)Math.Round(textLine.BoundingBox.Left * scale), + Height = (float)Math.Round(textLine.BoundingBox.Height * scale), + Width = (float)Math.Round(textLine.BoundingBox.Width * scale), + BaseLine = float.NaN, // TBD + //Hyp = new AltoTextBlockTextLineHyp() { }, // TBD + Strings = strings, + Lang = null, + //Sp = new AltoSP[0], // TBD + StyleRefs = null, + TagRefs = null, + Id = "P" + pageCount + "_TL" + textLineCount.ToString("#00000") + }; + } + + /// + /// + /// + /// + /// + /// + private AltoDocument.AltoString ToAltoString(Word word, decimal height) + { + stringCount++; + var glyphs = word.Letters.Select(l => ToAltoGlyph(l, height)).ToArray(); + return new AltoDocument.AltoString() + { + VPos = (float)Math.Round((height - word.BoundingBox.Top) * scale), + HPos = (float)Math.Round(word.BoundingBox.Left * scale), + Height = (float)Math.Round(word.BoundingBox.Height * scale), + Width = (float)Math.Round(word.BoundingBox.Width * scale), + Glyph = glyphs, + Cc = string.Join("", glyphs.Select(g => 9f * (1f - g.Gc))), // from 0->1 to 9->0 + Content = word.Text, + //Cs = false, + Lang = null, + //Style = AltoFontStyles.Bold, + StyleRefs = null, + SubsContent = null, + //SubsType = AltoSubsType.Abbreviation, + TagRefs = null, + Wc = float.NaN, + Id = "P" + pageCount + "_ST" + stringCount.ToString("#00000") + }; + } + + /// + /// + /// + /// + /// + /// + private AltoDocument.AltoGlyph ToAltoGlyph(Letter letter, decimal height) + { + glyphCount++; + return new AltoDocument.AltoGlyph() + { + VPos = (float)Math.Round((height - letter.GlyphRectangle.Top) * scale), + HPos = (float)Math.Round(letter.GlyphRectangle.Left * scale), + Height = (float)Math.Round(letter.GlyphRectangle.Height * scale), + Width = (float)Math.Round(letter.GlyphRectangle.Width * scale), + Gc = 1.0f, + Content = letter.Value, + Id = "P" + pageCount + "_ST" + stringCount.ToString("#00000") + "_G" + glyphCount.ToString("#00") + }; + } + + /// + /// + /// + /// + /// + private AltoDocument.AltoDescription GetAltoDescription(string fileName) + { + var processing = new AltoDocument.AltoDescriptionProcessing() + { + ProcessingAgency = null, + ProcessingCategory = AltoDocument.AltoProcessingCategory.Other, // TBD + ProcessingDateTime = DateTime.UtcNow.ToString(), + ProcessingSoftware = new AltoDocument.AltoProcessingSoftware() + { + SoftwareName = "PdfPig", + SoftwareCreator = @"https://github.com/UglyToad/PdfPig", + ApplicationDescription = "Read and extract text and other content from PDFs in C# (port of PdfBox)", + SoftwareVersion = "x.x.xx" + }, + ProcessingStepDescription = null, + ProcessingStepSettings = pageSegmenter.GetType().Name + "|" + wordExtractor.GetType().Name, + Id = "P" + pageCount + "_D1" + }; + + var documentIdentifier = new AltoDocument.AltoDocumentIdentifier() + { + DocumentIdentifierLocation = null, + Value = null + }; + + var fileIdentifier = new AltoDocument.AltoFileIdentifier() + { + FileIdentifierLocation = null, + Value = null + }; + + return new AltoDocument.AltoDescription() + { + MeasurementUnit = AltoDocument.AltoMeasurementUnit.Pixel, // need to check that + Processings = new[] { processing }, + SourceImageInformation = new AltoDocument.AltoSourceImageInformation() + { + DocumentIdentifiers = new AltoDocument.AltoDocumentIdentifier[] { documentIdentifier }, + FileIdentifiers = new AltoDocument.AltoFileIdentifier[] { fileIdentifier }, + FileName = fileName + } + }; + } + + private string Serialize(AltoDocument altoDocument) + { + XmlSerializer serializer = new XmlSerializer(typeof(AltoDocument)); + var settings = new XmlWriterSettings() + { + //Encoding = new System.Text.UTF8Encoding(true), + Indent = true, + IndentChars = indentChar, + OmitXmlDeclaration = true // hack to manually handle utf-8 + }; + + using (var stringWriter = new System.IO.StringWriter()) + using (var xmlWriter = XmlWriter.Create(stringWriter, settings)) + { + stringWriter.WriteLine(""); // hack to manually handle utf-8 + serializer.Serialize(xmlWriter, altoDocument); + return stringWriter.ToString(); + } + } + + private static AltoDocument Deserialize(string xmlPath) + { + XmlSerializer serializer = new XmlSerializer(typeof(AltoDocument)); + + using (var reader = XmlReader.Create(xmlPath)) + { + return (AltoDocument)serializer.Deserialize(reader); + } + } + } + + #region ALTO Schema + /******************************************************************************** + * Alto version 4.1 https://github.com/altoxml/schema/blob/master/v4/alto-4-1.xsd + * Auto-generated by xsd and improved by BobLD + ********************************************************************************/ + + /// + /// [Alto] Alto Schema root + /// Version 4.1 + /// See https://github.com/altoxml/schema + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + [XmlRootAttribute("alto", Namespace = "http://www.loc.gov/standards/alto/ns-v4#", IsNullable = false)] + public class AltoDocument + { + private AltoDescription descriptionField; + + private AltoStyles stylesField; + + private AltoTags tagsField; + + private AltoLayout layoutField; + + private string sCHEMAVERSIONField; + + /// + /// Describes general settings of the alto file like measurement units and metadata + /// + public AltoDescription Description + { + get + { + return this.descriptionField; + } + set + { + this.descriptionField = value; + } + } + + /// + /// Styles define properties of layout elements. A style defined in a parent element + /// is used as default style for all related children elements. + /// + public AltoStyles Styles + { + get + { + return this.stylesField; + } + set + { + this.stylesField = value; + } + } + + /// + /// Tag define properties of additional characteristic. The tags are referenced from + /// related content element on Block or String element by attribute TAGREF via the tag ID. + /// + /// This container element contains the individual elements for LayoutTags, StructureTags, + /// RoleTags, NamedEntityTags and OtherTags + /// + public AltoTags Tags + { + get + { + return this.tagsField; + } + set + { + this.tagsField = value; + } + } + + /// + /// The root layout element. + /// + public AltoLayout Layout + { + get + { + return this.layoutField; + } + set + { + this.layoutField = value; + } + } + + /// + /// Schema version of the ALTO file. + /// + [XmlAttributeAttribute("SCHEMAVERSION")] + public string SchemaVersion + { + get + { + return this.sCHEMAVERSIONField; + } + set + { + this.sCHEMAVERSIONField = value; + } + } + + /// + /// [Alto] Description + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoDescription + { + + private AltoMeasurementUnit measurementUnitField; + + private AltoSourceImageInformation sourceImageInformationField; + + private AltoDescriptionOcrProcessing[] oCRProcessingField; + + private AltoDescriptionProcessing[] processingField; + + /// + public AltoMeasurementUnit MeasurementUnit + { + get + { + return this.measurementUnitField; + } + set + { + this.measurementUnitField = value; + } + } + + /// + [XmlElementAttribute("sourceImageInformation")] + public AltoSourceImageInformation SourceImageInformation + { + get + { + return this.sourceImageInformationField; + } + set + { + this.sourceImageInformationField = value; + } + } + + /// + [XmlElementAttribute("OCRProcessing")] + [Obsolete("Element deprecated. 'Processing' should be used instead.")] + public AltoDescriptionOcrProcessing[] OCRProcessing + { + get + { + return this.oCRProcessingField; + } + set + { + this.oCRProcessingField = value; + } + } + + /// + [XmlElementAttribute("Processing")] + public AltoDescriptionProcessing[] Processings + { + get + { + return this.processingField; + } + set + { + this.processingField = value; + } + } + } + + /// + /// [Alto] Information to identify the image file from which the OCR text was created. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoSourceImageInformation + { + + private string fileNameField; + + private AltoFileIdentifier[] fileIdentifierField; + + private AltoDocumentIdentifier[] documentIdentifierField; + + /// + [XmlElementAttribute("fileName")] + public string FileName + { + get + { + return this.fileNameField; + } + set + { + this.fileNameField = value; + } + } + + /// + [XmlElementAttribute("fileIdentifier")] + public AltoFileIdentifier[] FileIdentifiers + { + get + { + return this.fileIdentifierField; + } + set + { + this.fileIdentifierField = value; + } + } + + /// + [XmlElementAttribute("documentIdentifier")] + public AltoDocumentIdentifier[] DocumentIdentifiers + { + get + { + return this.documentIdentifierField; + } + set + { + this.documentIdentifierField = value; + } + } + } + + /// + /// [Alto] A unique identifier for the image file. This is drawn from MIX. + /// + /// This identifier must be unique within the local + /// To facilitate file sharing or interoperability with other systems, + /// fileIdentifierLocation may be added to designate the system or + /// application where the identifier is unique. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoFileIdentifier + { + + private string fileIdentifierLocationField; + + private string valueField; + + /// + [XmlAttributeAttribute("fileIdentifierLocation")] + public string FileIdentifierLocation + { + get + { + return this.fileIdentifierLocationField; + } + set + { + this.fileIdentifierLocationField = value; + } + } + + /// + [XmlTextAttribute()] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + } + + /// + /// [Alto] A white space. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoSP + { + + private string idField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + } + + /// + /// [Alto] Alternative (combined) character for the glyph, outlined by OCR engine or similar recognition processes. + /// In case the variant are two (combining) characters, two characters are outlined in one Variant element. + /// E.g. a Glyph element with CONTENT="m" can have a Variant element with the content "rn". + /// Details for different use-cases see on the samples on GitHub. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoVariant + { + + private string cONTENTField; + + private float vcField; + + private bool vcFieldSpecified; + + /// + /// Each Variant represents an option for the glyph that the OCR software detected as possible alternatives. + /// In case the variant are two(combining) characters, two characters are outlined in one Variant element. + /// E.g.a Glyph element with CONTENT="m" can have a Variant element with the content "rn". + /// + /// Details for different use-cases see on the samples on GitHub. + /// + [XmlAttributeAttribute("CONTENT")] + public string Content + { + get + { + return this.cONTENTField; + } + set + { + this.cONTENTField = value; + } + } + + /// + /// This VC attribute records a float value between 0.0 and 1.0 that expresses the level of confidence + /// for the variant where is 1 is certain. + /// This attribute is optional. If it is not available, the default value for the variant is “0”. + /// The VC attribute semantic is the same as the GC attribute on the Glyph element. + /// + [XmlAttributeAttribute("VC")] + public float Vc + { + get + { + return this.vcField; + } + set + { + this.vcField = value; + if (!float.IsNaN(value)) this.vcFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VCSpecified + { + get + { + return this.vcFieldSpecified; + } + set + { + this.vcFieldSpecified = value; + } + } + } + + /// + /// [Alto] Modern OCR software stores information on glyph level. A glyph is essentially a character or ligature. + /// Accordingly the value for the glyph element will be defined as follows: + /// Pre-composed representation = base + combining character(s) (decomposed representation) + /// See http://www.fileformat.info/info/unicode/char/0101/index.htm + /// "U+0101" = (U+0061) + (U+0304) + /// "combining characters" ("base characters" in combination with non-spacing marks or characters which are combined to one) are represented as one "glyph", e.g.áàâ. + /// + /// Each glyph has its own coordinate information and must be separately addressable as a distinct object. + /// Correction and verification processes can be carried out for individual characters. + /// + /// Post-OCR analysis of the text as well as adaptive OCR algorithm must be able to record information on glyph level. + /// In order to reproduce the decision of the OCR software, optional characters must be recorded.These are called variants. + /// The OCR software evaluates each variant and picks the one with the highest confidence score as the glyph. + /// The confidence score expresses how confident the OCR software is that a single glyph had been recognized correctly. + /// + /// The glyph elements are in order of the word. Each glyph need to be recorded to built up the whole word sequence. + /// + /// The glyph’s CONTENT attribute is no replacement for the string’s CONTENT attribute. + /// Due to post-processing steps such as correction the values of both attributes may be inconsistent. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoGlyph + { + private AltoShape shapeField; + + private AltoVariant[] variantField; + + private string idField; + + private string cONTENTField; + + private float gcField; + + private bool gcFieldSpecified; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + /// + public AltoShape Shape + { + get + { + return this.shapeField; + } + set + { + this.shapeField = value; + } + } + + /// + /// Alternative (combined) character for the glyph, outlined by OCR engine or similar recognition processes. + /// In case the variant are two (combining) characters, two characters are outlined in one Variant element. + /// E.g. a Glyph element with CONTENT="m" can have a Variant element with the content "rn". + /// Details for different use-cases see on the samples on GitHub. + /// + [XmlElementAttribute("Variant")] + public AltoVariant[] Variant + { + get + { + return this.variantField; + } + set + { + this.variantField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// CONTENT contains the precomposed representation (combining character) of the character from the parent String element. + /// The sequence position of the Gylph element matches the position of the character in the String. + /// + [XmlAttributeAttribute("CONTENT")] + public string Content + { + get + { + return this.cONTENTField; + } + set + { + this.cONTENTField = value; + } + } + + /// + /// This GC attribute records a float value between 0.0 and 1.0 that expresses the level of confidence for the variant where is 1 is certain. + /// This attribute is optional. If it is not available, the default value for the variant is “0”. + /// + /// The GC attribute semantic is the same as the WC attribute on the String element and VC on Variant element. + /// + [XmlAttributeAttribute("GC")] + public float Gc + { + get + { + return this.gcField; + } + set + { + this.gcField = value; + if (!float.IsNaN(value)) this.gcFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool GCSpecified + { + get + { + return this.gcFieldSpecified; + } + set + { + this.gcFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + + /// + public override string ToString() + { + return this.Content; + } + } + + /// + /// [Alto] Describes the bounding shape of a block, if it is not rectangular. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoShape + { + + private object itemField; + + /// + [XmlElementAttribute("Circle", typeof(AltoCircle))] + [XmlElementAttribute("Ellipse", typeof(AltoEllipse))] + [XmlElementAttribute("Polygon", typeof(AltoPolygon))] + public object Item + { + get + { + return this.itemField; + } + set + { + this.itemField = value; + } + } + } + + /// + /// [Alto] A circle shape. HPOS and VPOS describe the center of the circle. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoCircle + { + + private float hPOSField; + + private float vPOSField; + + private float rADIUSField; + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + } + } + + /// + [XmlAttributeAttribute("RADIUS")] + public float Radius + { + get + { + return this.rADIUSField; + } + set + { + this.rADIUSField = value; + } + } + } + + /// + /// [Alto] An ellipse shape. HPOS and VPOS describe the center of the ellipse. + /// HLENGTH and VLENGTH are the width and height of the described ellipse. + /// The attribute ROTATION tells the rotation of the e.g. text or + /// illustration within the block.The value is in degrees counterclockwise. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoEllipse + { + + private float hPOSField; + + private float vPOSField; + + private float hLENGTHField; + + private float vLENGTHField; + + private float rOTATIONField; + + private bool rOTATIONFieldSpecified; + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + } + } + + /// + [XmlAttributeAttribute("HLENGTH")] + public float HLength + { + get + { + return this.hLENGTHField; + } + set + { + this.hLENGTHField = value; + } + } + + /// + [XmlAttributeAttribute("VLENGTH")] + public float VLength + { + get + { + return this.vLENGTHField; + } + set + { + this.vLENGTHField = value; + } + } + + /// + [XmlAttributeAttribute("ROTATION")] + public float Rotation + { + get + { + return this.rOTATIONField; + } + set + { + this.rOTATIONField = value; + if (!float.IsNaN(value)) this.rOTATIONFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ROTATIONSpecified + { + get + { + return this.rOTATIONFieldSpecified; + } + set + { + this.rOTATIONFieldSpecified = value; + } + } + } + + /// + /// [Alto] A polygon shape. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoPolygon + { + + private string pOINTSField; + + /// + [XmlAttributeAttribute("POINTS")] + public string Points + { + get + { + return this.pOINTSField; + } + set + { + this.pOINTSField = value; + } + } + } + + /// + /// [Alto] Alternative + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoAlternative + { + + private string pURPOSEField; + + private string valueField; + + /// + [XmlAttributeAttribute("PURPOSE")] + public string Purpose + { + get + { + return this.pURPOSEField; + } + set + { + this.pURPOSEField = value; + } + } + + /// + [XmlTextAttribute()] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + } + + /// + /// [Alto] A sequence of chars. Strings are separated by white spaces or hyphenation chars. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoString + { + + private AltoShape shapeField; + + private AltoAlternative[] aLTERNATIVEField; + + private AltoGlyph[] glyphField; + + private string idField; + + private string sTYLEREFSField; + + private string tAGREFSField; + + private string pROCESSINGREFSField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + private string cONTENTField; + + private AltoFontStyles sTYLEField; + + private bool sTYLEFieldSpecified; + + private AltoSubsType sUBS_TYPEField; + + private bool sUBS_TYPEFieldSpecified; + + private string sUBS_CONTENTField; + + private float wcField; + + private bool wcFieldSpecified; + + private string ccField; + + private bool csField; + + private bool csFieldSpecified; + + private string lANGField; + + /// + public AltoShape Shape + { + get + { + return this.shapeField; + } + set + { + this.shapeField = value; + } + } + + /// + [XmlElementAttribute("ALTERNATIVE")] + public AltoAlternative[] Alternative + { + get + { + return this.aLTERNATIVEField; + } + set + { + this.aLTERNATIVEField = value; + } + } + + /// + [XmlElementAttribute("Glyph")] + public AltoGlyph[] Glyph + { + get + { + return this.glyphField; + } + set + { + this.glyphField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + + /// + [XmlAttributeAttribute("TAGREFS", DataType = "IDREFS")] + public string TagRefs + { + get + { + return this.tAGREFSField; + } + set + { + this.tAGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("PROCESSINGREFS", DataType = "IDREFS")] + public string ProcessingRefs + { + get + { + return this.pROCESSINGREFSField; + } + set + { + this.pROCESSINGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("CONTENT")] + public string Content + { + get + { + return this.cONTENTField; + } + set + { + this.cONTENTField = value; + } + } + + /// + [XmlAttributeAttribute("STYLE")] + public AltoFontStyles Style + { + get + { + return this.sTYLEField; + } + set + { + this.sTYLEField = value; + this.sTYLEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool StyleSpecified + { + get + { + return this.sTYLEFieldSpecified; + } + set + { + this.sTYLEFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("SUBS_TYPE")] + public AltoSubsType SubsType + { + get + { + return this.sUBS_TYPEField; + } + set + { + this.sUBS_TYPEField = value; + this.sUBS_TYPEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SubsTypeSpecified + { + get + { + return this.sUBS_TYPEFieldSpecified; + } + set + { + this.sUBS_TYPEFieldSpecified = value; + } + } + + /// + /// Content of the substitution. + /// + [XmlAttributeAttribute("SUBS_CONTENT")] + public string SubsContent + { + get + { + return this.sUBS_CONTENTField; + } + set + { + this.sUBS_CONTENTField = value; + } + } + + /// + [XmlAttributeAttribute("WC")] + public float Wc + { + get + { + return this.wcField; + } + set + { + this.wcField = value; + if (!float.IsNaN(value)) this.wcFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WcSpecified + { + get + { + return this.wcFieldSpecified; + } + set + { + this.wcFieldSpecified = value; + } + } + + /// + /// Confidence level of each character in that string. A list of numbers, + /// one number between 0 (sure) and 9 (unsure) for each character. + /// + [XmlAttributeAttribute("CC")] + public string Cc + { + get + { + return this.ccField; + } + set + { + this.ccField = value; + } + } + + /// + /// Correction Status. Indicates whether manual correction has been done or not. + /// The correction status should be recorded at the highest level possible (Block, TextLine, String). + /// + [XmlAttributeAttribute("CS")] + public bool Cs + { + get + { + return this.csField; + } + set + { + this.csField = value; + this.csFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool CsSpecified + { + get + { + return this.csFieldSpecified; + } + set + { + this.csFieldSpecified = value; + } + } + + /// + /// Attribute to record language of the string. The language should be recorded at the highest level possible. + /// + [XmlAttributeAttribute("LANG", DataType = "language")] + public string Lang + { + get + { + return this.lANGField; + } + set + { + this.lANGField = value; + } + } + + /// + public override string ToString() + { + return this.Content; + } + } + + /// + /// [Alto] Base type for any kind of block on the page. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [XmlIncludeAttribute(typeof(AltoTextBlock))] + [XmlIncludeAttribute(typeof(AltoGraphicalElement))] + [XmlIncludeAttribute(typeof(AltoIllustration))] + [XmlIncludeAttribute(typeof(AltoComposedBlock))] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoBlock + { + + private AltoShape shapeField; + + private string idField; + + private string sTYLEREFSField; + + private string tAGREFSField; + + private string pROCESSINGREFSField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + private float rOTATIONField; + + private bool rOTATIONFieldSpecified; + + private string iDNEXTField; + + private bool csField; + + private bool csFieldSpecified; + + private string typeField; + + private string hrefField; + + private string roleField; + + private string arcroleField; + + private string titleField; + + private AltoBlockTypeShow showField; + + private bool showFieldSpecified; + + private AltoBlockTypeActuate actuateField; + + private bool actuateFieldSpecified; + + /// + public AltoBlock() + { + this.typeField = "simple"; + } + + /// + public AltoShape Shape + { + get + { + return this.shapeField; + } + set + { + this.shapeField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + + /// + [XmlAttributeAttribute("TAGREFS", DataType = "IDREFS")] + public string TagRefs + { + get + { + return this.tAGREFSField; + } + set + { + this.tAGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("PROCESSINGREFS", DataType = "IDREFS")] + public string ProcessingRefs + { + get + { + return this.pROCESSINGREFSField; + } + set + { + this.pROCESSINGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + + /// + /// Tells the rotation of e.g. text or illustration within the block. The value is in degree counterclockwise. + /// + [XmlAttributeAttribute("ROTATION")] + public float Rotation + { + get + { + return this.rOTATIONField; + } + set + { + this.rOTATIONField = value; + if (!float.IsNaN(value)) this.rOTATIONFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool RotationSpecified + { + get + { + return this.rOTATIONFieldSpecified; + } + set + { + this.rOTATIONFieldSpecified = value; + } + } + + /// + /// The next block in reading sequence on the page. + /// + [XmlAttributeAttribute("IDNEXT", DataType = "IDREF")] + public string IdNext + { + get + { + return this.iDNEXTField; + } + set + { + this.iDNEXTField = value; + } + } + + /// + /// Correction Status. Indicates whether manual correction has been done or not. + /// The correction status should be recorded at the highest level possible (Block, TextLine, String). + /// + [XmlAttributeAttribute("CS")] + public bool Cs + { + get + { + return this.csField; + } + set + { + this.csField = value; + this.csFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool CsSpecified + { + get + { + return this.csFieldSpecified; + } + set + { + this.csFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("type", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public string type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + } + } + + /// + [XmlAttributeAttribute("href", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink", DataType = "anyURI")] + public string href + { + get + { + return this.hrefField; + } + set + { + this.hrefField = value; + } + } + + /// + [XmlAttributeAttribute("role", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public string role + { + get + { + return this.roleField; + } + set + { + this.roleField = value; + } + } + + /// + [XmlAttributeAttribute("arcrole", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public string arcrole + { + get + { + return this.arcroleField; + } + set + { + this.arcroleField = value; + } + } + + /// + [XmlAttributeAttribute("title", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public string title + { + get + { + return this.titleField; + } + set + { + this.titleField = value; + } + } + + /// + [XmlAttributeAttribute("show", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public AltoBlockTypeShow show + { + get + { + return this.showField; + } + set + { + this.showField = value; + this.showFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool showSpecified + { + get + { + return this.showFieldSpecified; + } + set + { + this.showFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("actuate", Form = XmlSchemaForm.Qualified, Namespace = "http://www.w3.org/1999/xlink")] + public AltoBlockTypeActuate actuate + { + get + { + return this.actuateField; + } + set + { + this.actuateField = value; + this.actuateFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool actuateSpecified + { + get + { + return this.actuateFieldSpecified; + } + set + { + this.actuateFieldSpecified = value; + } + } + } + + /// + /// [Alto] A block of text. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTextBlock : AltoBlock + { + + private AltoTextBlockTextLine[] textLineField; + + private string languageField; + + private string lANGField; + + /// + [XmlElementAttribute("TextLine")] + public AltoTextBlockTextLine[] TextLines + { + get + { + return this.textLineField; + } + set + { + this.textLineField = value; + } + } + + /// + /// Attribute deprecated. LANG should be used instead. + /// + [XmlAttributeAttribute("language", DataType = "language")] + [Obsolete("Attribute deprecated. LANG should be used instead.")] + public string Language + { + get + { + return this.languageField; + } + set + { + this.languageField = value; + } + } + + /// + /// Attribute to record language of the textblock. + /// + [XmlAttributeAttribute("LANG", DataType = "language")] + public string Lang + { + get + { + return this.lANGField; + } + set + { + this.lANGField = value; + } + } + + /// + public override string ToString() + { + return string.Join(" ", this.TextLines); + } + } + + /// + /// [Alto] A single line of text. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTextBlockTextLine + { + + private AltoShape shapeField; + + private AltoString[] stringField; + + private AltoSP[] spField; + + private AltoTextBlockTextLineHyp hYPField; + + private string idField; + + private string sTYLEREFSField; + + private string tAGREFSField; + + private string pROCESSINGREFSField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + private float bASELINEField; + + private bool bASELINEFieldSpecified; + + private string lANGField; + + private bool csField; + + private bool csFieldSpecified; + + /// + public AltoShape Shape + { + get + { + return this.shapeField; + } + set + { + this.shapeField = value; + } + } + + /// + [XmlElementAttribute("String")] + public AltoString[] Strings + { + get + { + return this.stringField; + } + set + { + this.stringField = value; + } + } + + /// + [XmlElementAttribute("SP")] + public AltoSP[] Sp + { + get + { + return this.spField; + } + set + { + this.spField = value; + } + } + + /// + /// A hyphenation char. Can appear only at the end of a line. + /// + [XmlElementAttribute("HYP")] + public AltoTextBlockTextLineHyp Hyp + { + get + { + return this.hYPField; + } + set + { + this.hYPField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + + /// + [XmlAttributeAttribute("TAGREFS", DataType = "IDREFS")] + public string TagRefs + { + get + { + return this.tAGREFSField; + } + set + { + this.tAGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("PROCESSINGREFS", DataType = "IDREFS")] + public string ProcessingRefs + { + get + { + return this.pROCESSINGREFSField; + } + set + { + this.pROCESSINGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("BASELINE")] + public float BaseLine + { + get + { + return this.bASELINEField; + } + set + { + this.bASELINEField = value; + if (!float.IsNaN(value)) this.bASELINEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BaseLineSpecified + { + get + { + return this.bASELINEFieldSpecified; + } + set + { + this.bASELINEFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("LANG", DataType = "language")] + public string Lang + { + get + { + return this.lANGField; + } + set + { + this.lANGField = value; + } + } + + /// + /// Correction Status. Indicates whether manual correction has been done or not. + /// The correction status should be recorded at the highest level possible (Block, TextLine, String). + /// + [XmlAttributeAttribute("CS")] + public bool Cs + { + get + { + return this.csField; + } + set + { + this.csField = value; + this.csFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool CsSpecified + { + get + { + return this.csFieldSpecified; + } + set + { + this.csFieldSpecified = value; + } + } + + /// + public override string ToString() + { + return string.Join(" ", this.Strings); // take in account order? + } + } + + /// + /// [Alto] A hyphenation char. Can appear only at the end of a line. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTextBlockTextLineHyp + { + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + private string cONTENTField; + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("CONTENT")] + public string Content + { + get + { + return this.cONTENTField; + } + set + { + this.cONTENTField = value; + } + } + } + + /// + /// [Alto] A graphic used to separate blocks. Usually a line or rectangle. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoGraphicalElement : AltoBlock + { + } + + /// + /// [Alto] A picture or image. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoIllustration : AltoBlock + { + + private string tYPEField; + + private string fILEIDField; + + /// + /// A user defined string to identify the type of illustration like photo, map, drawing, chart, ... + /// + [XmlAttributeAttribute("TYPE")] + public string Type + { + get + { + return this.tYPEField; + } + set + { + this.tYPEField = value; + } + } + + /// + /// A link to an image which contains only the illustration. + /// + [XmlAttributeAttribute("FILEID")] + public string FileId + { + get + { + return this.fILEIDField; + } + set + { + this.fILEIDField = value; + } + } + } + + /// + /// [Alto] A block that consists of other blocks. + /// WARNING: The CIRCULAR GROUP REFERENCES was removed from the xsd. + /// NEED TO ADD IT BACK!!! + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoComposedBlock : AltoBlock + { + + /***************************************************************** + * /!\ WARNING /!\ + * The CIRCULAR GROUP REFERENCES below was removed from the xsd + * NEED TO ADD IT BACK!!! + * + * + * + *****************************************************************/ + + private string tYPEField; + + private string fILEIDField; + + /// + /// A user defined string to identify the type of composed block (e.g. table, advertisement, ...) + /// + [XmlAttributeAttribute("TYPE")] + public string Type + { + get + { + return this.tYPEField; + } + set + { + this.tYPEField = value; + } + } + + /// + /// An ID to link to an image which contains only the composed block. + /// The ID and the file link is defined in the related METS file. + /// + [XmlAttributeAttribute("FILEID")] + public string FileId + { + get + { + return this.fILEIDField; + } + set + { + this.fILEIDField = value; + } + } + } + + /// + /// [Alto] A region on a page + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoPageSpace + { + + private AltoShape shapeField; + + private AltoTextBlock[] textBlockField; + + private AltoIllustration[] illustrationField; + + private AltoGraphicalElement[] graphicalElementField; + + private AltoComposedBlock[] composedBlockField; + + private string idField; + + private string sTYLEREFSField; + + private string pROCESSINGREFSField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float hPOSField; + + private bool hPOSFieldSpecified; + + private float vPOSField; + + private bool vPOSFieldSpecified; + + /// + /// + /// + public AltoShape Shape + { + get + { + return this.shapeField; + } + set + { + this.shapeField = value; + } + } + + /// + [XmlElementAttribute("TextBlock")] + public AltoTextBlock[] TextBlock + { + get + { + return this.textBlockField; + } + set + { + this.textBlockField = value; + } + } + + /// + [XmlElementAttribute("Illustration")] + public AltoIllustration[] Illustrations + { + get + { + return this.illustrationField; + } + set + { + this.illustrationField = value; + } + } + + /// + [XmlElementAttribute("GraphicalElement")] + public AltoGraphicalElement[] GraphicalElements + { + get + { + return this.graphicalElementField; + } + set + { + this.graphicalElementField = value; + } + } + + /// + [XmlElementAttribute("ComposedBlock")] + public AltoComposedBlock[] ComposedBlocks + { + get + { + return this.composedBlockField; + } + set + { + this.composedBlockField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + + /// + [XmlAttributeAttribute("PROCESSINGREFS", DataType = "IDREFS")] + public string ProcessingRefs + { + get + { + return this.pROCESSINGREFSField; + } + set + { + this.pROCESSINGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("HPOS")] + public float HPos + { + get + { + return this.hPOSField; + } + set + { + this.hPOSField = value; + if (!float.IsNaN(value)) this.hPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HPosSpecified + { + get + { + return this.hPOSFieldSpecified; + } + set + { + this.hPOSFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("VPOS")] + public float VPos + { + get + { + return this.vPOSField; + } + set + { + this.vPOSField = value; + if (!float.IsNaN(value)) this.vPOSFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool VPosSpecified + { + get + { + return this.vPOSFieldSpecified; + } + set + { + this.vPOSFieldSpecified = value; + } + } + } + + /// + /// [Alto] One page of a book or journal. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoPage + { + + private AltoPageSpace topMarginField; + + private AltoPageSpace leftMarginField; + + private AltoPageSpace rightMarginField; + + private AltoPageSpace bottomMarginField; + + private AltoPageSpace printSpaceField; + + private string idField; + + private string pAGECLASSField; + + private string sTYLEREFSField; + + private string pROCESSINGREFSField; + + private float hEIGHTField; + + private bool hEIGHTFieldSpecified; + + private float wIDTHField; + + private bool wIDTHFieldSpecified; + + private float pHYSICAL_IMG_NRField; + + private string pRINTED_IMG_NRField; + + private AltoQuality qUALITYField; + + private bool qUALITYFieldSpecified; + + private string qUALITY_DETAILField; + + private AltoPosition pOSITIONField; + + private bool pOSITIONFieldSpecified; + + private string pROCESSINGField; + + private float aCCURACYField; + + private bool aCCURACYFieldSpecified; + + private float pcField; + + private bool pcFieldSpecified; + + /// + /// The area between the top line of print and the upper edge of the leaf. It may contain page number or running title. + /// + public AltoPageSpace TopMargin + { + get + { + return this.topMarginField; + } + set + { + this.topMarginField = value; + } + } + + /// + /// The area between the printspace and the left border of a page. May contain margin notes. + /// + public AltoPageSpace LeftMargin + { + get + { + return this.leftMarginField; + } + set + { + this.leftMarginField = value; + } + } + + /// + /// The area between the printspace and the right border of a page. May contain margin notes. + /// + public AltoPageSpace RightMargin + { + get + { + return this.rightMarginField; + } + set + { + this.rightMarginField = value; + } + } + + /// + /// The area between the bottom line of letterpress or writing and the bottom edge of the leaf. + /// It may contain a page number, a signature number or a catch word. + /// + public AltoPageSpace BottomMargin + { + get + { + return this.bottomMarginField; + } + set + { + this.bottomMarginField = value; + } + } + + /// + /// Rectangle covering the printed area of a page. Page number and running title are not part of the print space. + /// + public AltoPageSpace PrintSpace + { + get + { + return this.printSpaceField; + } + set + { + this.printSpaceField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Any user-defined class like title page. + /// + [XmlAttributeAttribute("PAGECLASS")] + public string PageClass + { + get + { + return this.pAGECLASSField; + } + set + { + this.pAGECLASSField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + + /// + [XmlAttributeAttribute("PROCESSINGREFS", DataType = "IDREFS")] + public string ProcessingRefs + { + get + { + return this.pROCESSINGREFSField; + } + set + { + this.pROCESSINGREFSField = value; + } + } + + /// + [XmlAttributeAttribute("HEIGHT")] + public float Height + { + get + { + return this.hEIGHTField; + } + set + { + this.hEIGHTField = value; + if (!float.IsNaN(value)) this.hEIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeightSpecified + { + get + { + return this.hEIGHTFieldSpecified; + } + set + { + this.hEIGHTFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("WIDTH")] + public float Width + { + get + { + return this.wIDTHField; + } + set + { + this.wIDTHField = value; + if (!float.IsNaN(value)) this.wIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool WidthSpecified + { + get + { + return this.wIDTHFieldSpecified; + } + set + { + this.wIDTHFieldSpecified = value; + } + } + + /// + /// The number of the page within the document. + /// + [XmlAttributeAttribute("PHYSICAL_IMG_NR")] + public float PhysicalImgNr + { + get + { + return this.pHYSICAL_IMG_NRField; + } + set + { + this.pHYSICAL_IMG_NRField = value; + } + } + + /// + /// The page number that is printed on the page. + /// + [XmlAttributeAttribute("PRINTED_IMG_NR")] + public string PrintedImgNr + { + get + { + return this.pRINTED_IMG_NRField; + } + set + { + this.pRINTED_IMG_NRField = value; + } + } + + /// + [XmlAttributeAttribute("QUALITY")] + public AltoQuality Quality + { + get + { + return this.qUALITYField; + } + set + { + this.qUALITYField = value; + this.qUALITYFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool QualitySpecified + { + get + { + return this.qUALITYFieldSpecified; + } + set + { + this.qUALITYFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("QUALITY_DETAIL")] + public string QualityDetail + { + get + { + return this.qUALITY_DETAILField; + } + set + { + this.qUALITY_DETAILField = value; + } + } + + /// + [XmlAttributeAttribute("POSITION")] + public AltoPosition Position + { + get + { + return this.pOSITIONField; + } + set + { + this.pOSITIONField = value; + this.pOSITIONFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PositionSpecified + { + get + { + return this.pOSITIONFieldSpecified; + } + set + { + this.pOSITIONFieldSpecified = value; + } + } + + /// + /// A link to the processing description that has been used for this page. + /// + [XmlAttributeAttribute("PROCESSING", DataType = "IDREF")] + public string Processing + { + get + { + return this.pROCESSINGField; + } + set + { + this.pROCESSINGField = value; + } + } + + /// + /// Estimated percentage of OCR Accuracy in range from 0 to 100 + /// + [XmlAttributeAttribute("ACCURACY")] + public float Accuracy + { + get + { + return this.aCCURACYField; + } + set + { + this.aCCURACYField = value; + if (!float.IsNaN(value)) this.aCCURACYFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool AccuracySpecified + { + get + { + return this.aCCURACYFieldSpecified; + } + set + { + this.aCCURACYFieldSpecified = value; + } + } + + /// + /// + /// + [XmlAttributeAttribute("PC")] + public float Pc + { + get + { + return this.pcField; + } + set + { + this.pcField = value; + if (!float.IsNaN(value)) this.pcFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PcSpecified + { + get + { + return this.pcFieldSpecified; + } + set + { + this.pcFieldSpecified = value; + } + } + } + + /// + /// [Alto] Layout + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoLayout + { + + private AltoPage[] pageField; + + private string sTYLEREFSField; + + /// + [XmlElementAttribute("Page")] + public AltoPage[] Pages + { + get + { + return this.pageField; + } + set + { + this.pageField = value; + } + } + + /// + [XmlAttributeAttribute("STYLEREFS", DataType = "IDREFS")] + public string StyleRefs + { + get + { + return this.sTYLEREFSField; + } + set + { + this.sTYLEREFSField = value; + } + } + } + + /// + /// [Alto] Tag + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTag + { + + private AltoTagXmlData xmlDataField; + + private string idField; + + private string tYPEField; + + private string lABELField; + + private string dESCRIPTIONField; + + private string uRIField; + + /// + /// The xml data wrapper element XmlData is used to contain XML encoded metadata. + /// The content of an XmlData element can be in any namespace or in no namespace. + /// As permitted by the XML Schema Standard, the processContents attribute value for the + /// metadata in an XmlData is set to “lax”. Therefore, if the source schema and its location are + /// identified by means of an XML schemaLocation attribute, then an XML processor will validate + /// the elements for which it can find declarations.If a source schema is not identified, or cannot be + /// found at the specified schemaLocation, then an XML validator will check for well-formedness, + /// but otherwise skip over the elements appearing in the XmlData element. + /// + public AltoTagXmlData XmlData + { + get + { + return this.xmlDataField; + } + set + { + this.xmlDataField = value; + } + } + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Type can be used to classify and group the information within each tag element type. + /// + [XmlAttributeAttribute("TYPE")] + public string Type + { + get + { + return this.tYPEField; + } + set + { + this.tYPEField = value; + } + } + + /// + /// Content / information value of the tag. + /// + [XmlAttributeAttribute("LABEL")] + public string Label + { + get + { + return this.lABELField; + } + set + { + this.lABELField = value; + } + } + + /// + /// Description text for tag information for clarification. + /// + [XmlAttributeAttribute("DESCRIPTION")] + public string Description + { + get + { + return this.dESCRIPTIONField; + } + set + { + this.dESCRIPTIONField = value; + } + } + + /// + /// Any URI for authority or description relevant information. + /// + [XmlAttributeAttribute("URI", DataType = "anyURI")] + public string Uri + { + get + { + return this.uRIField; + } + set + { + this.uRIField = value; + } + } + } + + /// + /// [Alto] The xml data wrapper element XmlData is used to contain XML encoded metadata. + /// The content of an XmlData element can be in any namespace or in no namespace. + /// As permitted by the XML Schema Standard, the processContents attribute value for the + /// metadata in an XmlData is set to “lax”. Therefore, if the source schema and its location are + /// identified by means of an XML schemaLocation attribute, then an XML processor will validate + /// the elements for which it can find declarations. If a source schema is not identified, or cannot be + /// found at the specified schemaLocation, then an XML validator will check for well-formedness, + /// but otherwise skip over the elements appearing in the XmlData element. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTagXmlData + { + + private XmlElement[] anyField; + + /// + [XmlAnyElementAttribute()] + public XmlElement[] Any + { + get + { + return this.anyField; + } + set + { + this.anyField = value; + } + } + } + + /// + /// [Alto] There are following variation of tag types available: + /// LayoutTag – criteria about arrangement or graphical appearance; + /// StructureTag – criteria about grouping or formation; + /// RoleTag – criteria about function or mission; + /// NamedEntityTag – criteria about assignment of terms to their relationship / meaning (NER); + /// OtherTag – criteria about any other characteristic not listed above, the TYPE attribute is intended to be used for classification within those.; + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTags + { + + private AltoTag[] itemsField; + + private AltoItemsChoice[] itemsElementNameField; + + /// + [XmlElementAttribute("LayoutTag", typeof(AltoTag))] + [XmlElementAttribute("NamedEntityTag", typeof(AltoTag))] + [XmlElementAttribute("OtherTag", typeof(AltoTag))] + [XmlElementAttribute("RoleTag", typeof(AltoTag))] + [XmlElementAttribute("StructureTag", typeof(AltoTag))] + [XmlChoiceIdentifierAttribute("ItemsElementName")] + public AltoTag[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlElementAttribute("ItemsElementName")] + [XmlIgnoreAttribute()] + public AltoItemsChoice[] ItemsElementName + { + get + { + return this.itemsElementNameField; + } + set + { + this.itemsElementNameField = value; + } + } + } + + /// + /// [Alto] A paragraph style defines formatting properties of text blocks. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoParagraphStyle + { + + private string idField; + + private AltoParagraphStyleAlign aLIGNField; + + private bool aLIGNFieldSpecified; + + private float lEFTField; + + private bool lEFTFieldSpecified; + + private float rIGHTField; + + private bool rIGHTFieldSpecified; + + private float lINESPACEField; + + private bool lINESPACEFieldSpecified; + + private float fIRSTLINEField; + + private bool fIRSTLINEFieldSpecified; + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Indicates the alignement of the paragraph. Could be left, right, center or justify. + /// + [XmlAttributeAttribute("ALIGN")] + public AltoParagraphStyleAlign Align + { + get + { + return this.aLIGNField; + } + set + { + this.aLIGNField = value; + this.aLIGNFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool AlignSpecified + { + get + { + return this.aLIGNFieldSpecified; + } + set + { + this.aLIGNFieldSpecified = value; + } + } + + /// + /// Left indent of the paragraph in relation to the column. + /// + [XmlAttributeAttribute("LEFT")] + public float Left + { + get + { + return this.lEFTField; + } + set + { + this.lEFTField = value; + if (!float.IsNaN(value)) this.lEFTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LeftSpecified + { + get + { + return this.lEFTFieldSpecified; + } + set + { + this.lEFTFieldSpecified = value; + } + } + + /// + /// Right indent of the paragraph in relation to the column. + /// + [XmlAttributeAttribute("RIGHT")] + public float Right + { + get + { + return this.rIGHTField; + } + set + { + this.rIGHTField = value; + if (!float.IsNaN(value)) this.rIGHTFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool RightSpecified + { + get + { + return this.rIGHTFieldSpecified; + } + set + { + this.rIGHTFieldSpecified = value; + } + } + + /// + /// Line spacing between two lines of the paragraph. Measurement calculated from baseline to baseline. + /// + [XmlAttributeAttribute("LINESPACE")] + public float LineSpace + { + get + { + return this.lINESPACEField; + } + set + { + this.lINESPACEField = value; + if (!float.IsNaN(value)) this.lINESPACEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LineSpaceSpecified + { + get + { + return this.lINESPACEFieldSpecified; + } + set + { + this.lINESPACEFieldSpecified = value; + } + } + + /// + /// Indent of the first line of the paragraph if this is different from the other lines. A negative + /// value indicates an indent to the left, a positive value indicates an indent to the right. + /// + [XmlAttributeAttribute("FIRSTLINE")] + public float FirstLine + { + get + { + return this.fIRSTLINEField; + } + set + { + this.fIRSTLINEField = value; + if (!float.IsNaN(value)) this.fIRSTLINEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool FIRSTLINESpecified + { + get + { + return this.fIRSTLINEFieldSpecified; + } + set + { + this.fIRSTLINEFieldSpecified = value; + } + } + } + + /// + /// [Alto] A text style defines font properties of text. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoTextStyle + { + + private string idField; + + private string fONTFAMILYField; + + private AltoFontType fONTTYPEField; + + private bool fONTTYPEFieldSpecified; + + private AltoFontWidth fONTWIDTHField; + + private bool fONTWIDTHFieldSpecified; + + private float fONTSIZEField; + + private byte[] fONTCOLORField; + + private AltoFontStyles fONTSTYLEField; + + private bool fONTSTYLEFieldSpecified; + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// The font name. + /// + [XmlAttributeAttribute("FONTFAMILY")] + public string FontFamily + { + get + { + return this.fONTFAMILYField; + } + set + { + this.fONTFAMILYField = value; + } + } + + /// + [XmlAttributeAttribute("FONTTYPE")] + public AltoFontType FontType + { + get + { + return this.fONTTYPEField; + } + set + { + this.fONTTYPEField = value; + this.fONTTYPEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool FontTypeSpecified + { + get + { + return this.fONTTYPEFieldSpecified; + } + set + { + this.fONTTYPEFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("FONTWIDTH")] + public AltoFontWidth FontWidth + { + get + { + return this.fONTWIDTHField; + } + set + { + this.fONTWIDTHField = value; + this.fONTWIDTHFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool FontWidthSpecified + { + get + { + return this.fONTWIDTHFieldSpecified; + } + set + { + this.fONTWIDTHFieldSpecified = value; + } + } + + /// + /// The font size, in points (1/72 of an inch). + /// + [XmlAttributeAttribute("FONTSIZE")] + public float FontSize + { + get + { + return this.fONTSIZEField; + } + set + { + this.fONTSIZEField = value; + } + } + + /// + /// Font color as RGB value + /// + [XmlAttributeAttribute("FONTCOLOR", DataType = "hexBinary")] + public byte[] FontColor + { + get + { + return this.fONTCOLORField; + } + set + { + this.fONTCOLORField = value; + } + } + + /// + [XmlAttributeAttribute("FONTSTYLE")] + public AltoFontStyles FontStyle + { + get + { + return this.fONTSTYLEField; + } + set + { + this.fONTSTYLEField = value; + this.fONTSTYLEFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool FontStyleSpecified + { + get + { + return this.fONTSTYLEFieldSpecified; + } + set + { + this.fONTSTYLEFieldSpecified = value; + } + } + } + + /// + /// [Alto] Styles + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoStyles + { + + private AltoTextStyle[] textStyleField; + + private AltoParagraphStyle[] paragraphStyleField; + + /// + [XmlElementAttribute("TextStyle")] + public AltoTextStyle[] TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + [XmlElementAttribute("ParagraphStyle")] + public AltoParagraphStyle[] ParagraphStyle + { + get + { + return this.paragraphStyleField; + } + set + { + this.paragraphStyleField = value; + } + } + } + + /// + /// [Alto] Information about a software application. Where applicable, the preferred method + /// for determining this information is by selecting Help -- About. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoProcessingSoftware + { + + private string softwareCreatorField; + + private string softwareNameField; + + private string softwareVersionField; + + private string applicationDescriptionField; + + /// + /// The name of the organization or company that created the application. + /// + [XmlAttributeAttribute("softwareCreator")] + public string SoftwareCreator + { + get + { + return this.softwareCreatorField; + } + set + { + this.softwareCreatorField = value; + } + } + + /// + /// The name of the application. + /// + [XmlAttributeAttribute("softwareName")] + public string SoftwareName + { + get + { + return this.softwareNameField; + } + set + { + this.softwareNameField = value; + } + } + + /// + /// The version of the application. + /// + [XmlAttributeAttribute("softwareVersion")] + public string SoftwareVersion + { + get + { + return this.softwareVersionField; + } + set + { + this.softwareVersionField = value; + } + } + + /// + /// A description of any important characteristics of the application, especially for + /// non-commercial applications. For example, if a non-commercial application is built + /// using commercial components, e.g., an OCR engine SDK. Those components should be mentioned here. + /// + [XmlAttributeAttribute("applicationDescription")] + public string ApplicationDescription + { + get + { + return this.applicationDescriptionField; + } + set + { + this.applicationDescriptionField = value; + } + } + } + + /// + /// [Alto] Description of the processing step. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoProcessingStep + { + + private AltoProcessingCategory processingCategoryField; + + private bool processingCategoryFieldSpecified; + + private string processingDateTimeField; + + private string processingAgencyField; + + private string[] processingStepDescriptionField; + + private string processingStepSettingsField; + + private AltoProcessingSoftware processingSoftwareField; + + /// + /// Classification of the category of operation, how the file was created, including + /// generation, modification, preprocessing, postprocessing or any other steps. + /// + [XmlAttributeAttribute("processingCategory")] + public AltoProcessingCategory ProcessingCategory + { + get + { + return this.processingCategoryField; + } + set + { + this.processingCategoryField = value; + this.processingCategoryFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ProcessingCategorySpecified + { + get + { + return this.processingCategoryFieldSpecified; + } + set + { + this.processingCategoryFieldSpecified = value; + } + } + + /// + /// Date or DateTime the image was processed. + /// + [XmlAttributeAttribute("processingDateTime")] + public string ProcessingDateTime + { + get + { + return this.processingDateTimeField; + } + set + { + this.processingDateTimeField = value; + } + } + + /// + /// Identifies the organization level producer(s) of the processed image. + /// + [XmlAttributeAttribute("processingAgency")] + public string ProcessingAgency + { + get + { + return this.processingAgencyField; + } + set + { + this.processingAgencyField = value; + } + } + + /// + /// An ordinal listing of the image processing steps performed. For example, "image despeckling." + /// + [XmlElementAttribute("processingStepDescription")] + public string[] ProcessingStepDescription + { + get + { + return this.processingStepDescriptionField; + } + set + { + this.processingStepDescriptionField = value; + } + } + + /// + /// A description of any setting of the processing application. For example, for a multi-engine + /// OCR application this might include the engines which were used. Ideally, this description + /// should be adequate so that someone else using the same application can produce identical results. + /// + [XmlAttributeAttribute("processingStepSettings")] + public string ProcessingStepSettings + { + get + { + return this.processingStepSettingsField; + } + set + { + this.processingStepSettingsField = value; + } + } + + /// + [XmlElementAttribute("processingSoftware")] + public AltoProcessingSoftware ProcessingSoftware + { + get + { + return this.processingSoftwareField; + } + set + { + this.processingSoftwareField = value; + } + } + } + + /// + /// [Alto] Ocr Processing + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + [Obsolete("Element deprecated. 'AltoProcessing' should be used instead.")] + public class AltoOcrProcessing + { + + private AltoProcessingStep[] preProcessingStepField; + + private AltoProcessingStep ocrProcessingStepField; + + private AltoProcessingStep[] postProcessingStepField; + + /// + [XmlElementAttribute("preProcessingStep")] + public AltoProcessingStep[] preProcessingStep + { + get + { + return this.preProcessingStepField; + } + set + { + this.preProcessingStepField = value; + } + } + + /// + public AltoProcessingStep ocrProcessingStep + { + get + { + return this.ocrProcessingStepField; + } + set + { + this.ocrProcessingStepField = value; + } + } + + /// + [XmlElementAttribute("postProcessingStep")] + public AltoProcessingStep[] postProcessingStep + { + get + { + return this.postProcessingStepField; + } + set + { + this.postProcessingStepField = value; + } + } + } + + /// + /// [Alto] A unique identifier for the document. + /// This identifier must be unique within the local + /// To facilitate file sharing or interoperability with other systems, + /// documentIdentifierLocation may be added to designate the system or + /// application where the identifier is unique. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoDocumentIdentifier + { + + private string documentIdentifierLocationField; + + private string valueField; + + /// + /// A location qualifier, i.e., a namespace. + /// + [XmlAttributeAttribute("documentIdentifierLocation")] + public string DocumentIdentifierLocation + { + get + { + return this.documentIdentifierLocationField; + } + set + { + this.documentIdentifierLocationField = value; + } + } + + /// + [XmlTextAttribute()] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + } + + /// + /// [Alto] Description Ocr Processing + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + [Obsolete("Element deprecated. 'AltoProcessing' should be used instead.")] + public class AltoDescriptionOcrProcessing : AltoOcrProcessing + { + + private string idField; + + /// + [XmlAttributeAttribute(DataType = "ID")] + public string ID + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + } + + /// + /// [Alto] Description Processing + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public class AltoDescriptionProcessing : AltoProcessingStep + { + + private string idField; + + /// + [XmlAttributeAttribute("ID", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + } + + /// + /// [Alto] All measurement values inside the alto file are related to this unit, except the font size. + /// + /// Coordinates as being used in HPOS and VPOS are absolute coordinates referring to the upper-left corner of a page. + /// The upper left corner of the page is defined as coordinate (0/0). + /// + /// values meaning: + /// mm10: 1/10th of millimeter; + /// inch1200: 1/1200th of inch; + /// pixel: 1 pixel + /// + /// The values for pixel will be related to the resolution of the image based + /// on which the layout is described. Incase the original image is not known + /// the scaling factor can be calculated based on total width and height of + /// the image and the according information of the PAGE element. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoMeasurementUnit + { + + /// + /// 1 pixel + /// + [XmlEnumAttribute("pixel")] + Pixel, + + /// + /// 1/10th of millimeter + /// + [XmlEnumAttribute("mm10")] + Mm10, + + /// + /// 1/1200th of inch + /// + [XmlEnumAttribute("inch1200")] + Inch1200, + } + + /// + /// [Alto] List of any combination of font styles + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [FlagsAttribute()] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoFontStyles + { + /// + [XmlEnumAttribute("bold")] + Bold = 1, + + /// + [XmlEnumAttribute("italics")] + Italics = 2, + + /// + [XmlEnumAttribute("subscript")] + Subscript = 4, + + /// + [XmlEnumAttribute("superscript")] + Superscript = 8, + + /// + [XmlEnumAttribute("smallcaps")] + SmallCaps = 16, + + /// + [XmlEnumAttribute("underline")] + Underline = 32, + } + + /// + /// [Alto] Type of the substitution (if any) + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoSubsType + { + + /// + HypPart1, + + /// + HypPart2, + + /// + Abbreviation, + } + + /// + /// [Alto/xlink] Block Type Show + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.w3.org/1999/xlink")] + public enum AltoBlockTypeShow + { + /// + [XmlEnumAttribute("new")] + New, + + /// + [XmlEnumAttribute("replace")] + Replace, + + /// + [XmlEnumAttribute("embed")] + Embed, + + /// + [XmlEnumAttribute("other")] + Other, + + /// + [XmlEnumAttribute("none")] + None, + } + + /// + /// [Alto/xlink] Block Type Actuate + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.w3.org/1999/xlink")] + public enum AltoBlockTypeActuate + { + /// + [XmlEnumAttribute("onLoad")] + OnLoad, + + /// + [XmlEnumAttribute("onRequest")] + OnRequest, + + /// + [XmlEnumAttribute("other")] + Other, + + /// + [XmlEnumAttribute("none")] + None, + } + + /// + /// [Alto] Gives brief information about original page quality + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoQuality + { + + /// + OK, + + /// + Missing, + + /// + [XmlEnumAttribute("Missing in original")] + MissingInOriginal, + + /// + Damaged, + + /// + Retained, + + /// + Target, + + /// + [XmlEnumAttribute("As in original")] + AsInOriginal, + } + + /// + /// [Alto] Position of the page. Could be lefthanded, righthanded, cover, foldout or single if it has no special position. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoPosition + { + + /// + Left, + + /// + Right, + + /// + Foldout, + + /// + Single, + + /// + Cover, + } + + /// + /// [Alto] There are following variation of tag types available: + /// LayoutTag – criteria about arrangement or graphical appearance; + /// StructureTag – criteria about grouping or formation; + /// RoleTag – criteria about function or mission; + /// NamedEntityTag – criteria about assignment of terms to their relationship / meaning (NER); + /// OtherTag – criteria about any other characteristic not listed above, the TYPE attribute is intended to be used for classification within those.; + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#", IncludeInSchema = false)] + public enum AltoItemsChoice + { + + /// + /// Criteria about arrangement or graphical appearance + /// + LayoutTag, + + /// + /// Criteria about assignment of terms to their relationship / meaning (NER) + /// + NamedEntityTag, + + /// + /// Criteria about any other characteristic not listed above, the TYPE attribute is intended to be used for classification within those. + /// + OtherTag, + + /// + /// Criteria about function or mission + /// + RoleTag, + + /// + /// Criteria about grouping or formation + /// + StructureTag, + } + + /// + /// [Alto] Indicates the alignement of the paragraph. Could be left, right, center or justify. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoParagraphStyleAlign + { + + /// + Left, + + /// + Right, + + /// + Center, + + /// + Block, + } + + /// + /// [Alto] Serif or Sans-Serif + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoFontType + { + + /// + /// + /// + [XmlEnumAttribute("serif")] + Serif, + + /// + /// + /// + [XmlEnumAttribute("sans-serif")] + SansSerif, + } + + /// + /// [Alto] Fixed or proportional + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoFontWidth + { + /// + [XmlEnumAttribute("proportional")] + Proportional, + + /// + [XmlEnumAttribute("fixed")] + Fixed, + } + + /// + /// [Alto] Classification of the category of operation, how the file was created, including generation, modification, + /// preprocessing, postprocessing or any other steps. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [FlagsAttribute()] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://www.loc.gov/standards/alto/ns-v4#")] + public enum AltoProcessingCategory + { + /// + [XmlEnumAttribute("contentGeneration")] + ContentGeneration = 1, + + /// + [XmlEnumAttribute("contentModification")] + ContentModification = 2, + + /// + [XmlEnumAttribute("preOperation")] + PreOperation = 4, + + /// + [XmlEnumAttribute("postOperation")] + PostOperation = 8, + + /// + [XmlEnumAttribute("other")] + Other = 16, + } + } + #endregion +} diff --git a/src/UglyToad.PdfPig/Export/HOcrTextExporter.cs b/src/UglyToad.PdfPig/Export/HOcrTextExporter.cs new file mode 100644 index 00000000..41d9f0eb --- /dev/null +++ b/src/UglyToad.PdfPig/Export/HOcrTextExporter.cs @@ -0,0 +1,345 @@ +using System; +using System.Linq; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.DocumentLayoutAnalysis; +using UglyToad.PdfPig.Geometry; +using UglyToad.PdfPig.Util; + +namespace UglyToad.PdfPig.Export +{ + /// + /// hOCR v1.2 (HTML) text exporter. + /// See http://kba.cloud/hocr-spec/1.2/ + /// + internal class HOcrTextExporter : ITextExporter + { + private const string xmlHeader = "\n\n"; + private const string hocrjs = "\n"; + + private IPageSegmenter pageSegmenter; + private IWordExtractor wordExtractor; + + private decimal scale; + private string indentChar; + + private int pageCount = 0; + private int areaCount = 0; + private int lineCount = 0; + private int wordCount = 0; + private int pathCount = 0; + private int paraCount = 0; + + /// + /// hOCR v1.2 (HTML) + /// See http://kba.cloud/hocr-spec/1.2/ + /// + /// + /// + /// + /// Indent character. + public HOcrTextExporter(IWordExtractor wordExtractor, IPageSegmenter pageSegmenter, double scale = 1.0, string indent = "\t") + { + this.wordExtractor = wordExtractor; + this.pageSegmenter = pageSegmenter; + this.scale = (decimal)scale; + this.indentChar = indent; + } + + /// + /// Get the hORC (HTML) string of the page layout. + /// + /// The document. + /// Draw s present in the page. + /// Will add a reference to the 'hocrjs' script just before the closing 'body' tag, adding the + /// interface to a plain hOCR file.See https://github.com/kba/hocrjs for more information. + public string Get(PdfDocument document, bool includePaths = false, bool useHocrjs = false) + { + string hocr = GetHead() + indentChar + "\n"; + + for (var i = 0; i < document.NumberOfPages; i++) + { + var page = document.GetPage(i + 1); + hocr += GetCode(page, includePaths) + "\n"; + } + + if (useHocrjs) hocr += indentChar + indentChar + hocrjs; + hocr += indentChar + ""; + hocr = xmlHeader + AddHtmlHeader(hocr); + return hocr; + } + + /// + /// Get the hORC (HTML) string of the page layout. Excludes s. + /// + /// The page. + /// + public string Get(Page page) + { + return Get(page, false); + } + + /// + /// Get the hORC (HTML) string of the page layout. + /// + /// The page. + /// The image name, if any. + /// Draw s present in the page. + /// Will add a reference to the 'hocrjs' script just before the closing 'body' tag, adding the interface to a plain hOCR file.See https://github.com/kba/hocrjs for more information. + public string Get(Page page, bool includePaths = false, string imageName = "unknown", bool useHocrjs = false) + { + string hocr = GetHead() + indentChar + "\n"; + + hocr += GetCode(page, includePaths, imageName) + "\n"; + + if (useHocrjs) hocr += indentChar + indentChar + hocrjs; + hocr += indentChar + ""; + hocr = xmlHeader + AddHtmlHeader(hocr); + return hocr; + } + + private string GetHead() + { + return indentChar + "" + + "\n" + indentChar + indentChar + "" + + "\n" + indentChar + indentChar + "" + + "\n" + indentChar + indentChar + "" + + "\n" + indentChar + indentChar + "" + + "\n" + indentChar + "\n"; + } + + private string AddHtmlHeader(string content) + { + return "\n" + content + "\n"; + } + + /// + /// Get indent string from level. + /// + /// The indent level. + /// + private string GetIndent(int level) + { + string indent = ""; + for (int i = 0; i < level; i++) + { + indent += indentChar; + } + return indent; + } + + /// + /// Get the hORC string for the page. + /// http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_page + /// + /// + /// + /// Draw s present in the page. + private string GetCode(Page page, bool includePaths, string imageName = "unknown") + { + pageCount++; + int level = 2; + + string hocr = GetIndent(level) + @"
"; + + if (includePaths) + { + foreach (var path in page.ExperimentalAccess.Paths) + { + hocr += "\n" + GetCode(path, page.Height, true, level + 1); + } + } + + var words = page.GetWords(wordExtractor); + + if (words.Count() > 0) + { + var blocks = pageSegmenter.GetBlocks(words); + foreach (var block in blocks) + { + hocr += "\n" + GetCodeArea(block, page.Height, level + 1); + } + } + + hocr += "\n" + GetIndent(level) + @"
"; + return hocr; + } + + /// + /// Get the hORC string for the path. + /// http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_linedrawing + /// + /// + /// + /// + /// The indent level. + /// + private string GetCode(PdfPath path, decimal pageHeight, bool subPaths, int level) + { + if (path == null) return string.Empty; + + string hocr = string.Empty; + + if (subPaths) + { + var bbox = path.GetBoundingRectangle(); + if (bbox.HasValue) + { + areaCount++; + hocr += GetIndent(level) + @"
\n"; + foreach (var subPath in path.Commands) + { + var subBbox = subPath.GetBoundingRectangle(); + if (subBbox.HasValue) + { + pathCount++; + hocr += GetIndent(level + 1) + @"\n"; + } + } + hocr += GetIndent(level) + @"
"; + } + } + else + { + var bbox = path.GetBoundingRectangle(); + if (bbox.HasValue) + { + pathCount++; + hocr += GetIndent(level) + @""; + } + + } + + return hocr; + } + + /// + /// Get the hORC string for the area. + /// http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_carea + /// + /// The text area. + /// + /// The indent level. + private string GetCodeArea(TextBlock block, decimal pageHeight, int level) + { + areaCount++; + + string bbox = GetCode(block.BoundingBox, pageHeight); + string hocr = GetIndent(level) + @"
"; + + hocr += GetCodeParagraph(block, pageHeight, level + 1); // we concider 1 area = 1 block. should change in the future + hocr += "\n" + GetIndent(level) + @"
"; + return hocr; + } + + /// + /// Get the hORC string for the paragraph. + /// See http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_par + /// + /// The paragraph. + /// + /// The indent level. + /// + private string GetCodeParagraph(TextBlock block, decimal pageHeight, int level) + { + paraCount++; + string hocr = "\n" + GetIndent(level) + @"

"; // lang='eng' + + foreach (var line in block.TextLines) + { + hocr += "\n" + GetCode(line, pageHeight, level + 1); + } + hocr += "\n" + GetIndent(level) + @"

"; + + return hocr; + } + + /// + /// Get the hORC string for the text line. + /// See http://kba.cloud/hocr-spec/1.2/#elementdef-ocr_line + /// + /// + /// + /// The indent level. + private string GetCode(TextLine line, decimal pageHeight, int level) + { + lineCount++; + double angle = 0; + + // http://kba.cloud/hocr-spec/1.2/#propdef-baseline + // below will be 0 as long as the word's bounding box bottom is the BaseLine and not 'Bottom' + double baseLine = (double)line.Words[0].Letters[0].StartBaseLine.Y; + baseLine = (double)line.BoundingBox.Bottom - baseLine; + + string hocr = GetIndent(level) + @""; //"; x_size 42; x_descenders 5; x_ascenders 12' >"; + + foreach (var word in line.Words) + { + hocr += "\n" + GetCode(word, pageHeight, level + 1); + } + hocr += "\n" + GetIndent(level) + @""; + return hocr; + } + + /// + /// Get the hORC string for the word. + /// See http://kba.cloud/hocr-spec/1.2/#elementdef-ocrx_word + /// + /// + /// + /// The indent level. + private string GetCode(Word word, decimal pageHeight, int level) + { + wordCount++; + string hocr = GetIndent(level) + + @" "; + return hocr; + } + + private int GetConfidence(Word word) + { + return 100; + } + + /// + /// Get the hORC string for the bounding box. + /// See http://kba.cloud/hocr-spec/1.2/#propdef-bbox + /// + /// + /// + private string GetCode(PdfRectangle rectangle, decimal pageHeight) + { + // the values are with reference to the the top-left + // corner of the document image and measured in pixels + + var left = (int)Math.Round(rectangle.Left * scale); + var top = (int)Math.Round((pageHeight - rectangle.Top) * scale); + var right = (int)Math.Round(rectangle.Right * scale); + var bottom = (int)Math.Round((pageHeight - rectangle.Bottom) * scale); + + return @"bbox " + (left > 0 ? left : 0) + " " + + (top > 0 ? top : 0) + " " + + (right > 0 ? right : 0) + " " + + (bottom > 0 ? bottom : 0); + } + } +} diff --git a/src/UglyToad.PdfPig/Export/ITextExporter.cs b/src/UglyToad.PdfPig/Export/ITextExporter.cs new file mode 100644 index 00000000..96a2877f --- /dev/null +++ b/src/UglyToad.PdfPig/Export/ITextExporter.cs @@ -0,0 +1,17 @@ +using UglyToad.PdfPig.Content; + +namespace UglyToad.PdfPig.Export +{ + /// + /// Exports the page's text into the desired format. + /// + public interface ITextExporter + { + /// + /// Get the text representation. + /// + /// + /// + string Get(Page page); + } +} diff --git a/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs b/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs new file mode 100644 index 00000000..67b4cc2a --- /dev/null +++ b/src/UglyToad.PdfPig/Export/PageXmlTextExporter.cs @@ -0,0 +1,9803 @@ +using System; +using System.CodeDom.Compiler; +using System.Collections.Generic; +using System.ComponentModel; +using System.Diagnostics; +using System.Linq; +using System.Xml.Serialization; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.DocumentLayoutAnalysis; +using UglyToad.PdfPig.Geometry; +using UglyToad.PdfPig.Graphics.Colors; +using UglyToad.PdfPig.Util; + +namespace UglyToad.PdfPig.Export +{ + /// + /// PAGE-XML 2019-07-15 (XML) text exporter. + /// See https://github.com/PRImA-Research-Lab/PAGE-XML + /// + internal class PageXmlTextExporter : ITextExporter + { + private IPageSegmenter pageSegmenter; + private IWordExtractor wordExtractor; + + private decimal scale; + private string indentChar; + + int lineCount = 0; + int wordCount = 0; + int glyphCount = 0; + int regionCount = 0; + + /// + /// PAGE-XML 2019-07-15 (XML) text exporter. + /// See https://github.com/PRImA-Research-Lab/PAGE-XML + /// + /// + /// + /// + /// Indent character. + public PageXmlTextExporter(IWordExtractor wordExtractor, IPageSegmenter pageSegmenter, double scale = 1.0, string indent = "\t") + { + this.wordExtractor = wordExtractor; + this.pageSegmenter = pageSegmenter; + this.scale = (decimal)scale; + this.indentChar = indent; + } + + /// + /// Get the PAGE-XML (XML) string of the pages layout. + /// + /// + /// Draw s present in the page. + public string Get(PdfDocument document, bool includePaths = false) + { + throw new NotImplementedException(); + } + + /// + /// Get the PAGE-XML (XML) string of the pages layout. Excludes s. + /// + /// + /// + public string Get(Page page) + { + return Get(page, false); + } + + /// + /// Get the PAGE-XML (XML) string of the pages layout. + /// + /// + /// Draw s present in the page. + public string Get(Page page, bool includePaths) + { + PageXmlDocument pageXmlDocument = new PageXmlDocument() + { + Metadata = new PageXmlDocument.PageXmlMetadata() + { + Created = DateTime.UtcNow, + LastChange = DateTime.UtcNow, + Creator = "PdfPig", + Comments = pageSegmenter.GetType().Name + "|" + wordExtractor.GetType().Name, + }, + PcGtsId = "pc-" + page.GetHashCode() + }; + + pageXmlDocument.Page = ToPageXmlPage(page, includePaths); + + return Serialize(pageXmlDocument); + } + + /// + /// + /// + /// + /// + /// + private string PointToString(PdfPoint point, decimal height) + { + decimal x = point.X * scale; + decimal y = (height - point.Y) * scale; + return (x > 0 ? x : 0).ToString("0") + "," + (y > 0 ? y : 0).ToString("0"); + } + + /// + /// + /// + /// + /// + /// + private string ToPoints(IEnumerable points, decimal height) + { + return string.Join(" ", points.Select(p => PointToString(p, height))); + } + + /// + /// + /// + /// + /// + /// + private string ToPoints(PdfRectangle pdfRectangle, decimal height) + { + return ToPoints(new[] { pdfRectangle.BottomLeft, pdfRectangle.TopLeft, pdfRectangle.TopRight, pdfRectangle.BottomRight }, height); + } + + /// + /// + /// + /// + /// + /// + private PageXmlDocument.PageXmlCoords ToCoords(PdfRectangle pdfRectangle, decimal height) + { + return new PageXmlDocument.PageXmlCoords() + { + //Conf = 1, + Points = ToPoints(pdfRectangle, height) + }; + } + + /// + /// PageXml Text colour in RGB encoded format + /// (red value) + (256 x green value) + (65536 x blue value). + /// + private string ToRgbEncoded(IColor color) + { + var rgb = color.ToRGBValues(); + int red = (int)Math.Round(255f * (float)rgb.r); + int green = 256 * (int)Math.Round(255f * (float)rgb.g); + int blue = 65536 * (int)Math.Round(255f * (float)rgb.b); + int sum = red + green + blue; + + // as per below, red and blue order might be inverted... + //var colorWin = System.Drawing.Color.FromArgb(sum); + + return sum.ToString(); + } + + /// + /// + /// + /// + /// Draw s present in the page. + private PageXmlDocument.PageXmlPage ToPageXmlPage(Page page, bool includePaths) + { + var pageXmlPage = new PageXmlDocument.PageXmlPage() + { + //Border = new PageXmlBorder() + //{ + // Coords = new PageXmlCoords() + // { + // Points = page. + // } + //}, + ImageFilename = "unknown", + ImageHeight = (int)page.Height, + ImageWidth = (int)page.Width, + //PrintSpace = new PageXmlPrintSpace() + //{ + // Coords = new PageXmlCoords() + // { + + // } + //} + }; + + var words = page.GetWords(wordExtractor); + var regions = new List(); + + if (words.Count() > 0) + { + var blocks = pageSegmenter.GetBlocks(words); + regions.AddRange(blocks.Select(b => ToPageXmlTextRegion(b, page.Height))); + } + + if (includePaths) + { + var graphicalElements = page.ExperimentalAccess.Paths.Select(p => ToPageXmlLineDrawingRegion(p, page.Height)); + if (graphicalElements.Where(g => g != null).Count() > 0) + { + regions.AddRange(graphicalElements.Where(g => g != null)); + } + } + + pageXmlPage.Items = regions.ToArray(); + return pageXmlPage; + } + + private PageXmlDocument.PageXmlLineDrawingRegion ToPageXmlLineDrawingRegion(PdfPath pdfPath, decimal height) + { + var bbox = pdfPath.GetBoundingRectangle(); + if (bbox.HasValue) + { + regionCount++; + return new PageXmlDocument.PageXmlLineDrawingRegion() + { + Coords = ToCoords(bbox.Value, height), + Id = "r" + regionCount + }; + } + return null; + } + + /// + /// + /// + /// + /// + /// + private PageXmlDocument.PageXmlTextRegion ToPageXmlTextRegion(TextBlock textBlock, decimal height) + { + regionCount++; + return new PageXmlDocument.PageXmlTextRegion() + { + Coords = ToCoords(textBlock.BoundingBox, height), + TextLines = textBlock.TextLines.Select(l => ToPageXmlTextLine(l, height)).ToArray(), + TextEquivs = new[] { new PageXmlDocument.PageXmlTextEquiv() { Unicode = textBlock.Text } }, + Id = "r" + regionCount + }; + } + + /// + /// + /// + /// + /// + /// + private PageXmlDocument.PageXmlTextLine ToPageXmlTextLine(TextLine textLine, decimal height) + { + lineCount++; + return new PageXmlDocument.PageXmlTextLine() + { + Coords = ToCoords(textLine.BoundingBox, height), + //Baseline = new PageXmlBaseline() { }, + Production = PageXmlDocument.PageXmlProductionSimpleType.Printed, + //ReadingDirection = PageXmlReadingDirectionSimpleType.LeftToRight, + Words = textLine.Words.Select(w => ToPageXmlWord(w, height)).ToArray(), + TextEquivs = new[] { new PageXmlDocument.PageXmlTextEquiv() { Unicode = textLine.Text } }, + Id = "l" + lineCount + }; + } + + /// + /// + /// + /// + /// + /// + private PageXmlDocument.PageXmlWord ToPageXmlWord(Word word, decimal height) + { + wordCount++; + return new PageXmlDocument.PageXmlWord() + { + Coords = ToCoords(word.BoundingBox, height), + Glyphs = word.Letters.Select(l => ToPageXmlGlyph(l, height)).ToArray(), + TextEquivs = new[] { new PageXmlDocument.PageXmlTextEquiv() { Unicode = word.Text } }, + Id = "w" + wordCount + }; + } + + /// + /// + /// + /// + /// + /// + private PageXmlDocument.PageXmlGlyph ToPageXmlGlyph(Letter letter, decimal height) + { + glyphCount++; + return new PageXmlDocument.PageXmlGlyph() + { + Coords = ToCoords(letter.GlyphRectangle, height), + Ligature = false, + Production = PageXmlDocument.PageXmlProductionSimpleType.Printed, + TextStyle = new PageXmlDocument.PageXmlTextStyle() + { + FontSize = (float)letter.FontSize, + FontFamily = letter.FontName, + TextColourRgb = ToRgbEncoded(letter.Color), + }, + TextEquivs = new[] { new PageXmlDocument.PageXmlTextEquiv() { Unicode = letter.Value } }, + Id = "c" + glyphCount + }; + } + + private static PageXmlDocument Deserialize(string xmlPath) + { + XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); + + using (var reader = System.Xml.XmlReader.Create(xmlPath)) + { + return (PageXmlDocument)serializer.Deserialize(reader); + } + } + + private string Serialize(PageXmlDocument pageXmlDocument) + { + XmlSerializer serializer = new XmlSerializer(typeof(PageXmlDocument)); + var settings = new System.Xml.XmlWriterSettings() + { + //Encoding = new System.Text.UTF8Encoding(true), + Indent = true, + IndentChars = indentChar, + OmitXmlDeclaration = true // hack to manually handle utf-8 + }; + + using (var stringWriter = new System.IO.StringWriter()) + using (var xmlWriter = System.Xml.XmlWriter.Create(stringWriter, settings)) + { + stringWriter.WriteLine(""); // hack to manually handle utf-8 + serializer.Serialize(xmlWriter, pageXmlDocument); + return stringWriter.ToString(); + } + } + } + + #region PageXml Schema + /****************************************************************************** + * PAGE pagecontent version 2019-07-15 https://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd + * Auto-generated by xsd and improved by BobLD + ******************************************************************************/ + + /// + /// PAGE (Page Analysis and Ground-Truth Elements) root + /// Version 2019-07-15 + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + [XmlRootAttribute("PcGts", Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15", IsNullable = false)] + public class PageXmlDocument + { + private PageXmlMetadata metadataField; + + private PageXmlPage pageField; + + private string pcGtsIdField; + + /// + public PageXmlMetadata Metadata + { + get + { + return this.metadataField; + } + set + { + this.metadataField = value; + } + } + + /// + public PageXmlPage Page + { + get + { + return this.pageField; + } + set + { + this.pageField = value; + } + } + + /// + [XmlAttributeAttribute("pcGtsId", DataType = "ID")] + public string PcGtsId + { + get + { + return this.pcGtsIdField; + } + set + { + this.pcGtsIdField = value; + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlMetadata + { + + private string creatorField; + + private DateTime createdField; + + private DateTime lastChangeField; + + private string commentsField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlMetadataItem[] metadataItemField; + + private string externalRefField; + + /// + public string Creator + { + get + { + return this.creatorField; + } + set + { + this.creatorField = value; + } + } + + /// + /// The timestamp has to be in UTC (Coordinated Universal Time) and not local time. + /// + public DateTime Created + { + get + { + return this.createdField; + } + set + { + this.createdField = value; + } + } + + /// + /// The timestamp has to be in UTC (Coordinated Universal Time) and not local time. + /// + public DateTime LastChange + { + get + { + return this.lastChangeField; + } + set + { + this.lastChangeField = value; + } + } + + /// + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// + /// + [XmlElementAttribute("MetadataItem")] + public PageXmlMetadataItem[] MetadataItems + { + get + { + return this.metadataItemField; + } + set + { + this.metadataItemField = value; + } + } + + /// + /// External reference of any kind + /// + [XmlAttributeAttribute("externalRef")] + public string ExternalRef + { + get + { + return this.externalRefField; + } + set + { + this.externalRefField = value; + } + } + } + + /// + /// Structured custom data defined by name, type and value. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlUserAttribute + { + + private string nameField; + + private string descriptionField; + + private PageXmlUserAttributeType typeField; + + private bool typeFieldSpecified; + + private string valueField; + + /// + [XmlAttributeAttribute("name")] + public string Name + { + get + { + return this.nameField; + } + set + { + this.nameField = value; + } + } + + /// + [XmlAttributeAttribute("description")] + public string Description + { + get + { + return this.descriptionField; + } + set + { + this.descriptionField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlUserAttributeType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("value")] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + } + + /// + /// Points with x,y coordinates. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlGridPoints + { + + private int indexField; + + private string pointsField; + + /// + /// The grid row index + /// + [XmlAttributeAttribute("index")] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("points")] + public string Points + { + get + { + return this.pointsField; + } + set + { + this.pointsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTextEquiv + { + #region private + private string plainTextField; + + private string unicodeField; + + private string indexField; + + private float confField; + + private bool confFieldSpecified; + + private PageXmlTextDataSimpleType dataTypeField; + + private bool dataTypeFieldSpecified; + + private string dataTypeDetailsField; + + private string commentsField; + #endregion + + /// + /// Text in a "simple" form (ASCII or extended ASCII + /// as mostly used for typing). I.e.no use of + /// special characters for ligatures (should be + /// stored as two separate characters) etc. + /// + public string PlainText + { + get + { + return this.plainTextField; + } + set + { + this.plainTextField = value; + } + } + + /// + /// Correct encoding of the original, always using the corresponding Unicode code point. + /// I.e. ligatures have to be represented as one character etc. + /// + public string Unicode + { + get + { + return this.unicodeField; + } + set + { + this.unicodeField = value; + } + } + + /// + /// Used for sort order in case multiple TextEquivs are defined. + /// The text content with the lowest index should be interpreted as the main text content. + /// + [XmlAttributeAttribute("index", DataType = "integer")] + public string Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + + /// + /// Type of text content (is it free text or a number, for instance). This is only + /// a descriptive attribute, the text type is not checked during XML validation. + /// + [XmlAttributeAttribute("dataType")] + public PageXmlTextDataSimpleType DataType + { + get + { + return this.dataTypeField; + } + set + { + this.dataTypeField = value; + this.dataTypeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool DataTypeSpecified + { + get + { + return this.dataTypeFieldSpecified; + } + set + { + this.dataTypeFieldSpecified = value; + } + } + + /// + /// Refinement for dataType attribute. Can be a regular expression, for instance. + /// + [XmlAttributeAttribute("dataTypeDetails")] + public string DataTypeDetails + { + get + { + return this.dataTypeDetailsField; + } + set + { + this.dataTypeDetailsField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + public override string ToString() + { + return this.Unicode; + } + } + + /// + /// Base type for graphemes, grapheme groups and non-printing characters. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [XmlIncludeAttribute(typeof(PageXmlGraphemeGroup))] + [XmlIncludeAttribute(typeof(PageXmlNonPrintingChar))] + [XmlIncludeAttribute(typeof(PageXmlGrapheme))] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public abstract class PageXmlGraphemeBase + { + + private PageXmlTextEquiv[] textEquivField; + + private string idField; + + private int indexField; + + private bool ligatureField; + + private bool ligatureFieldSpecified; + + private PageXmlGraphemeBaseCharType charTypeField; + + private bool charTypeFieldSpecified; + + private string customField; + + private string commentsField; + + /// + [XmlElementAttribute("TextEquiv")] + public PageXmlTextEquiv[] TextEquivs + { + get + { + return this.textEquivField; + } + set + { + this.textEquivField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Order index of grapheme, group, or non-printing character + /// within the parent container (graphemes or glyph or grapheme group). + /// + [XmlAttributeAttribute("index")] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("ligature")] + public bool Ligature + { + get + { + return this.ligatureField; + } + set + { + this.ligatureField = value; + this.ligatureFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LigatureSpecified + { + get + { + return this.ligatureFieldSpecified; + } + set + { + this.ligatureFieldSpecified = value; + } + } + + /// + /// Type of character represented by the grapheme, group, or non-printing character element. + /// + [XmlAttributeAttribute("charType")] + public PageXmlGraphemeBaseCharType CharType + { + get + { + return this.charTypeField; + } + set + { + this.charTypeField = value; + this.charTypeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool CharTypeSpecified + { + get + { + return this.charTypeFieldSpecified; + } + set + { + this.charTypeFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlGraphemeGroup : PageXmlGraphemeBase + { + + private PageXmlGraphemeBase[] itemsField; + + /// + [XmlElementAttribute("Grapheme", typeof(PageXmlGrapheme))] + [XmlElementAttribute("NonPrintingChar", typeof(PageXmlNonPrintingChar))] + public PageXmlGraphemeBase[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + } + + /// + /// Represents a sub-element of a glyph. Smallest graphical unit that can be assigned a Unicode code point. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlGrapheme : PageXmlGraphemeBase + { + + private PageXmlCoords coordsField; + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlCoords + { + + private string pointsField; + + private float confField; + + private bool confFieldSpecified; + + /// + /// Polygon outline of the element as a path of points. + /// No points may lie outside the outline of its parent, + /// which in the case of Border is the bounding rectangle + /// of the root image. Paths are closed by convention, + /// i.e.the last point logically connects with the first + /// (and at least 3 points are required to span an area). + /// Paths must be planar (i.e.must not self-intersect). + /// + [XmlAttributeAttribute("points")] + public string Points + { + get + { + return this.pointsField; + } + set + { + this.pointsField = value; + } + } + + /// + /// Confidence value (between 0 and 1) + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + } + + /// + /// A glyph component without visual representation + /// but with Unicode code point. + /// Non-visual / non-printing / control character. + /// Part of grapheme container (of glyph) or grapheme sub group. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlNonPrintingChar : PageXmlGraphemeBase + { + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlGlyph + { + #region private + private PageXmlAlternativeImage[] alternativeImageField; + + private PageXmlCoords coordsField; + + private PageXmlGraphemeBase[] graphemesField; + + private PageXmlTextEquiv[] textEquivField; + + private PageXmlTextStyle textStyleField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private string idField; + + private bool ligatureField; + + private bool ligatureFieldSpecified; + + private bool symbolField; + + private bool symbolFieldSpecified; + + private PageXmlScriptSimpleType scriptField; + + private bool scriptFieldSpecified; + + private PageXmlProductionSimpleType productionField; + + private bool productionFieldSpecified; + + private string customField; + + private string commentsField; + #endregion + + /// + /// Alternative glyph images (e.g. black-and-white) + /// + [XmlElementAttribute("AlternativeImage")] + public PageXmlAlternativeImage[] AlternativeImages + { + get + { + return this.alternativeImageField; + } + set + { + this.alternativeImageField = value; + } + } + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + + /// + /// Container for graphemes, grapheme groups and non-printing characters + /// + [XmlArrayItemAttribute("Grapheme", typeof(PageXmlGrapheme), IsNullable = false)] + [XmlArrayItemAttribute("GraphemeGroup", typeof(PageXmlGraphemeGroup), IsNullable = false)] + [XmlArrayItemAttribute("NonPrintingChar", typeof(PageXmlNonPrintingChar), IsNullable = false)] + public PageXmlGraphemeBase[] Graphemes + { + get + { + return this.graphemesField; + } + set + { + this.graphemesField = value; + } + } + + /// + [XmlElementAttribute("TextEquiv")] + public PageXmlTextEquiv[] TextEquivs + { + get + { + return this.textEquivField; + } + set + { + this.textEquivField = value; + } + } + + /// + public PageXmlTextStyle TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("ligature")] + public bool Ligature + { + get + { + return this.ligatureField; + } + set + { + this.ligatureField = value; + this.ligatureFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LigatureSpecified + { + get + { + return this.ligatureFieldSpecified; + } + set + { + this.ligatureFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("symbol")] + public bool Symbol + { + get + { + return this.symbolField; + } + set + { + this.symbolField = value; + this.symbolFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SymbolSpecified + { + get + { + return this.symbolFieldSpecified; + } + set + { + this.symbolFieldSpecified = value; + } + } + + /// + /// The script used for the glyph + /// + [XmlAttributeAttribute("script")] + public PageXmlScriptSimpleType Script + { + get + { + return this.scriptField; + } + set + { + this.scriptField = value; + this.scriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ScriptSpecified + { + get + { + return this.scriptFieldSpecified; + } + set + { + this.scriptFieldSpecified = value; + } + } + + /// + /// Overrides the production attribute of the parent word / text line / text region. + /// + [XmlAttributeAttribute("production")] + public PageXmlProductionSimpleType Production + { + get + { + return this.productionField; + } + set + { + this.productionField = value; + this.productionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ProductionSpecified + { + get + { + return this.productionFieldSpecified; + } + set + { + this.productionFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlAlternativeImage + { + + private string filenameField; + + private string commentsField; + + private float confField; + + private bool confFieldSpecified; + + /// + [XmlAttributeAttribute("filename")] + public string FileName + { + get + { + return this.filenameField; + } + set + { + this.filenameField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + /// Confidence value (between 0 and 1) + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + } + + /// + /// Monospace (fixed-pitch, non-proportional) or + /// proportional font. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTextStyle + { + #region private + private string fontFamilyField; + + private bool serifField; + + private bool serifFieldSpecified; + + private bool monospaceField; + + private bool monospaceFieldSpecified; + + private float fontSizeField; + + private bool fontSizeFieldSpecified; + + private string xHeightField; + + private int kerningField; + + private bool kerningFieldSpecified; + + private PageXmlColourSimpleType textColourField; + + private bool textColourFieldSpecified; + + private string textColourRgbField; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + private string bgColourRgbField; + + private bool reverseVideoField; + + private bool reverseVideoFieldSpecified; + + private bool boldField; + + private bool boldFieldSpecified; + + private bool italicField; + + private bool italicFieldSpecified; + + private bool underlinedField; + + private bool underlinedFieldSpecified; + + private PageXmlUnderlineStyleSimpleType underlineStyleField; + + private bool underlineStyleFieldSpecified; + + private bool subscriptField; + + private bool subscriptFieldSpecified; + + private bool superscriptField; + + private bool superscriptFieldSpecified; + + private bool strikethroughField; + + private bool strikethroughFieldSpecified; + + private bool smallCapsField; + + private bool smallCapsFieldSpecified; + + private bool letterSpacedField; + + private bool letterSpacedFieldSpecified; + #endregion + + /// + /// For instance: Arial, Times New Roman. + /// Add more information if necessary + /// (e.g.blackletter, antiqua). + /// + [XmlAttributeAttribute("fontFamily")] + public string FontFamily + { + get + { + return this.fontFamilyField; + } + set + { + this.fontFamilyField = value; + } + } + + /// + /// Serif or sans-serif typeface. + /// + [XmlAttributeAttribute("serif")] + public bool Serif + { + get + { + return this.serifField; + } + set + { + this.serifField = value; + this.serifFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SerifSpecified + { + get + { + return this.serifFieldSpecified; + } + set + { + this.serifFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("monospace")] + public bool Monospace + { + get + { + return this.monospaceField; + } + set + { + this.monospaceField = value; + this.monospaceFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool MonospaceSpecified + { + get + { + return this.monospaceFieldSpecified; + } + set + { + this.monospaceFieldSpecified = value; + } + } + + /// + /// The size of the characters in points. + /// + [XmlAttributeAttribute("fontSize")] + public float FontSize + { + get + { + return this.fontSizeField; + } + set + { + this.fontSizeField = value; + this.fontSizeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool FontSizeSpecified + { + get + { + return this.fontSizeFieldSpecified; + } + set + { + this.fontSizeFieldSpecified = value; + } + } + + /// + /// The x-height or corpus size refers to the distance + /// between the baseline and the mean line of + /// lower-case letters in a typeface. + /// The unit is assumed to be pixels. + /// + [XmlAttributeAttribute("xHeight", DataType = "integer")] + public string XHeight + { + get + { + return this.xHeightField; + } + set + { + this.xHeightField = value; + } + } + + /// + /// The degree of space (in points) between + /// the characters in a string of text. + /// + [XmlAttributeAttribute("kerning")] + public int Kerning + { + get + { + return this.kerningField; + } + set + { + this.kerningField = value; + this.kerningFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool KerningSpecified + { + get + { + return this.kerningFieldSpecified; + } + set + { + this.kerningFieldSpecified = value; + } + } + + /// + /// + /// + [XmlAttributeAttribute("textColour")] + public PageXmlColourSimpleType TextColour + { + get + { + return this.textColourField; + } + set + { + this.textColourField = value; + this.textColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TextColourSpecified + { + get + { + return this.textColourFieldSpecified; + } + set + { + this.textColourFieldSpecified = value; + } + } + + /// + /// Text colour in RGB encoded format + /// (red value) + (256 x green value) + (65536 x blue value). + /// + [XmlAttributeAttribute("textColourRgb", DataType = "integer")] + public string TextColourRgb + { + get + { + return this.textColourRgbField; + } + set + { + this.textColourRgbField = value; + } + } + + /// + /// Background colour + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + + /// + /// Background colour in RGB encoded format + /// (red value) + (256 x green value) + (65536 x blue value). + /// + [XmlAttributeAttribute("bgColourRgb", DataType = "integer")] + public string BgColourRgb + { + get + { + return this.bgColourRgbField; + } + set + { + this.bgColourRgbField = value; + } + } + + /// + /// Specifies whether the colour of the text appears + /// reversed against a background colour. + /// + [XmlAttributeAttribute("reverseVideo")] + public bool ReverseVideo + { + get + { + return this.reverseVideoField; + } + set + { + this.reverseVideoField = value; + this.reverseVideoFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReverseVideoSpecified + { + get + { + return this.reverseVideoFieldSpecified; + } + set + { + this.reverseVideoFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("bold")] + public bool Bold + { + get + { + return this.boldField; + } + set + { + this.boldField = value; + this.boldFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BoldSpecified + { + get + { + return this.boldFieldSpecified; + } + set + { + this.boldFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("italic")] + public bool Italic + { + get + { + return this.italicField; + } + set + { + this.italicField = value; + this.italicFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ItalicSpecified + { + get + { + return this.italicFieldSpecified; + } + set + { + this.italicFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("underlined")] + public bool Underlined + { + get + { + return this.underlinedField; + } + set + { + this.underlinedField = value; + this.underlinedFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool UnderlinedSpecified + { + get + { + return this.underlinedFieldSpecified; + } + set + { + this.underlinedFieldSpecified = value; + } + } + + /// + /// Line style details if "underlined" is TRUE + /// + [XmlAttributeAttribute("underlineStyle")] + public PageXmlUnderlineStyleSimpleType UnderlineStyle + { + get + { + return this.underlineStyleField; + } + set + { + this.underlineStyleField = value; + this.underlineStyleFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool UnderlineStyleSpecified + { + get + { + return this.underlineStyleFieldSpecified; + } + set + { + this.underlineStyleFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("subscript")] + public bool Subscript + { + get + { + return this.subscriptField; + } + set + { + this.subscriptField = value; + this.subscriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SubscriptSpecified + { + get + { + return this.subscriptFieldSpecified; + } + set + { + this.subscriptFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("superscript")] + public bool Superscript + { + get + { + return this.superscriptField; + } + set + { + this.superscriptField = value; + this.superscriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SuperscriptSpecified + { + get + { + return this.superscriptFieldSpecified; + } + set + { + this.superscriptFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("strikethrough")] + public bool Strikethrough + { + get + { + return this.strikethroughField; + } + set + { + this.strikethroughField = value; + this.strikethroughFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool StrikethroughSpecified + { + get + { + return this.strikethroughFieldSpecified; + } + set + { + this.strikethroughFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("smallCaps")] + public bool SmallCaps + { + get + { + return this.smallCapsField; + } + set + { + this.smallCapsField = value; + this.smallCapsFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SmallCapsSpecified + { + get + { + return this.smallCapsFieldSpecified; + } + set + { + this.smallCapsFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("letterSpaced")] + public bool LetterSpaced + { + get + { + return this.letterSpacedField; + } + set + { + this.letterSpacedField = value; + this.letterSpacedFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LetterSpacedSpecified + { + get + { + return this.letterSpacedFieldSpecified; + } + set + { + this.letterSpacedFieldSpecified = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlLabels + { + + private PageXmlLabel[] labelField; + + private string externalModelField; + + private string externalIdField; + + private string prefixField; + + private string commentsField; + + /// + /// A semantic label / tag + /// + [XmlElementAttribute("Label")] + public PageXmlLabel[] Labels + { + get + { + return this.labelField; + } + set + { + this.labelField = value; + } + } + + /// + /// Reference to external model / ontology / schema + /// + [XmlAttributeAttribute("externalModel")] + public string ExternalModel + { + get + { + return this.externalModelField; + } + set + { + this.externalModelField = value; + } + } + + /// + /// E.g. an RDF resource identifier (to be used as subject or object of an RDF triple) + /// + [XmlAttributeAttribute("externalId")] + public string ExternalId + { + get + { + return this.externalIdField; + } + set + { + this.externalIdField = value; + } + } + + /// + /// Prefix for all labels (e.g. first part of an URI) + /// + [XmlAttributeAttribute("prefix")] + public string Prefix + { + get + { + return this.prefixField; + } + set + { + this.prefixField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + /// Semantic label + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlLabel + { + + private string valueField; + + private string typeField; + + private string commentsField; + + /// + /// The label / tag (e.g. 'person'). Can be an RDF resource identifier (e.g. object of an RDF triple). + /// + [XmlAttributeAttribute("value")] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + + /// + /// Additional information on the label (e.g. 'YYYY-mm-dd' for a date label). Can be used as predicate of an RDF triple. + /// + [XmlAttributeAttribute("type")] + public string Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlWord + { + #region private + private PageXmlAlternativeImage[] alternativeImageField; + + private PageXmlCoords coordsField; + + private PageXmlGlyph[] glyphField; + + private PageXmlTextEquiv[] textEquivField; + + private PageXmlTextStyle textStyleField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private string idField; + + private PageXmlLanguageSimpleType languageField; + + private bool languageFieldSpecified; + + private PageXmlScriptSimpleType primaryScriptField; + + private bool primaryScriptFieldSpecified; + + private PageXmlScriptSimpleType secondaryScriptField; + + private bool secondaryScriptFieldSpecified; + + private PageXmlReadingDirectionSimpleType readingDirectionField; + + private bool readingDirectionFieldSpecified; + + private PageXmlProductionSimpleType productionField; + + private bool productionFieldSpecified; + + private string customField; + + private string commentsField; + #endregion + + /// + /// Alternative word images (e.g. black-and-white) + /// + [XmlElementAttribute("AlternativeImage")] + public PageXmlAlternativeImage[] AlternativeImages + { + get + { + return this.alternativeImageField; + } + set + { + this.alternativeImageField = value; + } + } + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + + /// + [XmlElementAttribute("Glyph")] + public PageXmlGlyph[] Glyphs + { + get + { + return this.glyphField; + } + set + { + this.glyphField = value; + } + } + + /// + [XmlElementAttribute("TextEquiv")] + public PageXmlTextEquiv[] TextEquivs + { + get + { + return this.textEquivField; + } + set + { + this.textEquivField = value; + } + } + + /// + public PageXmlTextStyle TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Overrides primaryLanguage attribute of parent line and/or text region + /// + [XmlAttributeAttribute("language")] + public PageXmlLanguageSimpleType Language + { + get + { + return this.languageField; + } + set + { + this.languageField = value; + this.languageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LanguageSpecified + { + get + { + return this.languageFieldSpecified; + } + set + { + this.languageFieldSpecified = value; + } + } + + /// + /// The primary script used in the word + /// + [XmlAttributeAttribute("primaryScript")] + public PageXmlScriptSimpleType PrimaryScript + { + get + { + return this.primaryScriptField; + } + set + { + this.primaryScriptField = value; + this.primaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryScriptSpecified + { + get + { + return this.primaryScriptFieldSpecified; + } + set + { + this.primaryScriptFieldSpecified = value; + } + } + + /// + /// The secondary script used in the word + /// + [XmlAttributeAttribute("secondaryScript")] + public PageXmlScriptSimpleType SecondaryScript + { + get + { + return this.secondaryScriptField; + } + set + { + this.secondaryScriptField = value; + this.secondaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryScriptSpecified + { + get + { + return this.secondaryScriptFieldSpecified; + } + set + { + this.secondaryScriptFieldSpecified = value; + } + } + + /// + /// The direction in which text within the word should be read(order of characters). + /// + [XmlAttributeAttribute("readingDirection")] + public PageXmlReadingDirectionSimpleType ReadingDirection + { + get + { + return this.readingDirectionField; + } + set + { + this.readingDirectionField = value; + this.readingDirectionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReadingDirectionSpecified + { + get + { + return this.readingDirectionFieldSpecified; + } + set + { + this.readingDirectionFieldSpecified = value; + } + } + + /// + /// Overrides the production attribute of the parent text line and/or text region. + /// + [XmlAttributeAttribute("production")] + public PageXmlProductionSimpleType Production + { + get + { + return this.productionField; + } + set + { + this.productionField = value; + this.productionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ProductionSpecified + { + get + { + return this.productionFieldSpecified; + } + set + { + this.productionFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + public override string ToString() + { + return string.Join("\n", this.TextEquivs.Select(t => t.Unicode)); + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlBaseline + { + + private string pointsField; + + private float confField; + + private bool confFieldSpecified; + + /// + [XmlAttributeAttribute("points")] + public string Points + { + get + { + return this.pointsField; + } + set + { + this.pointsField = value; + } + } + + /// + /// Confidence value (between 0 and 1) + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTextLine + { + #region private + private PageXmlAlternativeImage[] alternativeImageField; + + private PageXmlCoords coordsField; + + private PageXmlBaseline baselineField; + + private PageXmlWord[] wordField; + + private PageXmlTextEquiv[] textEquivField; + + private PageXmlTextStyle textStyleField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private string idField; + + private PageXmlLanguageSimpleType primaryLanguageField; + + private bool primaryLanguageFieldSpecified; + + private PageXmlScriptSimpleType primaryScriptField; + + private bool primaryScriptFieldSpecified; + + private PageXmlScriptSimpleType secondaryScriptField; + + private bool secondaryScriptFieldSpecified; + + private PageXmlReadingDirectionSimpleType readingDirectionField; + + private bool readingDirectionFieldSpecified; + + private PageXmlProductionSimpleType productionField; + + private bool productionFieldSpecified; + + private string customField; + + private string commentsField; + + private int indexField; + + private bool indexFieldSpecified; + #endregion + + /// + /// Alternative text line images (e.g. black-and-white) + /// + [XmlElementAttribute("AlternativeImage")] + public PageXmlAlternativeImage[] AlternativeImages + { + get + { + return this.alternativeImageField; + } + set + { + this.alternativeImageField = value; + } + } + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + + /// + /// Multiple connected points that mark the baseline of the glyphs + /// + public PageXmlBaseline Baseline + { + get + { + return this.baselineField; + } + set + { + this.baselineField = value; + } + } + + /// + [XmlElementAttribute("Word")] + public PageXmlWord[] Words + { + get + { + return this.wordField; + } + set + { + this.wordField = value; + } + } + + /// + [XmlElementAttribute("TextEquiv")] + public PageXmlTextEquiv[] TextEquivs + { + get + { + return this.textEquivField; + } + set + { + this.textEquivField = value; + } + } + + /// + public PageXmlTextStyle TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("primaryLanguage")] + public PageXmlLanguageSimpleType PrimaryLanguage + { + get + { + return this.primaryLanguageField; + } + set + { + this.primaryLanguageField = value; + this.primaryLanguageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryLanguageSpecified + { + get + { + return this.primaryLanguageFieldSpecified; + } + set + { + this.primaryLanguageFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("primaryScript")] + public PageXmlScriptSimpleType PrimaryScript + { + get + { + return this.primaryScriptField; + } + set + { + this.primaryScriptField = value; + this.primaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryScriptSpecified + { + get + { + return this.primaryScriptFieldSpecified; + } + set + { + this.primaryScriptFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("secondaryScript")] + public PageXmlScriptSimpleType SecondaryScript + { + get + { + return this.secondaryScriptField; + } + set + { + this.secondaryScriptField = value; + this.secondaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryScriptSpecified + { + get + { + return this.secondaryScriptFieldSpecified; + } + set + { + this.secondaryScriptFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("readingDirection")] + public PageXmlReadingDirectionSimpleType ReadingDirection + { + get + { + return this.readingDirectionField; + } + set + { + this.readingDirectionField = value; + this.readingDirectionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReadingDirectionSpecified + { + get + { + return this.readingDirectionFieldSpecified; + } + set + { + this.readingDirectionFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("production")] + public PageXmlProductionSimpleType Production + { + get + { + return this.productionField; + } + set + { + this.productionField = value; + this.productionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ProductionSpecified + { + get + { + return this.productionFieldSpecified; + } + set + { + this.productionFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + [XmlAttributeAttribute()] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + this.indexFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool IndexSpecified + { + get + { + return this.indexFieldSpecified; + } + set + { + this.indexFieldSpecified = value; + } + } + + /// + public override string ToString() + { + return string.Join("\n", this.TextEquivs.Select(t => t.Unicode)); + } + } + + /// + /// Data for a region that takes on the role of a table cell within a parent table region. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTableCellRole + { + + private int rowIndexField; + + private int columnIndexField; + + private int rowSpanField; + + private bool rowSpanFieldSpecified; + + private int colSpanField; + + private bool colSpanFieldSpecified; + + private bool headerField; + + private bool headerFieldSpecified; + + /// + /// Cell position in table starting with row 0 + /// + [XmlAttributeAttribute("rowIndex")] + public int RowIndex + { + get + { + return this.rowIndexField; + } + set + { + this.rowIndexField = value; + } + } + + /// + /// Cell position in table starting with column 0 + /// + [XmlAttributeAttribute("columnIndex")] + public int ColumnIndex + { + get + { + return this.columnIndexField; + } + set + { + this.columnIndexField = value; + } + } + + /// + /// Number of rows the cell spans (optional; default is 1) + /// + [XmlAttributeAttribute("rowSpan")] + public int RowSpan + { + get + { + return this.rowSpanField; + } + set + { + this.rowSpanField = value; + this.rowSpanFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool RowSpanSpecified + { + get + { + return this.rowSpanFieldSpecified; + } + set + { + this.rowSpanFieldSpecified = value; + } + } + + /// + /// Number of columns the cell spans (optional; default is 1) + /// + [XmlAttributeAttribute("colSpan")] + public int ColSpan + { + get + { + return this.colSpanField; + } + set + { + this.colSpanField = value; + this.colSpanFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ColSpanSpecified + { + get + { + return this.colSpanFieldSpecified; + } + set + { + this.colSpanFieldSpecified = value; + } + } + + /// + /// Is the cell a column or row header? + /// + [XmlAttributeAttribute("header")] + public bool Header + { + get + { + return this.headerField; + } + set + { + this.headerField = value; + this.headerFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool HeaderSpecified + { + get + { + return this.headerFieldSpecified; + } + set + { + this.headerFieldSpecified = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlRoles + { + + private PageXmlTableCellRole tableCellRoleField; + + /// + /// Data for a region that takes on the role of a table cell within a parent table region. + /// + public PageXmlTableCellRole TableCellRole + { + get + { + return this.tableCellRoleField; + } + set + { + this.tableCellRoleField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [XmlIncludeAttribute(typeof(PageXmlMapRegion))] + [XmlIncludeAttribute(typeof(PageXmlCustomRegion))] + [XmlIncludeAttribute(typeof(PageXmlUnknownRegion))] + [XmlIncludeAttribute(typeof(PageXmlNoiseRegion))] + [XmlIncludeAttribute(typeof(PageXmlAdvertRegion))] + [XmlIncludeAttribute(typeof(PageXmlMusicRegion))] + [XmlIncludeAttribute(typeof(PageXmlChemRegion))] + [XmlIncludeAttribute(typeof(PageXmlMathsRegion))] + [XmlIncludeAttribute(typeof(PageXmlSeparatorRegion))] + [XmlIncludeAttribute(typeof(PageXmlChartRegion))] + [XmlIncludeAttribute(typeof(PageXmlTableRegion))] + [XmlIncludeAttribute(typeof(PageXmlGraphicRegion))] + [XmlIncludeAttribute(typeof(PageXmlLineDrawingRegion))] + [XmlIncludeAttribute(typeof(PageXmlImageRegion))] + [XmlIncludeAttribute(typeof(PageXmlTextRegion))] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public abstract class PageXmlRegion + { + #region private + private PageXmlAlternativeImage[] alternativeImageField; + + private PageXmlCoords coordsField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private PageXmlRoles rolesField; + + private PageXmlRegion[] itemsField; + + private string idField; + + private string customField; + + private string commentsField; + + private bool continuationField; + + private bool continuationFieldSpecified; + #endregion + + /// + /// Alternative region images (e.g.black-and-white). + /// + [XmlElementAttribute("AlternativeImage")] + public PageXmlAlternativeImage[] AlternativeImage + { + get + { + return this.alternativeImageField; + } + set + { + this.alternativeImageField = value; + } + } + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + /// Roles the region takes (e.g. in context of a parent region). + /// + public PageXmlRoles Roles + { + get + { + return this.rolesField; + } + set + { + this.rolesField = value; + } + } + + /// + [XmlElementAttribute("AdvertRegion", typeof(PageXmlAdvertRegion))] + [XmlElementAttribute("ChartRegion", typeof(PageXmlChartRegion))] + [XmlElementAttribute("ChemRegion", typeof(PageXmlChemRegion))] + [XmlElementAttribute("CustomRegion", typeof(PageXmlCustomRegion))] + [XmlElementAttribute("GraphicRegion", typeof(PageXmlGraphicRegion))] + [XmlElementAttribute("ImageRegion", typeof(PageXmlImageRegion))] + [XmlElementAttribute("LineDrawingRegion", typeof(PageXmlLineDrawingRegion))] + [XmlElementAttribute("MathsRegion", typeof(PageXmlMathsRegion))] + [XmlElementAttribute("MusicRegion", typeof(PageXmlMusicRegion))] + [XmlElementAttribute("NoiseRegion", typeof(PageXmlNoiseRegion))] + [XmlElementAttribute("SeparatorRegion", typeof(PageXmlSeparatorRegion))] + [XmlElementAttribute("TableRegion", typeof(PageXmlTableRegion))] + [XmlElementAttribute("TextRegion", typeof(PageXmlTextRegion))] + [XmlElementAttribute("UnknownRegion", typeof(PageXmlUnknownRegion))] + public PageXmlRegion[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + + /// + /// Is this region a continuation of another region + /// (in previous column or page, for example)? + /// + [XmlAttributeAttribute("continuation")] + public bool Continuation + { + get + { + return this.continuationField; + } + set + { + this.continuationField = value; + this.continuationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ContinuationSpecified + { + get + { + return this.continuationFieldSpecified; + } + set + { + this.continuationFieldSpecified = value; + } + } + } + + /// + /// Regions containing advertisements. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlAdvertRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + } + + /// + /// Regions containing charts or graphs of any type, should be marked as chart regions. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlChartRegion : PageXmlRegion + { + #region private + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlChartSimpleType typeField; + + private bool typeFieldSpecified; + + private int numColoursField; + + private bool numColoursFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + private bool embTextField; + + private bool embTextFieldSpecified; + #endregion + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The type of chart in the region + /// + [XmlAttributeAttribute("type")] + public PageXmlChartSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// An approximation of the number of colours used in the region + /// + [XmlAttributeAttribute("numColours")] + public int NumColours + { + get + { + return this.numColoursField; + } + set + { + this.numColoursField = value; + this.numColoursFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool NumColoursSpecified + { + get + { + return this.numColoursFieldSpecified; + } + set + { + this.numColoursFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + + /// + /// Specifies whether the region also contains text + /// + [XmlAttributeAttribute("embText")] + public bool EmbText + { + get + { + return this.embTextField; + } + set + { + this.embTextField = value; + this.embTextFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool EmbTextSpecified + { + get + { + return this.embTextFieldSpecified; + } + set + { + this.embTextFieldSpecified = value; + } + } + } + + /// + /// Regions containing chemical formulas. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlChemRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + /// + /// The angle the rectangle encapsulating a + /// region has to be rotated in clockwise + /// direction in order to correct the present + /// skew(negative values indicate + /// anti-clockwise rotation). Range: + /// -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + } + + /// + /// Regions containing content that is not covered + /// by the default types(text, graphic, image, + /// line drawing, chart, table, separator, maths, + /// map, music, chem, advert, noise, unknown). + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlCustomRegion : PageXmlRegion + { + + private string typeField; + + /// + /// Information on the type of content represented by this region + /// + [XmlAttributeAttribute("type")] + public string Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + } + } + } + + /// + /// Regions containing simple graphics, such as a company + /// logo, should be marked as graphic regions. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlGraphicRegion : PageXmlRegion + { + #region private + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlGraphicsSimpleType typeField; + + private bool typeFieldSpecified; + + private int numColoursField; + + private bool numColoursFieldSpecified; + + private bool embTextField; + + private bool embTextFieldSpecified; + #endregion + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The type of graphic in the region + /// + [XmlAttributeAttribute("type")] + public PageXmlGraphicsSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// An approximation of the number of colours used in the region + /// + [XmlAttributeAttribute("numColours")] + public int NumColours + { + get + { + return this.numColoursField; + } + set + { + this.numColoursField = value; + this.numColoursFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool NumColoursSpecified + { + get + { + return this.numColoursFieldSpecified; + } + set + { + this.numColoursFieldSpecified = value; + } + } + + /// + /// Specifies whether the region also contains text. + /// + [XmlAttributeAttribute("embText")] + public bool EmbText + { + get + { + return this.embTextField; + } + set + { + this.embTextField = value; + this.embTextFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool EmbTextSpecified + { + get + { + return this.embTextFieldSpecified; + } + set + { + this.embTextFieldSpecified = value; + } + } + } + + /// + /// An image is considered to be more intricate and complex than a graphic. These can be photos or drawings. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlImageRegion : PageXmlRegion + { + #region private + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourDepthSimpleType colourDepthField; + + private bool colourDepthFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + private bool embTextField; + + private bool embTextFieldSpecified; + #endregion + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The colour bit depth required for the region + /// + [XmlAttributeAttribute("colourDepth")] + public PageXmlColourDepthSimpleType ColourDepth + { + get + { + return this.colourDepthField; + } + set + { + this.colourDepthField = value; + this.colourDepthFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ColourDepthSpecified + { + get + { + return this.colourDepthFieldSpecified; + } + set + { + this.colourDepthFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + + /// + /// Specifies whether the region also contains text + /// + [XmlAttributeAttribute("embText")] + public bool EmbText + { + get + { + return this.embTextField; + } + set + { + this.embTextField = value; + this.embTextFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool EmbTextSpecified + { + get + { + return this.embTextFieldSpecified; + } + set + { + this.embTextFieldSpecified = value; + } + } + } + + /// + /// A line drawing is a single colour illustration without solid areas. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlLineDrawingRegion : PageXmlRegion + { + #region private + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType penColourField; + + private bool penColourFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + private bool embTextField; + + private bool embTextFieldSpecified; + #endregion + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The pen (foreground) colour of the region + /// + [XmlAttributeAttribute("penColour")] + public PageXmlColourSimpleType PenColour + { + get + { + return this.penColourField; + } + set + { + this.penColourField = value; + this.penColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PenColourSpecified + { + get + { + return this.penColourFieldSpecified; + } + set + { + this.penColourFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + + /// + /// Specifies whether the region also contains text + /// + [XmlAttributeAttribute("embText")] + public bool EmbText + { + get + { + return this.embTextField; + } + set + { + this.embTextField = value; + this.embTextFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool EmbTextSpecified + { + get + { + return this.embTextFieldSpecified; + } + set + { + this.embTextFieldSpecified = value; + } + } + } + + /// + /// Regions containing equations and mathematical symbols should be marked as maths regions. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlMathsRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + } + + /// + /// Regions containing musical notations. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlMusicRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + } + + /// + /// Noise regions are regions where no real data lies, only + /// false data created by artifacts on the document or + /// scanner noise. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlNoiseRegion : PageXmlRegion + { + } + + /// + /// Separators are lines that lie between columns and + /// paragraphs and can be used to logically separate + /// different articles from each other. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlSeparatorRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlColourSimpleType colourField; + + private bool colourFieldSpecified; + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The colour of the separator + /// + [XmlAttributeAttribute("colour")] + public PageXmlColourSimpleType Colour + { + get + { + return this.colourField; + } + set + { + this.colourField = value; + this.colourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ColourSpecified + { + get + { + return this.colourFieldSpecified; + } + set + { + this.colourFieldSpecified = value; + } + } + } + + /// + /// Tabular data in any form is represented with a table + /// region.Rows and columns may or may not have separator + /// lines; these lines are not separator regions. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTableRegion : PageXmlRegion + { + #region private + private PageXmlGridPoints[] gridField; + + private float orientationField; + + private bool orientationFieldSpecified; + + private int rowsField; + + private bool rowsFieldSpecified; + + private int columnsField; + + private bool columnsFieldSpecified; + + private PageXmlColourSimpleType lineColourField; + + private bool lineColourFieldSpecified; + + private PageXmlColourSimpleType bgColourField; + + private bool bgColourFieldSpecified; + + private bool lineSeparatorsField; + + private bool lineSeparatorsFieldSpecified; + + private bool embTextField; + + private bool embTextFieldSpecified; + #endregion + + /// + /// Table grid (visible or virtual grid lines) + /// + [XmlArrayItemAttribute("GridPoints", IsNullable = false)] + public PageXmlGridPoints[] Grid + { + get + { + return this.gridField; + } + set + { + this.gridField = value; + } + } + + /// + /// The angle the rectangle encapsulating a region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The number of rows present in the table + /// + [XmlAttributeAttribute("rows")] + public int Rows + { + get + { + return this.rowsField; + } + set + { + this.rowsField = value; + this.rowsFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool RowsSpecified + { + get + { + return this.rowsFieldSpecified; + } + set + { + this.rowsFieldSpecified = value; + } + } + + /// + /// The number of columns present in the table + /// + [XmlAttributeAttribute("columns")] + public int Columns + { + get + { + return this.columnsField; + } + set + { + this.columnsField = value; + this.columnsFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ColumnsSpecified + { + get + { + return this.columnsFieldSpecified; + } + set + { + this.columnsFieldSpecified = value; + } + } + + /// + /// The colour of the lines used in the region + /// + [XmlAttributeAttribute("lineColour")] + public PageXmlColourSimpleType LineColour + { + get + { + return this.lineColourField; + } + set + { + this.lineColourField = value; + this.lineColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LineColourSpecified + { + get + { + return this.lineColourFieldSpecified; + } + set + { + this.lineColourFieldSpecified = value; + } + } + + /// + /// The background colour of the region + /// + [XmlAttributeAttribute("bgColour")] + public PageXmlColourSimpleType BgColour + { + get + { + return this.bgColourField; + } + set + { + this.bgColourField = value; + this.bgColourFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool BgColourSpecified + { + get + { + return this.bgColourFieldSpecified; + } + set + { + this.bgColourFieldSpecified = value; + } + } + + /// + /// Specifies the presence of line separators + /// + [XmlAttributeAttribute("lineSeparators")] + public bool LineSeparators + { + get + { + return this.lineSeparatorsField; + } + set + { + this.lineSeparatorsField = value; + this.lineSeparatorsFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LineSeparatorsSpecified + { + get + { + return this.lineSeparatorsFieldSpecified; + } + set + { + this.lineSeparatorsFieldSpecified = value; + } + } + + /// + /// Specifies whether the region also contains text + /// + [XmlAttributeAttribute("embText")] + public bool EmbText + { + get + { + return this.embTextField; + } + set + { + this.embTextField = value; + this.embTextFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool EmbTextSpecified + { + get + { + return this.embTextFieldSpecified; + } + set + { + this.embTextFieldSpecified = value; + } + } + } + + /// + /// Pure text is represented as a text region. This includes drop capitals, but practically + /// ornate text may be considered as a graphic. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlTextRegion : PageXmlRegion + { + #region private methods + private PageXmlTextLine[] textLineField; + + private PageXmlTextEquiv[] textEquivField; + + private PageXmlTextStyle textStyleField; + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlTextSimpleType typeField; + + private bool typeFieldSpecified; + + private int leadingField; + + private bool leadingFieldSpecified; + + private PageXmlReadingDirectionSimpleType readingDirectionField; + + private bool readingDirectionFieldSpecified; + + private PageXmlTextLineOrderSimpleType textLineOrderField; + + private bool textLineOrderFieldSpecified; + + private float readingOrientationField; + + private bool readingOrientationFieldSpecified; + + private bool indentedField; + + private bool indentedFieldSpecified; + + private PageXmlAlignSimpleType alignField; + + private bool alignFieldSpecified; + + private PageXmlLanguageSimpleType primaryLanguageField; + + private bool primaryLanguageFieldSpecified; + + private PageXmlLanguageSimpleType secondaryLanguageField; + + private bool secondaryLanguageFieldSpecified; + + private PageXmlScriptSimpleType primaryScriptField; + + private bool primaryScriptFieldSpecified; + + private PageXmlScriptSimpleType secondaryScriptField; + + private bool secondaryScriptFieldSpecified; + + private PageXmlProductionSimpleType productionField; + + private bool productionFieldSpecified; + #endregion + + /// + [XmlElementAttribute("TextLine")] + public PageXmlTextLine[] TextLines + { + get + { + return this.textLineField; + } + set + { + this.textLineField = value; + } + } + + /// + [XmlElementAttribute("TextEquiv")] + public PageXmlTextEquiv[] TextEquivs + { + get + { + return this.textEquivField; + } + set + { + this.textEquivField = value; + } + } + + /// + public PageXmlTextStyle TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + /// The angle the rectangle encapsulating the region + /// has to be rotated in clockwise direction + /// in order to correct the present skew + /// (negative values indicate anti-clockwise rotation). + /// (The rotated image can be further referenced + /// via “AlternativeImage”.) + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The nature of the text in the region + /// + [XmlAttributeAttribute("type")] + public PageXmlTextSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// The degree of space in points between the lines of + /// text(line spacing) + /// + [XmlAttributeAttribute("leading")] + public int Leading + { + get + { + return this.leadingField; + } + set + { + this.leadingField = value; + this.leadingFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool LeadingSpecified + { + get + { + return this.leadingFieldSpecified; + } + set + { + this.leadingFieldSpecified = value; + } + } + + /// + /// The direction in which text within lines + /// should be read(order of words and characters), + /// in addition to “textLineOrder”. + /// + [XmlAttributeAttribute("readingDirection")] + public PageXmlReadingDirectionSimpleType ReadingDirection + { + get + { + return this.readingDirectionField; + } + set + { + this.readingDirectionField = value; + this.readingDirectionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReadingDirectionSpecified + { + get + { + return this.readingDirectionFieldSpecified; + } + set + { + this.readingDirectionFieldSpecified = value; + } + } + + /// + /// The order of text lines within the block, + /// in addition to “readingDirection”. + /// + [XmlAttributeAttribute("textLineOrder")] + public PageXmlTextLineOrderSimpleType TextLineOrder + { + get + { + return this.textLineOrderField; + } + set + { + this.textLineOrderField = value; + this.textLineOrderFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TextLineOrderSpecified + { + get + { + return this.textLineOrderFieldSpecified; + } + set + { + this.textLineOrderFieldSpecified = value; + } + } + + /// + /// The angle the baseline of text within the region + /// has to be rotated(relative to the rectangle + /// encapsulating the region) in clockwise direction + /// in order to correct the present skew, + /// in addition to “orientation” + /// (negative values indicate anti-clockwise rotation). + /// Range: -179.999,180 + /// + [XmlAttributeAttribute("readingOrientation")] + public float ReadingOrientation + { + get + { + return this.readingOrientationField; + } + set + { + this.readingOrientationField = value; + this.readingOrientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReadingOrientationSpecified + { + get + { + return this.readingOrientationFieldSpecified; + } + set + { + this.readingOrientationFieldSpecified = value; + } + } + + /// + /// Defines whether a region of text is indented or not + /// + [XmlAttributeAttribute("indented")] + public bool Indented + { + get + { + return this.indentedField; + } + set + { + this.indentedField = value; + this.indentedFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool IndentedSpecified + { + get + { + return this.indentedFieldSpecified; + } + set + { + this.indentedFieldSpecified = value; + } + } + + /// + /// Text align + /// + [XmlAttributeAttribute("align")] + public PageXmlAlignSimpleType Align + { + get + { + return this.alignField; + } + set + { + this.alignField = value; + this.alignFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool AlignSpecified + { + get + { + return this.alignFieldSpecified; + } + set + { + this.alignFieldSpecified = value; + } + } + + /// + /// The primary language used in the region + /// + [XmlAttributeAttribute("primaryLanguage")] + public PageXmlLanguageSimpleType PrimaryLanguage + { + get + { + return this.primaryLanguageField; + } + set + { + this.primaryLanguageField = value; + this.primaryLanguageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryLanguageSpecified + { + get + { + return this.primaryLanguageFieldSpecified; + } + set + { + this.primaryLanguageFieldSpecified = value; + } + } + + /// + /// The secondary language used in the region + /// + [XmlAttributeAttribute("secondaryLanguage")] + public PageXmlLanguageSimpleType SecondaryLanguage + { + get + { + return this.secondaryLanguageField; + } + set + { + this.secondaryLanguageField = value; + this.secondaryLanguageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryLanguageSpecified + { + get + { + return this.secondaryLanguageFieldSpecified; + } + set + { + this.secondaryLanguageFieldSpecified = value; + } + } + + /// + /// The primary script used in the region + /// + [XmlAttributeAttribute("primaryScript")] + public PageXmlScriptSimpleType PrimaryScript + { + get + { + return this.primaryScriptField; + } + set + { + this.primaryScriptField = value; + this.primaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryScriptSpecified + { + get + { + return this.primaryScriptFieldSpecified; + } + set + { + this.primaryScriptFieldSpecified = value; + } + } + + /// + /// The secondary script used in the region + /// + [XmlAttributeAttribute("secondaryScript")] + public PageXmlScriptSimpleType SecondaryScript + { + get + { + return this.secondaryScriptField; + } + set + { + this.secondaryScriptField = value; + this.secondaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryScriptSpecified + { + get + { + return this.secondaryScriptFieldSpecified; + } + set + { + this.secondaryScriptFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("production")] + public PageXmlProductionSimpleType Production + { + get + { + return this.productionField; + } + set + { + this.productionField = value; + this.productionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ProductionSpecified + { + get + { + return this.productionFieldSpecified; + } + set + { + this.productionFieldSpecified = value; + } + } + } + + + /// + /// To be used if the region type cannot be ascertained. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlUnknownRegion : PageXmlRegion + { + } + + /// + /// Regions containing maps. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlMapRegion : PageXmlRegion + { + + private float orientationField; + + private bool orientationFieldSpecified; + + /// + /// The angle the rectangle encapsulating a + /// region has to be rotated in clockwise + /// direction in order to correct the present + /// skew(negative values indicate + /// anti-clockwise rotation). Range: + /// -179.999,180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + } + + /// + /// One-to-one relation between to layout object. Use 'link' + /// for loose relations and 'join' for strong relations + /// (where something is fragmented for instance). + /// + /// Examples for 'link': caption - image floating - + /// paragraph paragraph - paragraph (when a paragraph is + /// split across columns and the last word of the first + /// paragraph DOES NOT continue in the second paragraph) + /// drop-cap - paragraph (when the drop-cap is a whole word) + /// + /// Examples for 'join': word - word (separated word at the + /// end of a line) drop-cap - paragraph (when the drop-cap + /// is not a whole word) paragraph - paragraph (when a + /// pragraph is split across columns and the last word of + /// the first paragraph DOES continue in the second + /// paragraph) + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlRelation + { + + private PageXmlLabels[] labelsField; + + private PageXmlRegionRef sourceRegionRefField; + + private PageXmlRegionRef targetRegionRefField; + + private string idField; + + private PageXmlRelationType typeField; + + private bool typeFieldSpecified; + + private string customField; + + private string commentsField; + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + public PageXmlRegionRef SourceRegionRef + { + get + { + return this.sourceRegionRefField; + } + set + { + this.sourceRegionRefField = value; + } + } + + /// + public PageXmlRegionRef TargetRegionRef + { + get + { + return this.targetRegionRefField; + } + set + { + this.targetRegionRefField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlRelationType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlRegionRef + { + + private string regionRefField; + + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string RegionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + } + + /// + /// Container for one-to-one relations between layout + /// objects (for example: DropCap - paragraph, caption - + /// image). + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlRelations + { + + private PageXmlRelation[] relationField; + + /// + [XmlElementAttribute("Relation")] + public PageXmlRelation[] Relations + { + get + { + return this.relationField; + } + set + { + this.relationField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlLayer + { + + private PageXmlRegionRef[] regionRefField; + + private string idField; + + private int zIndexField; + + private string captionField; + + /// + [XmlElementAttribute("RegionRef")] + public PageXmlRegionRef[] RegionRefs + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("zIndex")] + public int ZIndex + { + get + { + return this.zIndexField; + } + set + { + this.zIndexField = value; + } + } + + /// + [XmlAttributeAttribute("caption")] + public string Caption + { + get + { + return this.captionField; + } + set + { + this.captionField = value; + } + } + } + + /// + /// Can be used to express the z-index of overlapping + /// regions.An element with a greater z-index is always in + /// front of another element with lower z-index. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlLayers + { + + private PageXmlLayer[] layerField; + + /// + [XmlElementAttribute("Layer")] + public PageXmlLayer[] Layers + { + get + { + return this.layerField; + } + set + { + this.layerField = value; + } + } + } + + /// + /// Numbered group (contains unordered elements) + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlUnorderedGroup + { + #region private + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private object[] itemsField; + + private string idField; + + private string regionRefField; + + private string captionField; + + private PageXmlGroupSimpleType typeField; + + private bool typeFieldSpecified; + + private bool continuationField; + + private bool continuationFieldSpecified; + + private string customField; + + private string commentsField; + #endregion + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlElementAttribute("OrderedGroup", typeof(PageXmlOrderedGroup))] + [XmlElementAttribute("RegionRef", typeof(PageXmlRegionRef))] + [XmlElementAttribute("UnorderedGroup", typeof(PageXmlUnorderedGroup))] + public object[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Optional link to a parent region of nested regions. + /// The parent region doubles as reading order group. + /// Only the nested regions should be allowed as group members. + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string RegionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + + /// + [XmlAttributeAttribute("caption")] + public string Caption + { + get + { + return this.captionField; + } + set + { + this.captionField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlGroupSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// Is this group a continuation of another group + /// (from previous column or page, for example)? + /// + [XmlAttributeAttribute("continuation")] + public bool Continuation + { + get + { + return this.continuationField; + } + set + { + this.continuationField = value; + this.continuationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ContinuationSpecified + { + get + { + return this.continuationFieldSpecified; + } + set + { + this.continuationFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + /// Numbered group (contains ordered elements) + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlOrderedGroup + { + #region private + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private object[] itemsField; + + private string idField; + + private string regionRefField; + + private string captionField; + + private PageXmlGroupSimpleType typeField; + + private bool typeFieldSpecified; + + private bool continuationField; + + private bool continuationFieldSpecified; + + private string customField; + + private string commentsField; + #endregion + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlElementAttribute("OrderedGroupIndexed", typeof(PageXmlOrderedGroupIndexed))] + [XmlElementAttribute("RegionRefIndexed", typeof(PageXmlRegionRefIndexed))] + [XmlElementAttribute("UnorderedGroupIndexed", typeof(PageXmlUnorderedGroupIndexed))] + public object[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Optional link to a parent region of nested regions. + /// The parent region doubles as reading order group. + /// Only the nested regions should be allowed as group members. + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string regionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + + /// + [XmlAttributeAttribute("caption")] + public string Caption + { + get + { + return this.captionField; + } + set + { + this.captionField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlGroupSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// Is this group a continuation of another group + /// (from previous column or page, for example)? + /// + [XmlAttributeAttribute("continuation")] + public bool Continuation + { + get + { + return this.continuationField; + } + set + { + this.continuationField = value; + this.continuationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ContinuationSpecified + { + get + { + return this.continuationFieldSpecified; + } + set + { + this.continuationFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlOrderedGroupIndexed + { + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private object[] itemsField; + + private string idField; + + private string regionRefField; + + private int indexField; + + private string captionField; + + private PageXmlGroupSimpleType typeField; + + private bool typeFieldSpecified; + + private bool continuationField; + + private bool continuationFieldSpecified; + + private string customField; + + private string commentsField; + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlElementAttribute("OrderedGroupIndexed", typeof(PageXmlOrderedGroupIndexed))] + [XmlElementAttribute("RegionRefIndexed", typeof(PageXmlRegionRefIndexed))] + [XmlElementAttribute("UnorderedGroupIndexed", typeof(PageXmlUnorderedGroupIndexed))] + public object[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string RegionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + + /// + [XmlAttributeAttribute("index")] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("caption")] + public string Caption + { + get + { + return this.captionField; + } + set + { + this.captionField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlGroupSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("continuation")] + public bool Continuation + { + get + { + return this.continuationField; + } + set + { + this.continuationField = value; + this.continuationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ContinuationSpecified + { + get + { + return this.continuationFieldSpecified; + } + set + { + this.continuationFieldSpecified = value; + } + } + + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + /// Numbered region + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlRegionRefIndexed + { + + private int indexField; + + private string regionRefField; + + /// + /// Position (order number) of this item within the current hierarchy level. + /// + [XmlAttributeAttribute("index")] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string RegionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + } + + /// + /// Indexed group containing ordered elements + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlUnorderedGroupIndexed + { + #region private + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private object[] itemsField; + + private string idField; + + private string regionRefField; + + private int indexField; + + private string captionField; + + private PageXmlGroupSimpleType typeField; + + private bool typeFieldSpecified; + + private bool continuationField; + + private bool continuationFieldSpecified; + + private string customField; + + private string commentsField; + #endregion + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlElementAttribute("OrderedGroup", typeof(PageXmlOrderedGroup))] + [XmlElementAttribute("RegionRef", typeof(PageXmlRegionRef))] + [XmlElementAttribute("UnorderedGroup", typeof(PageXmlUnorderedGroup))] + public object[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + [XmlAttributeAttribute("id", DataType = "ID")] + public string Id + { + get + { + return this.idField; + } + set + { + this.idField = value; + } + } + + /// + /// Optional link to a parent region of nested regions. + /// The parent region doubles as reading order group. + /// Only the nested regions should be allowed as group members. + /// + [XmlAttributeAttribute("regionRef", DataType = "IDREF")] + public string RegionRef + { + get + { + return this.regionRefField; + } + set + { + this.regionRefField = value; + } + } + + /// + /// Position (order number) of this item within the current hierarchy level. + /// + [XmlAttributeAttribute("index")] + public int Index + { + get + { + return this.indexField; + } + set + { + this.indexField = value; + } + } + + /// + [XmlAttributeAttribute("caption")] + public string Caption + { + get + { + return this.captionField; + } + set + { + this.captionField = value; + } + } + + /// + [XmlAttributeAttribute("type")] + public PageXmlGroupSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// Is this group a continuation of another group (from + /// previous column or page, for example)? + /// + [XmlAttributeAttribute("continuation")] + public bool Continuation + { + get + { + return this.continuationField; + } + set + { + this.continuationField = value; + this.continuationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ContinuationSpecified + { + get + { + return this.continuationFieldSpecified; + } + set + { + this.continuationFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + [XmlAttributeAttribute("comments")] + public string Comments + { + get + { + return this.commentsField; + } + set + { + this.commentsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlReadingOrder + { + + private object itemField; + + private float confField; + + private bool confFieldSpecified; + + /// + [XmlElementAttribute("OrderedGroup", typeof(PageXmlOrderedGroup))] + [XmlElementAttribute("UnorderedGroup", typeof(PageXmlUnorderedGroup))] + public object Item + { + get + { + return this.itemField; + } + set + { + this.itemField = value; + } + } + + /// + /// Confidence value (between 0 and 1) + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + } + + /// + /// Determines the effective area on the paper of a printed page. + /// Its size is equal for all pages of a book + /// (exceptions: titlepage, multipage pictures). + /// It contains all living elements (except marginals) + /// like body type, footnotes, headings, running titles. + /// It does not contain pagenumber (if not part of running title), + /// marginals, signature mark, preview words. + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlPrintSpace + { + + private PageXmlCoords coordsField; + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + } + + /// + /// Border of the actual page (if the scanned image + /// contains parts not belonging to the page). + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlBorder + { + + private PageXmlCoords coordsField; + + /// + public PageXmlCoords Coords + { + get + { + return this.coordsField; + } + set + { + this.coordsField = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlPage + { + + private PageXmlAlternativeImage[] alternativeImageField; + + private PageXmlBorder borderField; + + private PageXmlPrintSpace printSpaceField; + + private PageXmlReadingOrder readingOrderField; + + private PageXmlLayers layersField; + + private PageXmlRelations relationsField; + + private PageXmlTextStyle textStyleField; + + private PageXmlUserAttribute[] userDefinedField; + + private PageXmlLabels[] labelsField; + + private PageXmlRegion[] itemsField; + + private string imageFilenameField; + + private int imageWidthField; + + private int imageHeightField; + + private float imageXResolutionField; + + private bool imageXResolutionFieldSpecified; + + private float imageYResolutionField; + + private bool imageYResolutionFieldSpecified; + + private PageXmlPageImageResolutionUnit imageResolutionUnitField; + + private bool imageResolutionUnitFieldSpecified; + + private string customField; + + private float orientationField; + + private bool orientationFieldSpecified; + + private PageXmlPageSimpleType typeField; + + private bool typeFieldSpecified; + + private PageXmlLanguageSimpleType primaryLanguageField; + + private bool primaryLanguageFieldSpecified; + + private PageXmlLanguageSimpleType secondaryLanguageField; + + private bool secondaryLanguageFieldSpecified; + + private PageXmlScriptSimpleType primaryScriptField; + + private bool primaryScriptFieldSpecified; + + private PageXmlScriptSimpleType secondaryScriptField; + + private bool secondaryScriptFieldSpecified; + + private PageXmlReadingDirectionSimpleType readingDirectionField; + + private bool readingDirectionFieldSpecified; + + private PageXmlTextLineOrderSimpleType textLineOrderField; + + private bool textLineOrderFieldSpecified; + + private float confField; + + private bool confFieldSpecified; + + /// + /// Alternative document page images (e.g.black-and-white). + /// + [XmlElementAttribute("AlternativeImage")] + public PageXmlAlternativeImage[] AlternativeImage + { + get + { + return this.alternativeImageField; + } + set + { + this.alternativeImageField = value; + } + } + + /// + public PageXmlBorder Border + { + get + { + return this.borderField; + } + set + { + this.borderField = value; + } + } + + /// + public PageXmlPrintSpace PrintSpace + { + get + { + return this.printSpaceField; + } + set + { + this.printSpaceField = value; + } + } + + /// + /// Order of blocks within the page. + /// + public PageXmlReadingOrder ReadingOrder + { + get + { + return this.readingOrderField; + } + set + { + this.readingOrderField = value; + } + } + + /// + /// Unassigned regions are considered to be in the (virtual) default layer which is to be treated as below any other layers. + /// + public PageXmlLayers Layers + { + get + { + return this.layersField; + } + set + { + this.layersField = value; + } + } + + /// + public PageXmlRelations Relations + { + get + { + return this.relationsField; + } + set + { + this.relationsField = value; + } + } + + /// + /// Default text style + /// + public PageXmlTextStyle TextStyle + { + get + { + return this.textStyleField; + } + set + { + this.textStyleField = value; + } + } + + /// + [XmlArrayItemAttribute("UserAttribute", IsNullable = false)] + public PageXmlUserAttribute[] UserDefined + { + get + { + return this.userDefinedField; + } + set + { + this.userDefinedField = value; + } + } + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + [XmlElementAttribute("AdvertRegion", typeof(PageXmlAdvertRegion))] + [XmlElementAttribute("ChartRegion", typeof(PageXmlChartRegion))] + [XmlElementAttribute("ChemRegion", typeof(PageXmlChemRegion))] + [XmlElementAttribute("CustomRegion", typeof(PageXmlCustomRegion))] + [XmlElementAttribute("GraphicRegion", typeof(PageXmlGraphicRegion))] + [XmlElementAttribute("ImageRegion", typeof(PageXmlImageRegion))] + [XmlElementAttribute("LineDrawingRegion", typeof(PageXmlLineDrawingRegion))] + [XmlElementAttribute("MapRegion", typeof(PageXmlMapRegion))] + [XmlElementAttribute("MathsRegion", typeof(PageXmlMathsRegion))] + [XmlElementAttribute("MusicRegion", typeof(PageXmlMusicRegion))] + [XmlElementAttribute("NoiseRegion", typeof(PageXmlNoiseRegion))] + [XmlElementAttribute("SeparatorRegion", typeof(PageXmlSeparatorRegion))] + [XmlElementAttribute("TableRegion", typeof(PageXmlTableRegion))] + [XmlElementAttribute("TextRegion", typeof(PageXmlTextRegion))] + [XmlElementAttribute("UnknownRegion", typeof(PageXmlUnknownRegion))] + public PageXmlRegion[] Items + { + get + { + return this.itemsField; + } + set + { + this.itemsField = value; + } + } + + /// + /// Contains the image file name including the file extension. + /// + [XmlAttributeAttribute("imageFilename")] + public string ImageFilename + { + get + { + return this.imageFilenameField; + } + set + { + this.imageFilenameField = value; + } + } + + /// + /// Specifies the width of the image. + /// + [XmlAttributeAttribute("imageWidth")] + public int ImageWidth + { + get + { + return this.imageWidthField; + } + set + { + this.imageWidthField = value; + } + } + + /// + /// Specifies the height of the image. + /// + [XmlAttributeAttribute("imageHeight")] + public int ImageHeight + { + get + { + return this.imageHeightField; + } + set + { + this.imageHeightField = value; + } + } + + /// + /// Specifies the image resolution in width. + /// + [XmlAttributeAttribute("imageXResolution")] + public float ImageXResolution + { + get + { + return this.imageXResolutionField; + } + set + { + this.imageXResolutionField = value; + this.imageXResolutionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ImageXResolutionSpecified + { + get + { + return this.imageXResolutionFieldSpecified; + } + set + { + this.imageXResolutionFieldSpecified = value; + } + } + + /// + /// Specifies the image resolution in height. + /// + [XmlAttributeAttribute("imageYResolution")] + public float ImageYResolution + { + get + { + return this.imageYResolutionField; + } + set + { + this.imageYResolutionField = value; + this.imageYResolutionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ImageYResolutionSpecified + { + get + { + return this.imageYResolutionFieldSpecified; + } + set + { + this.imageYResolutionFieldSpecified = value; + } + } + + /// + /// Specifies the unit of the resolution information referring to a standardised unit of measurement + /// (pixels per inch, pixels per centimeter or other). + /// + [XmlAttributeAttribute("imageResolutionUnit")] + public PageXmlPageImageResolutionUnit ImageResolutionUnit + { + get + { + return this.imageResolutionUnitField; + } + set + { + this.imageResolutionUnitField = value; + this.imageResolutionUnitFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ImageResolutionUnitSpecified + { + get + { + return this.imageResolutionUnitFieldSpecified; + } + set + { + this.imageResolutionUnitFieldSpecified = value; + } + } + + /// + /// For generic use + /// + [XmlAttributeAttribute("custom")] + public string Custom + { + get + { + return this.customField; + } + set + { + this.customField = value; + } + } + + /// + /// The angle the rectangle encapsulating the page (or its Border) has to be rotated in clockwise direction + /// in order to correct the present skew (negative values indicate anti-clockwise rotation). + /// (The rotated image can be further referenced via “AlternativeImage”.) + /// Range: -179.999, 180 + /// + [XmlAttributeAttribute("orientation")] + public float Orientation + { + get + { + return this.orientationField; + } + set + { + this.orientationField = value; + this.orientationFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool OrientationSpecified + { + get + { + return this.orientationFieldSpecified; + } + set + { + this.orientationFieldSpecified = value; + } + } + + /// + /// The type of the page within the document + /// (e.g.cover page). + /// + [XmlAttributeAttribute("type")] + public PageXmlPageSimpleType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// The primary language used in the page (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("primaryLanguage")] + public PageXmlLanguageSimpleType PrimaryLanguage + { + get + { + return this.primaryLanguageField; + } + set + { + this.primaryLanguageField = value; + this.primaryLanguageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryLanguageSpecified + { + get + { + return this.primaryLanguageFieldSpecified; + } + set + { + this.primaryLanguageFieldSpecified = value; + } + } + + /// + /// The secondary language used in the page (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("secondaryLanguage")] + public PageXmlLanguageSimpleType SecondaryLanguage + { + get + { + return this.secondaryLanguageField; + } + set + { + this.secondaryLanguageField = value; + this.secondaryLanguageFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryLanguageSpecified + { + get + { + return this.secondaryLanguageFieldSpecified; + } + set + { + this.secondaryLanguageFieldSpecified = value; + } + } + + /// + /// The primary script used in the page (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("primaryScript")] + public PageXmlScriptSimpleType PrimaryScript + { + get + { + return this.primaryScriptField; + } + set + { + this.primaryScriptField = value; + this.primaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool PrimaryScriptSpecified + { + get + { + return this.primaryScriptFieldSpecified; + } + set + { + this.primaryScriptFieldSpecified = value; + } + } + + /// + /// The secondary script used in the page (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("secondaryScript")] + public PageXmlScriptSimpleType SecondaryScript + { + get + { + return this.secondaryScriptField; + } + set + { + this.secondaryScriptField = value; + this.secondaryScriptFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool SecondaryScriptSpecified + { + get + { + return this.secondaryScriptFieldSpecified; + } + set + { + this.secondaryScriptFieldSpecified = value; + } + } + + /// + /// The direction in which text within lines should be read(order of words and characters), + /// in addition to “textLineOrder” (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("readingDirection")] + public PageXmlReadingDirectionSimpleType ReadingDirection + { + get + { + return this.readingDirectionField; + } + set + { + this.readingDirectionField = value; + this.readingDirectionFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ReadingDirectionSpecified + { + get + { + return this.readingDirectionFieldSpecified; + } + set + { + this.readingDirectionFieldSpecified = value; + } + } + + /// + /// The order of text lines within a block, in addition to “readingDirection” + /// (lower-level definitions override the page-level definition). + /// + [XmlAttributeAttribute("textLineOrder")] + public PageXmlTextLineOrderSimpleType TextLineOrder + { + get + { + return this.textLineOrderField; + } + set + { + this.textLineOrderField = value; + this.textLineOrderFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TextLineOrderSpecified + { + get + { + return this.textLineOrderFieldSpecified; + } + set + { + this.textLineOrderFieldSpecified = value; + } + } + + /// + /// Confidence value for whole page (between 0 and 1) + /// + [XmlAttributeAttribute("conf")] + public float Conf + { + get + { + return this.confField; + } + set + { + this.confField = value; + this.confFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool ConfSpecified + { + get + { + return this.confFieldSpecified; + } + set + { + this.confFieldSpecified = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [DebuggerStepThroughAttribute()] + [DesignerCategoryAttribute("code")] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public class PageXmlMetadataItem + { + + private PageXmlLabels[] labelsField; + + private PageXmlMetadataItemType typeField; + + private bool typeFieldSpecified; + + private string nameField; + + private string valueField; + + private DateTime dateField; + + private bool dateFieldSpecified; + + /// + /// Semantic labels / tags + /// + [XmlElementAttribute("Labels")] + public PageXmlLabels[] Labels + { + get + { + return this.labelsField; + } + set + { + this.labelsField = value; + } + } + + /// + /// Type of metadata (e.g. author) + /// + [XmlAttributeAttribute("type")] + public PageXmlMetadataItemType Type + { + get + { + return this.typeField; + } + set + { + this.typeField = value; + this.typeFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool TypeSpecified + { + get + { + return this.typeFieldSpecified; + } + set + { + this.typeFieldSpecified = value; + } + } + + /// + /// E.g. imagePhotometricInterpretation + /// + [XmlAttributeAttribute("name")] + public string Name + { + get + { + return this.nameField; + } + set + { + this.nameField = value; + } + } + + /// + /// E.g. RGB + /// + [XmlAttributeAttribute("value")] + public string Value + { + get + { + return this.valueField; + } + set + { + this.valueField = value; + } + } + + /// + [XmlAttributeAttribute("date")] + public DateTime Date + { + get + { + return this.dateField; + } + set + { + this.dateField = value; + this.dateFieldSpecified = true; + } + } + + /// + [XmlIgnoreAttribute()] + public bool DateSpecified + { + get + { + return this.dateFieldSpecified; + } + set + { + this.dateFieldSpecified = value; + } + } + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlMetadataItemType + { + + /// + [XmlEnumAttribute("author")] + Author, + + /// + [XmlEnumAttribute("imageProperties")] + ImageProperties, + + /// + [XmlEnumAttribute("processingStep")] + ProcessingStep, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlUserAttributeType + { + + /// + [XmlEnumAttribute("xsd:string")] + XsdString, + + /// + [XmlEnumAttribute("xsd:integer")] + XsdInteger, + + /// + [XmlEnumAttribute("xsd:boolean")] + XsdBoolean, + + /// + [XmlEnumAttribute("xsd:float")] + XsdFloat, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlTextDataSimpleType + { + + /// + /// Examples: "123.456", "+1234.456", "-1234.456", "-.456", "-456" + /// + [XmlEnumAttribute("xsd:decimal")] + XsdDecimal, + + /// + /// Examples: "123.456", "+1234.456", "-1.2344e56", "-.45E-6", "INF", "-INF", "NaN" + /// + [XmlEnumAttribute("xsd:float")] + XsdFloat, + + /// + /// Examples: "123456", "+00000012", "-1", "-456" + /// + [XmlEnumAttribute("xsd:integer")] + XsdInteger, + + /// + /// Examples: "true", "false", "1", "0" + /// + [XmlEnumAttribute("xsd:boolean")] + XsdBoolean, + + /// + /// Examples: "2001-10-26", "2001-10-26+02:00", "2001-10-26Z", "2001-10-26+00:00", "-2001-10-26", "-20000-04-01" + /// + [XmlEnumAttribute("xsd:date")] + XsdDate, + + /// + /// Examples: "21:32:52", "21:32:52+02:00", "19:32:52Z", "19:32:52+00:00", "21:32:52.12679" + /// + [XmlEnumAttribute("xsd:time")] + XsdTime, + + /// + /// Examples: "2001-10-26T21:32:52", "2001-10-26T21:32:52+02:00", "2001-10-26T19:32:52Z", "2001-10-26T19:32:52+00:00","-2001-10-26T21:32:52", "2001-10-26T21:32:52.12679" + /// + [XmlEnumAttribute("xsd:dateTime")] + XsdDateTime, + + /// + /// Generic text string + /// + [XmlEnumAttribute("xsd:string")] + XsdString, + + /// + /// An XSD type that is not listed or a custom type (use dataTypeDetails attribute). + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlGraphemeBaseCharType + { + + /// + [XmlEnumAttribute("base")] + Base, + + /// + [XmlEnumAttribute("combining")] + Combining, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlColourSimpleType + { + + /// + [XmlEnumAttribute("black")] + Black, + + /// + [XmlEnumAttribute("blue")] + Blue, + + /// + [XmlEnumAttribute("brown")] + Brown, + + /// + [XmlEnumAttribute("cyan")] + Cyan, + + /// + [XmlEnumAttribute("green")] + Green, + + /// + [XmlEnumAttribute("grey")] + Grey, + + /// + [XmlEnumAttribute("indigo")] + Indigo, + + /// + [XmlEnumAttribute("magenta")] + Magenta, + + /// + [XmlEnumAttribute("orange")] + Orange, + + /// + [XmlEnumAttribute("pink")] + Pink, + + /// + [XmlEnumAttribute("red")] + Red, + + /// + [XmlEnumAttribute("turquoise")] + Turquoise, + + /// + [XmlEnumAttribute("violet")] + Violet, + + /// + [XmlEnumAttribute("white")] + White, + + /// + [XmlEnumAttribute("yellow")] + Yellow, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlUnderlineStyleSimpleType + { + + /// + [XmlEnumAttribute("singleLine")] + SingleLine, + + /// + [XmlEnumAttribute("doubleLine")] + DoubleLine, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + /// iso15924 2016-07-14 + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlScriptSimpleType + { + + /// + [XmlEnumAttribute("Adlm - Adlam")] + AdlmAdlam, + + /// + [XmlEnumAttribute("Afak - Afaka")] + AfakAfaka, + + /// + [XmlEnumAttribute("Aghb - Caucasian Albanian")] + AghbCaucasianAlbanian, + + /// + [XmlEnumAttribute("Ahom - Ahom, Tai Ahom")] + AhomAhomTaiAhom, + + /// + [XmlEnumAttribute("Arab - Arabic")] + ArabArabic, + + /// + [XmlEnumAttribute("Aran - Arabic (Nastaliq variant)")] + AranArabicNastaliqVariant, + + /// + [XmlEnumAttribute("Armi - Imperial Aramaic")] + ArmiImperialAramaic, + + /// + [XmlEnumAttribute("Armn - Armenian")] + ArmnArmenian, + + /// + [XmlEnumAttribute("Avst - Avestan")] + AvstAvestan, + + /// + [XmlEnumAttribute("Bali - Balinese")] + BaliBalinese, + + /// + [XmlEnumAttribute("Bamu - Bamum")] + BamuBamum, + + /// + [XmlEnumAttribute("Bass - Bassa Vah")] + BassBassaVah, + + /// + [XmlEnumAttribute("Batk - Batak")] + BatkBatak, + + /// + [XmlEnumAttribute("Beng - Bengali")] + BengBengali, + + /// + [XmlEnumAttribute("Bhks - Bhaiksuki")] + BhksBhaiksuki, + + /// + [XmlEnumAttribute("Blis - Blissymbols")] + BlisBlissymbols, + + /// + [XmlEnumAttribute("Bopo - Bopomofo")] + BopoBopomofo, + + /// + [XmlEnumAttribute("Brah - Brahmi")] + BrahBrahmi, + + /// + [XmlEnumAttribute("Brai - Braille")] + BraiBraille, + + /// + [XmlEnumAttribute("Bugi - Buginese")] + BugiBuginese, + + /// + [XmlEnumAttribute("Buhd - Buhid")] + BuhdBuhid, + + /// + [XmlEnumAttribute("Cakm - Chakma")] + CakmChakma, + + /// + [XmlEnumAttribute("Cans - Unified Canadian Aboriginal Syllabics")] + CansUnifiedCanadianAboriginalSyllabics, + + /// + [XmlEnumAttribute("Cari - Carian")] + CariCarian, + + /// + [XmlEnumAttribute("Cham - Cham")] + ChamCham, + + /// + [XmlEnumAttribute("Cher - Cherokee")] + CherCherokee, + + /// + [XmlEnumAttribute("Cirt - Cirth")] + CirtCirth, + + /// + [XmlEnumAttribute("Copt - Coptic")] + CoptCoptic, + + /// + [XmlEnumAttribute("Cprt - Cypriot")] + CprtCypriot, + + /// + [XmlEnumAttribute("Cyrl - Cyrillic")] + CyrlCyrillic, + + /// + [XmlEnumAttribute("Cyrs - Cyrillic (Old Church Slavonic variant)")] + CyrsCyrillicOldChurchSlavonicVariant, + + /// + [XmlEnumAttribute("Deva - Devanagari (Nagari)")] + DevaDevanagariNagari, + + /// + [XmlEnumAttribute("Dsrt - Deseret (Mormon)")] + DsrtDeseretMormon, + + /// + [XmlEnumAttribute("Dupl - Duployan shorthand, Duployan stenography")] + DuplDuployanShorthandDuployanStenography, + + /// + [XmlEnumAttribute("Egyd - Egyptian demotic")] + EgydEgyptianDemotic, + + /// + [XmlEnumAttribute("Egyh - Egyptian hieratic")] + EgyhEgyptianHieratic, + + /// + [XmlEnumAttribute("Egyp - Egyptian hieroglyphs")] + EgypEgyptianHieroglyphs, + + /// + [XmlEnumAttribute("Elba - Elbasan")] + ElbaElbasan, + + /// + [XmlEnumAttribute("Ethi - Ethiopic")] + EthiEthiopic, + + /// + [XmlEnumAttribute("Geok - Khutsuri (Asomtavruli and Nuskhuri)")] + GeokKhutsuriAsomtavruliAndNuskhuri, + + /// + [XmlEnumAttribute("Geor - Georgian (Mkhedruli)")] + GeorGeorgianMkhedruli, + + /// + [XmlEnumAttribute("Glag - Glagolitic")] + GlagGlagolitic, + + /// + [XmlEnumAttribute("Goth - Gothic")] + GothGothic, + + /// + [XmlEnumAttribute("Gran - Grantha")] + GranGrantha, + + /// + [XmlEnumAttribute("Grek - Greek")] + GrekGreek, + + /// + [XmlEnumAttribute("Gujr - Gujarati")] + GujrGujarati, + + /// + [XmlEnumAttribute("Guru - Gurmukhi")] + GuruGurmukhi, + + /// + [XmlEnumAttribute("Hanb - Han with Bopomofo")] + HanbHanwithBopomofo, + + /// + [XmlEnumAttribute("Hang - Hangul")] + HangHangul, + + /// + [XmlEnumAttribute("Hani - Han (Hanzi, Kanji, Hanja)")] + HaniHanHanziKanjiHanja, + + /// + [XmlEnumAttribute("Hano - Hanunoo (Hanunóo)")] + HanoHanunooHanunóo, + + /// + [XmlEnumAttribute("Hans - Han (Simplified variant)")] + HansHanSimplifiedVariant, + + /// + [XmlEnumAttribute("Hant - Han (Traditional variant)")] + HantHanTraditionalVariant, + + /// + [XmlEnumAttribute("Hatr - Hatran")] + HatrHatran, + + /// + [XmlEnumAttribute("Hebr - Hebrew")] + HebrHebrew, + + /// + [XmlEnumAttribute("Hira - Hiragana")] + HiraHiragana, + + /// + [XmlEnumAttribute("Hluw - Anatolian Hieroglyphs")] + HluwAnatolianHieroglyphs, + + /// + [XmlEnumAttribute("Hmng - Pahawh Hmong")] + HmngPahawhHmong, + + /// + [XmlEnumAttribute("Hrkt - Japanese syllabaries")] + HrktJapaneseSyllabaries, + + /// + [XmlEnumAttribute("Hung - Old Hungarian (Hungarian Runic)")] + HungOldHungarianHungarianRunic, + + /// + [XmlEnumAttribute("Inds - Indus (Harappan)")] + IndsIndusHarappan, + + /// + [XmlEnumAttribute("Ital - Old Italic (Etruscan, Oscan etc.)")] + ItalOldItalicEtruscanOscanEtc, + + /// + [XmlEnumAttribute("Jamo - Jamo")] + JamoJamo, + + /// + [XmlEnumAttribute("Java - Javanese")] + JavaJavanese, + + /// + [XmlEnumAttribute("Jpan - Japanese")] + JpanJapanese, + + /// + [XmlEnumAttribute("Jurc - Jurchen")] + JurcJurchen, + + /// + [XmlEnumAttribute("Kali - Kayah Li")] + KaliKayahLi, + + /// + [XmlEnumAttribute("Kana - Katakana")] + KanaKatakana, + + /// + [XmlEnumAttribute("Khar - Kharoshthi")] + KharKharoshthi, + + /// + [XmlEnumAttribute("Khmr - Khmer")] + KhmrKhmer, + + /// + [XmlEnumAttribute("Khoj - Khojki")] + KhojKhojki, + + /// + [XmlEnumAttribute("Kitl - Khitan large script")] + KitlKhitanlargescript, + + /// + [XmlEnumAttribute("Kits - Khitan small script")] + KitsKhitansmallscript, + + /// + [XmlEnumAttribute("Knda - Kannada")] + KndaKannada, + + /// + [XmlEnumAttribute("Kore - Korean (alias for Hangul + Han)")] + KoreKoreanaliasforHangulHan, + + /// + [XmlEnumAttribute("Kpel - Kpelle")] + KpelKpelle, + + /// + [XmlEnumAttribute("Kthi - Kaithi")] + KthiKaithi, + + /// + [XmlEnumAttribute("Lana - Tai Tham (Lanna)")] + LanaTaiThamLanna, + + /// + [XmlEnumAttribute("Laoo - Lao")] + LaooLao, + + /// + [XmlEnumAttribute("Latf - Latin (Fraktur variant)")] + LatfLatinFrakturvariant, + + /// + [XmlEnumAttribute("Latg - Latin (Gaelic variant)")] + LatgLatinGaelicvariant, + + /// + [XmlEnumAttribute("Latn - Latin")] + LatnLatin, + + /// + [XmlEnumAttribute("Leke - Leke")] + LekeLeke, + + /// + [XmlEnumAttribute("Lepc - Lepcha (Róng)")] + LepcLepchaRóng, + + /// + [XmlEnumAttribute("Limb - Limbu")] + LimbLimbu, + + /// + [XmlEnumAttribute("Lina - Linear A")] + LinaLinearA, + + /// + [XmlEnumAttribute("Linb - Linear B")] + LinbLinearB, + + /// + [XmlEnumAttribute("Lisu - Lisu (Fraser)")] + LisuLisuFraser, + + /// + [XmlEnumAttribute("Loma - Loma")] + LomaLoma, + + /// + [XmlEnumAttribute("Lyci - Lycian")] + LyciLycian, + + /// + [XmlEnumAttribute("Lydi - Lydian")] + LydiLydian, + + /// + [XmlEnumAttribute("Mahj - Mahajani")] + MahjMahajani, + + /// + [XmlEnumAttribute("Mand - Mandaic, Mandaean")] + MandMandaicMandaean, + + /// + [XmlEnumAttribute("Mani - Manichaean")] + ManiManichaean, + + /// + [XmlEnumAttribute("Marc - Marchen")] + MarcMarchen, + + /// + [XmlEnumAttribute("Maya - Mayan hieroglyphs")] + MayaMayanhieroglyphs, + + /// + [XmlEnumAttribute("Mend - Mende Kikakui")] + MendMendeKikakui, + + /// + [XmlEnumAttribute("Merc - Meroitic Cursive")] + MercMeroiticCursive, + + /// + [XmlEnumAttribute("Mero - Meroitic Hieroglyphs")] + MeroMeroiticHieroglyphs, + + /// + [XmlEnumAttribute("Mlym - Malayalam")] + MlymMalayalam, + + /// + [XmlEnumAttribute("Modi - Modi, Moḍī")] + ModiModiMoḍī, + + /// + [XmlEnumAttribute("Mong - Mongolian")] + MongMongolian, + + /// + [XmlEnumAttribute("Moon - Moon (Moon code, Moon script, Moon type)")] + MoonMoonMooncodeMoonscriptMoontype, + + /// + [XmlEnumAttribute("Mroo - Mro, Mru")] + MrooMroMru, + + /// + [XmlEnumAttribute("Mtei - Meitei Mayek (Meithei, Meetei)")] + MteiMeiteiMayekMeitheiMeetei, + + /// + [XmlEnumAttribute("Mult - Multani")] + MultMultani, + + /// + [XmlEnumAttribute("Mymr - Myanmar (Burmese)")] + MymrMyanmarBurmese, + + /// + [XmlEnumAttribute("Narb - Old North Arabian (Ancient North Arabian)")] + NarbOldNorthArabianAncientNorthArabian, + + /// + [XmlEnumAttribute("Nbat - Nabataean")] + NbatNabataean, + + /// + [XmlEnumAttribute("Newa - Newa, Newar, Newari")] + NewaNewaNewarNewari, + + /// + [XmlEnumAttribute("Nkgb - Nakhi Geba")] + NkgbNakhiGeba, + + /// + [XmlEnumAttribute("Nkoo - N’Ko")] + NkooNKo, + + /// + [XmlEnumAttribute("Nshu - Nüshu")] + NshuNüshu, + + /// + [XmlEnumAttribute("Ogam - Ogham")] + OgamOgham, + + /// + [XmlEnumAttribute("Olck - Ol Chiki (Ol Cemet’, Ol, Santali)")] + OlckOlChikiOlCemetOlSantali, + + /// + [XmlEnumAttribute("Orkh - Old Turkic, Orkhon Runic")] + OrkhOldTurkicOrkhonRunic, + + /// + [XmlEnumAttribute("Orya - Oriya")] + OryaOriya, + + /// + [XmlEnumAttribute("Osge - Osage")] + OsgeOsage, + + /// + [XmlEnumAttribute("Osma - Osmanya")] + OsmaOsmanya, + + /// + [XmlEnumAttribute("Palm - Palmyrene")] + PalmPalmyrene, + + /// + [XmlEnumAttribute("Pauc - Pau Cin Hau")] + PaucPauCinHau, + + /// + [XmlEnumAttribute("Perm - Old Permic")] + PermOldPermic, + + /// + [XmlEnumAttribute("Phag - Phags-pa")] + PhagPhagspa, + + /// + [XmlEnumAttribute("Phli - Inscriptional Pahlavi")] + PhliInscriptionalPahlavi, + + /// + [XmlEnumAttribute("Phlp - Psalter Pahlavi")] + PhlpPsalterPahlavi, + + /// + [XmlEnumAttribute("Phlv - Book Pahlavi")] + PhlvBookPahlavi, + + /// + [XmlEnumAttribute("Phnx - Phoenician")] + PhnxPhoenician, + + /// + [XmlEnumAttribute("Piqd - Klingon (KLI pIqaD)")] + PiqdKlingonKLIpIqaD, + + /// + [XmlEnumAttribute("Plrd - Miao (Pollard)")] + PlrdMiaoPollard, + + /// + [XmlEnumAttribute("Prti - Inscriptional Parthian")] + PrtiInscriptionalParthian, + + /// + [XmlEnumAttribute("Rjng - Rejang (Redjang, Kaganga)")] + RjngRejangRedjangKaganga, + + /// + [XmlEnumAttribute("Roro - Rongorongo")] + RoroRongorongo, + + /// + [XmlEnumAttribute("Runr - Runic")] + RunrRunic, + + /// + [XmlEnumAttribute("Samr - Samaritan")] + SamrSamaritan, + + /// + [XmlEnumAttribute("Sara - Sarati")] + SaraSarati, + + /// + [XmlEnumAttribute("Sarb - Old South Arabian")] + SarbOldSouthArabian, + + /// + [XmlEnumAttribute("Saur - Saurashtra")] + SaurSaurashtra, + + /// + [XmlEnumAttribute("Sgnw - SignWriting")] + SgnwSignWriting, + + /// + [XmlEnumAttribute("Shaw - Shavian (Shaw)")] + ShawShavianShaw, + + /// + [XmlEnumAttribute("Shrd - Sharada, Śāradā")] + ShrdSharadaŚāradā, + + /// + [XmlEnumAttribute("Sidd - Siddham")] + SiddSiddham, + + /// + [XmlEnumAttribute("Sind - Khudawadi, Sindhi")] + SindKhudawadiSindhi, + + /// + [XmlEnumAttribute("Sinh - Sinhala")] + SinhSinhala, + + /// + [XmlEnumAttribute("Sora - Sora Sompeng")] + SoraSoraSompeng, + + /// + [XmlEnumAttribute("Sund - Sundanese")] + SundSundanese, + + /// + [XmlEnumAttribute("Sylo - Syloti Nagri")] + SyloSylotiNagri, + + /// + [XmlEnumAttribute("Syrc - Syriac")] + SyrcSyriac, + + /// + [XmlEnumAttribute("Syre - Syriac (Estrangelo variant)")] + SyreSyriacEstrangeloVariant, + + /// + [XmlEnumAttribute("Syrj - Syriac (Western variant)")] + SyrjSyriacWesternVariant, + + /// + [XmlEnumAttribute("Syrn - Syriac (Eastern variant)")] + SyrnSyriacEasternVariant, + + /// + [XmlEnumAttribute("Tagb - Tagbanwa")] + TagbTagbanwa, + + /// + [XmlEnumAttribute("Takr - Takri")] + TakrTakri, + + /// + [XmlEnumAttribute("Tale - Tai Le")] + TaleTaiLe, + + /// + [XmlEnumAttribute("Talu - New Tai Lue")] + TaluNewTaiLue, + + /// + [XmlEnumAttribute("Taml - Tamil")] + TamlTamil, + + /// + [XmlEnumAttribute("Tang - Tangut")] + TangTangut, + + /// + [XmlEnumAttribute("Tavt - Tai Viet")] + TavtTaiViet, + + /// + [XmlEnumAttribute("Telu - Telugu")] + TeluTelugu, + + /// + [XmlEnumAttribute("Teng - Tengwar")] + TengTengwar, + + /// + [XmlEnumAttribute("Tfng - Tifinagh (Berber)")] + TfngTifinaghBerber, + + /// + [XmlEnumAttribute("Tglg - Tagalog (Baybayin, Alibata)")] + TglgTagalogBaybayinAlibata, + + /// + [XmlEnumAttribute("Thaa - Thaana")] + ThaaThaana, + + /// + [XmlEnumAttribute("Thai - Thai")] + ThaiThai, + + /// + [XmlEnumAttribute("Tibt - Tibetan")] + TibtTibetan, + + /// + [XmlEnumAttribute("Tirh - Tirhuta")] + TirhTirhuta, + + /// + [XmlEnumAttribute("Ugar - Ugaritic")] + UgarUgaritic, + + /// + [XmlEnumAttribute("Vaii - Vai")] + VaiiVai, + + /// + [XmlEnumAttribute("Visp - Visible Speech")] + VispVisibleSpeech, + + /// + [XmlEnumAttribute("Wara - Warang Citi (Varang Kshiti)")] + WaraWarangCitiVarangKshiti, + + /// + [XmlEnumAttribute("Wole - Woleai")] + WoleWoleai, + + /// + [XmlEnumAttribute("Xpeo - Old Persian")] + XpeoOldPersian, + + /// + [XmlEnumAttribute("Xsux - Cuneiform, Sumero-Akkadian")] + XsuxCuneiformSumeroAkkadian, + + /// + [XmlEnumAttribute("Yiii - Yi")] + YiiiYi, + + /// + [XmlEnumAttribute("Zinh - Code for inherited script")] + ZinhCodeForInheritedScript, + + /// + [XmlEnumAttribute("Zmth - Mathematical notation")] + ZmthMathematicalNotation, + + /// + [XmlEnumAttribute("Zsye - Symbols (Emoji variant)")] + ZsyeSymbolsEmojiVariant, + + /// + [XmlEnumAttribute("Zsym - Symbols")] + ZsymSymbols, + + /// + [XmlEnumAttribute("Zxxx - Code for unwritten documents")] + ZxxxCodeForUnwrittenDocuments, + + /// + [XmlEnumAttribute("Zyyy - Code for undetermined script")] + ZyyyCodeForUndeterminedScript, + + /// + [XmlEnumAttribute("Zzzz - Code for uncoded script")] + ZzzzCodeForUncodedScript, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + /// Text production type + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlProductionSimpleType + { + + /// + [XmlEnumAttribute("printed")] + Printed, + + /// + [XmlEnumAttribute("typewritten")] + Typewritten, + + /// + [XmlEnumAttribute("handwritten-cursive")] + HandwrittenCursive, + + /// + [XmlEnumAttribute("handwritten-printscript")] + HandwrittenPrintscript, + + /// + [XmlEnumAttribute("medieval-manuscript")] + MedievalManuscript, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + /// ISO 639.x 2016-07-14 + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlLanguageSimpleType + { + + /// + Abkhaz, + + /// + Afar, + + /// + Afrikaans, + + /// + Akan, + + /// + Albanian, + + /// + Amharic, + + /// + Arabic, + + /// + Aragonese, + + /// + Armenian, + + /// + Assamese, + + /// + Avaric, + + /// + Avestan, + + /// + Aymara, + + /// + Azerbaijani, + + /// + Bambara, + + /// + Bashkir, + + /// + Basque, + + /// + Belarusian, + + /// + Bengali, + + /// + Bihari, + + /// + Bislama, + + /// + Bosnian, + + /// + Breton, + + /// + Bulgarian, + + /// + Burmese, + + /// + Cambodian, + + /// + Cantonese, + + /// + Catalan, + + /// + Chamorro, + + /// + Chechen, + + /// + Chichewa, + + /// + Chinese, + + /// + Chuvash, + + /// + Cornish, + + /// + Corsican, + + /// + Cree, + + /// + Croatian, + + /// + Czech, + + /// + Danish, + + /// + Divehi, + + /// + Dutch, + + /// + Dzongkha, + + /// + English, + + /// + Esperanto, + + /// + Estonian, + + /// + Ewe, + + /// + Faroese, + + /// + Fijian, + + /// + Finnish, + + /// + French, + + /// + Fula, + + /// + Gaelic, + + /// + Galician, + + /// + Ganda, + + /// + Georgian, + + /// + German, + + /// + Greek, + + /// + Guaraní, + + /// + Gujarati, + + /// + Haitian, + + /// + Hausa, + + /// + Hebrew, + + /// + Herero, + + /// + Hindi, + + /// + [XmlEnumAttribute("Hiri Motu")] + HiriMotu, + + /// + Hungarian, + + /// + Icelandic, + + /// + Ido, + + /// + Igbo, + + /// + Indonesian, + + /// + Interlingua, + + /// + Interlingue, + + /// + Inuktitut, + + /// + Inupiaq, + + /// + Irish, + + /// + Italian, + + /// + Japanese, + + /// + Javanese, + + /// + Kalaallisut, + + /// + Kannada, + + /// + Kanuri, + + /// + Kashmiri, + + /// + Kazakh, + + /// + Khmer, + + /// + Kikuyu, + + /// + Kinyarwanda, + + /// + Kirundi, + + /// + Komi, + + /// + Kongo, + + /// + Korean, + + /// + Kurdish, + + /// + Kwanyama, + + /// + Kyrgyz, + + /// + Lao, + + /// + Latin, + + /// + Latvian, + + /// + Limburgish, + + /// + Lingala, + + /// + Lithuanian, + + /// + [XmlEnumAttribute("Luba-Katanga")] + LubaKatanga, + + /// + Luxembourgish, + + /// + Macedonian, + + /// + Malagasy, + + /// + Malay, + + /// + Malayalam, + + /// + Maltese, + + /// + Manx, + + /// + Māori, + + /// + Marathi, + + /// + Marshallese, + + /// + Mongolian, + + /// + Nauru, + + /// + Navajo, + + /// + Ndonga, + + /// + Nepali, + + /// + [XmlEnumAttribute("North Ndebele")] + NorthNdebele, + + /// + [XmlEnumAttribute("Northern Sami")] + NorthernSami, + + /// + Norwegian, + + /// + [XmlEnumAttribute("Norwegian Bokmål")] + NorwegianBokmål, + + /// + [XmlEnumAttribute("Norwegian Nynorsk")] + NorwegianNynorsk, + + /// + Nuosu, + + /// + Occitan, + + /// + Ojibwe, + + /// + [XmlEnumAttribute("Old Church Slavonic")] + OldChurchSlavonic, + + /// + Oriya, + + /// + Oromo, + + /// + Ossetian, + + /// + Pāli, + + /// + Panjabi, + + /// + Pashto, + + /// + Persian, + + /// + Polish, + + /// + Portuguese, + + /// + Punjabi, + + /// + Quechua, + + /// + Romanian, + + /// + Romansh, + + /// + Russian, + + /// + Samoan, + + /// + Sango, + + /// + Sanskrit, + + /// + Sardinian, + + /// + Serbian, + + /// + Shona, + + /// + Sindhi, + + /// + Sinhala, + + /// + Slovak, + + /// + Slovene, + + /// + Somali, + + /// + [XmlEnumAttribute("South Ndebele")] + SouthNdebele, + + /// + [XmlEnumAttribute("Southern Sotho")] + SouthernSotho, + + /// + Spanish, + + /// + Sundanese, + + /// + Swahili, + + /// + Swati, + + /// + Swedish, + + /// + Tagalog, + + /// + Tahitian, + + /// + Tajik, + + /// + Tamil, + + /// + Tatar, + + /// + Telugu, + + /// + Thai, + + /// + Tibetan, + + /// + Tigrinya, + + /// + Tonga, + + /// + Tsonga, + + /// + Tswana, + + /// + Turkish, + + /// + Turkmen, + + /// + Twi, + + /// + Uighur, + + /// + Ukrainian, + + /// + Urdu, + + /// + Uzbek, + + /// + Venda, + + /// + Vietnamese, + + /// + Volapük, + + /// + Walloon, + + /// + Welsh, + + /// + [XmlEnumAttribute("Western Frisian")] + WesternFrisian, + + /// + Wolof, + + /// + Xhosa, + + /// + Yiddish, + + /// + Yoruba, + + /// + Zhuang, + + /// + Zulu, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlReadingDirectionSimpleType + { + + /// + [XmlEnumAttribute("left-to-right")] + LeftToRight, + + /// + [XmlEnumAttribute("right-to-left")] + RightToLeft, + + /// + [XmlEnumAttribute("top-to-bottom")] + TopToBottom, + + /// + [XmlEnumAttribute("bottom-to-top")] + BottomToTop, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlChartSimpleType + { + + /// + [XmlEnumAttribute("bar")] + Bar, + + /// + [XmlEnumAttribute("line")] + Line, + + /// + [XmlEnumAttribute("pie")] + Pie, + + /// + [XmlEnumAttribute("scatter")] + Scatter, + + /// + [XmlEnumAttribute("surface")] + Surface, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlGraphicsSimpleType + { + + /// + [XmlEnumAttribute("logo")] + Logo, + + /// + [XmlEnumAttribute("letterhead")] + Letterhead, + + /// + [XmlEnumAttribute("decoration")] + Decoration, + + /// + [XmlEnumAttribute("frame")] + Frame, + + /// + [XmlEnumAttribute("handwritten-annotation")] + HandwrittenAnnotation, + + /// + [XmlEnumAttribute("stamp")] + Stamp, + + /// + [XmlEnumAttribute("signature")] + Signature, + + /// + [XmlEnumAttribute("barcode")] + Barcode, + + /// + [XmlEnumAttribute("paper-grow")] + PaperGrow, + + /// + [XmlEnumAttribute("punch-hole")] + PunchHole, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlColourDepthSimpleType + { + + /// + [XmlEnumAttribute("bilevel")] + BiLevel, + + /// + [XmlEnumAttribute("greyscale")] + GreyScale, + + /// + [XmlEnumAttribute("colour")] + Colour, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlTextSimpleType + { + + /// + [XmlEnumAttribute("paragraph")] + Paragraph, + + /// + [XmlEnumAttribute("heading")] + Heading, + + /// + [XmlEnumAttribute("caption")] + Caption, + + /// + [XmlEnumAttribute("header")] + Header, + + /// + [XmlEnumAttribute("footer")] + Footer, + + /// + [XmlEnumAttribute("page-number")] + PageNumber, + + /// + [XmlEnumAttribute("drop-capital")] + DropCapital, + + /// + [XmlEnumAttribute("credit")] + Credit, + + /// + [XmlEnumAttribute("floating")] + Floating, + + /// + [XmlEnumAttribute("signature-mark")] + SignatureMark, + + /// + [XmlEnumAttribute("catch-word")] + CatchWord, + + /// + [XmlEnumAttribute("marginalia")] + Marginalia, + + /// + [XmlEnumAttribute("footnote")] + FootNote, + + /// + [XmlEnumAttribute("footnote-continued")] + FootNoteContinued, + + /// + [XmlEnumAttribute("endnote")] + EndNote, + + /// + [XmlEnumAttribute("TOC-entry")] + TocEntry, + + /// + [XmlEnumAttribute("list-label")] + LisLabel, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlTextLineOrderSimpleType + { + + /// + [XmlEnumAttribute("top-to-bottom")] + TopToBottom, + + /// + [XmlEnumAttribute("bottom-to-top")] + BottomToTop, + + /// + [XmlEnumAttribute("left-to-right")] + LeftToRight, + + /// + [XmlEnumAttribute("right-to-left")] + RightToLeft, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlAlignSimpleType + { + + /// + [XmlEnumAttribute("left")] + Left, + + /// + [XmlEnumAttribute("centre")] + Centre, + + /// + [XmlEnumAttribute("right")] + Right, + + /// + [XmlEnumAttribute("justify")] + Justify, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlRelationType + { + + /// + [XmlEnumAttribute("link")] + Link, + + /// + [XmlEnumAttribute("join")] + Join, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlGroupSimpleType + { + + /// + [XmlEnumAttribute("paragraph")] + Paragraph, + + /// + [XmlEnumAttribute("list")] + List, + + /// + [XmlEnumAttribute("list-item")] + ListItem, + + /// + [XmlEnumAttribute("figure")] + Figure, + + /// + [XmlEnumAttribute("article")] + Article, + + /// + [XmlEnumAttribute("div")] + Div, + + /// + [XmlEnumAttribute("other")] + Other, + } + + /// + /// Specifies the unit of the resolution information referring to a standardised unit of measurement (pixels per inch, pixels per centimeter or other). + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(AnonymousType = true, Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlPageImageResolutionUnit + { + + /// + PPI, + + /// + PPCM, + + /// + [XmlEnumAttribute("other")] + other, + } + + /// + [EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + [GeneratedCodeAttribute("xsd", "4.6.1055.0")] + [SerializableAttribute()] + [XmlTypeAttribute(Namespace = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")] + public enum PageXmlPageSimpleType + { + + /// + [XmlEnumAttribute("front-cover")] + FrontCover, + + /// + [XmlEnumAttribute("back-cover")] + BackCover, + + /// + [XmlEnumAttribute("title")] + Title, + + /// + [XmlEnumAttribute("table-of-contents")] + TableOfContents, + + /// + [XmlEnumAttribute("index")] + Index, + + /// + [XmlEnumAttribute("content")] + Content, + + /// + [XmlEnumAttribute("blank")] + Blank, + + /// + [XmlEnumAttribute("other")] + Other, + } + } + #endregion +} diff --git a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs index b869802b..9a1971d9 100644 --- a/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs +++ b/src/UglyToad.PdfPig/Geometry/PdfRectangle.cs @@ -33,7 +33,7 @@ /// /// Centroid point of the rectangle. /// - public PdfPoint Centroid => new PdfPoint(TopLeft.X + (TopLeft.X - TopLeft.X) / 2, BottomLeft.Y + (TopLeft.Y - BottomLeft.Y) / 2); + public PdfPoint Centroid => new PdfPoint(Left + (Right - Left) / 2, Bottom + (Top - Bottom) / 2); /// /// Width of the rectangle.