diff --git a/src/UglyToad.Pdf.Tests/Parser/Parts/ThrowingReader.cs b/src/UglyToad.Pdf.Tests/Parser/Parts/ThrowingReader.cs index 3c3a4b78..1482ea85 100644 --- a/src/UglyToad.Pdf.Tests/Parser/Parts/ThrowingReader.cs +++ b/src/UglyToad.Pdf.Tests/Parser/Parts/ThrowingReader.cs @@ -3,7 +3,7 @@ using System; using IO; - public class ThrowingReader : IRandomAccessRead + internal class ThrowingReader : IRandomAccessRead { private readonly IRandomAccessRead reader; diff --git a/src/UglyToad.Pdf.Tests/StringBytesTestConverter.cs b/src/UglyToad.Pdf.Tests/StringBytesTestConverter.cs index 38be5f6d..08fe7545 100644 --- a/src/UglyToad.Pdf.Tests/StringBytesTestConverter.cs +++ b/src/UglyToad.Pdf.Tests/StringBytesTestConverter.cs @@ -5,7 +5,7 @@ using Pdf.Tokenization.Scanner; using Pdf.Util; - public static class StringBytesTestConverter + internal static class StringBytesTestConverter { public static Result Convert(string s, bool readFirst = true) { diff --git a/src/UglyToad.Pdf/Content/CropBox.cs b/src/UglyToad.Pdf/Content/CropBox.cs index f9635fd0..37014879 100644 --- a/src/UglyToad.Pdf/Content/CropBox.cs +++ b/src/UglyToad.Pdf/Content/CropBox.cs @@ -7,7 +7,7 @@ /// /// Defines the visible region, contents expanding beyond the crop box should be clipped. /// - public class CropBox + internal class CropBox { [NotNull] public PdfRectangle Bounds { get; } diff --git a/src/UglyToad.Pdf/Content/DocumentInformation.cs b/src/UglyToad.Pdf/Content/DocumentInformation.cs index 45d0e20b..51d737d3 100644 --- a/src/UglyToad.Pdf/Content/DocumentInformation.cs +++ b/src/UglyToad.Pdf/Content/DocumentInformation.cs @@ -70,6 +70,9 @@ representation = builder.ToString() ?? string.Empty; } + /// + /// Gets a string representing this document information. entries are not shown. + /// public override string ToString() { return representation; @@ -82,7 +85,7 @@ return; } - builder.Append(name).Append(": ").Append(value).Append(";"); + builder.Append(name).Append(": ").Append(value).Append("; "); } } } diff --git a/src/UglyToad.Pdf/Content/MediaBox.cs b/src/UglyToad.Pdf/Content/MediaBox.cs index 6be5212b..29677a0b 100644 --- a/src/UglyToad.Pdf/Content/MediaBox.cs +++ b/src/UglyToad.Pdf/Content/MediaBox.cs @@ -9,7 +9,7 @@ /// /// See table 3.27 from the PDF specification version 1.7. /// - public class MediaBox + internal class MediaBox { /// /// User space units per inch. diff --git a/src/UglyToad.Pdf/Content/Page.cs b/src/UglyToad.Pdf/Content/Page.cs index e694e54d..d9248b9a 100644 --- a/src/UglyToad.Pdf/Content/Page.cs +++ b/src/UglyToad.Pdf/Content/Page.cs @@ -4,6 +4,9 @@ using System.Collections.Generic; using System.Linq; + /// + /// Contains the content and provides access to methods of a single page in the . + /// public class Page { /// diff --git a/src/UglyToad.Pdf/Content/PageSize.cs b/src/UglyToad.Pdf/Content/PageSize.cs index 61eb02a2..fc31dd5f 100644 --- a/src/UglyToad.Pdf/Content/PageSize.cs +++ b/src/UglyToad.Pdf/Content/PageSize.cs @@ -3,24 +3,78 @@ using System.Collections.Generic; using Geometry; + /// + /// The corresponding named size of the . + /// public enum PageSize { + /// + /// Unknown page size, did not match a defined page size. + /// Custom = 0, + /// + /// The ISO 216 A0 page size. + /// A0 = 3, + /// + /// The ISO 216 A1 page size. + /// A1 = 4, + /// + /// The ISO 216 A2 page size. + /// A2 = 5, + /// + /// The ISO 216 A3 page size. + /// A3 = 6, + /// + /// The ISO 216 A4 page size. + /// A4 = 7, + /// + /// The ISO 216 A5 page size. + /// A5 = 8, + /// + /// The ISO 216 A6 page size. + /// A6 = 9, + /// + /// The ISO 216 A7 page size. + /// A7 = 10, + /// + /// The ISO 216 A8 page size. + /// A8 = 11, + /// + /// The ISO 216 A9 page size. + /// A9 = 12, + /// + /// The ISO 216 A10 page size. + /// A10 = 13, + /// + /// The North American Letter page size. + /// Letter = 14, + /// + /// The North American Legal page size. + /// Legal = 15, + /// + /// The North American Ledger page size. + /// Ledger = 16, + /// + /// The North American Tabloid page size. + /// Tabloid = 17, + /// + /// The North American Executive page size. + /// Executive = 18 } diff --git a/src/UglyToad.Pdf/Content/PageTreeMembers.cs b/src/UglyToad.Pdf/Content/PageTreeMembers.cs index c53ace47..33f84f2f 100644 --- a/src/UglyToad.Pdf/Content/PageTreeMembers.cs +++ b/src/UglyToad.Pdf/Content/PageTreeMembers.cs @@ -5,7 +5,7 @@ /// /// Contains the values inherited from the Page Tree for this page. /// - public class PageTreeMembers + internal class PageTreeMembers { public MediaBox GetMediaBox() { diff --git a/src/UglyToad.Pdf/ContentStream/IndirectReference.cs b/src/UglyToad.Pdf/ContentStream/IndirectReference.cs index f4c501c2..0aecc3d6 100644 --- a/src/UglyToad.Pdf/ContentStream/IndirectReference.cs +++ b/src/UglyToad.Pdf/ContentStream/IndirectReference.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.ContentStream { - public struct IndirectReference + internal struct IndirectReference { public long ObjectNumber { get; } diff --git a/src/UglyToad.Pdf/Core/ICosStreamWriter.cs b/src/UglyToad.Pdf/Core/ICosStreamWriter.cs index 259e3b58..be64a6cc 100644 --- a/src/UglyToad.Pdf/Core/ICosStreamWriter.cs +++ b/src/UglyToad.Pdf/Core/ICosStreamWriter.cs @@ -2,7 +2,7 @@ { using System.IO; - public interface ICosStreamWriter + internal interface ICosStreamWriter { void WriteToPdfStream(BinaryWriter output); } diff --git a/src/UglyToad.Pdf/Core/IDeepCloneable.cs b/src/UglyToad.Pdf/Core/IDeepCloneable.cs index 55161221..f4a44be2 100644 --- a/src/UglyToad.Pdf/Core/IDeepCloneable.cs +++ b/src/UglyToad.Pdf/Core/IDeepCloneable.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Core { - public interface IDeepCloneable + internal interface IDeepCloneable { T DeepClone(); } diff --git a/src/UglyToad.Pdf/Core/TransformationMatrix.cs b/src/UglyToad.Pdf/Core/TransformationMatrix.cs index 0dacbf7d..1d81c82a 100644 --- a/src/UglyToad.Pdf/Core/TransformationMatrix.cs +++ b/src/UglyToad.Pdf/Core/TransformationMatrix.cs @@ -148,7 +148,7 @@ { var xScale = A; - /** + /* * BM: if the trm is rotated, the calculation is a little more complicated * * The rotation matrix multiplied with the scaling matrix is: @@ -161,7 +161,7 @@ * * sqrt(M(0,0)^2+M(0,1)^2) = * sqrt(x2*cos2+x2*sin2) = - * sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one + * sqrt(x2*(cos2+sin2)) = (here is the trick cos2+sin2 = 1) * sqrt(x2) = * abs(x) */ diff --git a/src/UglyToad.Pdf/Cos/CosArray.cs b/src/UglyToad.Pdf/Cos/CosArray.cs index a4a16b34..506bfdd2 100644 --- a/src/UglyToad.Pdf/Cos/CosArray.cs +++ b/src/UglyToad.Pdf/Cos/CosArray.cs @@ -15,11 +15,12 @@ * limitations under the License. */ -/** +/* * An array of PDFBase objects as part of the PDF document. * * @author Ben Litchfield */ + namespace UglyToad.Pdf.Cos { using System; diff --git a/src/UglyToad.Pdf/Cos/CosDictionary.cs b/src/UglyToad.Pdf/Cos/CosDictionary.cs index acc9d53e..f69d9984 100644 --- a/src/UglyToad.Pdf/Cos/CosDictionary.cs +++ b/src/UglyToad.Pdf/Cos/CosDictionary.cs @@ -748,136 +748,7 @@ } return retval; } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a name and convert it to - * a string. Null is returned if the entry does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a date. - */ - public DateTime? getDate(String key) - { - return getDate(CosName.Create(key)); - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a name and convert it to - * a string. Null is returned if the entry does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a date. - */ - public DateTime? getDate(CosName key) - { - CosBase baseObj = getDictionaryObject(key); - if (baseObj is CosString) - { - return DateConverter.toCalendar((CosString)baseObj); - } - - return null; - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned - * if the entry does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a date. - */ - public DateTime? getDate(String key, DateTime defaultValue) - { - return getDate(CosName.Create(key), defaultValue); - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned - * if the entry does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a date. - */ - public DateTime? getDate(CosName key, DateTime? defaultValue) - { - var retval = getDate(key); - if (retval == null) - { - retval = defaultValue; - } - - return retval; - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a name and convert it to - * a string. Null is returned if the entry does not exist in the dictionary. - * - * @param embedded The embedded dictionary to get. - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - * @ If there is an error converting to a date. - */ - public DateTime? getEmbeddedDate(String embedded, String key) - { - return getEmbeddedDate(embedded, CosName.Create(key), null); - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a name and convert it to - * a string. Null is returned if the entry does not exist in the dictionary. - * - * @param embedded The embedded dictionary to get. - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - * - * @ If there is an error converting to a date. - */ - public DateTime? getEmbeddedDate(String embedded, CosName key) - { - return getEmbeddedDate(embedded, key, null); - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned - * if the entry does not exist in the dictionary. - * - * @param embedded The embedded dictionary to get. - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a string. - * @ If there is an error converting to a date. - */ - public DateTime? getEmbeddedDate(String embedded, String key, DateTime? defaultValue) - - { - return getEmbeddedDate(embedded, CosName.Create(key), defaultValue); - } - - /** - * This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned - * if the entry does not exist in the dictionary. - * - * @param embedded The embedded dictionary to get. - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a string. - * @ If there is an error converting to a date. - */ - public DateTime? getEmbeddedDate(String embedded, CosName key, DateTime? defaultValue) - - { - var retval = defaultValue; - CosDictionary eDic = (CosDictionary)getDictionaryObject(embedded); - if (eDic != null) - { - retval = eDic.getDate(key, defaultValue); - } - return retval; - } - + /** * This is a convenience method that will get the dictionary object that is expected to be a cos bool and convert * it to a primitive bool. diff --git a/src/UglyToad.Pdf/Cos/CosNumberFactory.cs b/src/UglyToad.Pdf/Cos/CosNumberFactory.cs index 96b912c4..e4fa0a73 100644 --- a/src/UglyToad.Pdf/Cos/CosNumberFactory.cs +++ b/src/UglyToad.Pdf/Cos/CosNumberFactory.cs @@ -2,7 +2,7 @@ { using System; - public static class CosNumberFactory + internal static class CosNumberFactory { /** * This factory method will get the appropriate number object. @@ -15,7 +15,7 @@ */ public static ICosNumber get(string value) { - if (value.Length == 1) + if (value.Length == 1) { char digit = value[0]; if ('0' <= digit && digit <= '9') @@ -31,25 +31,26 @@ { throw new ArgumentException($"Not a number: {value}"); } - } - else if (value.IndexOf('.') == -1 && (value.ToLower().IndexOf('e') == -1)) + } + else { - try + if (value.IndexOf('.') == -1 && (value.ToLower().IndexOf('e') == -1)) { - if (value[0] == '+') + try { - return CosInt.Get(long.Parse(value.Substring(1))); + if (value[0] == '+') + { + return CosInt.Get(long.Parse(value.Substring(1))); + } + return CosInt.Get(long.Parse(value)); + } + catch (FormatException) + { + // might be a huge number, see PDFBOX-3116 + return new CosFloat(value); } - return CosInt.Get(long.Parse(value)); } - catch (FormatException e) - { -// might be a huge number, see PDFBOX-3116 - return new CosFloat(value); - } - } - else - { + return new CosFloat(value); } } diff --git a/src/UglyToad.Pdf/Cos/CrossReferenceType.cs b/src/UglyToad.Pdf/Cos/CrossReferenceType.cs index c148ac23..15c7f019 100644 --- a/src/UglyToad.Pdf/Cos/CrossReferenceType.cs +++ b/src/UglyToad.Pdf/Cos/CrossReferenceType.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Cos { - public enum CrossReferenceType + internal enum CrossReferenceType { Table, Stream diff --git a/src/UglyToad.Pdf/Cos/ICosNumber.cs b/src/UglyToad.Pdf/Cos/ICosNumber.cs index 3e22e924..69ecf985 100644 --- a/src/UglyToad.Pdf/Cos/ICosNumber.cs +++ b/src/UglyToad.Pdf/Cos/ICosNumber.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Cos { - public interface ICosNumber + internal interface ICosNumber { float AsFloat(); diff --git a/src/UglyToad.Pdf/Filters/PngPredictor.cs b/src/UglyToad.Pdf/Filters/PngPredictor.cs index 33409f7d..c54bfb3a 100644 --- a/src/UglyToad.Pdf/Filters/PngPredictor.cs +++ b/src/UglyToad.Pdf/Filters/PngPredictor.cs @@ -5,7 +5,7 @@ using System.IO; using IO; - public class PngPredictor : IPngPredictor + internal class PngPredictor : IPngPredictor { public byte[] Decode(byte[] inputBytes, int predictor, int colors, int bitsPerComponent, int columns) { diff --git a/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs b/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs index fc6a3114..75d4b323 100644 --- a/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs +++ b/src/UglyToad.Pdf/Fonts/CharacterIdentifierSystemInfo.cs @@ -5,7 +5,7 @@ /// /// Specifies the character collection associated with the (CIDFont). /// - public struct CharacterIdentifierSystemInfo + internal struct CharacterIdentifierSystemInfo { /// /// Identifies the issuer of the character collection. diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/CharacterIdentifierToGlyphIdentifierMap.cs b/src/UglyToad.Pdf/Fonts/CidFonts/CharacterIdentifierToGlyphIdentifierMap.cs deleted file mode 100644 index 21d402fe..00000000 --- a/src/UglyToad.Pdf/Fonts/CidFonts/CharacterIdentifierToGlyphIdentifierMap.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace UglyToad.Pdf.Fonts.CidFonts -{ - public class CharacterIdentifierToGlyphIdentifierMap - { - - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/CidFontType.cs b/src/UglyToad.Pdf/Fonts/CidFonts/CidFontType.cs index d12a3a3b..119007f0 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/CidFontType.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/CidFontType.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Fonts.CidFonts { - public enum CidFontType + internal enum CidFontType { /// /// Glyph descriptions based on Adobe Type 1 format. diff --git a/src/UglyToad.Pdf/Fonts/CidFonts/VerticalVectorComponents.cs b/src/UglyToad.Pdf/Fonts/CidFonts/VerticalVectorComponents.cs index 09706dbb..ae33370c 100644 --- a/src/UglyToad.Pdf/Fonts/CidFonts/VerticalVectorComponents.cs +++ b/src/UglyToad.Pdf/Fonts/CidFonts/VerticalVectorComponents.cs @@ -3,7 +3,7 @@ /// /// Equivalent to the DW2 array in the font dictionary for vertical fonts. /// - public struct VerticalVectorComponents + internal struct VerticalVectorComponents { public decimal Position { get; } diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CidCharacterMapping.cs b/src/UglyToad.Pdf/Fonts/Cmap/CidCharacterMapping.cs index 62e49e17..814e5dea 100644 --- a/src/UglyToad.Pdf/Fonts/Cmap/CidCharacterMapping.cs +++ b/src/UglyToad.Pdf/Fonts/Cmap/CidCharacterMapping.cs @@ -3,7 +3,7 @@ /// /// Maps from a single character code to its CID. /// - public struct CidCharacterMapping + internal struct CidCharacterMapping { /// /// The character code. diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CidRange.cs b/src/UglyToad.Pdf/Fonts/Cmap/CidRange.cs index 8b2a90a0..c942f255 100644 --- a/src/UglyToad.Pdf/Fonts/Cmap/CidRange.cs +++ b/src/UglyToad.Pdf/Fonts/Cmap/CidRange.cs @@ -5,7 +5,7 @@ /// /// Associates the beginning and end of a range of character codes with the starting CID for the range. /// - public struct CidRange + internal struct CidRange { /// /// The beginning of the range of character codes. diff --git a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs index d8df59cd..94cf7a45 100644 --- a/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs +++ b/src/UglyToad.Pdf/Fonts/Cmap/CodespaceRange.cs @@ -6,7 +6,7 @@ /// /// A codespace range is specified by a pair of codes of some particular length giving the lower and upper bounds of that range. /// - public class CodespaceRange + internal class CodespaceRange { /// /// The lower-bound of this range. diff --git a/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs index 28dd1f63..fe828a77 100644 --- a/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.Pdf/Fonts/Simple/TrueTypeSimpleFont.cs @@ -71,26 +71,6 @@ value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName); return true; - - if (!ToUnicode.CanMapToUnicode) - { - try - { - if (encoding != null) - { - } - - value = ((char) characterCode).ToString(); - - return true; - } - catch (Exception) - { - return false; - } - } - - return ToUnicode.TryGet(characterCode, out value); } public PdfVector GetDisplacement(int characterCode) diff --git a/src/UglyToad.Pdf/Fonts/TextObjectComponentType.cs b/src/UglyToad.Pdf/Fonts/TextObjectComponentType.cs deleted file mode 100644 index 1055e99e..00000000 --- a/src/UglyToad.Pdf/Fonts/TextObjectComponentType.cs +++ /dev/null @@ -1,31 +0,0 @@ -namespace UglyToad.Pdf.Fonts -{ - public enum TextObjectComponentType - { - BeginText, - EndText, - TextFont, - SetTextMatrix, - MoveTextPosition, - MoveTextPositionAndSetLeading, - ShowText, - ShowTextWithIndividualGlyphPositioning, - SetTextLeading, - SetTextRenderingMode, - SetTextRise, - SetWordSpacing, - SetHorizontalTextScaling, - MoveToNextLineStart, - SetCharacterSpacing, - Numeric, - String, - Font, - Array, - SetGrayNonStroking, - SetGrayStroking, - SetLineWidth, - SetClippingPathNonZeroWinding, - SetClippingPathEvenOdd, - MoveNextLineAndShowText - } -} \ No newline at end of file diff --git a/src/UglyToad.Pdf/Geometry/PdfPoint.cs b/src/UglyToad.Pdf/Geometry/PdfPoint.cs index b12a508f..963ff583 100644 --- a/src/UglyToad.Pdf/Geometry/PdfPoint.cs +++ b/src/UglyToad.Pdf/Geometry/PdfPoint.cs @@ -25,24 +25,36 @@ /// public decimal Y { get; } + /// + /// Create a new at this position. + /// public PdfPoint(decimal x, decimal y) { X = x; Y = y; } + /// + /// Create a new at this position. + /// public PdfPoint(int x, int y) { X = x; Y = y; } + /// + /// Create a new at this position. + /// public PdfPoint(double x, double y) { X = (decimal)x; Y = (decimal)y; } + /// + /// Get a string representation of this point. + /// public override string ToString() { return $"(x:{X}, y:{Y})"; diff --git a/src/UglyToad.Pdf/Geometry/PdfRectangle.cs b/src/UglyToad.Pdf/Geometry/PdfRectangle.cs index 850f8eb3..58a622a4 100644 --- a/src/UglyToad.Pdf/Geometry/PdfRectangle.cs +++ b/src/UglyToad.Pdf/Geometry/PdfRectangle.cs @@ -2,7 +2,7 @@ { using System; - public class PdfRectangle + internal class PdfRectangle { public PdfPoint TopLeft { get; } diff --git a/src/UglyToad.Pdf/Geometry/UserSpaceUnit.cs b/src/UglyToad.Pdf/Geometry/UserSpaceUnit.cs index de93793f..092d3534 100644 --- a/src/UglyToad.Pdf/Geometry/UserSpaceUnit.cs +++ b/src/UglyToad.Pdf/Geometry/UserSpaceUnit.cs @@ -6,7 +6,7 @@ /// By default user space units correspond to 1/72nd of an inch (a typographic point). /// The UserUnit entry in a page dictionary can define the space units as a different multiple of 1/72 (1 point). /// - public struct UserSpaceUnit + internal struct UserSpaceUnit { public static readonly UserSpaceUnit Default = new UserSpaceUnit(1); diff --git a/src/UglyToad.Pdf/Graphics/Core/LineDashPattern.cs b/src/UglyToad.Pdf/Graphics/Core/LineDashPattern.cs index 2d6aa2b0..3cfc5155 100644 --- a/src/UglyToad.Pdf/Graphics/Core/LineDashPattern.cs +++ b/src/UglyToad.Pdf/Graphics/Core/LineDashPattern.cs @@ -2,7 +2,7 @@ { using System; - public struct LineDashPattern + internal struct LineDashPattern { public int Phase { get; } diff --git a/src/UglyToad.Pdf/ICosUpdateInfo.cs b/src/UglyToad.Pdf/ICosUpdateInfo.cs index ee0df90a..d505b575 100644 --- a/src/UglyToad.Pdf/ICosUpdateInfo.cs +++ b/src/UglyToad.Pdf/ICosUpdateInfo.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf { - public interface ICosUpdateInfo + internal interface ICosUpdateInfo { bool NeedsToBeUpdated { get; set; } } diff --git a/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs b/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs index 744c8579..9b5cbdb6 100644 --- a/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs +++ b/src/UglyToad.Pdf/IO/ByteArrayInputBytes.cs @@ -2,7 +2,7 @@ { using System.Collections.Generic; - public class ByteArrayInputBytes : IInputBytes + internal class ByteArrayInputBytes : IInputBytes { private readonly IReadOnlyList bytes; diff --git a/src/UglyToad.Pdf/IO/IInputBytes.cs b/src/UglyToad.Pdf/IO/IInputBytes.cs index f248d37a..87b2af44 100644 --- a/src/UglyToad.Pdf/IO/IInputBytes.cs +++ b/src/UglyToad.Pdf/IO/IInputBytes.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.IO { - public interface IInputBytes + internal interface IInputBytes { int CurrentOffset { get; } diff --git a/src/UglyToad.Pdf/IO/ISequentialSource.cs b/src/UglyToad.Pdf/IO/ISequentialSource.cs deleted file mode 100644 index 1cc5413a..00000000 --- a/src/UglyToad.Pdf/IO/ISequentialSource.cs +++ /dev/null @@ -1,165 +0,0 @@ -using System; - -namespace UglyToad.Pdf.IO -{ - public interface SequentialSource : IDisposable - { - /** - * Read a single byte of data. - * - * @return The byte of data that is being read. - * @throws IOException If there is an error while reading the data. - */ - int read(); - - /** - * Read a buffer of data. - * - * @param b The buffer to write the data to. - * @return The number of bytes that were actually read. - * @throws IOException If there was an error while reading the data. - */ - int read(byte[] b); - - /** - * Read a buffer of data. - * - * @param b The buffer to write the data to. - * @param offset Offset into the buffer to start writing. - * @param length The amount of data to attempt to read. - * @return The number of bytes that were actually read. - * @throws IOException If there was an error while reading the data. - */ - int read(byte[] b, int offset, int length); - - /** - * Returns offset of next byte to be returned by a read method. - * - * @return offset of next byte which will be returned with next {@link #read()} (if no more - * bytes are left it returns a value >= length of source). - * @throws IOException If there was an error while reading the data. - */ - long getPosition(); - - /** - * This will peek at the next byte. - * - * @return The next byte on the stream, leaving it as available to read. - * @throws IOException If there is an error reading the next byte. - */ - int peek(); - - /** - * Unreads a single byte. - * - * @param b byte array to push back - * @throws IOException if there is an error while unreading - */ - void unread(int b); - - /** - * Unreads an array of bytes. - * - * @param bytes byte array to be unread - * @throws IOException if there is an error while unreading - */ - void unread(byte[] bytes); - - /** - * Unreads a portion of an array of bytes. - * - * @param bytes byte array to be unread - * @param start start index - * @param len number of bytes to be unread - * @throws IOException if there is an error while unreading - */ - void unread(byte[] bytes, int start, int len); - - /** - * Reads a given number of bytes in its entirety. - * - * @param length the number of bytes to be read - * @return a byte array containing the bytes just read - * @throws IOException if an I/O error occurs while reading data - */ - byte[] readFully(int length); - - /** - * Returns true if the end of the data source has been reached. - * - * @return true if we are at the end of the data. - * @throws IOException If there is an error reading the next byte. - */ - bool isEOF(); - } - - public class BufferSequentialSource : SequentialSource - { - private readonly IRandomAccessRead reader; - - /** - * Constructor. - * - * @param reader The random access reader to wrap. - */ - public BufferSequentialSource(IRandomAccessRead reader) - { - this.reader = reader; - } - - public int read() - { - return reader.Read(); - } - - public int read(byte[] b) - { - return reader.Read(b); - } - - public int read(byte[] b, int offset, int length) - { - return reader.Read(b, offset, length); - } - - public long getPosition() - { - return reader.GetPosition(); - } - - public int peek() - { - return reader.Peek(); - } - - public void unread(int b) - { - reader.Rewind(1); - } - - public void unread(byte[] bytes) - { - reader.Rewind(bytes.Length); - } - - public void unread(byte[] bytes, int start, int len) - { - reader.Rewind(len - start); - } - - public byte[] readFully(int length) - { - return reader.ReadFully(length); - } - - public bool isEOF() - { - return reader.IsEof(); - } - - public void Dispose() - { - reader.Dispose(); - } - } -} diff --git a/src/UglyToad.Pdf/IO/RandomAccess.cs b/src/UglyToad.Pdf/IO/RandomAccess.cs index a92311c6..e8781f92 100644 --- a/src/UglyToad.Pdf/IO/RandomAccess.cs +++ b/src/UglyToad.Pdf/IO/RandomAccess.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.IO { - public interface RandomAccess : IRandomAccessRead, RandomAccessWrite + internal interface RandomAccess : IRandomAccessRead, RandomAccessWrite { // super interface for both read and write } diff --git a/src/UglyToad.Pdf/IO/RandomAccessBuffer.cs b/src/UglyToad.Pdf/IO/RandomAccessBuffer.cs index 9759846f..d564cbf0 100644 --- a/src/UglyToad.Pdf/IO/RandomAccessBuffer.cs +++ b/src/UglyToad.Pdf/IO/RandomAccessBuffer.cs @@ -5,7 +5,7 @@ namespace UglyToad.Pdf.IO { using System.IO; - public class RandomAccessBuffer : RandomAccess + internal class RandomAccessBuffer : RandomAccess { // default chunk size is 1kb private static readonly int DefaultChunkSize = 1024; diff --git a/src/UglyToad.Pdf/IO/RandomAccessRead.cs b/src/UglyToad.Pdf/IO/RandomAccessRead.cs index d6f444d3..8529edbc 100644 --- a/src/UglyToad.Pdf/IO/RandomAccessRead.cs +++ b/src/UglyToad.Pdf/IO/RandomAccessRead.cs @@ -2,7 +2,7 @@ { using System; - public interface IRandomAccessRead : IDisposable + internal interface IRandomAccessRead : IDisposable { /** * Read a single byte of data. diff --git a/src/UglyToad.Pdf/IO/RandomAccessWrite.cs b/src/UglyToad.Pdf/IO/RandomAccessWrite.cs index 6c80b629..ee6895b9 100644 --- a/src/UglyToad.Pdf/IO/RandomAccessWrite.cs +++ b/src/UglyToad.Pdf/IO/RandomAccessWrite.cs @@ -2,7 +2,7 @@ { using System; - public interface RandomAccessWrite : IDisposable + internal interface RandomAccessWrite : IDisposable { /** * Write a byte to the stream. diff --git a/src/UglyToad.Pdf/Logging/ILog.cs b/src/UglyToad.Pdf/Logging/ILog.cs index 95c2e112..856f9eef 100644 --- a/src/UglyToad.Pdf/Logging/ILog.cs +++ b/src/UglyToad.Pdf/Logging/ILog.cs @@ -2,12 +2,31 @@ namespace UglyToad.Pdf.Logging { + /// + /// Logs internal messages from the PDF parsing process. Consumers can provide their own implementation + /// in the to intercept log messages. + /// public interface ILog { + /// + /// Record an informational debug message. + /// void Debug(string message); + /// + /// Record an informational debug message with exception. + /// void Debug(string message, Exception ex); + /// + /// Record an warning message due to a non-error issue encountered in parsing. + /// void Warn(string message); + /// + /// Record an error message due to an issue encountered in parsing. + /// void Error(string message); + /// + /// Record an error message due to an issue encountered in parsing with exception. + /// void Error(string message, Exception ex); } diff --git a/src/UglyToad.Pdf/Parser/FileStructure/XrefCosOffsetChecker.cs b/src/UglyToad.Pdf/Parser/FileStructure/XrefCosOffsetChecker.cs index 5b69f7c7..ee98f562 100644 --- a/src/UglyToad.Pdf/Parser/FileStructure/XrefCosOffsetChecker.cs +++ b/src/UglyToad.Pdf/Parser/FileStructure/XrefCosOffsetChecker.cs @@ -26,7 +26,7 @@ long objectOffset = objectEntry.Value; // a negative offset number represents a object number itself // see type 2 entry in xref stream - if (objectOffset != null && objectOffset >= 0 + if (objectOffset >= 0 && !checkObjectKeys(reader, objectKey, objectOffset)) { //LOG.debug("Stop checking xref offsets as at least one (" + objectKey @@ -58,7 +58,7 @@ return true; } } - catch (InvalidOperationException exception) + catch (InvalidOperationException) { // Swallow the exception, obviously there isn't any valid object number } @@ -180,7 +180,7 @@ foreach (var entry in xrefOffset) { long offset = entry.Value; - if (offset != null && offset < 0) + if (offset < 0) { CosObjectKey objStream = new CosObjectKey(-offset, 0); if (!objStreams.Contains(objStream)) @@ -253,7 +253,7 @@ ObjectHelper.ReadObjectNumber(source); ObjectHelper.ReadGenerationNumber(source); } - catch (InvalidOperationException exception) + catch (InvalidOperationException) { // save the EOF marker as the following data is most likely some garbage lastEOFMarker = tempMarker; diff --git a/src/UglyToad.Pdf/ParsingOptions.cs b/src/UglyToad.Pdf/ParsingOptions.cs index 1b2b84eb..82580bbf 100644 --- a/src/UglyToad.Pdf/ParsingOptions.cs +++ b/src/UglyToad.Pdf/ParsingOptions.cs @@ -2,10 +2,25 @@ { using Logging; + /// + /// Configures options used by the parser when reading PDF documents. + /// public class ParsingOptions { + + /// + /// Should the parser ignore issues where the document does not conform to the PDF specification? + /// public bool UseLenientParsing { get; set; } = true; - public ILog Logger { get; set; } = new NoOpLog(); + private ILog logger = new NoOpLog(); + /// + /// The used to record messages raised by the parsing process. + /// + public ILog Logger + { + get => logger ?? new NoOpLog(); + set => logger = value; + } } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/PdfDocument.cs b/src/UglyToad.Pdf/PdfDocument.cs index 6a55be9e..7757e692 100644 --- a/src/UglyToad.Pdf/PdfDocument.cs +++ b/src/UglyToad.Pdf/PdfDocument.cs @@ -6,10 +6,12 @@ using IO; using Logging; using Parser; - using Parser.FileStructure; - using Parser.Parts; using Util.JetBrains.Annotations; + /// + /// + /// Provides access to document level information for this PDF document as well as access to the s contained in the document. + /// public class PdfDocument : IDisposable { [NotNull] diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/ArrayToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/ArrayToken.cs index 2cc31297..eee2d2e3 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/ArrayToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/ArrayToken.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Text; - public class ArrayToken : IDataToken> + internal class ArrayToken : IDataToken> { public IReadOnlyList Data { get; } diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/BooleanToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/BooleanToken.cs index fdee3864..6aad1951 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/BooleanToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/BooleanToken.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public class BooleanToken : IDataToken + internal class BooleanToken : IDataToken { public static BooleanToken True { get; } = new BooleanToken(true); diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/CommentToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/CommentToken.cs index 24178900..a03cbce7 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/CommentToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/CommentToken.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public class CommentToken : IDataToken + internal class CommentToken : IDataToken { public string Data { get; } diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs index a8664586..8645c7f2 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/HexToken.cs @@ -3,7 +3,7 @@ namespace UglyToad.Pdf.Tokenization.Tokens using System.Collections.Generic; using System.Text; - public class HexToken : IDataToken + internal class HexToken : IDataToken { private static readonly Dictionary HexMap = new Dictionary { diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/IDataToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/IDataToken.cs index 54de792a..609ab1b1 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/IDataToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/IDataToken.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public interface IDataToken : IToken + internal interface IDataToken : IToken { T Data { get; } } diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/IToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/IToken.cs index 827e15f4..640e610d 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/IToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/IToken.cs @@ -1,6 +1,9 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public interface IToken + /// + /// A marker interface for tokens from the content. + /// + internal interface IToken { } } \ No newline at end of file diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/IndirectReferenceToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/IndirectReferenceToken.cs index 90e14257..f7f4a165 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/IndirectReferenceToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/IndirectReferenceToken.cs @@ -2,7 +2,7 @@ { using ContentStream; - public class IndirectReferenceToken : IDataToken + internal class IndirectReferenceToken : IDataToken { public IndirectReference Data { get; } diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/NullToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/NullToken.cs index edcac885..bf3d37fe 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/NullToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/NullToken.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public class NullToken : IDataToken + internal class NullToken : IDataToken { public static NullToken Instance { get; } = new NullToken(); diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/NumericToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/NumericToken.cs index 3663eaca..9f187219 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/NumericToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/NumericToken.cs @@ -3,7 +3,7 @@ using System; using System.Globalization; - public class NumericToken : IDataToken + internal class NumericToken : IDataToken { public decimal Data { get; } diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/OperatorToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/OperatorToken.cs index e7dda4ce..b0f5f4a4 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/OperatorToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/OperatorToken.cs @@ -2,7 +2,7 @@ { using System.Collections.Generic; - public class OperatorToken : IDataToken + internal class OperatorToken : IDataToken { private static readonly Dictionary PooledNames = new Dictionary(); diff --git a/src/UglyToad.Pdf/Tokenization/Tokens/StringToken.cs b/src/UglyToad.Pdf/Tokenization/Tokens/StringToken.cs index 8a1cbf48..048745a3 100644 --- a/src/UglyToad.Pdf/Tokenization/Tokens/StringToken.cs +++ b/src/UglyToad.Pdf/Tokenization/Tokens/StringToken.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Tokenization.Tokens { - public class StringToken : IDataToken + internal class StringToken : IDataToken { public string Data { get; } diff --git a/src/UglyToad.Pdf/Util/DateConverter.cs b/src/UglyToad.Pdf/Util/DateConverter.cs deleted file mode 100644 index aea683d9..00000000 --- a/src/UglyToad.Pdf/Util/DateConverter.cs +++ /dev/null @@ -1,695 +0,0 @@ -namespace UglyToad.Pdf.Util -{ - using System; - using System.Globalization; - using Cos; - - internal class DateConverter - { - private DateConverter() - { - } - - // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours - private static readonly int MINUTES_PER_HOUR = 60; - private static readonly int SECONDS_PER_MINUTE = 60; - private static readonly int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE * 1000; - private static readonly int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE; - private static readonly int HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, DAY = 2 * HALF_DAY; - - /* - * The Date format is supposed to be the PDF_DATE_FORMAT, but other - * forms appear. These lists offer alternatives to be tried - * if parseBigEndianDate fails. - * - * The time zone offset generally trails the date string, so it is processed - * separately with parseTZoffset. (This does not preclude having time - * zones in the elements below; one does.) - * - * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its - * calendar field (PDFBox-402), so these lists are strings to create - * SimpleDate format as needed. - * - * Some past entries have been elided because they duplicate existing - * entries. See the API for SimpleDateFormat, which says - * "For parsing, the number of pattern letters is ignored - * unless it's needed to separate two adjacent fields." - * - * toCalendar(String, String[]) tests to see that the entire input text - * has been consumed. Therefore the ordering of formats is important. - * If one format begins with the entirety of another, the longer - * must precede the other in the list. - * - * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh" - * Where year is yy, four digit years are accepted - * and two digit years are converted to four digits in the range - * [thisyear-79...thisyear+20] - */ - private static readonly String[] ALPHA_START_FORMATS = - { - "EEEE, dd MMM yy hh:mm:ss a", - "EEEE, MMM dd, yy hh:mm:ss a", - "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows - "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465 - "EEEE MMM dd, yy HH:mm:ss", // ECMP5 - "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7 - "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant - }; - - private static readonly String[] DIGIT_START_FORMATS = - { - "dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00 - "dd MMM yy HH:mm", // for 26 May 2000 11:25 - "yyyy MMM d", // ambiguity resolved only by omitting time - "yyyymmddhh:mm:ss", // test case "200712172:2:3" - "H:m M/d/yy", // test case "9:47 5/12/2008" - "M/d/yy HH:mm:ss", - "M/d/yy HH:mm", - "M/d/yy", - - // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss" - // "yyyy MMM d HH:mm:ss", - - // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy", - // (incoming digit strings do not mark themselves as y, m, or d!) - // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170 - // "M/dd/yyyy hh:mm:ss", - // "MM/d/yyyy hh:mm:ss", - // "M/d/yyyy HH:mm:ss", - // "M/dd/yyyy", - // "MM/d/yyyy", - // "M/d/yyyy", - // "M/d/yyyy HH:mm:ss", - // "M/d/yy HH:mm:ss", - // subsumed by big-endian parse - // "yyyy-MM-dd'T'HH:mm:ss", - // "yyyy-MM-dd'T'HH:mm:ss", - // "yyyymmdd hh:mm:ss", - // "yyyymmdd", - // "yyyymmddX''00''", // covers 24 cases - // (orignally the above ended with '+00''00'''; - // the first apostrophe quoted the plus, - // '' mapped to a single ', and the ''' was invalid) -}; - - /** - * Converts a Calendar to a string formatted as: - * D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -. - * - * @param cal The date to convert to a string. May be null. - * The DST_OFFSET is included when computing the output time zone. - * - * @return The date as a String to be used in a PDF document, - * or null if the cal value is null - */ - public static String toString(DateTime? cal) - { - if (cal == null) - { - return null; - } - //String offset = formatTZoffset(cal.Value.ti.get(Calendar.ZONE_OFFSET) + - //cal.get(Calendar.DST_OFFSET), "'"); - //return String.format(Locale.US, "D:" - //+ "%1$4tY%1$2tm%1$2td" // yyyyMMdd - //+ "%1$2tH%1$2tM%1$2tS" // HHmmss - //+ "%2$s" // time zone - //+ "'", // trailing apostrophe - //cal, offset); - - throw new NotImplementedException(); - } - - /** - * Converts the date to ISO 8601 string format: - * yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-'). - * - * @param cal The date to convert. Must not be null. - * The DST_OFFSET is included in the output value. - * - * @return The date represented as an ISO 8601 string. - */ - public static string toISO8601(DateTime cal) - { - //String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + - //cal.get(Calendar.DST_OFFSET), ":"); - //return String.format(Locale.US, - //"%1$4tY" // yyyy - //+ "-%1$2tm" // -mm (%tm adds one to cal month value) - //+ "-%1$2td" // -dd (%tm adds one to cal month value) - //+ "T" // T - //+ "%1$2tH:%1$2tM:%1$2tS" // HHmmss - //+ "%2$s", // time zone - //cal, offset); - - throw new NotImplementedException(); - } - - /* - * Constrain a timezone offset to the range [-14:00 thru +14:00]. - * by adding or subtracting multiples of a full day. - */ - private static int restrainTZoffset(long proposedOffset) - { - if (proposedOffset <= 14 * MILLIS_PER_HOUR && proposedOffset >= -14 * MILLIS_PER_HOUR) - { - // https://www.w3.org/TR/xmlschema-2/#dateTime-timezones - // Timezones between 14:00 and -14:00 are valid - return (int)proposedOffset; - } - // Constrain a timezone offset to the range [-11:59 thru +12:00]. - proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY; - if (proposedOffset == 0) - { - return HALF_DAY; - } - // 0 <= proposedOffset < DAY - proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY; - // -HALF_DAY < proposedOffset < HALF_DAY - return (int)proposedOffset; - } - - /* - * Formats a time zone offset as #hh^mm - * where # is + or -, hh is hours, ^ is a separator, and mm is minutes. - * Any separator may be specified by the second argument; - * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF). - * The returned value is constrained to the range -11:59 ... 11:59. - * For offset of 0 millis, the String returned is "+00^00", never "Z". - * To get a "general" offset in form GMT#hh:mm, write - * "GMT"+DateConverter.formatTZoffset(offset, ":"); - * - * Take thought in choosing the source for the millis value. - * It can come from calendarValue.getTimeZone() or from - * calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created - * from a valid time zone ID, then it may have a daylight savings rule. - * (As of July 4, 2013, the data base at http://www.iana.org/time-zones - * recognized 629 time zone regions. But a TimeZone created as - * new SimpleTimeZone(millisOffset, "ID"), - * will not have a daylight savings rule. (Not even if there is a - * known time zone with the given ID. To get the TimeZone named "xDT" - * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT. - * - * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value. - * At most it has an OFFSET value like -04'00'. It is generally impossible to - * determine what TIMEZONE corresponds to a given OFFSET. If the date is - * in the summer when daylight savings is in effect, an offset of -0400 - * might correspond to any one of the 38 regions (of 53) with standard time - * offset -0400 and no daylight saving. Or it might correspond to - * any one of the 31 regions (out of 43) that observe daylight savings - * and have standard time offset of -0500. - * - * If a Calendar has not been assigned a TimeZone with setTimeZone(), - * it will have by default the local TIMEZONE, not just the OFFSET. In the - * USA, this TimeZone will have a daylight savings rule. - * - * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs - * from the offset in the TimeZone set by Calendar.setTimeZone(). Example: - * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'. - * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00' - * calVal = new GregorianCalendar(); // TimeZone is the local default - * calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR); - * Four different offsets can be computed from calVal: - * calVal.get(Calendar.ZONE_OFFSET) => -07:00 - * calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00 - * calVal.getTimeZone().getRawOffset() => -05:00 - * calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00 - * - * Which is correct??? I dunno, though setTimeZone() does seem to affect - * ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone - * or ZONE_OFFSET has been set; both have been set by initialization code. - * TimeZone is initialized to the local default time zone - * and ZONE_OFFSET is set from it. - * - * My choice in this DateConverter class has been to set the - * initial TimeZone of a GregorianCalendar to GMT. Thereafter - * the TimeZone is modified with {@link #adjustTimeZoneNicely}. - * - * package-private for testing - */ - private static string formatTZoffset(long millis, string sep) - { - //{ - // SimpleDateFormat sdf = new SimpleDateFormat("Z"); // #hhmm - // sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis), "unknown")); - // String tz = sdf.format(new Date()); - // return tz.substring(0, 3) + sep + tz.substring(3); - throw new NotImplementedException(); - } - - /* - * Parses an integer from a string, starting at and advancing a ParsePosition. - * Returns The integer that was at the given parse position, or the remedy value - * if no digits were found. - * - * The ParsePosition will be incremented by the number of digits found, but no - * more than maxlen. That is, the ParsePosition will advance across at most - * maxlen initial digits in text. The error index is ignored and unchanged. - * - * maxlen is the maximum length of the integer to parse, usually 2, but 4 for - * year fields. If the field of length maxlen begins with a digit, but contains - * a non-digit, no error is signaled and the integer value is returned. - */ - private static int parseTimeField(string text, ParsePosition where, int maxlen, int remedy) - { - if (text == null) - { - return remedy; - } - // it would seem that DecimalFormat.parse() would be simpler; - // but that class blithely ignores setMaximumIntegerDigits - int retval = 0; - int index = where.Index; - int limit = index + Math.Min(maxlen, text.Length - index); - for (; index < limit; index++) - { - // convert digit to integer - int cval = text[index] - '0'; - // test to see if we got a digit - if (cval < 0 || cval > 9) - { - // no digit at index - break; - } - // append the digit to the return value - retval = retval * 10 + cval; - } - if (index == where.Index) - { - return remedy; - } - - where.Index = index; - - return retval; - } - - /* - * Advances the ParsePosition past any and all the characters that match - * those in the optionals list. In particular, a space will skip all spaces. - * - * The start value is incremented by the number of optionals found. The error - * index is ignored and unchanged. - * - * Returns the last non-space character passed over (even if space is not in - * the optionals list.) - */ - private static char skipOptionals(string text, ParsePosition where, string optionals) - { - char retval = ' ', currch; - while (text != null && where.Index < text.Length && optionals.IndexOf((currch = text[where.Index])) >= 0) - { - retval = (currch != ' ') ? currch : retval; - where.Index++; - } - - return retval; - } - - /* - * If the victim string is at the given position in the text, this method - * advances the position past that string. - * - * `where` is the initial position to look at. After return, this will have - * been incremented by the length of the victim if it was found. The error - * index is ignored and unchanged. - */ - private static bool skipString(string text, string victim, ParsePosition where) - { - if (text.StartsWithOffset(victim, where.Index)) - { - where.Index = where.Index + victim.Length; - return true; - } - return false; - } - - /* - * Construct a new GregorianCalendar and set defaults. - * Locale is ENGLISH. - * TimeZone is "UTC" (zero offset and no DST). - * Parsing is NOT lenient. Milliseconds are zero. - * - * package-private for testing - */ - private static GregorianCalendar newGreg() - { - throw new NotImplementedException(); - //GregorianCalendar retCal = new GregorianCalendar(GregorianCalendarTypes.USEnglish); - //retCal.setTimeZone(new SimpleTimeZone(0, "UTC")); - //retCal.setLenient(false); - //retCal.set(Calendar.MILLISECOND, 0); - //return retCal; - } - - /* - * Install a TimeZone on a GregorianCalendar without changing the - * hours value. A plain GregorianCalendat.setTimeZone() - * adjusts the Calendar.HOUR value to compensate. This is *BAD* - * (not to say *EVIL*) when we have already set the time. - */ - private static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz) - { - throw new NotImplementedException(); - //cal.setTimeZone(tz); - //int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) / - //MILLIS_PER_MINUTE; - //cal.add(Calendar.MINUTE, -offset); - } - - /* - * Parses the end of a date string for a time zone and, if one is found, - * sets the time zone of the GregorianCalendar. Otherwise the calendar - * time zone is unchanged. - * - * The text is parsed as - * (Z|GMT|UTC)? [+- ]* h [': ]? m '? - * where the leading String is optional, h is two digits by default, - * but may be a single digit if followed by one of space, apostrophe, - * colon, or the end of string. Similarly, m is one or two digits. - * This scheme accepts the format of PDF, RFC 822, and ISO8601. - * If none of these applies (as for a time zone name), we try - * TimeZone.getTimeZone(). - * - * Scanning begins at where.index. After success, the returned index - * is that of the next character after the recognized string. - * - * package-private for testing - */ - private static bool parseTZoffset(string text, GregorianCalendar cal, ParsePosition initialWhere) - { - ParsePosition where = new ParsePosition(initialWhere.Index); - //TimeZone tz = new SimpleTimeZone(0, "GMT"); - //int tzHours, tzMin; - //char sign = skipOptionals(text, where, "Z+- "); - //boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) || - //skipString(text, "UTC", where)); - //sign = (!hadGMT) ? sign : skipOptionals(text, where, "+- "); - - //tzHours = parseTimeField(text, where, 2, -999); - //skipOptionals(text, where, "\': "); - //tzMin = parseTimeField(text, where, 2, 0); - //skipOptionals(text, where, "\' "); - - //if (tzHours != -999) - //{ - // // we parsed a time zone in default format - // int hrSign = (sign == '-' ? -1 : 1); - // tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * MILLIS_PER_HOUR + tzMin * - // (long)MILLIS_PER_MINUTE))); - // updateZoneId(tz); - //} - //else if (!hadGMT) - //{ - // // try to process as a name; "GMT" or "UTC" has already been processed - // String tzText = text.substring(initialWhere.getIndex()).trim(); - // tz = TimeZone.getTimeZone(tzText); - // // getTimeZone returns "GMT" for unknown ids - // if ("GMT".equals(tz.getID())) - // { - // // no timezone in text, cal amd initialWhere are unchanged - // return false; - // } - // else - // { - // // we got a tz by name; use it - // where.setIndex(text.length()); - // } - //} - //adjustTimeZoneNicely(cal, tz); - //initialWhere.setIndex(where.getIndex()); - throw new NotImplementedException(); - return true; - } - - /** - * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where - * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12. - * Zones that don't fit in this schema are set to zone ID "unknown". - * - * @param tz the time zone to update. - */ - private static void updateZoneId(TimeZone tz) - { - //int offset = tz.getRawOffset(); - //char pm = '+'; - //if (offset < 0) - //{ - // pm = '-'; - // offset = -offset; - //} - //int hh = offset / 3600000; - //int mm = offset % 3600000 / 60000; - //if (offset == 0) - //{ - // tz.setID("GMT"); - //} - //else if (pm == '+' && hh <= 12) - //{ - // tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm)); - //} - //else if (pm == '-' && hh <= 14) - //{ - // tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm)); - //} - //else - //{ - // tz.setID("unknown"); - //} - throw new NotImplementedException(); - } - - /* - * Parses a big-endian date: year month day hour min sec. - * The year must be four digits. Other fields may be adjacent - * and delimited by length or they may follow appropriate delimiters. - * year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction] - * If any numeric field is omitted, all following fields must also be omitted. - * No time zone is processed. - * - * Ambiguous dates can produce unexpected results. For example: - * 1970 12 23:08 will parse as 1970 December 23 00:08:00 - * - * The parse begins at `where, on return the index - * is advanced to just beyond the last character processed. - * The error index is ignored and unchanged. - */ - private static GregorianCalendar parseBigEndianDate(String text, - ParsePosition initialWhere) - { - //ParsePosition where = new ParsePosition(initialWhere.getIndex()); - //int year = parseTimeField(text, where, 4, 0); - //if (where.getIndex() != 4 + initialWhere.getIndex()) - //{ - // return null; - //} - //skipOptionals(text, where, "/- "); - //int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11 - //skipOptionals(text, where, "/- "); - //int day = parseTimeField(text, where, 2, 1); - //skipOptionals(text, where, " T"); - //int hour = parseTimeField(text, where, 2, 0); - //skipOptionals(text, where, ": "); - //int minute = parseTimeField(text, where, 2, 0); - //skipOptionals(text, where, ": "); - //int second = parseTimeField(text, where, 2, 0); - //char nextC = skipOptionals(text, where, "."); - //if (nextC == '.') - //{ - // // fractions of a second: skip upto 19 digits - // parseTimeField(text, where, 19, 0); - //} - - //GregorianCalendar dest = newGreg(); - //try - //{ - // dest.set(year, month, day, hour, minute, second); - // // trigger limit tests - // dest.getTimeInMillis(); - //} - //catch (IllegalArgumentException ill) - //{ - // return null; - //} - //initialWhere.setIndex(where.getIndex()); - //skipOptionals(text, initialWhere, " "); - //// dest has at least a year value - //return dest; - throw new NotImplementedException(); - } - - /* - * See if text can be parsed as a date according to any of a list of - * formats. The time zone may be included as part of the format, or - * omitted in favor of later testing for a trailing time zone. - * - * The parse starts at `where`, upon return it will have been - * incremented to refer to the next non-space character after the date. - * If no date was found, the value is unchanged. - * The error index is ignored and unchanged. - * - * If there is a failure to find a date, or the GregorianCalendar - * for the date that was found. Unless a time zone was - * part of the format, the time zone will be GMT+0 - */ - private static GregorianCalendar parseSimpleDate(string text, string[] fmts, - ParsePosition initialWhere) - { - foreach (var fmt in fmts) - { - ParsePosition where = new ParsePosition(initialWhere.Index); - //SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH); - throw new NotImplementedException(); - GregorianCalendar retCal = newGreg(); - //sdf.setCalendar(retCal); - //if (sdf.parse(text, where) != null) - //{ - // initialWhere.Index = where.Index; - // skipOptionals(text, initialWhere, " "); - // return retCal; - //} - } - - - return null; - } - - /* - * Parses a String to see if it begins with a date, and if so, - * returns that date. The date must be strictly correct--no - * field may exceed the appropriate limit. - * (That is, the Calendar has setLenient(false).) - * Skips initial spaces, but does NOT check for "D:" - * - * The scan first tries parseBigEndianDate and parseTZoffset - * and then tries parseSimpleDate with appropriate formats, - * again followed by parseTZoffset. If at any stage the entire - * text is consumed, that date value is returned immediately. - * Otherwise the date that consumes the longest initial part - * of the text is returned. - * - * - PDF format dates are among those recognized by parseBigEndianDate. - * - The formats tried are alphaStartFormats or digitStartFormat and - * any listed in the value of moreFmts. - */ - private static DateTime? parseDate(string text, ParsePosition initialWhere) - { - if (string.IsNullOrEmpty(text)) - { - return null; - } - - // remember longestr date string - int longestLen = -999999; - // theorem: the above value will never be used - // proof: longestLen is only used if longestDate is not null - - GregorianCalendar longestDate = null; // null says no date found yet - int whereLen; // tempcopy of where.getIndex() - - ParsePosition where = new ParsePosition(initialWhere.Index); - // check for null (throws exception) and trim off surrounding spaces - skipOptionals(text, where, " "); - int startPosition = where.Index; - - // try big-endian parse - GregorianCalendar retCal = parseBigEndianDate(text, where); - // check for success and a timezone - if (retCal != null && (where.Index == text.Length || parseTZoffset(text, retCal, where))) - { - // if text is fully consumed, return the date else remember it and its length - whereLen = where.Index; - if (whereLen == text.Length) - { - initialWhere.Index = whereLen; - //return retCal; - } - longestLen = whereLen; - longestDate = retCal; - } - - // try one of the sets of standard formats - where.Index = startPosition; - String[] formats - = char.IsDigit(text[startPosition]) - ? DIGIT_START_FORMATS - : ALPHA_START_FORMATS; - retCal = parseSimpleDate(text, formats, where); - // check for success and a timezone - if (retCal != null && - (where.Index == text.Length || parseTZoffset(text, retCal, where))) - { - // if text is fully consumed, return the date else remember it and its length - whereLen = where.Index; - if (whereLen == text.Length) - { - initialWhere.Index = whereLen; - // return retCal; - } - if (whereLen > longestLen) - { - longestLen = whereLen; - longestDate = retCal; - } - } - - if (longestDate != null) - { - initialWhere.Index = longestLen; - - //return longestDate; - } - - throw new NotImplementedException(); - //return retCal; - } - - /** - * Returns the Calendar for a given COS string containing a date, - * or {@code null} if it cannot be parsed. - * - * The returned value will have 0 for DST_OFFSET. - * - * @param text A COS string containing a date. - * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. - */ - public static DateTime? toCalendar(CosString text) - { - if (text == null) - { - return null; - } - return toCalendar(text.GetString()); - } - - /** - * Returns the Calendar for a given string containing a date, - * or {@code null} if it cannot be parsed. - * - * The returned value will have 0 for DST_OFFSET. - * - * @param text A COS string containing a date. - * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. - */ - public static DateTime? toCalendar(string text) - { - if (text == null || text.Trim() == string.Empty) - { - return null; - } - - ParsePosition where = new ParsePosition(0); - skipOptionals(text, where, " "); - skipString(text, "D:", where); - var calendar = parseDate(text, where); - - if (calendar == null || where.Index != text.Length) - { - // the date string is invalid - return null; - } - - return calendar; - } - } -} diff --git a/src/UglyToad.Pdf/Util/InternalStringExtensions.cs b/src/UglyToad.Pdf/Util/InternalStringExtensions.cs index ad269f56..024f7958 100644 --- a/src/UglyToad.Pdf/Util/InternalStringExtensions.cs +++ b/src/UglyToad.Pdf/Util/InternalStringExtensions.cs @@ -7,8 +7,6 @@ public static string ReplaceLimited(this string value, string old, string newValue, int count) { throw new NotImplementedException(); - - return value; } public static bool StartsWithOffset(this string value, string start, int offset) diff --git a/src/UglyToad.Pdf/Util/OtherEncodings.cs b/src/UglyToad.Pdf/Util/OtherEncodings.cs index 302e6d68..0fb7fd6a 100644 --- a/src/UglyToad.Pdf/Util/OtherEncodings.cs +++ b/src/UglyToad.Pdf/Util/OtherEncodings.cs @@ -2,7 +2,7 @@ namespace UglyToad.Pdf.Util { - public static class OtherEncodings + internal static class OtherEncodings { /// /// Latin 1 Encoding: ISO 8859-1 is a single-byte encoding that can represent the first 256 Unicode characters. diff --git a/src/UglyToad.Pdf/Util/ParsePosition.cs b/src/UglyToad.Pdf/Util/ParsePosition.cs index 26c957a5..f7ad8a60 100644 --- a/src/UglyToad.Pdf/Util/ParsePosition.cs +++ b/src/UglyToad.Pdf/Util/ParsePosition.cs @@ -1,6 +1,6 @@ namespace UglyToad.Pdf.Util { - public class ParsePosition + internal class ParsePosition { public int Index { get; set; }