mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-10-14 10:55:04 +08:00
encapsulation for internal classes, remove old code, document public api
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
using System;
|
||||
using IO;
|
||||
|
||||
public class ThrowingReader : IRandomAccessRead
|
||||
internal class ThrowingReader : IRandomAccessRead
|
||||
{
|
||||
private readonly IRandomAccessRead reader;
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
using Pdf.Tokenization.Scanner;
|
||||
using Pdf.Util;
|
||||
|
||||
public static class StringBytesTestConverter
|
||||
internal static class StringBytesTestConverter
|
||||
{
|
||||
public static Result Convert(string s, bool readFirst = true)
|
||||
{
|
||||
|
@@ -7,7 +7,7 @@
|
||||
/// <summary>
|
||||
/// Defines the visible region, contents expanding beyond the crop box should be clipped.
|
||||
/// </summary>
|
||||
public class CropBox
|
||||
internal class CropBox
|
||||
{
|
||||
[NotNull]
|
||||
public PdfRectangle Bounds { get; }
|
||||
|
@@ -70,6 +70,9 @@
|
||||
representation = builder.ToString() ?? string.Empty;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a string representing this document information. <see langword="null"/> entries are not shown.
|
||||
/// </summary>
|
||||
public override string ToString()
|
||||
{
|
||||
return representation;
|
||||
@@ -82,7 +85,7 @@
|
||||
return;
|
||||
}
|
||||
|
||||
builder.Append(name).Append(": ").Append(value).Append(";");
|
||||
builder.Append(name).Append(": ").Append(value).Append("; ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@
|
||||
/// <remarks>
|
||||
/// See table 3.27 from the PDF specification version 1.7.
|
||||
/// </remarks>
|
||||
public class MediaBox
|
||||
internal class MediaBox
|
||||
{
|
||||
///<summary>
|
||||
/// User space units per inch.
|
||||
|
@@ -4,6 +4,9 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
/// <summary>
|
||||
/// Contains the content and provides access to methods of a single page in the <see cref="PdfDocument"/>.
|
||||
/// </summary>
|
||||
public class Page
|
||||
{
|
||||
/// <summary>
|
||||
|
@@ -3,24 +3,78 @@
|
||||
using System.Collections.Generic;
|
||||
using Geometry;
|
||||
|
||||
/// <summary>
|
||||
/// The corresponding named size of the <see cref="Page"/>.
|
||||
/// </summary>
|
||||
public enum PageSize
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown page size, did not match a defined page size.
|
||||
/// </summary>
|
||||
Custom = 0,
|
||||
/// <summary>
|
||||
/// The ISO 216 A0 page size.
|
||||
/// </summary>
|
||||
A0 = 3,
|
||||
/// <summary>
|
||||
/// The ISO 216 A1 page size.
|
||||
/// </summary>
|
||||
A1 = 4,
|
||||
/// <summary>
|
||||
/// The ISO 216 A2 page size.
|
||||
/// </summary>
|
||||
A2 = 5,
|
||||
/// <summary>
|
||||
/// The ISO 216 A3 page size.
|
||||
/// </summary>
|
||||
A3 = 6,
|
||||
/// <summary>
|
||||
/// The ISO 216 A4 page size.
|
||||
/// </summary>
|
||||
A4 = 7,
|
||||
/// <summary>
|
||||
/// The ISO 216 A5 page size.
|
||||
/// </summary>
|
||||
A5 = 8,
|
||||
/// <summary>
|
||||
/// The ISO 216 A6 page size.
|
||||
/// </summary>
|
||||
A6 = 9,
|
||||
/// <summary>
|
||||
/// The ISO 216 A7 page size.
|
||||
/// </summary>
|
||||
A7 = 10,
|
||||
/// <summary>
|
||||
/// The ISO 216 A8 page size.
|
||||
/// </summary>
|
||||
A8 = 11,
|
||||
/// <summary>
|
||||
/// The ISO 216 A9 page size.
|
||||
/// </summary>
|
||||
A9 = 12,
|
||||
/// <summary>
|
||||
/// The ISO 216 A10 page size.
|
||||
/// </summary>
|
||||
A10 = 13,
|
||||
/// <summary>
|
||||
/// The North American Letter page size.
|
||||
/// </summary>
|
||||
Letter = 14,
|
||||
/// <summary>
|
||||
/// The North American Legal page size.
|
||||
/// </summary>
|
||||
Legal = 15,
|
||||
/// <summary>
|
||||
/// The North American Ledger page size.
|
||||
/// </summary>
|
||||
Ledger = 16,
|
||||
/// <summary>
|
||||
/// The North American Tabloid page size.
|
||||
/// </summary>
|
||||
Tabloid = 17,
|
||||
/// <summary>
|
||||
/// The North American Executive page size.
|
||||
/// </summary>
|
||||
Executive = 18
|
||||
}
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
/// <summary>
|
||||
/// Contains the values inherited from the Page Tree for this page.
|
||||
/// </summary>
|
||||
public class PageTreeMembers
|
||||
internal class PageTreeMembers
|
||||
{
|
||||
public MediaBox GetMediaBox()
|
||||
{
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.ContentStream
|
||||
{
|
||||
public struct IndirectReference
|
||||
internal struct IndirectReference
|
||||
{
|
||||
public long ObjectNumber { get; }
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System.IO;
|
||||
|
||||
public interface ICosStreamWriter
|
||||
internal interface ICosStreamWriter
|
||||
{
|
||||
void WriteToPdfStream(BinaryWriter output);
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Core
|
||||
{
|
||||
public interface IDeepCloneable<out T>
|
||||
internal interface IDeepCloneable<out T>
|
||||
{
|
||||
T DeepClone();
|
||||
}
|
||||
|
@@ -148,7 +148,7 @@
|
||||
{
|
||||
var xScale = A;
|
||||
|
||||
/**
|
||||
/*
|
||||
* BM: if the trm is rotated, the calculation is a little more complicated
|
||||
*
|
||||
* The rotation matrix multiplied with the scaling matrix is:
|
||||
@@ -161,7 +161,7 @@
|
||||
*
|
||||
* sqrt(M(0,0)^2+M(0,1)^2) =
|
||||
* sqrt(x2*cos2+x2*sin2) =
|
||||
* sqrt(x2*(cos2+sin2)) = <- here is the trick cos2+sin2 is one
|
||||
* sqrt(x2*(cos2+sin2)) = (here is the trick cos2+sin2 = 1)
|
||||
* sqrt(x2) =
|
||||
* abs(x)
|
||||
*/
|
||||
|
@@ -15,11 +15,12 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
/*
|
||||
* An array of PDFBase objects as part of the PDF document.
|
||||
*
|
||||
* @author Ben Litchfield
|
||||
*/
|
||||
|
||||
namespace UglyToad.Pdf.Cos
|
||||
{
|
||||
using System;
|
||||
|
@@ -748,136 +748,7 @@
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a name and convert it to
|
||||
* a string. Null is returned if the entry does not exist in the dictionary or if the date was invalid.
|
||||
*
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @return The name converted to a date.
|
||||
*/
|
||||
public DateTime? getDate(String key)
|
||||
{
|
||||
return getDate(CosName.Create(key));
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a name and convert it to
|
||||
* a string. Null is returned if the entry does not exist in the dictionary or if the date was invalid.
|
||||
*
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @return The name converted to a date.
|
||||
*/
|
||||
public DateTime? getDate(CosName key)
|
||||
{
|
||||
CosBase baseObj = getDictionaryObject(key);
|
||||
if (baseObj is CosString)
|
||||
{
|
||||
return DateConverter.toCalendar((CosString)baseObj);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned
|
||||
* if the entry does not exist in the dictionary or if the date was invalid.
|
||||
*
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @param defaultValue The default value to return.
|
||||
* @return The name converted to a date.
|
||||
*/
|
||||
public DateTime? getDate(String key, DateTime defaultValue)
|
||||
{
|
||||
return getDate(CosName.Create(key), defaultValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned
|
||||
* if the entry does not exist in the dictionary or if the date was invalid.
|
||||
*
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @param defaultValue The default value to return.
|
||||
* @return The name converted to a date.
|
||||
*/
|
||||
public DateTime? getDate(CosName key, DateTime? defaultValue)
|
||||
{
|
||||
var retval = getDate(key);
|
||||
if (retval == null)
|
||||
{
|
||||
retval = defaultValue;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a name and convert it to
|
||||
* a string. Null is returned if the entry does not exist in the dictionary.
|
||||
*
|
||||
* @param embedded The embedded dictionary to get.
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @return The name converted to a string.
|
||||
* @ If there is an error converting to a date.
|
||||
*/
|
||||
public DateTime? getEmbeddedDate(String embedded, String key)
|
||||
{
|
||||
return getEmbeddedDate(embedded, CosName.Create(key), null);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a name and convert it to
|
||||
* a string. Null is returned if the entry does not exist in the dictionary.
|
||||
*
|
||||
* @param embedded The embedded dictionary to get.
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @return The name converted to a string.
|
||||
*
|
||||
* @ If there is an error converting to a date.
|
||||
*/
|
||||
public DateTime? getEmbeddedDate(String embedded, CosName key)
|
||||
{
|
||||
return getEmbeddedDate(embedded, key, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned
|
||||
* if the entry does not exist in the dictionary.
|
||||
*
|
||||
* @param embedded The embedded dictionary to get.
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @param defaultValue The default value to return.
|
||||
* @return The name converted to a string.
|
||||
* @ If there is an error converting to a date.
|
||||
*/
|
||||
public DateTime? getEmbeddedDate(String embedded, String key, DateTime? defaultValue)
|
||||
|
||||
{
|
||||
return getEmbeddedDate(embedded, CosName.Create(key), defaultValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a date. Null is returned
|
||||
* if the entry does not exist in the dictionary.
|
||||
*
|
||||
* @param embedded The embedded dictionary to get.
|
||||
* @param key The key to the item in the dictionary.
|
||||
* @param defaultValue The default value to return.
|
||||
* @return The name converted to a string.
|
||||
* @ If there is an error converting to a date.
|
||||
*/
|
||||
public DateTime? getEmbeddedDate(String embedded, CosName key, DateTime? defaultValue)
|
||||
|
||||
{
|
||||
var retval = defaultValue;
|
||||
CosDictionary eDic = (CosDictionary)getDictionaryObject(embedded);
|
||||
if (eDic != null)
|
||||
{
|
||||
retval = eDic.getDate(key, defaultValue);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This is a convenience method that will get the dictionary object that is expected to be a cos bool and convert
|
||||
* it to a primitive bool.
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
|
||||
public static class CosNumberFactory
|
||||
internal static class CosNumberFactory
|
||||
{
|
||||
/**
|
||||
* This factory method will get the appropriate number object.
|
||||
@@ -15,7 +15,7 @@
|
||||
*/
|
||||
public static ICosNumber get(string value)
|
||||
{
|
||||
if (value.Length == 1)
|
||||
if (value.Length == 1)
|
||||
{
|
||||
char digit = value[0];
|
||||
if ('0' <= digit && digit <= '9')
|
||||
@@ -31,25 +31,26 @@
|
||||
{
|
||||
throw new ArgumentException($"Not a number: {value}");
|
||||
}
|
||||
}
|
||||
else if (value.IndexOf('.') == -1 && (value.ToLower().IndexOf('e') == -1))
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
if (value.IndexOf('.') == -1 && (value.ToLower().IndexOf('e') == -1))
|
||||
{
|
||||
if (value[0] == '+')
|
||||
try
|
||||
{
|
||||
return CosInt.Get(long.Parse(value.Substring(1)));
|
||||
if (value[0] == '+')
|
||||
{
|
||||
return CosInt.Get(long.Parse(value.Substring(1)));
|
||||
}
|
||||
return CosInt.Get(long.Parse(value));
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
// might be a huge number, see PDFBOX-3116
|
||||
return new CosFloat(value);
|
||||
}
|
||||
return CosInt.Get(long.Parse(value));
|
||||
}
|
||||
catch (FormatException e)
|
||||
{
|
||||
// might be a huge number, see PDFBOX-3116
|
||||
return new CosFloat(value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
return new CosFloat(value);
|
||||
}
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Cos
|
||||
{
|
||||
public enum CrossReferenceType
|
||||
internal enum CrossReferenceType
|
||||
{
|
||||
Table,
|
||||
Stream
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Cos
|
||||
{
|
||||
public interface ICosNumber
|
||||
internal interface ICosNumber
|
||||
{
|
||||
float AsFloat();
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
using System.IO;
|
||||
using IO;
|
||||
|
||||
public class PngPredictor : IPngPredictor
|
||||
internal class PngPredictor : IPngPredictor
|
||||
{
|
||||
public byte[] Decode(byte[] inputBytes, int predictor, int colors, int bitsPerComponent, int columns)
|
||||
{
|
||||
|
@@ -5,7 +5,7 @@
|
||||
/// <summary>
|
||||
/// Specifies the character collection associated with the <see cref="ICidFont"/> (CIDFont).
|
||||
/// </summary>
|
||||
public struct CharacterIdentifierSystemInfo
|
||||
internal struct CharacterIdentifierSystemInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Identifies the issuer of the character collection.
|
||||
|
@@ -1,7 +0,0 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
public class CharacterIdentifierToGlyphIdentifierMap
|
||||
{
|
||||
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Fonts.CidFonts
|
||||
{
|
||||
public enum CidFontType
|
||||
internal enum CidFontType
|
||||
{
|
||||
/// <summary>
|
||||
/// Glyph descriptions based on Adobe Type 1 format.
|
||||
|
@@ -3,7 +3,7 @@
|
||||
/// <summary>
|
||||
/// Equivalent to the DW2 array in the font dictionary for vertical fonts.
|
||||
/// </summary>
|
||||
public struct VerticalVectorComponents
|
||||
internal struct VerticalVectorComponents
|
||||
{
|
||||
public decimal Position { get; }
|
||||
|
||||
|
@@ -3,7 +3,7 @@
|
||||
/// <summary>
|
||||
/// Maps from a single character code to its CID.
|
||||
/// </summary>
|
||||
public struct CidCharacterMapping
|
||||
internal struct CidCharacterMapping
|
||||
{
|
||||
/// <summary>
|
||||
/// The character code.
|
||||
|
@@ -5,7 +5,7 @@
|
||||
/// <summary>
|
||||
/// Associates the beginning and end of a range of character codes with the starting CID for the range.
|
||||
/// </summary>
|
||||
public struct CidRange
|
||||
internal struct CidRange
|
||||
{
|
||||
/// <summary>
|
||||
/// The beginning of the range of character codes.
|
||||
|
@@ -6,7 +6,7 @@
|
||||
/// <summary>
|
||||
/// A codespace range is specified by a pair of codes of some particular length giving the lower and upper bounds of that range.
|
||||
/// </summary>
|
||||
public class CodespaceRange
|
||||
internal class CodespaceRange
|
||||
{
|
||||
/// <summary>
|
||||
/// The lower-bound of this range.
|
||||
|
@@ -71,26 +71,6 @@
|
||||
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
|
||||
|
||||
return true;
|
||||
|
||||
if (!ToUnicode.CanMapToUnicode)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (encoding != null)
|
||||
{
|
||||
}
|
||||
|
||||
value = ((char) characterCode).ToString();
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return ToUnicode.TryGet(characterCode, out value);
|
||||
}
|
||||
|
||||
public PdfVector GetDisplacement(int characterCode)
|
||||
|
@@ -1,31 +0,0 @@
|
||||
namespace UglyToad.Pdf.Fonts
|
||||
{
|
||||
public enum TextObjectComponentType
|
||||
{
|
||||
BeginText,
|
||||
EndText,
|
||||
TextFont,
|
||||
SetTextMatrix,
|
||||
MoveTextPosition,
|
||||
MoveTextPositionAndSetLeading,
|
||||
ShowText,
|
||||
ShowTextWithIndividualGlyphPositioning,
|
||||
SetTextLeading,
|
||||
SetTextRenderingMode,
|
||||
SetTextRise,
|
||||
SetWordSpacing,
|
||||
SetHorizontalTextScaling,
|
||||
MoveToNextLineStart,
|
||||
SetCharacterSpacing,
|
||||
Numeric,
|
||||
String,
|
||||
Font,
|
||||
Array,
|
||||
SetGrayNonStroking,
|
||||
SetGrayStroking,
|
||||
SetLineWidth,
|
||||
SetClippingPathNonZeroWinding,
|
||||
SetClippingPathEvenOdd,
|
||||
MoveNextLineAndShowText
|
||||
}
|
||||
}
|
@@ -25,24 +25,36 @@
|
||||
/// </summary>
|
||||
public decimal Y { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Create a new <see cref="PdfPoint"/> at this position.
|
||||
/// </summary>
|
||||
public PdfPoint(decimal x, decimal y)
|
||||
{
|
||||
X = x;
|
||||
Y = y;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a new <see cref="PdfPoint"/> at this position.
|
||||
/// </summary>
|
||||
public PdfPoint(int x, int y)
|
||||
{
|
||||
X = x;
|
||||
Y = y;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a new <see cref="PdfPoint"/> at this position.
|
||||
/// </summary>
|
||||
public PdfPoint(double x, double y)
|
||||
{
|
||||
X = (decimal)x;
|
||||
Y = (decimal)y;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get a string representation of this point.
|
||||
/// </summary>
|
||||
public override string ToString()
|
||||
{
|
||||
return $"(x:{X}, y:{Y})";
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
|
||||
public class PdfRectangle
|
||||
internal class PdfRectangle
|
||||
{
|
||||
public PdfPoint TopLeft { get; }
|
||||
|
||||
|
@@ -6,7 +6,7 @@
|
||||
/// By default user space units correspond to 1/72nd of an inch (a typographic point).
|
||||
/// The UserUnit entry in a page dictionary can define the space units as a different multiple of 1/72 (1 point).
|
||||
/// </summary>
|
||||
public struct UserSpaceUnit
|
||||
internal struct UserSpaceUnit
|
||||
{
|
||||
public static readonly UserSpaceUnit Default = new UserSpaceUnit(1);
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
|
||||
public struct LineDashPattern
|
||||
internal struct LineDashPattern
|
||||
{
|
||||
public int Phase { get; }
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf
|
||||
{
|
||||
public interface ICosUpdateInfo
|
||||
internal interface ICosUpdateInfo
|
||||
{
|
||||
bool NeedsToBeUpdated { get; set; }
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
|
||||
public class ByteArrayInputBytes : IInputBytes
|
||||
internal class ByteArrayInputBytes : IInputBytes
|
||||
{
|
||||
private readonly IReadOnlyList<byte> bytes;
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.IO
|
||||
{
|
||||
public interface IInputBytes
|
||||
internal interface IInputBytes
|
||||
{
|
||||
int CurrentOffset { get; }
|
||||
|
||||
|
@@ -1,165 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace UglyToad.Pdf.IO
|
||||
{
|
||||
public interface SequentialSource : IDisposable
|
||||
{
|
||||
/**
|
||||
* Read a single byte of data.
|
||||
*
|
||||
* @return The byte of data that is being read.
|
||||
* @throws IOException If there is an error while reading the data.
|
||||
*/
|
||||
int read();
|
||||
|
||||
/**
|
||||
* Read a buffer of data.
|
||||
*
|
||||
* @param b The buffer to write the data to.
|
||||
* @return The number of bytes that were actually read.
|
||||
* @throws IOException If there was an error while reading the data.
|
||||
*/
|
||||
int read(byte[] b);
|
||||
|
||||
/**
|
||||
* Read a buffer of data.
|
||||
*
|
||||
* @param b The buffer to write the data to.
|
||||
* @param offset Offset into the buffer to start writing.
|
||||
* @param length The amount of data to attempt to read.
|
||||
* @return The number of bytes that were actually read.
|
||||
* @throws IOException If there was an error while reading the data.
|
||||
*/
|
||||
int read(byte[] b, int offset, int length);
|
||||
|
||||
/**
|
||||
* Returns offset of next byte to be returned by a read method.
|
||||
*
|
||||
* @return offset of next byte which will be returned with next {@link #read()} (if no more
|
||||
* bytes are left it returns a value >= length of source).
|
||||
* @throws IOException If there was an error while reading the data.
|
||||
*/
|
||||
long getPosition();
|
||||
|
||||
/**
|
||||
* This will peek at the next byte.
|
||||
*
|
||||
* @return The next byte on the stream, leaving it as available to read.
|
||||
* @throws IOException If there is an error reading the next byte.
|
||||
*/
|
||||
int peek();
|
||||
|
||||
/**
|
||||
* Unreads a single byte.
|
||||
*
|
||||
* @param b byte array to push back
|
||||
* @throws IOException if there is an error while unreading
|
||||
*/
|
||||
void unread(int b);
|
||||
|
||||
/**
|
||||
* Unreads an array of bytes.
|
||||
*
|
||||
* @param bytes byte array to be unread
|
||||
* @throws IOException if there is an error while unreading
|
||||
*/
|
||||
void unread(byte[] bytes);
|
||||
|
||||
/**
|
||||
* Unreads a portion of an array of bytes.
|
||||
*
|
||||
* @param bytes byte array to be unread
|
||||
* @param start start index
|
||||
* @param len number of bytes to be unread
|
||||
* @throws IOException if there is an error while unreading
|
||||
*/
|
||||
void unread(byte[] bytes, int start, int len);
|
||||
|
||||
/**
|
||||
* Reads a given number of bytes in its entirety.
|
||||
*
|
||||
* @param length the number of bytes to be read
|
||||
* @return a byte array containing the bytes just read
|
||||
* @throws IOException if an I/O error occurs while reading data
|
||||
*/
|
||||
byte[] readFully(int length);
|
||||
|
||||
/**
|
||||
* Returns true if the end of the data source has been reached.
|
||||
*
|
||||
* @return true if we are at the end of the data.
|
||||
* @throws IOException If there is an error reading the next byte.
|
||||
*/
|
||||
bool isEOF();
|
||||
}
|
||||
|
||||
public class BufferSequentialSource : SequentialSource
|
||||
{
|
||||
private readonly IRandomAccessRead reader;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param reader The random access reader to wrap.
|
||||
*/
|
||||
public BufferSequentialSource(IRandomAccessRead reader)
|
||||
{
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
public int read()
|
||||
{
|
||||
return reader.Read();
|
||||
}
|
||||
|
||||
public int read(byte[] b)
|
||||
{
|
||||
return reader.Read(b);
|
||||
}
|
||||
|
||||
public int read(byte[] b, int offset, int length)
|
||||
{
|
||||
return reader.Read(b, offset, length);
|
||||
}
|
||||
|
||||
public long getPosition()
|
||||
{
|
||||
return reader.GetPosition();
|
||||
}
|
||||
|
||||
public int peek()
|
||||
{
|
||||
return reader.Peek();
|
||||
}
|
||||
|
||||
public void unread(int b)
|
||||
{
|
||||
reader.Rewind(1);
|
||||
}
|
||||
|
||||
public void unread(byte[] bytes)
|
||||
{
|
||||
reader.Rewind(bytes.Length);
|
||||
}
|
||||
|
||||
public void unread(byte[] bytes, int start, int len)
|
||||
{
|
||||
reader.Rewind(len - start);
|
||||
}
|
||||
|
||||
public byte[] readFully(int length)
|
||||
{
|
||||
return reader.ReadFully(length);
|
||||
}
|
||||
|
||||
public bool isEOF()
|
||||
{
|
||||
return reader.IsEof();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
reader.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.IO
|
||||
{
|
||||
public interface RandomAccess : IRandomAccessRead, RandomAccessWrite
|
||||
internal interface RandomAccess : IRandomAccessRead, RandomAccessWrite
|
||||
{
|
||||
// super interface for both read and write
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ namespace UglyToad.Pdf.IO
|
||||
{
|
||||
using System.IO;
|
||||
|
||||
public class RandomAccessBuffer : RandomAccess
|
||||
internal class RandomAccessBuffer : RandomAccess
|
||||
{
|
||||
// default chunk size is 1kb
|
||||
private static readonly int DefaultChunkSize = 1024;
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
|
||||
public interface IRandomAccessRead : IDisposable
|
||||
internal interface IRandomAccessRead : IDisposable
|
||||
{
|
||||
/**
|
||||
* Read a single byte of data.
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System;
|
||||
|
||||
public interface RandomAccessWrite : IDisposable
|
||||
internal interface RandomAccessWrite : IDisposable
|
||||
{
|
||||
/**
|
||||
* Write a byte to the stream.
|
||||
|
@@ -2,12 +2,31 @@
|
||||
|
||||
namespace UglyToad.Pdf.Logging
|
||||
{
|
||||
/// <summary>
|
||||
/// Logs internal messages from the PDF parsing process. Consumers can provide their own implementation
|
||||
/// in the <see cref="ParsingOptions"/> to intercept log messages.
|
||||
/// </summary>
|
||||
public interface ILog
|
||||
{
|
||||
/// <summary>
|
||||
/// Record an informational debug message.
|
||||
/// </summary>
|
||||
void Debug(string message);
|
||||
/// <summary>
|
||||
/// Record an informational debug message with exception.
|
||||
/// </summary>
|
||||
void Debug(string message, Exception ex);
|
||||
/// <summary>
|
||||
/// Record an warning message due to a non-error issue encountered in parsing.
|
||||
/// </summary>
|
||||
void Warn(string message);
|
||||
/// <summary>
|
||||
/// Record an error message due to an issue encountered in parsing.
|
||||
/// </summary>
|
||||
void Error(string message);
|
||||
/// <summary>
|
||||
/// Record an error message due to an issue encountered in parsing with exception.
|
||||
/// </summary>
|
||||
void Error(string message, Exception ex);
|
||||
}
|
||||
|
||||
|
@@ -26,7 +26,7 @@
|
||||
long objectOffset = objectEntry.Value;
|
||||
// a negative offset number represents a object number itself
|
||||
// see type 2 entry in xref stream
|
||||
if (objectOffset != null && objectOffset >= 0
|
||||
if (objectOffset >= 0
|
||||
&& !checkObjectKeys(reader, objectKey, objectOffset))
|
||||
{
|
||||
//LOG.debug("Stop checking xref offsets as at least one (" + objectKey
|
||||
@@ -58,7 +58,7 @@
|
||||
return true;
|
||||
}
|
||||
}
|
||||
catch (InvalidOperationException exception)
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
// Swallow the exception, obviously there isn't any valid object number
|
||||
}
|
||||
@@ -180,7 +180,7 @@
|
||||
foreach (var entry in xrefOffset)
|
||||
{
|
||||
long offset = entry.Value;
|
||||
if (offset != null && offset < 0)
|
||||
if (offset < 0)
|
||||
{
|
||||
CosObjectKey objStream = new CosObjectKey(-offset, 0);
|
||||
if (!objStreams.Contains(objStream))
|
||||
@@ -253,7 +253,7 @@
|
||||
ObjectHelper.ReadObjectNumber(source);
|
||||
ObjectHelper.ReadGenerationNumber(source);
|
||||
}
|
||||
catch (InvalidOperationException exception)
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
// save the EOF marker as the following data is most likely some garbage
|
||||
lastEOFMarker = tempMarker;
|
||||
|
@@ -2,10 +2,25 @@
|
||||
{
|
||||
using Logging;
|
||||
|
||||
/// <summary>
|
||||
/// Configures options used by the parser when reading PDF documents.
|
||||
/// </summary>
|
||||
public class ParsingOptions
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Should the parser ignore issues where the document does not conform to the PDF specification?
|
||||
/// </summary>
|
||||
public bool UseLenientParsing { get; set; } = true;
|
||||
|
||||
public ILog Logger { get; set; } = new NoOpLog();
|
||||
private ILog logger = new NoOpLog();
|
||||
/// <summary>
|
||||
/// The <see cref="ILog"/> used to record messages raised by the parsing process.
|
||||
/// </summary>
|
||||
public ILog Logger
|
||||
{
|
||||
get => logger ?? new NoOpLog();
|
||||
set => logger = value;
|
||||
}
|
||||
}
|
||||
}
|
@@ -6,10 +6,12 @@
|
||||
using IO;
|
||||
using Logging;
|
||||
using Parser;
|
||||
using Parser.FileStructure;
|
||||
using Parser.Parts;
|
||||
using Util.JetBrains.Annotations;
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <summary>
|
||||
/// Provides access to document level information for this PDF document as well as access to the <see cref="T:UglyToad.Pdf.Content.Page" />s contained in the document.
|
||||
/// </summary>
|
||||
public class PdfDocument : IDisposable
|
||||
{
|
||||
[NotNull]
|
||||
|
@@ -3,7 +3,7 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
public class ArrayToken : IDataToken<IReadOnlyList<IToken>>
|
||||
internal class ArrayToken : IDataToken<IReadOnlyList<IToken>>
|
||||
{
|
||||
public IReadOnlyList<IToken> Data { get; }
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public class BooleanToken : IDataToken<bool>
|
||||
internal class BooleanToken : IDataToken<bool>
|
||||
{
|
||||
public static BooleanToken True { get; } = new BooleanToken(true);
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public class CommentToken : IDataToken<string>
|
||||
internal class CommentToken : IDataToken<string>
|
||||
{
|
||||
public string Data { get; }
|
||||
|
||||
|
@@ -3,7 +3,7 @@ namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
public class HexToken : IDataToken<string>
|
||||
internal class HexToken : IDataToken<string>
|
||||
{
|
||||
private static readonly Dictionary<char, byte> HexMap = new Dictionary<char, byte>
|
||||
{
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public interface IDataToken<out T> : IToken
|
||||
internal interface IDataToken<out T> : IToken
|
||||
{
|
||||
T Data { get; }
|
||||
}
|
||||
|
@@ -1,6 +1,9 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public interface IToken
|
||||
/// <summary>
|
||||
/// A marker interface for tokens from the content.
|
||||
/// </summary>
|
||||
internal interface IToken
|
||||
{
|
||||
}
|
||||
}
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using ContentStream;
|
||||
|
||||
public class IndirectReferenceToken : IDataToken<IndirectReference>
|
||||
internal class IndirectReferenceToken : IDataToken<IndirectReference>
|
||||
{
|
||||
public IndirectReference Data { get; }
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public class NullToken : IDataToken<object>
|
||||
internal class NullToken : IDataToken<object>
|
||||
{
|
||||
public static NullToken Instance { get; } = new NullToken();
|
||||
|
||||
|
@@ -3,7 +3,7 @@
|
||||
using System;
|
||||
using System.Globalization;
|
||||
|
||||
public class NumericToken : IDataToken<decimal>
|
||||
internal class NumericToken : IDataToken<decimal>
|
||||
{
|
||||
public decimal Data { get; }
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
{
|
||||
using System.Collections.Generic;
|
||||
|
||||
public class OperatorToken : IDataToken<string>
|
||||
internal class OperatorToken : IDataToken<string>
|
||||
{
|
||||
private static readonly Dictionary<string, string> PooledNames = new Dictionary<string, string>();
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Tokenization.Tokens
|
||||
{
|
||||
public class StringToken : IDataToken<string>
|
||||
internal class StringToken : IDataToken<string>
|
||||
{
|
||||
public string Data { get; }
|
||||
|
||||
|
@@ -1,695 +0,0 @@
|
||||
namespace UglyToad.Pdf.Util
|
||||
{
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using Cos;
|
||||
|
||||
internal class DateConverter
|
||||
{
|
||||
private DateConverter()
|
||||
{
|
||||
}
|
||||
|
||||
// milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours
|
||||
private static readonly int MINUTES_PER_HOUR = 60;
|
||||
private static readonly int SECONDS_PER_MINUTE = 60;
|
||||
private static readonly int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE * 1000;
|
||||
private static readonly int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE;
|
||||
private static readonly int HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, DAY = 2 * HALF_DAY;
|
||||
|
||||
/*
|
||||
* The Date format is supposed to be the PDF_DATE_FORMAT, but other
|
||||
* forms appear. These lists offer alternatives to be tried
|
||||
* if parseBigEndianDate fails.
|
||||
*
|
||||
* The time zone offset generally trails the date string, so it is processed
|
||||
* separately with parseTZoffset. (This does not preclude having time
|
||||
* zones in the elements below; one does.)
|
||||
*
|
||||
* Alas, SimpleDateFormat is badly non-reentrant -- it modifies its
|
||||
* calendar field (PDFBox-402), so these lists are strings to create
|
||||
* SimpleDate format as needed.
|
||||
*
|
||||
* Some past entries have been elided because they duplicate existing
|
||||
* entries. See the API for SimpleDateFormat, which says
|
||||
* "For parsing, the number of pattern letters is ignored
|
||||
* unless it's needed to separate two adjacent fields."
|
||||
*
|
||||
* toCalendar(String, String[]) tests to see that the entire input text
|
||||
* has been consumed. Therefore the ordering of formats is important.
|
||||
* If one format begins with the entirety of another, the longer
|
||||
* must precede the other in the list.
|
||||
*
|
||||
* HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh"
|
||||
* Where year is yy, four digit years are accepted
|
||||
* and two digit years are converted to four digits in the range
|
||||
* [thisyear-79...thisyear+20]
|
||||
*/
|
||||
private static readonly String[] ALPHA_START_FORMATS =
|
||||
{
|
||||
"EEEE, dd MMM yy hh:mm:ss a",
|
||||
"EEEE, MMM dd, yy hh:mm:ss a",
|
||||
"EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows
|
||||
"EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465
|
||||
"EEEE MMM dd, yy HH:mm:ss", // ECMP5
|
||||
"EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7
|
||||
"EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant
|
||||
};
|
||||
|
||||
private static readonly String[] DIGIT_START_FORMATS =
|
||||
{
|
||||
"dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00
|
||||
"dd MMM yy HH:mm", // for 26 May 2000 11:25
|
||||
"yyyy MMM d", // ambiguity resolved only by omitting time
|
||||
"yyyymmddhh:mm:ss", // test case "200712172:2:3"
|
||||
"H:m M/d/yy", // test case "9:47 5/12/2008"
|
||||
"M/d/yy HH:mm:ss",
|
||||
"M/d/yy HH:mm",
|
||||
"M/d/yy",
|
||||
|
||||
// proposed rule that is unreachable due to "dd MMM yy HH:mm:ss"
|
||||
// "yyyy MMM d HH:mm:ss",
|
||||
|
||||
// rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy",
|
||||
// (incoming digit strings do not mark themselves as y, m, or d!)
|
||||
// "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170
|
||||
// "M/dd/yyyy hh:mm:ss",
|
||||
// "MM/d/yyyy hh:mm:ss",
|
||||
// "M/d/yyyy HH:mm:ss",
|
||||
// "M/dd/yyyy",
|
||||
// "MM/d/yyyy",
|
||||
// "M/d/yyyy",
|
||||
// "M/d/yyyy HH:mm:ss",
|
||||
// "M/d/yy HH:mm:ss",
|
||||
// subsumed by big-endian parse
|
||||
// "yyyy-MM-dd'T'HH:mm:ss",
|
||||
// "yyyy-MM-dd'T'HH:mm:ss",
|
||||
// "yyyymmdd hh:mm:ss",
|
||||
// "yyyymmdd",
|
||||
// "yyyymmddX''00''", // covers 24 cases
|
||||
// (orignally the above ended with '+00''00''';
|
||||
// the first apostrophe quoted the plus,
|
||||
// '' mapped to a single ', and the ''' was invalid)
|
||||
};
|
||||
|
||||
/**
|
||||
* Converts a Calendar to a string formatted as:
|
||||
* D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -.
|
||||
*
|
||||
* @param cal The date to convert to a string. May be null.
|
||||
* The DST_OFFSET is included when computing the output time zone.
|
||||
*
|
||||
* @return The date as a String to be used in a PDF document,
|
||||
* or null if the cal value is null
|
||||
*/
|
||||
public static String toString(DateTime? cal)
|
||||
{
|
||||
if (cal == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
//String offset = formatTZoffset(cal.Value.ti.get(Calendar.ZONE_OFFSET) +
|
||||
//cal.get(Calendar.DST_OFFSET), "'");
|
||||
//return String.format(Locale.US, "D:"
|
||||
//+ "%1$4tY%1$2tm%1$2td" // yyyyMMdd
|
||||
//+ "%1$2tH%1$2tM%1$2tS" // HHmmss
|
||||
//+ "%2$s" // time zone
|
||||
//+ "'", // trailing apostrophe
|
||||
//cal, offset);
|
||||
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the date to ISO 8601 string format:
|
||||
* yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-').
|
||||
*
|
||||
* @param cal The date to convert. Must not be null.
|
||||
* The DST_OFFSET is included in the output value.
|
||||
*
|
||||
* @return The date represented as an ISO 8601 string.
|
||||
*/
|
||||
public static string toISO8601(DateTime cal)
|
||||
{
|
||||
//String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) +
|
||||
//cal.get(Calendar.DST_OFFSET), ":");
|
||||
//return String.format(Locale.US,
|
||||
//"%1$4tY" // yyyy
|
||||
//+ "-%1$2tm" // -mm (%tm adds one to cal month value)
|
||||
//+ "-%1$2td" // -dd (%tm adds one to cal month value)
|
||||
//+ "T" // T
|
||||
//+ "%1$2tH:%1$2tM:%1$2tS" // HHmmss
|
||||
//+ "%2$s", // time zone
|
||||
//cal, offset);
|
||||
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/*
|
||||
* Constrain a timezone offset to the range [-14:00 thru +14:00].
|
||||
* by adding or subtracting multiples of a full day.
|
||||
*/
|
||||
private static int restrainTZoffset(long proposedOffset)
|
||||
{
|
||||
if (proposedOffset <= 14 * MILLIS_PER_HOUR && proposedOffset >= -14 * MILLIS_PER_HOUR)
|
||||
{
|
||||
// https://www.w3.org/TR/xmlschema-2/#dateTime-timezones
|
||||
// Timezones between 14:00 and -14:00 are valid
|
||||
return (int)proposedOffset;
|
||||
}
|
||||
// Constrain a timezone offset to the range [-11:59 thru +12:00].
|
||||
proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY;
|
||||
if (proposedOffset == 0)
|
||||
{
|
||||
return HALF_DAY;
|
||||
}
|
||||
// 0 <= proposedOffset < DAY
|
||||
proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY;
|
||||
// -HALF_DAY < proposedOffset < HALF_DAY
|
||||
return (int)proposedOffset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Formats a time zone offset as #hh^mm
|
||||
* where # is + or -, hh is hours, ^ is a separator, and mm is minutes.
|
||||
* Any separator may be specified by the second argument;
|
||||
* the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF).
|
||||
* The returned value is constrained to the range -11:59 ... 11:59.
|
||||
* For offset of 0 millis, the String returned is "+00^00", never "Z".
|
||||
* To get a "general" offset in form GMT#hh:mm, write
|
||||
* "GMT"+DateConverter.formatTZoffset(offset, ":");
|
||||
*
|
||||
* Take thought in choosing the source for the millis value.
|
||||
* It can come from calendarValue.getTimeZone() or from
|
||||
* calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created
|
||||
* from a valid time zone ID, then it may have a daylight savings rule.
|
||||
* (As of July 4, 2013, the data base at http://www.iana.org/time-zones
|
||||
* recognized 629 time zone regions. But a TimeZone created as
|
||||
* new SimpleTimeZone(millisOffset, "ID"),
|
||||
* will not have a daylight savings rule. (Not even if there is a
|
||||
* known time zone with the given ID. To get the TimeZone named "xDT"
|
||||
* with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT.
|
||||
*
|
||||
* When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value.
|
||||
* At most it has an OFFSET value like -04'00'. It is generally impossible to
|
||||
* determine what TIMEZONE corresponds to a given OFFSET. If the date is
|
||||
* in the summer when daylight savings is in effect, an offset of -0400
|
||||
* might correspond to any one of the 38 regions (of 53) with standard time
|
||||
* offset -0400 and no daylight saving. Or it might correspond to
|
||||
* any one of the 31 regions (out of 43) that observe daylight savings
|
||||
* and have standard time offset of -0500.
|
||||
*
|
||||
* If a Calendar has not been assigned a TimeZone with setTimeZone(),
|
||||
* it will have by default the local TIMEZONE, not just the OFFSET. In the
|
||||
* USA, this TimeZone will have a daylight savings rule.
|
||||
*
|
||||
* The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs
|
||||
* from the offset in the TimeZone set by Calendar.setTimeZone(). Example:
|
||||
* Suppose my local TimeZone is America/New_York. It has an offset of -05'00'.
|
||||
* And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00'
|
||||
* calVal = new GregorianCalendar(); // TimeZone is the local default
|
||||
* calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR);
|
||||
* Four different offsets can be computed from calVal:
|
||||
* calVal.get(Calendar.ZONE_OFFSET) => -07:00
|
||||
* calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00
|
||||
* calVal.getTimeZone().getRawOffset() => -05:00
|
||||
* calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00
|
||||
*
|
||||
* Which is correct??? I dunno, though setTimeZone() does seem to affect
|
||||
* ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone
|
||||
* or ZONE_OFFSET has been set; both have been set by initialization code.
|
||||
* TimeZone is initialized to the local default time zone
|
||||
* and ZONE_OFFSET is set from it.
|
||||
*
|
||||
* My choice in this DateConverter class has been to set the
|
||||
* initial TimeZone of a GregorianCalendar to GMT. Thereafter
|
||||
* the TimeZone is modified with {@link #adjustTimeZoneNicely}.
|
||||
*
|
||||
* package-private for testing
|
||||
*/
|
||||
private static string formatTZoffset(long millis, string sep)
|
||||
{
|
||||
//{
|
||||
// SimpleDateFormat sdf = new SimpleDateFormat("Z"); // #hhmm
|
||||
// sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis), "unknown"));
|
||||
// String tz = sdf.format(new Date());
|
||||
// return tz.substring(0, 3) + sep + tz.substring(3);
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/*
|
||||
* Parses an integer from a string, starting at and advancing a ParsePosition.
|
||||
* Returns The integer that was at the given parse position, or the remedy value
|
||||
* if no digits were found.
|
||||
*
|
||||
* The ParsePosition will be incremented by the number of digits found, but no
|
||||
* more than maxlen. That is, the ParsePosition will advance across at most
|
||||
* maxlen initial digits in text. The error index is ignored and unchanged.
|
||||
*
|
||||
* maxlen is the maximum length of the integer to parse, usually 2, but 4 for
|
||||
* year fields. If the field of length maxlen begins with a digit, but contains
|
||||
* a non-digit, no error is signaled and the integer value is returned.
|
||||
*/
|
||||
private static int parseTimeField(string text, ParsePosition where, int maxlen, int remedy)
|
||||
{
|
||||
if (text == null)
|
||||
{
|
||||
return remedy;
|
||||
}
|
||||
// it would seem that DecimalFormat.parse() would be simpler;
|
||||
// but that class blithely ignores setMaximumIntegerDigits
|
||||
int retval = 0;
|
||||
int index = where.Index;
|
||||
int limit = index + Math.Min(maxlen, text.Length - index);
|
||||
for (; index < limit; index++)
|
||||
{
|
||||
// convert digit to integer
|
||||
int cval = text[index] - '0';
|
||||
// test to see if we got a digit
|
||||
if (cval < 0 || cval > 9)
|
||||
{
|
||||
// no digit at index
|
||||
break;
|
||||
}
|
||||
// append the digit to the return value
|
||||
retval = retval * 10 + cval;
|
||||
}
|
||||
if (index == where.Index)
|
||||
{
|
||||
return remedy;
|
||||
}
|
||||
|
||||
where.Index = index;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advances the ParsePosition past any and all the characters that match
|
||||
* those in the optionals list. In particular, a space will skip all spaces.
|
||||
*
|
||||
* The start value is incremented by the number of optionals found. The error
|
||||
* index is ignored and unchanged.
|
||||
*
|
||||
* Returns the last non-space character passed over (even if space is not in
|
||||
* the optionals list.)
|
||||
*/
|
||||
private static char skipOptionals(string text, ParsePosition where, string optionals)
|
||||
{
|
||||
char retval = ' ', currch;
|
||||
while (text != null && where.Index < text.Length && optionals.IndexOf((currch = text[where.Index])) >= 0)
|
||||
{
|
||||
retval = (currch != ' ') ? currch : retval;
|
||||
where.Index++;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the victim string is at the given position in the text, this method
|
||||
* advances the position past that string.
|
||||
*
|
||||
* `where` is the initial position to look at. After return, this will have
|
||||
* been incremented by the length of the victim if it was found. The error
|
||||
* index is ignored and unchanged.
|
||||
*/
|
||||
private static bool skipString(string text, string victim, ParsePosition where)
|
||||
{
|
||||
if (text.StartsWithOffset(victim, where.Index))
|
||||
{
|
||||
where.Index = where.Index + victim.Length;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Construct a new GregorianCalendar and set defaults.
|
||||
* Locale is ENGLISH.
|
||||
* TimeZone is "UTC" (zero offset and no DST).
|
||||
* Parsing is NOT lenient. Milliseconds are zero.
|
||||
*
|
||||
* package-private for testing
|
||||
*/
|
||||
private static GregorianCalendar newGreg()
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
//GregorianCalendar retCal = new GregorianCalendar(GregorianCalendarTypes.USEnglish);
|
||||
//retCal.setTimeZone(new SimpleTimeZone(0, "UTC"));
|
||||
//retCal.setLenient(false);
|
||||
//retCal.set(Calendar.MILLISECOND, 0);
|
||||
//return retCal;
|
||||
}
|
||||
|
||||
/*
|
||||
* Install a TimeZone on a GregorianCalendar without changing the
|
||||
* hours value. A plain GregorianCalendat.setTimeZone()
|
||||
* adjusts the Calendar.HOUR value to compensate. This is *BAD*
|
||||
* (not to say *EVIL*) when we have already set the time.
|
||||
*/
|
||||
private static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
//cal.setTimeZone(tz);
|
||||
//int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) /
|
||||
//MILLIS_PER_MINUTE;
|
||||
//cal.add(Calendar.MINUTE, -offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parses the end of a date string for a time zone and, if one is found,
|
||||
* sets the time zone of the GregorianCalendar. Otherwise the calendar
|
||||
* time zone is unchanged.
|
||||
*
|
||||
* The text is parsed as
|
||||
* (Z|GMT|UTC)? [+- ]* h [': ]? m '?
|
||||
* where the leading String is optional, h is two digits by default,
|
||||
* but may be a single digit if followed by one of space, apostrophe,
|
||||
* colon, or the end of string. Similarly, m is one or two digits.
|
||||
* This scheme accepts the format of PDF, RFC 822, and ISO8601.
|
||||
* If none of these applies (as for a time zone name), we try
|
||||
* TimeZone.getTimeZone().
|
||||
*
|
||||
* Scanning begins at where.index. After success, the returned index
|
||||
* is that of the next character after the recognized string.
|
||||
*
|
||||
* package-private for testing
|
||||
*/
|
||||
private static bool parseTZoffset(string text, GregorianCalendar cal, ParsePosition initialWhere)
|
||||
{
|
||||
ParsePosition where = new ParsePosition(initialWhere.Index);
|
||||
//TimeZone tz = new SimpleTimeZone(0, "GMT");
|
||||
//int tzHours, tzMin;
|
||||
//char sign = skipOptionals(text, where, "Z+- ");
|
||||
//boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) ||
|
||||
//skipString(text, "UTC", where));
|
||||
//sign = (!hadGMT) ? sign : skipOptionals(text, where, "+- ");
|
||||
|
||||
//tzHours = parseTimeField(text, where, 2, -999);
|
||||
//skipOptionals(text, where, "\': ");
|
||||
//tzMin = parseTimeField(text, where, 2, 0);
|
||||
//skipOptionals(text, where, "\' ");
|
||||
|
||||
//if (tzHours != -999)
|
||||
//{
|
||||
// // we parsed a time zone in default format
|
||||
// int hrSign = (sign == '-' ? -1 : 1);
|
||||
// tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * MILLIS_PER_HOUR + tzMin *
|
||||
// (long)MILLIS_PER_MINUTE)));
|
||||
// updateZoneId(tz);
|
||||
//}
|
||||
//else if (!hadGMT)
|
||||
//{
|
||||
// // try to process as a name; "GMT" or "UTC" has already been processed
|
||||
// String tzText = text.substring(initialWhere.getIndex()).trim();
|
||||
// tz = TimeZone.getTimeZone(tzText);
|
||||
// // getTimeZone returns "GMT" for unknown ids
|
||||
// if ("GMT".equals(tz.getID()))
|
||||
// {
|
||||
// // no timezone in text, cal amd initialWhere are unchanged
|
||||
// return false;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// // we got a tz by name; use it
|
||||
// where.setIndex(text.length());
|
||||
// }
|
||||
//}
|
||||
//adjustTimeZoneNicely(cal, tz);
|
||||
//initialWhere.setIndex(where.getIndex());
|
||||
throw new NotImplementedException();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where
|
||||
* n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12.
|
||||
* Zones that don't fit in this schema are set to zone ID "unknown".
|
||||
*
|
||||
* @param tz the time zone to update.
|
||||
*/
|
||||
private static void updateZoneId(TimeZone tz)
|
||||
{
|
||||
//int offset = tz.getRawOffset();
|
||||
//char pm = '+';
|
||||
//if (offset < 0)
|
||||
//{
|
||||
// pm = '-';
|
||||
// offset = -offset;
|
||||
//}
|
||||
//int hh = offset / 3600000;
|
||||
//int mm = offset % 3600000 / 60000;
|
||||
//if (offset == 0)
|
||||
//{
|
||||
// tz.setID("GMT");
|
||||
//}
|
||||
//else if (pm == '+' && hh <= 12)
|
||||
//{
|
||||
// tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm));
|
||||
//}
|
||||
//else if (pm == '-' && hh <= 14)
|
||||
//{
|
||||
// tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm));
|
||||
//}
|
||||
//else
|
||||
//{
|
||||
// tz.setID("unknown");
|
||||
//}
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/*
|
||||
* Parses a big-endian date: year month day hour min sec.
|
||||
* The year must be four digits. Other fields may be adjacent
|
||||
* and delimited by length or they may follow appropriate delimiters.
|
||||
* year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction]
|
||||
* If any numeric field is omitted, all following fields must also be omitted.
|
||||
* No time zone is processed.
|
||||
*
|
||||
* Ambiguous dates can produce unexpected results. For example:
|
||||
* 1970 12 23:08 will parse as 1970 December 23 00:08:00
|
||||
*
|
||||
* The parse begins at `where, on return the index
|
||||
* is advanced to just beyond the last character processed.
|
||||
* The error index is ignored and unchanged.
|
||||
*/
|
||||
private static GregorianCalendar parseBigEndianDate(String text,
|
||||
ParsePosition initialWhere)
|
||||
{
|
||||
//ParsePosition where = new ParsePosition(initialWhere.getIndex());
|
||||
//int year = parseTimeField(text, where, 4, 0);
|
||||
//if (where.getIndex() != 4 + initialWhere.getIndex())
|
||||
//{
|
||||
// return null;
|
||||
//}
|
||||
//skipOptionals(text, where, "/- ");
|
||||
//int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11
|
||||
//skipOptionals(text, where, "/- ");
|
||||
//int day = parseTimeField(text, where, 2, 1);
|
||||
//skipOptionals(text, where, " T");
|
||||
//int hour = parseTimeField(text, where, 2, 0);
|
||||
//skipOptionals(text, where, ": ");
|
||||
//int minute = parseTimeField(text, where, 2, 0);
|
||||
//skipOptionals(text, where, ": ");
|
||||
//int second = parseTimeField(text, where, 2, 0);
|
||||
//char nextC = skipOptionals(text, where, ".");
|
||||
//if (nextC == '.')
|
||||
//{
|
||||
// // fractions of a second: skip upto 19 digits
|
||||
// parseTimeField(text, where, 19, 0);
|
||||
//}
|
||||
|
||||
//GregorianCalendar dest = newGreg();
|
||||
//try
|
||||
//{
|
||||
// dest.set(year, month, day, hour, minute, second);
|
||||
// // trigger limit tests
|
||||
// dest.getTimeInMillis();
|
||||
//}
|
||||
//catch (IllegalArgumentException ill)
|
||||
//{
|
||||
// return null;
|
||||
//}
|
||||
//initialWhere.setIndex(where.getIndex());
|
||||
//skipOptionals(text, initialWhere, " ");
|
||||
//// dest has at least a year value
|
||||
//return dest;
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/*
|
||||
* See if text can be parsed as a date according to any of a list of
|
||||
* formats. The time zone may be included as part of the format, or
|
||||
* omitted in favor of later testing for a trailing time zone.
|
||||
*
|
||||
* The parse starts at `where`, upon return it will have been
|
||||
* incremented to refer to the next non-space character after the date.
|
||||
* If no date was found, the value is unchanged.
|
||||
* The error index is ignored and unchanged.
|
||||
*
|
||||
* If there is a failure to find a date, or the GregorianCalendar
|
||||
* for the date that was found. Unless a time zone was
|
||||
* part of the format, the time zone will be GMT+0
|
||||
*/
|
||||
private static GregorianCalendar parseSimpleDate(string text, string[] fmts,
|
||||
ParsePosition initialWhere)
|
||||
{
|
||||
foreach (var fmt in fmts)
|
||||
{
|
||||
ParsePosition where = new ParsePosition(initialWhere.Index);
|
||||
//SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH);
|
||||
throw new NotImplementedException();
|
||||
GregorianCalendar retCal = newGreg();
|
||||
//sdf.setCalendar(retCal);
|
||||
//if (sdf.parse(text, where) != null)
|
||||
//{
|
||||
// initialWhere.Index = where.Index;
|
||||
// skipOptionals(text, initialWhere, " ");
|
||||
// return retCal;
|
||||
//}
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parses a String to see if it begins with a date, and if so,
|
||||
* returns that date. The date must be strictly correct--no
|
||||
* field may exceed the appropriate limit.
|
||||
* (That is, the Calendar has setLenient(false).)
|
||||
* Skips initial spaces, but does NOT check for "D:"
|
||||
*
|
||||
* The scan first tries parseBigEndianDate and parseTZoffset
|
||||
* and then tries parseSimpleDate with appropriate formats,
|
||||
* again followed by parseTZoffset. If at any stage the entire
|
||||
* text is consumed, that date value is returned immediately.
|
||||
* Otherwise the date that consumes the longest initial part
|
||||
* of the text is returned.
|
||||
*
|
||||
* - PDF format dates are among those recognized by parseBigEndianDate.
|
||||
* - The formats tried are alphaStartFormats or digitStartFormat and
|
||||
* any listed in the value of moreFmts.
|
||||
*/
|
||||
private static DateTime? parseDate(string text, ParsePosition initialWhere)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// remember longestr date string
|
||||
int longestLen = -999999;
|
||||
// theorem: the above value will never be used
|
||||
// proof: longestLen is only used if longestDate is not null
|
||||
|
||||
GregorianCalendar longestDate = null; // null says no date found yet
|
||||
int whereLen; // tempcopy of where.getIndex()
|
||||
|
||||
ParsePosition where = new ParsePosition(initialWhere.Index);
|
||||
// check for null (throws exception) and trim off surrounding spaces
|
||||
skipOptionals(text, where, " ");
|
||||
int startPosition = where.Index;
|
||||
|
||||
// try big-endian parse
|
||||
GregorianCalendar retCal = parseBigEndianDate(text, where);
|
||||
// check for success and a timezone
|
||||
if (retCal != null && (where.Index == text.Length || parseTZoffset(text, retCal, where)))
|
||||
{
|
||||
// if text is fully consumed, return the date else remember it and its length
|
||||
whereLen = where.Index;
|
||||
if (whereLen == text.Length)
|
||||
{
|
||||
initialWhere.Index = whereLen;
|
||||
//return retCal;
|
||||
}
|
||||
longestLen = whereLen;
|
||||
longestDate = retCal;
|
||||
}
|
||||
|
||||
// try one of the sets of standard formats
|
||||
where.Index = startPosition;
|
||||
String[] formats
|
||||
= char.IsDigit(text[startPosition])
|
||||
? DIGIT_START_FORMATS
|
||||
: ALPHA_START_FORMATS;
|
||||
retCal = parseSimpleDate(text, formats, where);
|
||||
// check for success and a timezone
|
||||
if (retCal != null &&
|
||||
(where.Index == text.Length || parseTZoffset(text, retCal, where)))
|
||||
{
|
||||
// if text is fully consumed, return the date else remember it and its length
|
||||
whereLen = where.Index;
|
||||
if (whereLen == text.Length)
|
||||
{
|
||||
initialWhere.Index = whereLen;
|
||||
// return retCal;
|
||||
}
|
||||
if (whereLen > longestLen)
|
||||
{
|
||||
longestLen = whereLen;
|
||||
longestDate = retCal;
|
||||
}
|
||||
}
|
||||
|
||||
if (longestDate != null)
|
||||
{
|
||||
initialWhere.Index = longestLen;
|
||||
|
||||
//return longestDate;
|
||||
}
|
||||
|
||||
throw new NotImplementedException();
|
||||
//return retCal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Calendar for a given COS string containing a date,
|
||||
* or {@code null} if it cannot be parsed.
|
||||
*
|
||||
* The returned value will have 0 for DST_OFFSET.
|
||||
*
|
||||
* @param text A COS string containing a date.
|
||||
* @return The Calendar that the text string represents, or {@code null} if it cannot be parsed.
|
||||
*/
|
||||
public static DateTime? toCalendar(CosString text)
|
||||
{
|
||||
if (text == null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return toCalendar(text.GetString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Calendar for a given string containing a date,
|
||||
* or {@code null} if it cannot be parsed.
|
||||
*
|
||||
* The returned value will have 0 for DST_OFFSET.
|
||||
*
|
||||
* @param text A COS string containing a date.
|
||||
* @return The Calendar that the text string represents, or {@code null} if it cannot be parsed.
|
||||
*/
|
||||
public static DateTime? toCalendar(string text)
|
||||
{
|
||||
if (text == null || text.Trim() == string.Empty)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
ParsePosition where = new ParsePosition(0);
|
||||
skipOptionals(text, where, " ");
|
||||
skipString(text, "D:", where);
|
||||
var calendar = parseDate(text, where);
|
||||
|
||||
if (calendar == null || where.Index != text.Length)
|
||||
{
|
||||
// the date string is invalid
|
||||
return null;
|
||||
}
|
||||
|
||||
return calendar;
|
||||
}
|
||||
}
|
||||
}
|
@@ -7,8 +7,6 @@
|
||||
public static string ReplaceLimited(this string value, string old, string newValue, int count)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
public static bool StartsWithOffset(this string value, string start, int offset)
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace UglyToad.Pdf.Util
|
||||
{
|
||||
public static class OtherEncodings
|
||||
internal static class OtherEncodings
|
||||
{
|
||||
/// <summary>
|
||||
/// Latin 1 Encoding: ISO 8859-1 is a single-byte encoding that can represent the first 256 Unicode characters.
|
||||
|
@@ -1,6 +1,6 @@
|
||||
namespace UglyToad.Pdf.Util
|
||||
{
|
||||
public class ParsePosition
|
||||
internal class ParsePosition
|
||||
{
|
||||
public int Index { get; set; }
|
||||
|
||||
|
Reference in New Issue
Block a user