From e02e130947e1f5e928d779e098313539c57d32ae Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 3 Oct 2019 12:56:48 +0100 Subject: [PATCH] #57 add creation and modified date to document information this enables users to check if xmp metadata is outdated --- .../Util/DateFormatHelperTests.cs | 60 +++++ .../Content/DocumentInformation.cs | 40 +++- .../Parser/DocumentInformationFactory.cs | 4 +- src/UglyToad.PdfPig/Util/DateFormatHelper.cs | 220 ++++++++++++++++++ .../Util/InternalStringExtensions.cs | 5 - 5 files changed, 321 insertions(+), 8 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Util/DateFormatHelperTests.cs create mode 100644 src/UglyToad.PdfPig/Util/DateFormatHelper.cs diff --git a/src/UglyToad.PdfPig.Tests/Util/DateFormatHelperTests.cs b/src/UglyToad.PdfPig.Tests/Util/DateFormatHelperTests.cs new file mode 100644 index 00000000..294b2904 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Util/DateFormatHelperTests.cs @@ -0,0 +1,60 @@ +namespace UglyToad.PdfPig.Tests.Util +{ + using System; + using System.Collections.Generic; + using PdfPig.Util; + using Xunit; + + public class DateFormatHelperTests + { + public static IEnumerable PositiveDateData = new[] + { + new object[] {"D:20190710205447+01'00'", new DateTimeOffset(2019, 7, 10, 20, 54, 47, TimeSpan.FromHours(1))}, + new object[] {"D:2017", new DateTimeOffset(2017, 1, 1, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"2017", new DateTimeOffset(2017, 1, 1, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"196712", new DateTimeOffset(1967, 12, 1, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"D:196712", new DateTimeOffset(1967, 12, 1, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"D:20100520", new DateTimeOffset(2010, 5, 20, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"20121106", new DateTimeOffset(2012, 11, 6, 0, 0, 0, TimeSpan.Zero)}, + new object[] {"D:2012110623", new DateTimeOffset(2012, 11, 6, 23, 0, 0, TimeSpan.Zero)}, + new object[] {"D:201211061655", new DateTimeOffset(2012, 11, 6, 16, 55, 0, TimeSpan.Zero)}, + new object[] {"D:20121106005512", new DateTimeOffset(2012, 11, 6, 0, 55, 12, TimeSpan.Zero)}, + new object[] {"D:20121106165512Z", new DateTimeOffset(2012, 11, 6, 16, 55, 12, TimeSpan.Zero)}, + new object[] {"20121106165512Z", new DateTimeOffset(2012, 11, 6, 16, 55, 12, TimeSpan.Zero)}, + new object[] {"D:19970915110347-07'30'", new DateTimeOffset(1997, 9, 15, 11, 3, 47, new TimeSpan(-7, -30, 0))}, + new object[] {"D:19990209153925+11'", new DateTimeOffset(1999, 2, 9, 15, 39, 25, TimeSpan.FromHours(11))}, + new object[] {"D:19990209153925-03'", new DateTimeOffset(1999, 2, 9, 15, 39, 25, TimeSpan.FromHours(-3))}, + }; + + [Theory] + [InlineData(default(string))] + [InlineData("")] + [InlineData("D:")] + [InlineData("D:FEHTR$54")] + [InlineData("D:49454")] + [InlineData("9454AE")] + [InlineData("20190107121634!")] + [InlineData("D:19990209153925+11A")] + [InlineData("D:19990209153925+11")] + [InlineData("D:19990209153925E11")] + [InlineData("D:19993209")] + [InlineData("D:19990750")] + [InlineData("D:20100231")] + public void TryParseDateTimeOffset_InvalidInput_False(string input) + { + var result = DateFormatHelper.TryParseDateTimeOffset(input, out _); + + Assert.False(result); + } + + [Theory] + [MemberData(nameof(PositiveDateData))] + public void TryParseDateTimeOffset_ValidDate_True(string input, DateTimeOffset expected) + { + var success = DateFormatHelper.TryParseDateTimeOffset(input, out var result); + + Assert.True(success); + Assert.Equal(expected, result); + } + } +} diff --git a/src/UglyToad.PdfPig/Content/DocumentInformation.cs b/src/UglyToad.PdfPig/Content/DocumentInformation.cs index 61f606ef..377be91f 100644 --- a/src/UglyToad.PdfPig/Content/DocumentInformation.cs +++ b/src/UglyToad.PdfPig/Content/DocumentInformation.cs @@ -1,8 +1,10 @@ namespace UglyToad.PdfPig.Content { + using System; using System.Collections.Generic; using System.Text; using Tokens; + using Util; using Util.JetBrains.Annotations; /// @@ -11,7 +13,7 @@ public class DocumentInformation { internal static DocumentInformation Default { get; } - = new DocumentInformation(null, null, null, null, null, null, null); + = new DocumentInformation(null, null, null, null, null, null, null, null, null); private readonly string representation; @@ -56,7 +58,21 @@ [CanBeNull] public string Producer { get; } - internal DocumentInformation(DictionaryToken documentInformationDictionary, string title, string author, string subject, string keywords, string creator, string producer) + /// + /// The date and time the document was created. + /// + [CanBeNull] + public string CreationDate { get; } + + /// + /// The date and time the document was most recently modified. + /// + [CanBeNull] + public string ModifiedDate { get; } + + internal DocumentInformation(DictionaryToken documentInformationDictionary, string title, string author, string subject, string keywords, string creator, string producer, + string creationDate, + string modifiedDate) { DocumentInformationDictionary = documentInformationDictionary ?? new DictionaryToken(new Dictionary()); Title = title; @@ -65,6 +81,8 @@ Keywords = keywords; Creator = creator; Producer = producer; + CreationDate = creationDate; + ModifiedDate = modifiedDate; var builder = new StringBuilder(); @@ -74,10 +92,28 @@ AppendPart("Keywords", keywords, builder); AppendPart("Creator", creator, builder); AppendPart("Producer", producer, builder); + AppendPart("CreationDate", creationDate, builder); + AppendPart("ModifiedDate", modifiedDate, builder); representation = builder.ToString(); } + /// + /// Gets the as a if it's possible to convert it, or . + /// + public DateTimeOffset? GetCreatedDateTimeOffset() + { + return DateFormatHelper.TryParseDateTimeOffset(CreationDate, out var result) ? result : default(DateTimeOffset?); + } + + /// + /// Gets the as a if it's possible to convert it, or . + /// + public DateTimeOffset? GetModifiedDateTimeOffset() + { + return DateFormatHelper.TryParseDateTimeOffset(ModifiedDate, out var result) ? result : default(DateTimeOffset?); + } + /// /// Gets a string representing this document information. entries are not shown. /// diff --git a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs index 13aff674..c2ace0fd 100644 --- a/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs +++ b/src/UglyToad.PdfPig/Parser/DocumentInformationFactory.cs @@ -23,9 +23,11 @@ var keywords = GetEntryOrDefault(infoParsed, NameToken.Keywords); var creator = GetEntryOrDefault(infoParsed, NameToken.Creator); var producer = GetEntryOrDefault(infoParsed, NameToken.Producer); + var creationDate = GetEntryOrDefault(infoParsed, NameToken.CreationDate); + var modifiedDate = GetEntryOrDefault(infoParsed, NameToken.ModDate); return new DocumentInformation(infoParsed, title, author, subject, - keywords, creator, producer); + keywords, creator, producer, creationDate, modifiedDate); } private static string GetEntryOrDefault(DictionaryToken infoDictionary, NameToken key) diff --git a/src/UglyToad.PdfPig/Util/DateFormatHelper.cs b/src/UglyToad.PdfPig/Util/DateFormatHelper.cs new file mode 100644 index 00000000..c0c089ec --- /dev/null +++ b/src/UglyToad.PdfPig/Util/DateFormatHelper.cs @@ -0,0 +1,220 @@ +namespace UglyToad.PdfPig.Util +{ + using System; + + internal static class DateFormatHelper + { + public static bool TryParseDateTimeOffset(string s, out DateTimeOffset offset) + { + offset = DateTimeOffset.MinValue; + + bool HasRemainingCharacters(int pos, int len) + { + return pos + len <= s.Length; + } + + bool IsAtEnd(int pos) + { + return pos == s.Length; + } + + bool IsWithinRange(int val, int min, int max) + { + return val >= min && val <= max; + } + + if (s == null || s.Length < 4) + { + return false; + } + + try + { + var location = 0; + if (s[0] == 'D' && s[1] == ':') + { + location = 2; + } + + if (!HasRemainingCharacters(location, 4)) + { + return false; + } + + if (!int.TryParse(s.Substring(location, 4), out var year)) + { + return false; + } + + location += 4; + + if (!HasRemainingCharacters(location, 2)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, 1, 1, 0, 0, 0, TimeSpan.Zero); + + return true; + } + + if (!int.TryParse(s.Substring(location, 2), out var month) + || !IsWithinRange(month, 1, 12)) + { + return false; + } + + location += 2; + + if (!HasRemainingCharacters(location, 2)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, month, 1, 0, 0, 0, TimeSpan.Zero); + + return true; + } + + if (!int.TryParse(s.Substring(location, 2), out var day) + || !IsWithinRange(day, 1, 31)) + { + return false; + } + + location += 2; + + if (!HasRemainingCharacters(location, 2)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, month, day, 0, 0, 0, TimeSpan.Zero); + + return true; + } + + if (!int.TryParse(s.Substring(location, 2), out var hour) + || !IsWithinRange(hour, 0, 23)) + { + return false; + } + + location += 2; + + if (!HasRemainingCharacters(location, 2)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, month, day, hour, 0, 0, TimeSpan.Zero); + + return true; + } + + if (!int.TryParse(s.Substring(location, 2), out var minute) + || !IsWithinRange(minute, 0, 59)) + { + return false; + } + + location += 2; + + if (!HasRemainingCharacters(location, 2)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, month, day, hour, minute, 0, TimeSpan.Zero); + + return true; + } + + if (!int.TryParse(s.Substring(location, 2), out var second) + || !IsWithinRange(second, 0, 59)) + { + return false; + } + + location += 2; + + if (!HasRemainingCharacters(location, 1)) + { + if (!IsAtEnd(location)) + { + return false; + } + + offset = new DateTimeOffset(year, month, day, hour, minute, second, TimeSpan.Zero); + + return true; + } + + var o = s[location++]; + + if (o != '-' && o != '+' && o != 'Z') + { + return false; + } + + var sign = o == '-' ? -1 : + o == '+' ? 1 : 0; + + if (IsAtEnd(location)) + { + offset = new DateTimeOffset(year, month, day, hour, minute, second, TimeSpan.Zero); + + return true; + } + + if (!HasRemainingCharacters(location, 3) || !int.TryParse(s.Substring(location, 2), out var hoursOffset) + || s[location + 2] != '\'' + || !IsWithinRange(hoursOffset, 0, 23)) + { + return false; + } + + location += 3; + + if (IsAtEnd(location)) + { + offset = new DateTimeOffset(year, month, day, hour, minute, second, TimeSpan.FromHours(hoursOffset * sign)); + + return true; + } + + if (!HasRemainingCharacters(location, 3) || !int.TryParse(s.Substring(location, 2), out var minutesOffset) + || s[location + 2] != '\'' + || !IsWithinRange(minutesOffset, 0, 59)) + { + return false; + } + + location += 3; + + if (IsAtEnd(location)) + { + offset = new DateTimeOffset(year, month, day, hour, minute, second, new TimeSpan(hoursOffset * sign, minutesOffset * sign, 0)); + + return true; + } + + return false; + } + catch + { + return false; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Util/InternalStringExtensions.cs b/src/UglyToad.PdfPig/Util/InternalStringExtensions.cs index fb9b0dd3..6ba0dae7 100644 --- a/src/UglyToad.PdfPig/Util/InternalStringExtensions.cs +++ b/src/UglyToad.PdfPig/Util/InternalStringExtensions.cs @@ -4,11 +4,6 @@ internal static class InternalStringExtensions { - public static string ReplaceLimited(this string value, string old, string newValue, int count) - { - throw new NotImplementedException(); - } - public static bool StartsWithOffset(this string value, string start, int offset) { if (offset < 0)