some changes

This commit is contained in:
BobLd 2023-07-29 15:51:52 +01:00
parent 76c6e9436d
commit 8501ae123e
7 changed files with 192 additions and 93 deletions

View File

@ -0,0 +1,35 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UglyToad.PdfPig.Tokens;
using Xunit;
public class DctDecodeFilterTests
{
[Fact]
public void LettersHaveCorrectColors()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("Pig Production Handbook.pdf"), ParsingOptions.LenientParsingOff))
{
for (int p = 1; p <= document.NumberOfPages; p++)
{
var page = document.GetPage(p);
int i = 0;
foreach (var image in page.GetImages())
{
if (image.ImageDictionary.TryGet<NameToken>(NameToken.Filter, out var filter) && filter.Data.Equals(NameToken.DctDecode))
{
image.TryGetPng(out var png);
File.WriteAllBytes($"Pig Production Handbook_{p}_{i}.png", png);
}
i++;
}
}
}
}
}
}

View File

@ -1,19 +1,46 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using BigGustave.Jpgs;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Tokens;
using static System.Net.Mime.MediaTypeNames;
internal class DctDecodeFilter : IFilter
{
/// <inheritdoc />
public bool IsSupported { get; } = false;
public bool IsSupported { get; } = true;
/// <inheritdoc />
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " +
"This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries.");
//throw new NotSupportedException("The DST (Discrete Cosine Transform) Filter indicates data is encoded in JPEG format. " +
// "This filter is not currently supported but the raw data can be supplied to JPEG supporting libraries.");
var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
using (var ms = new MemoryStream(input.ToArray()))
{
var jpg = JpgOpener.Open(ms, true);
byte[] output = new byte[3 * jpg.Width * jpg.Height];
int i = 0;
for (int col = 0; col < jpg.Height; col++)
{
for (int row = 0; row < jpg.Width; row++)
{
var pixel = jpg.GetPixel(row, col);
output[i++] = pixel.R;
output[i++] = pixel.G;
output[i++] = pixel.B;
}
}
return output; //jpg.rawData;
}
}
}
}

View File

@ -1,16 +1,24 @@
namespace BigGustave.Jpgs
{
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
internal class BitStream
{
private bool isLittleEndian = BitConverter.IsLittleEndian;
private readonly BitArray bitArray;
private int bitOffset;
private readonly IReadOnlyList<byte> data;
public BitStream(IReadOnlyList<byte> data)
{
this.data = data;
bitArray = new BitArray(data.ToArray());
}
public int Read()
@ -22,14 +30,14 @@
return -1;
}
var byteVal = data[byteIndex];
var withinByteIndex = bitOffset - (byteIndex * 8);
bitOffset++;
// TODO: LSB?
return ((1 << (7 - withinByteIndex)) & byteVal) > 0 ? 1 : 0;
return bitArray[bitOffset + 7 - withinByteIndex] ? 1 : 0;
//var byteVal = data[byteIndex];
//return ((1 << (7 - withinByteIndex)) & byteVal) > 0 ? 1 : 0;
}
public int ReadNBits(int length)

View File

@ -1,17 +1,18 @@
namespace BigGustave
{
using Jpgs;
using System;
using System.Collections.Generic;
using System.IO;
using Jpgs;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Images.Png;
/// <summary>
/// A JPEG image.
/// </summary>
public class Jpg
internal class Jpg
{
private readonly byte[] rawData;
internal readonly byte[] rawData;
/// <summary>
/// The width of the image in pixels.
@ -46,7 +47,6 @@
this.rawData = rawData;
}
/*
/// <summary>
/// Get the pixel at the given column and row (x, y).
/// </summary>
@ -77,7 +77,6 @@
return new Pixel(r, g, b);
}
*/
/// <summary>
/// Open and parse a JPG file from the stream.

View File

@ -4,10 +4,10 @@
{
public static readonly byte[] ZigZagPattern = new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,

View File

@ -2,6 +2,7 @@
{
using Content;
using Graphics.Colors;
using System.Linq;
using UglyToad.PdfPig.Core;
internal static class PngFromPdfImageFactory
@ -20,86 +21,102 @@
}
try
{
bytesPure = ColorSpaceDetailsByteConverter.Convert(image.ColorSpaceDetails, bytesPure,
image.BitsPerComponent, image.WidthInSamples, image.HeightInSamples);
{
var numberOfComponents = image.ColorSpaceDetails.BaseNumberOfColorComponents;
var is3Byte = numberOfComponents == 3;
var builder = PngBuilder.Create(image.WidthInSamples, image.HeightInSamples, false);
var requiredSize = (image.WidthInSamples * image.HeightInSamples * numberOfComponents);
var actualSize = bytesPure.Count;
var isCorrectlySized = bytesPure.Count == requiredSize ||
// Spec, p. 37: "...error if the stream contains too much data, with the exception that
// there may be an extra end-of-line marker..."
(actualSize == requiredSize + 1 && bytesPure[actualSize - 1] == ReadHelper.AsciiLineFeed) ||
(actualSize == requiredSize + 1 && bytesPure[actualSize - 1] == ReadHelper.AsciiCarriageReturn) ||
// The combination of a CARRIAGE RETURN followed immediately by a LINE FEED is treated as one EOL marker.
(actualSize == requiredSize + 2 &&
bytesPure[actualSize - 2] == ReadHelper.AsciiCarriageReturn &&
bytesPure[actualSize - 1] == ReadHelper.AsciiLineFeed);
if (!isCorrectlySized)
{
return false;
}
if (image.ColorSpaceDetails.BaseType == ColorSpace.DeviceCMYK || numberOfComponents == 4)
var builder = PngBuilder.Create(image.WidthInSamples, image.HeightInSamples, false);
int i = 0;
for (int col = 0; col < image.HeightInSamples; col++)
{
int i = 0;
for (int col = 0; col < image.HeightInSamples; col++)
for (int row = 0; row < image.WidthInSamples; row++)
{
for (int row = 0; row < image.WidthInSamples; row++)
{
/*
* Where CMYK in 0..1
* R = 255 × (1-C) × (1-K)
* G = 255 × (1-M) × (1-K)
* B = 255 × (1-Y) × (1-K)
*/
double c = (bytesPure[i++] / 255d);
double m = (bytesPure[i++] / 255d);
double y = (bytesPure[i++] / 255d);
double k = (bytesPure[i++] / 255d);
var r = (byte)(255 * (1 - c) * (1 - k));
var g = (byte)(255 * (1 - m) * (1 - k));
var b = (byte)(255 * (1 - y) * (1 - k));
builder.SetPixel(r, g, b, row, col);
}
builder.SetPixel(bytesPure[i++], bytesPure[i++], bytesPure[i++], row, col);
}
}
else if (is3Byte)
{
int i = 0;
for (int col = 0; col < image.HeightInSamples; col++)
{
for (int row = 0; row < image.WidthInSamples; row++)
{
builder.SetPixel(bytesPure[i++], bytesPure[i++], bytesPure[i++], row, col);
}
}
}
else
{
int i = 0;
for (int col = 0; col < image.HeightInSamples; col++)
{
for (int row = 0; row < image.WidthInSamples; row++)
{
byte pixel = bytesPure[i++];
builder.SetPixel(pixel, pixel, pixel, row, col);
}
}
}
bytes = builder.Save();
return true;
return true;
//bytesPure = ColorSpaceDetailsByteConverter.Convert(image.ColorSpaceDetails, bytesPure,
// image.BitsPerComponent, image.WidthInSamples, image.HeightInSamples);
//var numberOfComponents = image.ColorSpaceDetails.BaseNumberOfColorComponents;
//var is3Byte = numberOfComponents == 3;
//var builder = PngBuilder.Create(image.WidthInSamples, image.HeightInSamples, false);
//var requiredSize = (image.WidthInSamples * image.HeightInSamples * numberOfComponents);
//var actualSize = bytesPure.Count;
//var isCorrectlySized = bytesPure.Count == requiredSize ||
// // Spec, p. 37: "...error if the stream contains too much data, with the exception that
// // there may be an extra end-of-line marker..."
// (actualSize == requiredSize + 1 && bytesPure[actualSize - 1] == ReadHelper.AsciiLineFeed) ||
// (actualSize == requiredSize + 1 && bytesPure[actualSize - 1] == ReadHelper.AsciiCarriageReturn) ||
// // The combination of a CARRIAGE RETURN followed immediately by a LINE FEED is treated as one EOL marker.
// (actualSize == requiredSize + 2 &&
// bytesPure[actualSize - 2] == ReadHelper.AsciiCarriageReturn &&
// bytesPure[actualSize - 1] == ReadHelper.AsciiLineFeed);
//if (!isCorrectlySized)
//{
// return false;
//}
//if (image.ColorSpaceDetails.BaseType == ColorSpace.DeviceCMYK || numberOfComponents == 4)
//{
// int i = 0;
// for (int col = 0; col < image.HeightInSamples; col++)
// {
// for (int row = 0; row < image.WidthInSamples; row++)
// {
// /*
// * Where CMYK in 0..1
// * R = 255 × (1-C) × (1-K)
// * G = 255 × (1-M) × (1-K)
// * B = 255 × (1-Y) × (1-K)
// */
// double c = (bytesPure[i++] / 255d);
// double m = (bytesPure[i++] / 255d);
// double y = (bytesPure[i++] / 255d);
// double k = (bytesPure[i++] / 255d);
// var r = (byte)(255 * (1 - c) * (1 - k));
// var g = (byte)(255 * (1 - m) * (1 - k));
// var b = (byte)(255 * (1 - y) * (1 - k));
// builder.SetPixel(r, g, b, row, col);
// }
// }
//}
//else if (is3Byte)
//{
// int i = 0;
// for (int col = 0; col < image.HeightInSamples; col++)
// {
// for (int row = 0; row < image.WidthInSamples; row++)
// {
// builder.SetPixel(bytesPure[i++], bytesPure[i++], bytesPure[i++], row, col);
// }
// }
//}
//else
//{
// int i = 0;
// for (int col = 0; col < image.HeightInSamples; col++)
// {
// for (int row = 0; row < image.WidthInSamples; row++)
// {
// byte pixel = bytesPure[i++];
// builder.SetPixel(pixel, pixel, pixel, row, col);
// }
// }
//}
//bytes = builder.Save();
//return true;
}
catch
{

View File

@ -1,12 +1,14 @@
namespace UglyToad.PdfPig
{
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using Core;
using Filters;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Tokens;
using UglyToad.PdfPig.Content;
/// <summary>
/// Extensions for PDF types.
/// </summary>
@ -66,11 +68,22 @@
internal static IReadOnlyList<byte> Decode(this StreamToken stream, ILookupFilterProvider filterProvider, IPdfTokenScanner scanner)
{
var filters = filterProvider.GetFilters(stream.StreamDictionary, scanner);
var dico = stream.StreamDictionary;
if (dico.TryGet<StreamToken>(NameToken.Metadata, scanner, out var metadata2))
{
XmpMetadata xmp = new XmpMetadata(metadata2, filterProvider, scanner);
//dico = dico.With(NameToken.Metadata, metadata);
string xml = OtherEncodings.BytesAsLatin1String(xmp.GetXmlBytes().ToArray());
}
var transform = stream.Data;
for (var i = 0; i < filters.Count; i++)
{
transform = filters[i].Decode(transform, stream.StreamDictionary, i);
transform = filters[i].Decode(transform, dico, i);
}
return transform;