Apply FlateDecode filter to streams

This commit is contained in:
2016-06-18 13:31:39 +02:00
parent 0f575df7ce
commit af5644758c
3 changed files with 79 additions and 1 deletions

View File

@@ -72,7 +72,7 @@ namespace VAR.PdfTools.Workbench
{ {
string.Format("Number of Objects: {0}", nObjects), string.Format("Number of Objects: {0}", nObjects),
string.Format("Number of Streams: {0}", nStreams), string.Format("Number of Streams: {0}", nStreams),
string.Format("Stream Filters: {0}", string.Join(", ", streamFilters.Distinct().ToArray())), string.Format("Unsuported Stream Filters: {0}", string.Join(", ", streamFilters.Distinct().ToArray())),
}; };
} }

View File

@@ -1,5 +1,6 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.IO.Compression;
namespace VAR.PdfTools namespace VAR.PdfTools
{ {
@@ -23,6 +24,41 @@ namespace VAR.PdfTools
#endregion #endregion
#region Private methods
private static byte[] DecodeFlateStreamData(byte[] streamData)
{
MemoryStream msInput = new MemoryStream(streamData);
MemoryStream msOutput = new MemoryStream();
// It seems to work when skipping the first two bytes.
byte header; // 0x30 0x59
header = (byte)msInput.ReadByte();
//Debug.Assert(header == 48);
header = (byte)msInput.ReadByte();
//Debug.Assert(header == 89);
DeflateStream zip = new DeflateStream(msInput, CompressionMode.Decompress, true);
int cbRead;
byte[] abResult = new byte[1024];
do
{
cbRead = zip.Read(abResult, 0, abResult.Length);
if (cbRead > 0)
msOutput.Write(abResult, 0, cbRead);
}
while (cbRead > 0);
zip.Close();
msOutput.Flush();
if (msOutput.Length >= 0)
{
msOutput.Capacity = (int)msOutput.Length;
return msOutput.GetBuffer();
}
return null;
}
#endregion
#region Public methods #region Public methods
public static PdfDocument Load(string filename) public static PdfDocument Load(string filename)
@@ -43,6 +79,25 @@ namespace VAR.PdfTools
doc.Objects.Add(obj); doc.Objects.Add(obj);
} }
} while (parser.IsEndOfStream() == false); } while (parser.IsEndOfStream() == false);
// Apply filters to streams
foreach(PdfObject obj in doc.Objects)
{
if(obj.Data.Type != PdfElementTypes.Stream) { continue; }
PdfStream stream = obj.Data as PdfStream;
string filter = stream.GetParamAsString("Filter");
if (filter == "FlateDecode")
{
stream.OriginalData = stream.Data;
stream.OriginalFilter = stream.Dictionary.Values["Filter"];
byte[] decodedStreamData = DecodeFlateStreamData(stream.Data);
stream.Data = decodedStreamData;
stream.Dictionary.Values["Length"] = new PdfInteger { Value = decodedStreamData.Length };
stream.Dictionary.Values.Remove("Filter");
}
}
return doc; return doc;
} }

View File

@@ -84,6 +84,29 @@ namespace VAR.PdfTools
public PdfElementTypes Type { get; private set; } = PdfElementTypes.Stream; public PdfElementTypes Type { get; private set; } = PdfElementTypes.Stream;
public PdfDictionary Dictionary { get; set; } public PdfDictionary Dictionary { get; set; }
public byte[] Data { get; set; } public byte[] Data { get; set; }
public byte[] OriginalData { get; set; }
public IPdfElement OriginalFilter { get; set; }
public string GetParamAsString(string name)
{
if(Dictionary.Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Dictionary.Values[name];
if (value is PdfArray)
{
value = ((PdfArray)value).Values[0];
}
if (value is PdfName)
{
return ((PdfName)value).Value;
}
if (value is PdfString)
{
return ((PdfString)value).Value;
}
return null;
}
} }
public class PdfObject : IPdfElement public class PdfObject : IPdfElement