Extract Pages information

This commit is contained in:
2016-06-19 15:22:42 +02:00
parent 5a68559cc2
commit 8ffa5db9f1
4 changed files with 164 additions and 1 deletions

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
@@ -11,12 +12,20 @@ namespace VAR.PdfTools
private List<PdfObject> _objects = new List<PdfObject>();
private PdfDictionary _catalog = null;
private List<PdfDocumentPage> _pages = new List<PdfDocumentPage>();
#endregion
#region Properties
public List<PdfObject> Objects { get { return _objects; } }
public PdfDictionary Catalog { get { return _catalog; } }
public List<PdfDocumentPage> Pages { get { return _pages; } }
#endregion
#region Life cycle
@@ -134,6 +143,40 @@ namespace VAR.PdfTools
return elem;
}
private static void ExtractPages(PdfDictionary page, PdfDocument doc)
{
string type = page.GetParamAsString("Type");
if (type == "Page")
{
PdfDocumentPage prevDocPage = null;
if (doc._pages.Count > 0)
{
prevDocPage = doc._pages.Last();
}
PdfDocumentPage docPage = new PdfDocumentPage(page, prevDocPage);
doc._pages.Add(docPage);
return;
}
else if (type == "Pages")
{
if (page.Values.ContainsKey("Kids") == false || (page.Values["Kids"] is PdfArray) == false)
{
throw new Exception("PdfDocument: Pages \"Kids\" not found");
}
PdfArray kids = page.Values["Kids"] as PdfArray;
foreach (IPdfElement elem in kids.Values)
{
PdfDictionary childPage = elem as PdfDictionary;
if (page == null) { continue; }
ExtractPages(childPage, doc);
}
}
else
{
throw new Exception(string.Format("PdfDocument: Unexpected page type, found: {0}", type));
}
}
#endregion
#region Public methods
@@ -208,6 +251,32 @@ namespace VAR.PdfTools
ResolveIndirectReferences(obj, dictObjects);
}
// Search Catalog
foreach(PdfObject obj in doc.Objects)
{
if ((obj.Data is PdfDictionary) == false) { continue; }
string type = ((PdfDictionary)obj.Data).GetParamAsString("Type");
if(type == "Catalog")
{
doc._catalog = (PdfDictionary)obj.Data;
break;
}
}
if(doc._catalog == null)
{
throw new Exception("PdfDocument: Catalog not found");
}
// Search pages
if(doc.Catalog.Values.ContainsKey("Pages") == false ||
(doc.Catalog.Values["Pages"] is PdfDictionary) == false)
{
throw new Exception("PdfDocument: Pages not found");
}
PdfDictionary pages = (PdfDictionary)doc.Catalog.Values["Pages"];
ExtractPages(pages, doc);
return doc;
}

View File

@@ -0,0 +1,63 @@
using System;
using System.Collections.Generic;
namespace VAR.PdfTools
{
public class PdfDocumentPage
{
#region Declarations
private PdfDictionary _baseData = null;
private byte[] _content = null;
private PdfDictionary _resources = null;
private Dictionary<string, PdfDictionary> _fonts = new Dictionary<string, PdfDictionary>();
#endregion
#region Properties
public PdfDictionary BaseData { get { return _baseData; } }
public byte[] Content { get { return _content; } }
public Dictionary<string, PdfDictionary> Fonts { get { return _fonts; } }
#endregion
#region Life cycle
public PdfDocumentPage(PdfDictionary baseData, PdfDocumentPage prevDocPage)
{
_baseData = baseData;
string type = baseData.GetParamAsString("Type");
if (type != "Page")
{
throw new Exception(string.Format("PdfDocumentPage: Expected dictionary of type:\"Page\". Found: {0}", type));
}
_content = _baseData.GetParamAsStream("Contents");
if (_baseData.Values.ContainsKey("Resources") == false)
{
_resources = prevDocPage._resources;
}
else
{
_resources = _baseData.Values["Resources"] as PdfDictionary;
}
if (_resources.Values.ContainsKey("Font"))
{
PdfDictionary fonts = _resources.Values["Font"] as PdfDictionary;
foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values)
{
_fonts.Add(pair.Key, pair.Value as PdfDictionary);
}
}
}
#endregion
}
}

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System.IO;
namespace VAR.PdfTools
{
@@ -101,6 +102,35 @@ namespace VAR.PdfTools
}
return null;
}
public byte[] GetParamAsStream(string name)
{
if (Values.ContainsKey(name) == false) { return null; }
IPdfElement value = Values[name];
if (value is PdfArray)
{
PdfArray array = value as PdfArray;
MemoryStream memStream = new MemoryStream();
foreach(IPdfElement elem in array.Values)
{
PdfStream stream = elem as PdfStream;
if (stream == null) { continue; }
memStream.Write(stream.Data, 0, stream.Data.Length);
}
if (memStream.Length > 0)
{
return memStream.ToArray();
}
return null;
}
if (value is PdfStream)
{
return ((PdfStream)value).Data;
}
return null;
}
}
public class PdfNull : IPdfElement

View File

@@ -41,6 +41,7 @@
</ItemGroup>
<ItemGroup>
<Compile Include="PdfDocument.cs" />
<Compile Include="PdfDocumentPage.cs" />
<Compile Include="PdfElements.cs" />
<Compile Include="PdfParser.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />