Extract Pages information
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Linq;
|
||||
@@ -11,12 +12,20 @@ namespace VAR.PdfTools
|
||||
|
||||
private List<PdfObject> _objects = new List<PdfObject>();
|
||||
|
||||
private PdfDictionary _catalog = null;
|
||||
|
||||
private List<PdfDocumentPage> _pages = new List<PdfDocumentPage>();
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public List<PdfObject> Objects { get { return _objects; } }
|
||||
|
||||
public PdfDictionary Catalog { get { return _catalog; } }
|
||||
|
||||
public List<PdfDocumentPage> Pages { get { return _pages; } }
|
||||
|
||||
#endregion
|
||||
|
||||
#region Life cycle
|
||||
@@ -134,6 +143,40 @@ namespace VAR.PdfTools
|
||||
return elem;
|
||||
}
|
||||
|
||||
private static void ExtractPages(PdfDictionary page, PdfDocument doc)
|
||||
{
|
||||
string type = page.GetParamAsString("Type");
|
||||
if (type == "Page")
|
||||
{
|
||||
PdfDocumentPage prevDocPage = null;
|
||||
if (doc._pages.Count > 0)
|
||||
{
|
||||
prevDocPage = doc._pages.Last();
|
||||
}
|
||||
PdfDocumentPage docPage = new PdfDocumentPage(page, prevDocPage);
|
||||
doc._pages.Add(docPage);
|
||||
return;
|
||||
}
|
||||
else if (type == "Pages")
|
||||
{
|
||||
if (page.Values.ContainsKey("Kids") == false || (page.Values["Kids"] is PdfArray) == false)
|
||||
{
|
||||
throw new Exception("PdfDocument: Pages \"Kids\" not found");
|
||||
}
|
||||
PdfArray kids = page.Values["Kids"] as PdfArray;
|
||||
foreach (IPdfElement elem in kids.Values)
|
||||
{
|
||||
PdfDictionary childPage = elem as PdfDictionary;
|
||||
if (page == null) { continue; }
|
||||
ExtractPages(childPage, doc);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format("PdfDocument: Unexpected page type, found: {0}", type));
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Public methods
|
||||
@@ -208,6 +251,32 @@ namespace VAR.PdfTools
|
||||
ResolveIndirectReferences(obj, dictObjects);
|
||||
}
|
||||
|
||||
// Search Catalog
|
||||
foreach(PdfObject obj in doc.Objects)
|
||||
{
|
||||
if ((obj.Data is PdfDictionary) == false) { continue; }
|
||||
string type = ((PdfDictionary)obj.Data).GetParamAsString("Type");
|
||||
if(type == "Catalog")
|
||||
{
|
||||
doc._catalog = (PdfDictionary)obj.Data;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
if(doc._catalog == null)
|
||||
{
|
||||
throw new Exception("PdfDocument: Catalog not found");
|
||||
}
|
||||
|
||||
// Search pages
|
||||
if(doc.Catalog.Values.ContainsKey("Pages") == false ||
|
||||
(doc.Catalog.Values["Pages"] is PdfDictionary) == false)
|
||||
{
|
||||
throw new Exception("PdfDocument: Pages not found");
|
||||
}
|
||||
PdfDictionary pages = (PdfDictionary)doc.Catalog.Values["Pages"];
|
||||
ExtractPages(pages, doc);
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
63
VAR.PdfTools/PdfDocumentPage.cs
Normal file
63
VAR.PdfTools/PdfDocumentPage.cs
Normal file
@@ -0,0 +1,63 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
public class PdfDocumentPage
|
||||
{
|
||||
#region Declarations
|
||||
|
||||
private PdfDictionary _baseData = null;
|
||||
|
||||
private byte[] _content = null;
|
||||
|
||||
private PdfDictionary _resources = null;
|
||||
|
||||
private Dictionary<string, PdfDictionary> _fonts = new Dictionary<string, PdfDictionary>();
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
public PdfDictionary BaseData { get { return _baseData; } }
|
||||
|
||||
public byte[] Content { get { return _content; } }
|
||||
|
||||
public Dictionary<string, PdfDictionary> Fonts { get { return _fonts; } }
|
||||
|
||||
#endregion
|
||||
|
||||
#region Life cycle
|
||||
|
||||
public PdfDocumentPage(PdfDictionary baseData, PdfDocumentPage prevDocPage)
|
||||
{
|
||||
_baseData = baseData;
|
||||
string type = baseData.GetParamAsString("Type");
|
||||
if (type != "Page")
|
||||
{
|
||||
throw new Exception(string.Format("PdfDocumentPage: Expected dictionary of type:\"Page\". Found: {0}", type));
|
||||
}
|
||||
|
||||
_content = _baseData.GetParamAsStream("Contents");
|
||||
|
||||
if (_baseData.Values.ContainsKey("Resources") == false)
|
||||
{
|
||||
_resources = prevDocPage._resources;
|
||||
}
|
||||
else
|
||||
{
|
||||
_resources = _baseData.Values["Resources"] as PdfDictionary;
|
||||
}
|
||||
if (_resources.Values.ContainsKey("Font"))
|
||||
{
|
||||
PdfDictionary fonts = _resources.Values["Font"] as PdfDictionary;
|
||||
foreach (KeyValuePair<string, IPdfElement> pair in fonts.Values)
|
||||
{
|
||||
_fonts.Add(pair.Key, pair.Value as PdfDictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
||||
namespace VAR.PdfTools
|
||||
{
|
||||
@@ -101,6 +102,35 @@ namespace VAR.PdfTools
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public byte[] GetParamAsStream(string name)
|
||||
{
|
||||
if (Values.ContainsKey(name) == false) { return null; }
|
||||
|
||||
IPdfElement value = Values[name];
|
||||
if (value is PdfArray)
|
||||
{
|
||||
PdfArray array = value as PdfArray;
|
||||
MemoryStream memStream = new MemoryStream();
|
||||
foreach(IPdfElement elem in array.Values)
|
||||
{
|
||||
PdfStream stream = elem as PdfStream;
|
||||
if (stream == null) { continue; }
|
||||
memStream.Write(stream.Data, 0, stream.Data.Length);
|
||||
}
|
||||
if (memStream.Length > 0)
|
||||
{
|
||||
return memStream.ToArray();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (value is PdfStream)
|
||||
{
|
||||
return ((PdfStream)value).Data;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public class PdfNull : IPdfElement
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="PdfDocument.cs" />
|
||||
<Compile Include="PdfDocumentPage.cs" />
|
||||
<Compile Include="PdfElements.cs" />
|
||||
<Compile Include="PdfParser.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
|
||||
Reference in New Issue
Block a user