by joe.pesch
28. August 2016 07:23
Using iTextSharp open source PDF library, the below console application illustrates opening one or more PDF files (based on file path and file mask inputs) and extracting a desired single PDF file from each.
using iTextSharp.text.pdf;
using System;
using System.Collections.Generic;
using System.IO;
namespace PdfPortfolioSample
{
class Program
{
static void Main(string[] args)
{
Console.Write("Enter source path: ");
string sourcePath = Console.ReadLine();
Console.Write("Enter file mask (e.g. *.pdf): ");
string fileMask = Console.ReadLine();
Console.Write("Recursive (y/n): ");
bool recursive = Console.ReadLine().ToUpper() == "Y";
Console.Write("Enter target path: ");
string targetPath = Console.ReadLine();
Console.Write("Enter document name to extract (e.g. MLPA.PDF): ");
string docName = Console.ReadLine();
List<string> files = GetFiles(sourcePath, fileMask, recursive);
foreach (string file in files)
{
GetPdfFromPortfolio(file, targetPath, docName);
}
}
private static void GetPdfFromPortfolio(string filePath, string targetPath, string docName)
{
PdfReader reader = new PdfReader(filePath);
PdfDictionary root = reader.Catalog;
PdfDictionary documentnames = root.GetAsDict(PdfName.NAMES);
PdfDictionary embeddedfiles =
documentnames.GetAsDict(PdfName.EMBEDDEDFILES);
PdfArray filespecs = embeddedfiles.GetAsArray(PdfName.NAMES);
for (int i = 0; i < filespecs.Size;)
{
filespecs.GetAsString(i++);
PdfDictionary filespec = filespecs.GetAsDict(i++);
PdfDictionary refs = filespec.GetAsDict(PdfName.EF);
foreach (PdfName key in refs.Keys)
{
PRStream stream = (PRStream)PdfReader.GetPdfObject(
refs.GetAsIndirectObject(key)
);
if (filespec.GetAsString(key).ToString().ToUpper() == docName.ToUpper())
using (FileStream fs = new FileStream(
targetPath + @"\" + Path.GetFileName(filePath).Substring(0, 10) + filespec.GetAsString(key).ToString(), FileMode.OpenOrCreate
))
{
byte[] attachment = PdfReader.GetStreamBytes(stream);
fs.Write(attachment, 0, attachment.Length);
}
}
}
}
private static List<string> GetFiles(string path, string fileMask = "", bool recursive = false, List<string> files = null)
{
if (files == null) files = new List<string>();
try
{
foreach (var file in Directory.GetFiles(path, fileMask))
{
files.Add(file);
}
if (recursive)
foreach (string dir in Directory.GetDirectories(path))
{
GetFiles(dir, fileMask, recursive, files);
}
}
catch (System.Exception e)
{
Console.WriteLine(e.ToString());
}
return files;
}
}
}
430c8ccd-10ab-4571-862d-8bbf2267a490|1|3.0|96d5b379-7e1d-4dac-a6ba-1e50db561b04
Tags:
C# | iTextSharp | PDF