Extract Images from PDFs
How to Extract Images from PDFs
Identify photographs, raster drawings and diagrams embedded in PDFs and extract those images as separate graphics files without affecting the resolution. Adobe PDF Library lets you save as TIFF, JPEG, GIF, BMP or PNG formats for future use or archiving purposes.
Get Free Trial
C#
Java
C#
using System;
using System.Drawing;
using System.Drawing.Imaging;
using Datalogics.PDFL;
namespace ImageExtraction
{
class ImageExtraction
{
static int next;
static void ExtractImages(Content content)
{
for (int i = 0; i < content.NumElements; i++)
{
Element e = content.GetElement(i);
if (e is Datalogics.PDFL.Image)
{
Console.WriteLine("Saving an image");
Datalogics.PDFL.Image img = (Datalogics.PDFL.Image) e;
Bitmap bitmap = img.Bitmap;
bitmap.Save("ImageExtraction-extract-out" + (next) + ".bmp", ImageFormat.Bmp);
Datalogics.PDFL.Image newimg = img.ChangeResolution(500);
bitmap = newimg.Bitmap;
bitmap.Save("ImageExtraction-extracted-out" + (next) + ".bmp", ImageFormat.Bmp);
next++;
// The bitmap may be saved in any supported ImageFormat, e.g.:
//bitmap.Save("extract" + i + ".gif", ImageFormat.Gif);
//bitmap.Save("extract" + i + ".png", ImageFormat.Png);
}
else if (e is Container)
{
ExtractImages((e as Container).Content);
}
else if (e is Group)
{
ExtractImages((e as Group).Content);
}
else if (e is Form)
{
ExtractImages((e as Form).Content);
}
}
}
static void Main(string[] args)
{
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform
.OSX) && !System.IO.File.Exists("/usr/local/lib/libgdiplus.dylib"))
{
Console.WriteLine("Please install libgdiplus first to access the System.Drawing namespace on macOS.");
return;
}
if (System.Runtime.InteropServices.RuntimeInformation.IsOSPlatform(System.Runtime.InteropServices.OSPlatform
.Linux) && !System.IO.File.Exists("/usr/lib64/libgdiplus.so") &&
!System.IO.File.Exists("/usr/lib/libgdiplus.so"))
{
Console.WriteLine("Please install libgdiplus first to access the System.Drawing namespace on Linux.");
return;
}
Console.WriteLine("ImageExtraction Sample:");
// ReSharper disable once UnusedVariable
using (Library lib = new Library())
{
Console.WriteLine("Initialized the library.");
String sInput = Library.ResourceDirectory + "Sample_Input/ducky.pdf";
if (args.Length > 0)
sInput = args[0];
Console.WriteLine("Input file: " + sInput);
Document doc = new Document(sInput);
Page pg = doc.GetPage(0);
Content content = pg.Content;
ExtractImages(content);
}
}
}
}
Java
package com.datalogics.PDFL.Samples;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import javax.imageio.ImageIO;
import java.io.File;
import com.datalogics.PDFL.*;
public class ImageExtraction {
static int next = 0;
public static void extractImages(Content content) throws Exception {
for (int i = 0; i < content.getNumElements(); i++) {
Element e = content.getElement(i);
if (e instanceof Image) {
Image img = (Image)e;
ImageIO.write(img.getBufferedImage(), "bmp", new File("ImageExtraction-extract-out" + (next++) + ".bmp"));
// the bitmap may be saved in any format supported by ImageIO, e.g.:
//ImageIO.write(img, "jpg", new File("extract" + i + ".jpg"));
//ImageIO.write(img, "png", new File("extract" + i + ".png"));
} else if (e instanceof Container) {
extractImages(((Container)e).getContent());
} else if (e instanceof Group) {
extractImages(((Group)e).getContent());
} else if (e instanceof Form) {
extractImages(((Form)e).getContent());
}
}
}
public static void main(String [] args) throws Exception {
System.out.println("ImageExtraction sample:");
Library lib = new Library();
try {
String filename = "../../Resources/Sample_Input/ducky.pdf";
if (args.length > 0) {
filename = args[0];
}
System.out.println("Input file: " + filename);
Document doc = new Document(filename);
System.setProperty("java.awt.headless", "true");
Page pg = doc.getPage(0);
Content content = pg.getContent();
extractImages(content);
}
finally {
lib.delete();
}
}
}