将PDF转换为图像批
本文关键字:图像 转换 PDF | 更新日期: 2023-09-27 18:20:19
我正在开发一个可以将pdf文件转换为图像的解决方案。我使用代码项目中的以下示例:http://www.codeproject.com/Articles/317700/Convert-a-PDF-into-a-series-of-images-using-Csharp?msg=4134859#xx4134859xx
现在我尝试使用以下代码从1000多个pdf文件中生成新的图像:
using Cyotek.GhostScript;
using Cyotek.GhostScript.PdfConversion;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace RefClass_PDF2Image
{
class Program
{
static void Main(string[] args)
{
string outputPath = Properties.Settings.Default.outputPath;
string pdfPath = Properties.Settings.Default.pdfPath;
if (!Directory.Exists(outputPath))
{
Console.WriteLine("Der angegebene Pfad " + outputPath + " für den Export wurde nicht gefunden. Bitte ändern Sie den Pfad (outputPath) in der App.Config Datei.");
return;
}
else
{
Console.WriteLine("Output Pfad: " + outputPath + " gefunden.");
}
if (!Directory.Exists(pdfPath))
{
Console.WriteLine("Der angegebene Pfad " + pdfPath + " zu den PDF Zeichnungen wurde nicht gefunden. Bitte ändern Sie den Pfad (pdfPath) in der App.Config Datei.");
return;
}
else
{
Console.WriteLine("PDF Pfad: " + pdfPath + " gefunden.");
}
Pdf2ImageSettings settings = GetPDFSettings();
DateTime start = DateTime.Now;
TimeSpan span;
Console.WriteLine("");
Console.WriteLine("Extraktion der PDF Zeichnungen wird gestartet: " + start.ToShortTimeString());
Console.WriteLine("");
DirectoryInfo diretoryInfo = new DirectoryInfo(pdfPath);
DirectoryInfo[] directories = diretoryInfo.GetDirectories();
Console.WriteLine("");
Console.WriteLine("Es wurden " + directories.Length + " verschiedende Verzeichnisse gefunden.");
Console.WriteLine("");
List<string> filenamesPDF = Directory.GetFiles(pdfPath, "*.pdf*", SearchOption.AllDirectories).Select(x => Path.GetFullPath(x)).ToList();
List<string> filenamesOutput = Directory.GetFiles(outputPath, "*.*", SearchOption.AllDirectories).Select(x => Path.GetFullPath(x)).ToList();
Console.WriteLine("");
Console.WriteLine("Es wurden " + filenamesPDF.Count + " verschiedende PDF Zeichnungen gefunden.");
Console.WriteLine("");
List<string> newFileNames = new List<string>();
int cutLength = pdfPath.Length;
for (int i = 0; i < filenamesPDF.Count; i++)
{
string temp = filenamesPDF[i].Remove(0, cutLength);
temp = outputPath + temp;
temp = temp.Replace("pdf", "jpg");
newFileNames.Add(temp);
}
for (int i = 0; i < filenamesPDF.Count; i++)
{
FileInfo fi = new FileInfo(newFileNames[i]);
if (!fi.Exists)
{
if (!Directory.Exists(fi.DirectoryName))
{
Directory.CreateDirectory(fi.DirectoryName);
}
Bitmap firstPage = new Pdf2Image(filenamesPDF[i], settings).GetImage();
firstPage.Save(newFileNames[i], System.Drawing.Imaging.ImageFormat.Jpeg);
firstPage.Dispose();
}
//if (i % 20 == 0)
//{
// GC.Collect();
// GC.WaitForPendingFinalizers();
//}
}
Console.ReadLine();
}
private static Pdf2ImageSettings GetPDFSettings()
{
Pdf2ImageSettings settings;
settings = new Pdf2ImageSettings();
settings.AntiAliasMode = AntiAliasMode.Medium;
settings.Dpi = 150;
settings.GridFitMode = GridFitMode.Topological;
settings.ImageFormat = ImageFormat.Png24;
settings.TrimMode = PdfTrimMode.CropBox;
return settings;
}
}
}
不幸的是,我总是在Pdf2Image.cs中遇到内存不足的异常。这里的代码:
public Bitmap GetImage(int pageNumber)
{
Bitmap result;
string workFile;
//if (pageNumber < 1 || pageNumber > this.PageCount)
// throw new ArgumentException("Page number is out of bounds", "pageNumber");
if (pageNumber < 1)
throw new ArgumentException("Page number is out of bounds", "pageNumber");
workFile = Path.GetTempFileName();
try
{
this.ConvertPdfPageToImage(workFile, pageNumber);
using (FileStream stream = new FileStream(workFile, FileMode.Open, FileAccess.Read))
{
result = new Bitmap(stream); // --->>> here is the out of memory exception
stream.Close();
stream.Dispose();
}
}
finally
{
File.Delete(workFile);
}
return result;
}
如何修复该问题以避免出现此异常?
感谢您的帮助,tro
不知道这对你来说是否值得,但看起来你可以做你想做的事情,而不需要在中间有位图。PdfToImage中有以下代码:
public void ConvertPdfPageToImage(string outputFileName, int pageNumber)
{
if (pageNumber < 1 || pageNumber > this.PageCount)
throw new ArgumentException("Page number is out of bounds", "pageNumber");
using (GhostScriptAPI api = new GhostScriptAPI())
api.Execute(this.GetConversionArguments(this._pdfFileName, outputFileName, pageNumber, this.PdfPassword, this.Settings));
}
它在你想要的地方为你写一个文件。为什么不直接调用这个方法,而不是读回图像再写出来呢?
这可能不会直接回答您的问题,但仍然很有用:Imagemagik提供了一种在批处理模式中从PDF创建图像的简单方法
单个pdf文件到许多慢跑:
convert -geometry 1024x768 -density 200 -colorspace RGB test.pdf +adjoin test_%0d.jpg
或者如果你想处理许多pdf文件:
mogrify -format jpg -alpha off -density 150 -quality 80 -resize 768 -unsharp 1.5 *.pdf
(设置显然应该根据您的需要进行调整:)
要在C#中以编程方式执行此操作,可以使用.NET imagemagik包装器http://imagemagick.codeplex.com
使用为生成的位图添加
using (FileStream stream = new FileStream(workFile, FileMode.Open, FileAccess.Read))
using (Bitmap result = new Bitmap(stream))
{
...
}