替换PDF中的特定文档
本文关键字:文档 PDF 替换 | 更新日期: 2023-09-27 18:30:03
包括:
using Ghostscript.NET;
using Ghostscript.NET.Processor;
using Ghostscript.NET.Rasterizer;
现在,我正在使用Ghostscript.Net将几个PDF合并为一个文档:
/// <summary>
/// Ghostscripts the file specified in parameter 1 as a PDF to the file specified in parameter 2
/// </summary>
/// <param name="fileNames">String[]. Array of Full Paths to a file to convert to a single PDF</param>
/// <param name="outputPath">String. Full Path to where Ghostscript will write the PDF</param>
public static void GhostscriptNetJoin(String[] fileNames, String outputPath)
{
var sb = new StringBuilder();
foreach (var fileName in fileNames)
{
var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
sb.Append(source + " ");
}
var output_file = (outputPath.IndexOf(' ') == -1) ? outputPath : String.Format("'"{0}'"", outputPath);
var gsArgs = new List<String>();
gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
gsArgs.Add("-dBATCH");
gsArgs.Add("-q");
gsArgs.Add("-dNOPAUSE");
gsArgs.Add("-dNOPROMPT");
gsArgs.Add("-sDEVICE=pdfwrite");
gsArgs.Add("-dPDFSETTINGS=/prepress");
gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
gsArgs.Add(sb.ToString());
var version = GhostscriptVersionInfo.GetLastInstalledVersion();
using (var processor = new GhostscriptProcessor(version, false))
{
processor.Process(gsArgs.ToArray());
}
}
稍后如何返回REPLACE或UPDATE第N页?
我已经制定了一个有我计划的例程,但现在我不知道如何完成它。我可以提供arg
值吗?或者我应该使用其他工具吗?
/// <summary>
/// Replace Specific Document from source PDF file
/// </summary>
/// <param name="source">String. Full path to the multi-page PDF</param>
/// <param name="documentN">String. Full path to the document to insert</param>
/// <param name="indexN">int. Page Index where the new document should be inserted</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
var list = new List<String>();
var version = GhostscriptVersionInfo.GetLastInstalledVersion();
using (var processor = new GhostscriptProcessor(version, false))
{
var gsArgs = new List<String>();
// what arguments are needed?
throw new NotImplementedException("I don't know how to code for this yet.");
processor.Process(gsArgs.ToArray());
}
list.RemoveAt(indexN);
list.Insert(indexN, documentN);
var sb = new StringBuilder();
foreach (var fileName in list)
{
var fmtSource = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
sb.Append(fmtSource + " ");
}
var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("'"{0}'"", source);
using (var processor = new GhostscriptProcessor(version, false))
{
var gsArgs = new List<String>();
gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
gsArgs.Add("-dBATCH");
gsArgs.Add("-q");
gsArgs.Add("-dNOPAUSE");
gsArgs.Add("-dNOPROMPT");
gsArgs.Add("-sDEVICE=pdfwrite");
gsArgs.Add("-dPDFSETTINGS=/prepress");
gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
gsArgs.Add(sb.ToString());
processor.Process(gsArgs.ToArray());
}
}
您可能可以这样做(现在无法测试代码,但其原理基于Ghostscript.NET repo):
var prcPath = "PATH"; //a path to store the temporary files
var pageCount = GetPDFPageCount(source);
var list = SplitPDFatIndex(source, prcPath, indexN);
private static List<String> SplitPDFatIndex(String pathToFile, String tempPath, int index)
{
var outList = new List<String>();
outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, true);
outlist.Add(null); // Alternatively modify method below to permit pulling page N
outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, false);
return outList;
}
private static String SlicePDFatIndex(String pathToFile, String tempPath, int index, bool lessThanIndex)
{
using (var processor = new GhostscriptProcessor(version, false))
{
var pageFrom = 1;
var pageTo = index - 1;
var name = tempPath + "temp_left.pdf";
if (!lessThanIndex)
{
pageFrom = index + 1;
pageTo = pageCount;
name = tempPath + "temp_right.pdf";
}
var gsArgs = new List<String>();
gsArgs.Add("-empty");
gsArgs.Add("-dBATCH");
gsArgs.Add("-q");
gsArgs.Add("-dNOPAUSE");
gsArgs.Add("-dNOPROMPT");
gsArgs.Add("-sDEVICE=pdfwrite");
gsArgs.Add("-dPDFSETTINGS=/prepress");
gsArgs.Add(String.Format(@"-f{0}", pathToFile);
gsArgs.Add("-dFirstPage=" + pageFrom.ToString());
gsArgs.Add("-dLastPage=" + pageTo.ToString());
gsArgs.Add(String.Format(@"-sOutputFile={0}", name));
processor.Process(@"-f{0}", pathToFile);
return name;
}
private static int GetPDFPageCount(String pathToFile)
{
var count;
var GhostscriptViewer viewer;
viewer = new GhostscriptViewer();
viewer.ShowPageAfterOpen = false;
viewer.ProgressiveUpdate = false;
viewer.Open(source); // try (source, version, false) or (source, version, true) if for some reason it hangs up here
count = viewer.LastPageNumber;
viewer.Close()
return count;
}
我将根据我在baaron的帖子中读到的内容添加一个答案:
不使用特定的C#库将PDF转换为JPG/Images
我修改了他的代码,我认为它能满足我的需求。不过,就像KenS在上面的一条评论中发布的那样,每次运行都会继续失去质量。
/// <summary>
/// Replaces document at provided index with new document.
/// Use with Caution! If you continuously cycle using the output as the input,
/// then you run repeated risks of information or quality loss.
/// </summary>
/// <param name="source">String. Full File Path to Source</param>
/// <param name="documentN">String. Full File Path to new document</param>
/// <param name="indexN">int. Index where file needs to go</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
var list = new List<String>();
var version = GhostscriptVersionInfo.GetLastInstalledVersion();
var fullPath = Path.GetFullPath(source);
int index = -1;
using (var rasterizer = new Ghostscript.NET.Rasterizer.GhostscriptRasterizer())
{
rasterizer.Open(source, version, false);
for (index = 0; index < rasterizer.PageCount; index++)
{
if (index != indexN)
{
var extracted = Path.Combine(fullPath, String.Format("~1_{0}.jpg", index));
if (File.Exists(extracted))
{
File.Delete(extracted);
}
var img = rasterizer.GetPage(300, 300, index);
img.Save(extracted, ImageFormat.Jpeg);
list.Add(extracted);
} else
{
list.Add(documentN);
}
}
if (index == indexN) // occurs if adding a page to the end
{
list.Add(documentN);
}
}
var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("'"{0}'"", source);
using (var processor = new GhostscriptProcessor(version, false))
{
var gsArgs = new List<String>();
gsArgs.Add("-empty"); // first argument is ignored. REF: https://stackoverflow.com/q/25202577/153923
gsArgs.Add("-dBATCH");
gsArgs.Add("-q");
gsArgs.Add("-dNOPAUSE");
gsArgs.Add("-dNOPROMPT");
gsArgs.Add("-sDEVICE=pdfwrite");
gsArgs.Add("-dPDFSETTINGS=/prepress");
gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
foreach (var fileName in list)
{
var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
gsArgs.Add(source);
}
processor.Process(gsArgs.ToArray());
}
foreach (var fileName in list) // delete the temp files
{
File.Delete(fileName);
}
}
工作人员决定暂时推迟,因为他们还没有准备好冒失去信息质量的风险。
因此,这段代码是未经测试的。
理论上,它应该起作用。
如果有帮助,请告诉我。如果没有人看我自己的问题,我讨厌继续回答。
来自我的相关帖子:
您可以使用PDF工具包PDFtk:
示例:
pdftk A=inA.pdf B=inB.pdf cat A1-12 B3 A14-end output out1.pdf
输出包括inA.pdf
的前12页,随后是inB.pdf
的第3页,然后是第14页,直到inA.pdf
结束。
许多Linux发行版提供了一个PDFtk软件包,您可以使用它们的软件包管理器下载和安装。