替换PDF中的特定文档

本文关键字：文档 PDF 替换 | 更新日期: 2023-09-27 18:30:03

包括：

using Ghostscript.NET;
using Ghostscript.NET.Processor;
using Ghostscript.NET.Rasterizer;

现在，我正在使用Ghostscript.Net将几个PDF合并为一个文档：

/// <summary>
/// Ghostscripts the file specified in parameter 1 as a PDF to the file specified in parameter 2
/// </summary>
/// <param name="fileNames">String[]. Array of Full Paths to a file to convert to a single PDF</param>
/// <param name="outputPath">String. Full Path to where Ghostscript will write the PDF</param>
public static void GhostscriptNetJoin(String[] fileNames, String outputPath)
{
    var sb = new StringBuilder();
    foreach (var fileName in fileNames)
    {
        var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
        sb.Append(source + " ");
    }
    var output_file = (outputPath.IndexOf(' ') == -1) ? outputPath : String.Format("'"{0}'"", outputPath);
    var gsArgs = new List<String>();
    gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
    gsArgs.Add("-dBATCH");
    gsArgs.Add("-q");
    gsArgs.Add("-dNOPAUSE");
    gsArgs.Add("-dNOPROMPT");
    gsArgs.Add("-sDEVICE=pdfwrite");
    gsArgs.Add("-dPDFSETTINGS=/prepress");
    gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
    gsArgs.Add(sb.ToString());
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        processor.Process(gsArgs.ToArray());
    }
}

稍后如何返回REPLACE或UPDATE第N页？

我已经制定了一个有我计划的例程，但现在我不知道如何完成它。我可以提供arg值吗？或者我应该使用其他工具吗？

/// <summary>
/// Replace Specific Document from source PDF file
/// </summary>
/// <param name="source">String. Full path to the multi-page PDF</param>
/// <param name="documentN">String. Full path to the document to insert</param>
/// <param name="indexN">int. Page Index where the new document should be inserted</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        // what arguments are needed?
        throw new NotImplementedException("I don't know how to code for this yet.");
        processor.Process(gsArgs.ToArray());
    }
    list.RemoveAt(indexN);
    list.Insert(indexN, documentN);
    var sb = new StringBuilder();
    foreach (var fileName in list)
    {
        var fmtSource = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
        sb.Append(fmtSource + " ");
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("'"{0}'"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        gsArgs.Add(sb.ToString());
        processor.Process(gsArgs.ToArray());
    }
}

替换PDF中的特定文档

您可能可以这样做（现在无法测试代码，但其原理基于Ghostscript.NET repo）：

var prcPath = "PATH"; //a path to store the temporary files
var pageCount = GetPDFPageCount(source);
var list = SplitPDFatIndex(source, prcPath, indexN);
private static List<String> SplitPDFatIndex(String pathToFile, String tempPath, int index)
{
    var outList = new List<String>();
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, true);
    outlist.Add(null); // Alternatively modify method below to permit pulling page N
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, false);
    return outList;
}
private static String SlicePDFatIndex(String pathToFile, String tempPath, int index, bool lessThanIndex)
{
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var pageFrom = 1;
        var pageTo = index - 1;
        var name = tempPath + "temp_left.pdf";
        if (!lessThanIndex)
        {
            pageFrom = index + 1;
            pageTo = pageCount;
            name = tempPath + "temp_right.pdf";
        }
        var gsArgs = new List<String>();
        gsArgs.Add("-empty");
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-f{0}", pathToFile);
        gsArgs.Add("-dFirstPage=" + pageFrom.ToString());
        gsArgs.Add("-dLastPage=" + pageTo.ToString());
        gsArgs.Add(String.Format(@"-sOutputFile={0}", name));
        processor.Process(@"-f{0}", pathToFile);
        return name;
}
private static int GetPDFPageCount(String pathToFile)
{
    var count;
    var GhostscriptViewer viewer;
    viewer = new GhostscriptViewer();
    viewer.ShowPageAfterOpen = false;
    viewer.ProgressiveUpdate = false;
    viewer.Open(source); // try (source, version, false) or (source, version, true) if for some reason it hangs up here
    count = viewer.LastPageNumber;
    viewer.Close()
    return count;
}

我将根据我在baaron的帖子中读到的内容添加一个答案：

不使用特定的C#库将PDF转换为JPG/Images

我修改了他的代码，我认为它能满足我的需求。不过，就像KenS在上面的一条评论中发布的那样，每次运行都会继续失去质量。

/// <summary>
/// Replaces document at provided index with new document.
/// Use with Caution! If you continuously cycle using the output as the input,
/// then you run repeated risks of information or quality loss.
/// </summary>
/// <param name="source">String. Full File Path to Source</param>
/// <param name="documentN">String. Full File Path to new document</param>
/// <param name="indexN">int. Index where file needs to go</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    var fullPath = Path.GetFullPath(source);
    int index = -1;
    using (var rasterizer = new Ghostscript.NET.Rasterizer.GhostscriptRasterizer())
    {
        rasterizer.Open(source, version, false);
        for (index = 0; index < rasterizer.PageCount; index++)
        {
            if (index != indexN)
            {
                var extracted = Path.Combine(fullPath, String.Format("~1_{0}.jpg", index));
                if (File.Exists(extracted))
                {
                    File.Delete(extracted);
                }
                var img = rasterizer.GetPage(300, 300, index);
                img.Save(extracted, ImageFormat.Jpeg);
                list.Add(extracted);
            } else
            {
                list.Add(documentN);
            }
        }
        if (index == indexN) // occurs if adding a page to the end
        {
            list.Add(documentN);
        }
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("'"{0}'"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: https://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        foreach (var fileName in list)
        {
            var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("'"{0}'"", fileName);
            gsArgs.Add(source);
        }
        processor.Process(gsArgs.ToArray());
    }
    foreach (var fileName in list) // delete the temp files
    {
        File.Delete(fileName);
    }
}

工作人员决定暂时推迟，因为他们还没有准备好冒失去信息质量的风险。

因此，这段代码是未经测试的。

理论上，它应该起作用。

如果有帮助，请告诉我。如果没有人看我自己的问题，我讨厌继续回答。

来自我的相关帖子：

您可以使用PDF工具包PDFtk:

示例：

pdftk A=inA.pdf B=inB.pdf cat A1-12 B3 A14-end output out1.pdf

输出包括inA.pdf的前12页，随后是inB.pdf的第3页，然后是第14页，直到inA.pdf结束。

许多Linux发行版提供了一个PDFtk软件包，您可以使用它们的软件包管理器下载和安装。