如何将.doc或.docx转换为 HTML C#

本文关键字:转换 HTML docx doc | 更新日期: 2023-09-27 18:31:50

帮助我,我不知道如何组合这段代码。希望有人能帮助我。我在 asp.net 中使用 c#,并尝试将.doc和.docx转换为 html 以便在网页中查看。

这是我的代码:

public bool WriteViewRow(DataRowView drv)
    {
        string strFileLink = null;
        string strFileName = Convert.ToString(drv["Name"]);
        string strFilePath = WebPathCombine(WebPath(), strFileName);
        bool blnFolder = IsDirectory(drv);
        if (blnFolder)
        {
            if (!string.IsNullOrEmpty(_strHideFolderPattern) && Regex.IsMatch(strFileName, _strHideFolderPattern, RegexOptions.IgnoreCase))
            {
                return false;
            }
            strFileLink = PageUrl(strFilePath) + strFileName + "</A>";
        }
        else
        {
            if (!string.IsNullOrEmpty(_strHideFilePattern) && Regex.IsMatch(strFileName, _strHideFilePattern, RegexOptions.IgnoreCase))
            {
                return false;
            }
              strFileLink = "<A href='"" + strFilePath + "'" target = '"iframe01'">" + strFileName + "</A>";         //link to open the file
        }

我想用这段代码加入我的代码,我不想上传文件,但想使用上面代码中的链接来集成到这段代码中:

//To check the file extension if it is word document or something else
string strFileName = fUpload.FileName;
string[] strSep = fUpload.FileName.Split('.');
int arrLength = strSep.Length - 1;
string strExt = strSep[arrLength].ToString().ToUpper(); //Save the uploaded file to the folder
strPathToUpload = Server.MapPath("Datadir");  //Map-path to the folder where html to be saved
strPathToConvert = Server.MapPath("WordToHtml");
object FileName = strPathToUpload + "''" + fUpload.FileName;
object FileToSave = strPathToConvert + "''" + fUpload.FileName + ".htm";
if (strExt.ToUpper() == "DOCX" || strExt.ToUpper() == "DOC" )
{
    fUpload.SaveAs(strPathToUpload + "''" + fUpload.FileName);
    lblMessage.Text = "File uploaded successfully";
    //open the file internally in word. In the method all the parameters should be passed by object reference
    objWord.Documents.Open(ref FileName, ref readOnly, ref missing, ref missing, ref missing, ref missing,
    ref missing, ref  missing, ref missing, ref missing, ref isVisible, ref missing, ref missing, ref missing,
    ref missing, ref missing);
    //Do the background activity
    objWord.Visible = false;
    Microsoft.Office.Interop.Word.Document oDoc = objWord.ActiveDocument;
    oDoc.SaveAs(ref FileToSave, ref fltDocFormat, ref missing, ref missing, ref missing, ref missing,ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing);
    lblMessage.Text = fUpload.FileName + " converted to HTML successfully";
    docPreview.Attributes["src"] = "../WordToHtml/" + fUpload.FileName + ".htm";
}

有人有什么建议吗?实际上,我想开发一些网页,例如Web管理器,并且用户能够上传,删除,查看,编辑文件。如果文件.txt,这一切都已经完成。但我无法转换这个。

如何将.doc或.docx转换为 HTML C#

您要使用的代码正在使用单词自动化。在服务器环境(如您的 asp.net)上这样做是不好的习惯,因为存在许多问题,强烈建议不要这样做。在此处阅读原因。

相反,寻找一个可以为您完成这项工作的工具,类似于这个答案。在大多数情况下,这些工具不会让您逐字查看 html 中的文档,但这取决于文档的结构和简单性。

希望此代码对您有所帮助,代码使用 Microsoft.Office.Interop.Word class

    public static string ReadWordFile(string strFilePath, Extension objExtension)
    {
        string strFileContent = string.Empty;
        try
        {
            if (objExtension == Extension.WebPage)
            {
                try
                {
                    Open(strFilePath);
                    strFileContent = ClsCommon.HTMLBody(ClsCommon.ReadFile(SaveAs(strFilePath, HtmExtension, WdSaveFormat.wdFormatFilteredHTML), true));
                }
                catch
                {
                }
            }
        }
        catch
        {
        }
        return strFileContent;
    }
      private static string SaveAs(string FilePath, string strFileExtension,    WdSaveFormat objSaveFormat)
    {
        try
        {
            FilePath = System.IO.Path.ChangeExtension(FilePath, strFileExtension);
            doc.SaveAs(FilePath, objSaveFormat, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing, objMissing);
        }
        catch
        {
        }
        finally
        {
            Close();
        }
        return FilePath;
    }
    public static string HTMLBody(string strHTML)
    {
        strHTML = ClearHTMLContent(strHTML);
        if (strHTML.ToLower().IndexOf("<body") > 0 && strHTML.ToLower().IndexOf("</body>") > 0)
        {
            strHTML = strHTML.Substring(strHTML.ToLower().IndexOf("<body") + 5, strHTML.ToLower().IndexOf("</body>") - (strHTML.ToLower().IndexOf("<body") + 5));
            strHTML = strHTML.Substring(strHTML.IndexOf(">") + 1, strHTML.Length - (strHTML.IndexOf(">") + 1));
        }
        return strHTML;
    }
    public static string ClearHTMLContent(string Str)
    {
        if (Str.ToLower().IndexOf("<base") > 0)
        {
            Str = Str.Replace(Str.Substring(Str.ToLower().IndexOf("<base"), Str.Substring(Str.ToLower().IndexOf("<base")).IndexOf(">") + 1), "");
        }
        return Str.Replace("Â", "").Replace("�", "");
    }