从.doc文件中删除表格、形状和图像

本文关键字:图像 表格 doc 文件 删除 | 更新日期: 2023-09-27 18:14:55

我正在c#中解析一个.doc文件以提取文本。

问题是。doc文件有表格,形状和图像,以及我的文本。我使用Microsoft Interop Word库提取文本。当我提取文本时,我也得到了形状和图像上的标签,以及表的列和行内部的数据。

我不需要形状的标签,也不需要表格或图像中的数据。我如何从我的。doc文件中删除这些形状、它们的标签、图像和表格?
这是代码

public void ReadMsWord()
    {
        // variable to store file path
        string filePath = null;
        // open dialog box to select file
        OpenFileDialog file = new OpenFileDialog();
        // dilog box title name
        file.Title = "Word File";
        // set initial directory of computer system
        file.InitialDirectory = "c:''";
        // set restore directory
        file.RestoreDirectory = true;
        // execute if block when dialog result box click ok button
        if (file.ShowDialog() == DialogResult.OK)
        {
            // store selected file path
            filePath = file.FileName.ToString();
        }
        try
        {
            // create word application
            Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.ApplicationClass();
            // create object of missing value
            object miss = System.Reflection.Missing.Value;
            // create object of selected file path
            object path = filePath;
            // set file path mode
            object readOnly = false;
            // open document                
            Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);
            // select whole data from active window document
            docs.ActiveWindow.Selection.WholeStory();
            // handover the data to cllipboard
            docs.ActiveWindow.Selection.Copy();
            // clipboard create reference of idataobject interface which transfer the data
            IDataObject data = Clipboard.GetDataObject();
            //set data into richtextbox control in text format
            string t = "";
            string[] y = {};
            t = data.GetData(DataFormats.Text).ToString();
            y = t.Split(''n');
            string check = "";
            string check1 = "";
            string A = "";
            //int i = 0;
            for (int i = 0; i < y.Length - 1; i++)
            {
                if (!Regex.IsMatch(y[i], @"^([A|B|C|D]|  )")&&(y[i].Length>15))
                {
                    //@"^'d+"
                    //int j = i + 1;
                    //  while (!Regex.IsMatch(asdf[j], @"^[A|B|C]"))
                    // {
                    check = check + ''n'+''n' + y[i];    
                     //i++;
                }
            }
            for (int i = 0; i < y.Length - 1; i++)
            {
                if (Regex.IsMatch(y[i], @"^[A |B |C |D ]"))
                {
                    //@"^'d+"
                    //int j = i + 1;
                    //  while (!Regex.IsMatch(asdf[j], @"^[A|B|C]"))
                    // {

                    check1 = check1 + ''n' + ''n' + y[i];
                    //i++;
                }
            }

代码@theGhostofc这是我正在使用的代码,这是给出异常类型错误匹配类型


   private void button1_Click(object sender, EventArgs e)
    {    
    string filePath = null;
        // open dialog box to select file
        OpenFileDialog file = new OpenFileDialog();
        // dilog box title name
        file.Title = "Word File";
        // set initial directory of computer system
        file.InitialDirectory = "c:''";
        // set restore directory
        file.RestoreDirectory = true;
        // execute if block when dialog result box click ok button
        if (file.ShowDialog() == DialogResult.OK)
        {
            // store selected file path
            filePath = file.FileName.ToString();
        }




       try
       {
           // create word application
           Microsoft.Office.Interop.Word.Application word = new   
           Microsoft.Office.Interop.Word.Application();
              // create object of missing value
           object miss = System.Reflection.Missing.Value;
           // create object of selected file path
           object path = filePath;
           // set file path mode
           object readOnly = false;
           // open document                
           Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path,
            ref miss, ref readOnly,
                                                               ref miss, 
          ref miss, ref miss, ref miss,
                                                                             ref miss, 
          ref miss, ref miss, ref miss,
                                                                             ref miss, 
          ref miss, ref miss, ref miss,
                                                                             ref miss);
           foreach (Microsoft.Office.Interop.Word.Table tbl in docs.Tables)
           {
               tbl.Delete();
           }
           foreach (Microsoft.Office.Interop.Word.Shape shp in docs.Shapes)
           {
               shp.Delete();
           }
           foreach (Microsoft.Office.Interop.Word.InlineShape ilshp in 
            docs.InlineShapes)
           {
               if (ilshp.Type == 
                  Microsoft.Office.Interop.Word.WdInlineShapeType.wdInlineShapePicture)
                   ilshp.Delete();

           }
           docs.Close(ref path,ref readOnly,ref miss);
       }
       catch (Exception ex)
       {
           MessageBox.Show(ex.Message);
       }
}




@theghostofc代码

 private void button1_Click(object sender, EventArgs e)
    {
        // variable to store file path
        string filePath = null;
        // open dialog box to select file
        OpenFileDialog file = new OpenFileDialog();
        // dilog box title name
        file.Title = "Word File";
        // set initial directory of computer system
        file.InitialDirectory = "c:''";
        // set restore directory
        file.RestoreDirectory = true;
        // execute if block when dialog result box click ok button
        if (file.ShowDialog() == DialogResult.OK)
        {
            // store selected file path
            filePath = file.FileName.ToString();
        }
        Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.ApplicationClass();
        // create object of missing value
        object miss = System.Reflection.Missing.Value;
        // create object of selected file path
        object path = filePath;
        // set file path mode
        object readOnly = false;
        // open document                
        Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss,
                                                                          ref miss, ref miss, ref miss, ref miss,
                                                                          ref miss, ref miss, ref miss, ref miss,
                                                                          ref miss, ref miss, ref miss, ref miss);
        try
        {
            // create word application

            foreach (Microsoft.Office.Interop.Word.Table tbl in docs.Tables)
            {
                tbl.Delete();
            }
            foreach (Microsoft.Office.Interop.Word.Shape shp in docs.Shapes)
            {
                shp.Delete();
            }
            foreach (Microsoft.Office.Interop.Word.InlineShape ilshp in docs.InlineShapes)
            {
                if (ilshp.Type == Microsoft.Office.Interop.Word.WdInlineShapeType.wdInlineShapePicture)
                    ilshp.Delete();
            }
            object saveOption = Microsoft.Office.Interop.Word.WdSaveOptions.wdDoNotSaveChanges;
            object originalFormat = Microsoft.Office.Interop.Word.WdOriginalFormat.wdOriginalDocumentFormat;
            object routeDocument = false;
            docs.Close(ref saveOption, ref originalFormat, ref routeDocument);
            // docs.Close(ref miss, ref miss, ref miss);
            docs = null;
            GC.Collect();
        }
        catch (Exception ex)
        {
            MessageBox.Show(ex.Message);
        }
    }

这是代码,请告诉我这两个方法都要求参数docs.Save()docs.Close()。我已经将参数传递给docs.Close(),但我不知道应该将什么参数传递给docs.Save(),所以我没有在我的代码中使用docs.Save(),所以请明确定义我这些东西,我将非常高兴你

从.doc文件中删除表格、形状和图像

如果您只是想从word文档中删除表格、形状和图像,您可以尝试下面的代码片段:

try
{
    // create word application
    Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.ApplicationClass();
    // create object of missing value
    object miss = System.Reflection.Missing.Value;
    // create object of selected file path
    object path = filePath;
    // set file path mode
    object readOnly = false;
    // open document                
    Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);
    foreach (Microsoft.Office.Interop.Word.Table tbl in docs.Tables)
    {
        tbl.Delete();
    }
    foreach (Microsoft.Office.Interop.Word.Shape shp in docs.Shapes)
    {
        shp.Delete();
    }
    foreach (Microsoft.Office.Interop.Word.InlineShape ilshp in docs.InlineShapes)
    {
        if (ilshp.Type == Microsoft.Office.Interop.Word.WdInlineShapeType.wdInlineShapePicture)
            ilshp.Delete();
    }
    docs.Close();
}

你可以保存docs一旦你完成。

如果你想删除更多的对象,你可以在Microsoft.Office.Interop.Word.WdInlineShapeType中看到更多的选项,这将允许删除链接图片

注::这不是复制粘贴代码。