天天看點

c#讀取word内容,c#提取word内容

1:

對項目添加引用,Microsoft Word 11.0 Object Library

2:

在程式中添加 using Word = Microsoft.Office.Interop.Word; 

3:

程式中添加

Word.Application app = new Microsoft.Office.Interop.Word.Application(); //可以打開word程式

Word.Document doc = null;  //一會要記錄word打開的文檔

word文檔和word程式可不是一回事奧!

4:

一般來說,對于抽取word内容,用的方法很少

public override void openFile(object fileName){} //打開文檔

public override object readPar(int i){} //讀取word文檔的第i段

public override int getParCount(){} //傳回word文檔一共幾段

public override void closeFile(){}  //關閉文檔

public override void quit(){}  //關閉word程式

//從網頁上拷貝的目錄有時候會出現手動換行符^l,,先将其換成回車段落标記,才能正确讀取

public void replaceChar(){}

5:代碼

public override void openFile(object fileName)
         {
             try
             {
                 if (app.Documents.Count > 0)
                 {
                     if (MessageBox.Show("已經打開了一個word文檔,你想關閉重新打開該文檔嗎?", "提示", MessageBoxButtons.YesNo) == DialogResult.Yes)
                     {
                         object unknow = Type.Missing;
                         doc = app.ActiveDocument;
                         if (MessageBox.Show("你想儲存嗎?", "儲存", MessageBoxButtons.YesNo) == DialogResult.Yes)
                         {
                             app.ActiveDocument.Save();
                         }
 
                         app.ActiveDocument.Close(ref unknow, ref unknow, ref unknow);
                         app.Visible = false;
                     }
                     else
                     {
                         return;
                     }
                 }
             }
             catch (Exception)
             {
                 //MessageBox.Show("您可能關閉了文檔");
                 app = new Microsoft.Office.Interop.Word.Application();
             }
 
             try
             {
                 object unknow = Type.Missing;
                 app.Visible = true;
                 doc = app.Documents.Open(ref fileName,
                                          ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                                          ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                                          ref unknow, ref unknow, ref unknow, ref unknow, ref unknow);
              }
              catch (Exception ex)
              {
                  MessageBox.Show("出現錯誤:" + ex.ToString());
              }   
            
         }
 public override object readPar(int i)
         {
             try
             {
                 string temp = doc.Paragraphs[i].Range.Text.Trim();
                 return temp;
             }
             catch (Exception e) {
                 MessageBox.Show("Error:"+e.ToString());
                 return null;
             }
         }
 
 public override int getParCount()
         {
             return doc.Paragraphs.Count;
         }
 
 public override void closeFile()
         {
             try
             {
                 object unknow = Type.Missing;
                 object saveChanges = Word.WdSaveOptions.wdPromptToSaveChanges;
                 app.ActiveDocument.Close(ref saveChanges, ref unknow, ref unknow);
             }
             catch (Exception ex)
             {
                 MessageBox.Show("Error:" + ex.ToString());
             }
         }
 
 public override void quit()
         {
             try
             {
                 object unknow = Type.Missing;
                 object saveChanges = Word.WdSaveOptions.wdSaveChanges;
                 app.Quit(ref saveChanges, ref unknow, ref unknow);
             }
             catch (Exception)
             {
 
             }
         }
 
 public void replaceChar() {
             try
             {
                 object replaceAll = Word.WdReplace.wdReplaceAll;
                 object missing = Type.Missing;
 
                 app.Selection.Find.ClearFormatting();
                 app.Selection.Find.Text = "^l";
 
                 app.Selection.Find.Replacement.ClearFormatting();
                 app.Selection.Find.Replacement.Text = "^p";
 
                 app.Selection.Find.Execute(
                     ref missing, ref missing, ref missing, ref missing, ref missing,
                     ref missing, ref missing, ref missing, ref missing, ref missing,
                     ref replaceAll, ref missing, ref missing, ref missing, ref missing);
             }
             catch (Exception e)
             {
                 MessageBox.Show("文檔出現錯誤,請重新操作");
             }
         }
           

6:

剛才是用讀取一段做的例子,如果要讀取一句或一篇隻需要把doc.Paragraphs[i](readPar中)改成doc.Sentences[i]或doc.content即可,因為都是微軟的東東,是以用起來沒有一點的障礙,再加上現在的vs2005做的很智能,是以先從java轉到了c#上

7:

實際上,c#中讀取word是不用那麼麻煩的,但是如果考慮到可能還要抽取txt,ppt等多種格式,是以就寫了一個抽象類,調用起來也友善,這就是為什麼我的程式方法開頭會有override的原因,總要考慮到通用,是以多了一些代碼。

繼續閱讀