Public Sub GetFirst()
GetDataFromWord "初检"
End Sub
Public Sub GetDataFromWord(ByVal SheetName As String)
AppSettings
‘On Error GoTo ErrHandler
Dim StartTime, UsedTime As Variant
StartTime = VBA.Timer
‘Input code here
Dim Wb As Workbook
Dim Sht As Worksheet
Dim oSht As Worksheet
Dim Rng As Range
Dim Arr As Variant
Dim wdApp As Word.Application
Dim wdDoc As Word.Document
Dim wdRng As Word.Range
‘Const SHEET_NAME As String = "提取信息"
Set Wb = Application.ThisWorkbook
Set Sht = Wb.Worksheets(SheetName)
Dim FilePath As String
With Application.FileDialog(msoFileDialogFilePicker)
.AllowMultiSelect = False
.InitialFileName = Wb.Path
.Title = "提取" & SheetName & "数据"
.Filters.Clear
.Filters.Add "Word文档", "*.rtf*"
If .Show = -1 Then
FilePath = .SelectedItems(1)
Else
MsgBox "您没有选中任何文件夹,本次汇总中断!"
Exit Sub
End If
End With
Debug.Print FilePath
Set wdApp = New Word.Application
Set wdDoc = wdApp.Documents.Open(FilePath)
Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>"
PositioningClear wdDoc, 5 ‘定位删除英文行 避免正则提取造成干扰
Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>"
Arr = RegGetArray(wdDoc.Content.Text) ‘正则从全文提取内容 存入数组
wdDoc.Close False ‘关闭doc
wdApp.Quit ‘退出app
Set wdApp = Nothing
Set wdDoc = Nothing
With Sht
.Cells.Clear
.Range("A1:D1").Value = Array("大项", "小项", "D值", "E值")
Set Rng = .Range("A2").Resize(UBound(Arr, 2), UBound(Arr))
Rng.Value = Application.WorksheetFunction.Transpose(Arr)
Sort2003 .UsedRange
End With
UsedTime = VBA.Timer - StartTime
Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds")
‘MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven QQ "
ErrorExit:
Set Wb = Nothing
Set Sht = Nothing
Set Rng = Nothing
AppSettings False
On Error Resume Next
wdApp.Quit
Exit Sub
ErrHandler:
If Err.Number <> 0 Then
MsgBox Err.Description & "!", vbCritical, "NextSeven QQ "
Debug.Print Err.Description
Err.Clear
Resume ErrorExit
End If
End Sub
Public Sub AppSettings(Optional IsStart As Boolean = True)
If IsStart Then
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Application.Calculation = xlCalculationManual
Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
Else
Application.ScreenUpdating = True
Application.DisplayAlerts = True
Application.Calculation = xlCalculationAutomatic
Application.StatusBar = False
End If
End Sub
Function RegGetArray(ByVal OrgText As String) As String()
Dim Reg As Object, Mh As Object, OneMh As Object
Dim Reg2 As Object
Dim Arr() As String, Index As Long
Dim Elm As String
Set Reg = CreateObject("Vbscript.Regexp")
Set Reg2 = CreateObject("Vbscript.Regexp")
Reg2.Global = True
With Reg
‘OrgText = Application.ActiveDocument.Content
.MultiLine = True
.Global = True
.Ignorecase = False
‘可用
‘.Pattern = "(?:\s)?(\S*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
.Pattern = "(?:\s+?)([一-龥;,,]*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
Set Mh = .Execute(OrgText)
Index = 0
ReDim Arr(1 To 4, 1 To 1)
For Each OneMh In Mh
Index = Index + 1
ReDim Preserve Arr(1 To 4, 1 To Index)
If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)
Reg2.Pattern = "[;,,]?(左视图|前视图|纵切面)+[;,,]?"
Arr(1, Index) = Reg2.Replace(Elm, "")
Reg2.Pattern = "[\s#G]"
Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "")
‘Debug.Print OneMh.submatches(2)
Arr(3, Index) = Split(OneMh.submatches(2), "=")(1)
‘Debug.Print OneMh.submatches(3)
Arr(4, Index) = Split(OneMh.submatches(3), "=")(1)
Next OneMh
End With
RegGetArray = Arr
Set Reg = Nothing: Set Mh = Nothing
Set Reg2 = Nothing
End Function
Public Sub PositioningClear(ByVal OpenDoc As Word.Document, ByVal Times As Long)
Dim wdRng As Word.Range
Dim lngStart As Long
Dim lngEnd As Long
Dim lngTime As Long
For lngTime = 1 To Times
lngEnd = OpenDoc.Content.End
With OpenDoc.Content.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "ALIMENTARY SYSTEM"
.Replacement.Text = ""
If .Execute Then
lngStart = .Parent.Start
Set wdRng = OpenDoc.Range(lngStart, lngEnd)
End If
End With
If Not wdRng Is Nothing Then
With wdRng.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "[^l^13][A-Za-z0-9\- ,;:.]@[^l^13]"
.MatchWildcards = True
.Wrap = wdFindStop
.Forward = True
.Replacement.Text = "^l"
‘n = 0
.Execute Replace:=wdReplaceAll
‘Do While .Execute
‘ n = n + 1
‘ Debug.Print n; "____________"; .Parent.Text
‘ If n > 1000 Then Exit Do
‘Loop
End With
End If
Set wdRng = Nothing
Next lngTime
End Sub
Sub Sort2003(ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1)
‘key1代表第一个排序的列的关键字
‘Order1表示第一字段的排序方式,赋值为xlAscending表示升序,改为xlDescending表示降序。
‘Header表示是否包含标题,赋值为xlYes表示标题不参与排序,赋值为xlNo表示标题也参数排序
‘MatchCase表示排序时是否区分大小写,赋值为False表示不区分大小写
‘Orientation表示排序方向,赋值为xlTopToBottom或者xlSortColumns表示按列排序,赋值为xlSortRows 表示排行排序
‘SortMethod用于限制对汉字排序时的排序方式,赋值为xlPinYin表示按拼音排序,赋值为xlStroke表示按笔划排序
With RngWithTitle
.Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _
MatchCase:=False, Orientation:=xlTopToBottom, SortMethod:=xlPinYin
End With
End Sub
原文:http://www.cnblogs.com/nextseven/p/7129186.html