Programming - Extracting Text from Office Documents
Make sure you include the relative office document library in your project references.
1. Powerpoint Presentation to Text
Private Sub convert_ppt(filename As String)
Dim pptapp As PowerPoint.Application
Dim pptdoc As PowerPoint.Presentation
Set pptapp = CreateObject("PowerPoint.Application")
Set pptdoc = pptapp.Presentations.Open(filename, msoCTrue, msoCTrue, msoFalse)
pptdoc.SaveAs App.Path & "\extract.rtf", ppSaveAsRTF
pptdoc.Close
pptapp.Quit
Set pptdoc = Nothing
Set pptapp = Nothing
' open the rtf now in word and convert to text.
' (you will need to include the code for Convert_WordDoc
' in your project also for this function to work properly).
Call Convert_WordDoc(App.Path & "\extract.rtf")
End Sub
2. Access Database to Text
Private Sub Convert_AccessAll(filename As String)
Dim AccessApp As Access.Application
Dim oConn As New ADODB.Connection
Dim oSchema As New ADODB.Recordset
Dim ofilesystem As Scripting.FileSystemObject
Dim oFile As Scripting.TextStream
Dim strfile As String
Dim pos As Long
Dim DefDir As String
Set ofilesystem = CreateObject("Scripting.FileSystemObject")
Set AccessApp = CreateObject("Access.Application")
AccessApp.OpenCurrentDatabase (filename)
pos = InStrRev(filename, "\")
DefDir = Mid(filename, 1, pos - 1)
oConn.Open "DRIVER=Microsoft Access Driver (*.mdb); UID=admin; UserCommitSync=Yes; " & _
"Threads=3; SafeTransactions=0; PageTimeout=5; MaxScanRows=8; MaxBufferSize=512; " & _
"ImplicitCommitSync=Yes; FIL=MS Access; DriverId=25; DefaultDir=" & DefDir & _
"; DBQ=" & filename
Set oSchema = oConn.OpenSchema(adSchemaTables)
strfile = ""
If ofilesystem.FileExists(App.Path & "\extract.txt") Then
ofilesystem.DeleteFile (App.Path & "\extract.txt")
End If
If ofilesystem.FileExists(App.Path & "\temp.txt") Then
ofilesystem.DeleteFile (App.Path & "\temp.txt")
End If
oSchema.Filter = "TABLE_TYPE='TABLE'"
Do While Not oSchema.EOF
AccessApp.DoCmd.TransferText acExportDelim, , oSchema("TABLE_NAME"), App.Path & _
"\temp.txt"
Set oFile = ofilesystem.OpenTextFile(App.Path & "\temp.txt")
If Not oFile.AtEndOfStream Then
strfile = oFile.ReadAll
End If
oFile.Close
ofilesystem.DeleteFile (App.Path & "\temp.txt")
If strfile <> "" Then
Set oFile = ofilesystem.OpenTextFile(App.Path & "\extract.txt", _
ForAppending, True)
oFile.Write strfile
oFile.Close
End If
strfile = ""
oSchema.MoveNext
Loop
Set oFile = Nothing
Set ofilesystem = Nothing
oSchema.Close
oConn.Close
Set oConn = Nothing
AccessApp.CloseCurrentDatabase
AccessApp.Quit
Set AccessApp = Nothing
End Sub
3. Excel Spreadsheet to Text
Private Sub Convert_ExcelAll(filename As String)
Dim ExcelApp As Excel.Application
Dim ExcelWrkbook As Excel.Workbook
Set ExcelApp = CreateObject("Excel.Application")
Set ExcelWrkbook = ExcelApp.Workbooks.Open(filename)
ExcelWrkbook.SaveAs App.Path & "\extract.txt", 20
ExcelWrkbook.Close False
ExcelApp.Quit
Set ExcelWrkbook = Nothing
Set ExcelApp = Nothing
End Sub
4. Word Document to Text
Private Sub Convert_WordDoc(filename As String)
Dim wordapp As Word.Application
Dim worddoc As Word.Document
Set wordapp = CreateObject("Word.Application")
Set worddoc = wordapp.Documents.Open(filename)
worddoc.SaveAs App.Path & "\extract.txt", 2
worddoc.Close
wordapp.Quit
Set worddoc = Nothing
Set wordapp = Nothing
End Sub