selenium 从嵌套的div元素中提取动态加载的表以在VBA-Selify中执行EXCEL

oxiaedzo  于 2022-11-10  发布在  其他
关注(0)|答案(1)|浏览(178)

我有以下代码导航到网站,登录,然后保存显示的网页数据。但是我看不到保存到.txt文件中的html.body.innerHTML中的表。我看到下面的div内容。当我检查元素时,它显示表格在嵌套的div元素下,这些元素在div下,我想打印到EXCEL。如果能有任何帮助来实现这一点,我将不胜感激。

<div id="container_div" style="height: 100%;">
    <table style="width: 100%; height: 100%;">
        <tbody><tr>
            <td align="center" valign="middle">
                <img alt="progress" src="/ria/images/progress.gif">
                <br>
                <span>Loading...</span>
            </td>
        </tr>
    </tbody></table>
</div>

VBA代码:

Option Explicit

Private ch          As Selenium.ChromeDriver

Sub TestSelenium()

    Set ch = New Selenium.ChromeDriver

    ch.Start
    ch.Get "https://siteaddress"

    ch.Timeouts.ImplicitWait = 20000        ' 5 seconds

    With ch
        .AddArgument "--headless"        ''This is the fix
        With .FindElementById("logInForm")
            .FindElementById("j_username").SendKeys "username"
            .FindElementById("j_password").SendKeys "password"
            .FindElementById("submitButton", timeout:=1000000).Click
            'ch.Timeouts.Server = 120000 ' 2 mins
            'ch.Timeouts.ImplicitWait = 50000 ' 5 seconds
        End With
        'Stop '<==  Delete me after inspection

        ' .SwitchToFrame .FindElementByTag("iframe", timeout:=10000) '' switch to iframe

        Dim html    As New HTMLDocument        ' Requires Microsoft HTML Library
        html.body.innerHTML = ch.ExecuteScript("return document.body.innerHTML;")

        Debug.Print html.body.innerText

        Dim filePath As String
        filePath = "C:\temp\MyTestFile.txt"

        ' The advantage of correctly typing fso as FileSystemObject is to make autocompletion
        ' (Intellisense) work, which helps you avoid typos and lets you discover other useful
        ' methods of the FileSystemObject
        Dim fso     As FileSystemObject
        Set fso = New FileSystemObject
        Dim fileStream As TextStream

        ' Here is another great method of the FileSystemObject that checks if a file exists
        If fso.FileExists(filePath) Then
            fso.DeleteFile filePath
        End If

        ' Here the actual file is created and opened for write access
        Set fileStream = fso.CreateTextFile(filePath)
        ' Write something to the file
        fileStream.WriteLine html.body.innerHTML
        ' Close it, so it is not locked anymore
        fileStream.Close

        .Quit
    End With

End Sub
aurhwmvo

aurhwmvo1#

我无法找到遍历所有表的所有循环的解决方案,但使用以下代码使用完整的XPath从特定表中提取数据:

Dim tr, td, th As WebElement
Dim c, r, l    As Integer
Lastrow = 1

 '' Print header
For Each tr In ch.FindElementByXPath("/html/body/div[1]/div/div[2]/div/div[4]/div/table/tbody/tr/td/div/table/tbody/tr[1]/td/div/div/div/div/div/div/div[1]/div[3]/div/div[4]/div/div/div/div/div[8]/div/div/div/div[1]/div/table").FindElementByTag("thead").FindElementsByTag("tr")
    c = 1
    For Each th In tr.FindElementsByTag("th")
        Sheets("Sheet1").Cells(Lastrow + r, c).Value = th.Text
        c = c + 1
    Next th
    r = r + 1
Next tr
'' Print table data
For Each tr In ch.FindElementByXPath("/html/body/div[1]/div/div[2]/div/div[4]/div/table/tbody/tr/td/div/table/tbody/tr[1]/td/div/div/div/div/div/div/div[1]/div[3]/div/div[4]/div/div/div/div/div[8]/div/div/div/div[1]/div/table").FindElementByTag("tbody").FindElementsByTag("tr")
    c = 1
    For Each td In tr.FindElementsByTag("td")
        Sheets("Sheet1").Cells(Lastrow + r, c).Value = td.Text
        c = c + 1
    Next td
    r = r + 1
Next tr

相关问题