尝试将网页文本复制到Excel中

chhqkbe1  于 2023-01-14  发布在  其他
关注(0)|答案(2)|浏览(108)

我使用这段代码打开Edge,这样我就可以抓取网页的文本。我可以让Edge打开并显示页面,我可以手动复制页面的文本,然后执行完代码,它会根据需要粘贴文本。但我的技能水平还不足以让Excel复制文本。这是我需要的最后一步。
任何帮助都将不胜感激。
我想复印整页,而不是其中的一部分。

Sub LOADEdge()
    Set obj = CreateObject("Shell.Application")
    obj.ShellExecute "microsoft-edge:https://maps.googleapis.com/maps/api/directions/json?origin=Disneyland&destination=Universal+Studios+HOllywood&key=your_API_Key"
   
    'This is where I manually grab the text from the page.
    MsgBox "wiating"
    
    Worksheets("sheet1").Range("A1").PasteSpecial Page = xlpagevalues
    
End Sub

我试着研究如何复制网页,但大多数需要安装Selenium,我不能让我的IT部门为我做这件事。我找到了如何打开浏览器的变通方法,但我找不到任何东西告诉我如何复制网页的文本,除了:

IE.ExecWB 17, 0 '// SelectAll
IE.ExecWB 12, 2 '// Copy selection

但那没用。

gjmwrych

gjmwrych1#

While it is possible,我不确定VBA是一个很好的网页抓取工具。我试了几次,它从来没有给我的结果,我正在寻找。这里有一些替代看看:

2uluyalo

2uluyalo2#

它是带有exceljsaxios库的node.js。
此代码将写入excel文件。
它需要在第4行粘贴'API_KEY'字符串。

保存为get-map.js

const axios = require('axios')
const Excel = require('exceljs');

const API_KEY ='your_API_Key'

const columnName = (index) => {
    var cname = String.fromCharCode(65 + ((index - 1) % 26))
    if (index > 26)
        cname = String.fromCharCode(64 + (index - 1) / 26) + cname
    return cname;
}

const getMapInformation = async (origin, destination) => {
    try {
        origin = origin.replace(/ /g,"+")  // replace from space to '+'
        destination = destination.replace(/ /g,"+") // replace from space to '+'
        const resp = await axios.get(
            `https://maps.googleapis.com/maps/api/directions/json?origin=${origin}&destination=${destination}&key=${API_KEY}`
        );
        return Promise.resolve(resp.data);
    } catch (err) {
        return Promise.reject(error);
    }
}

const writeMapInformation = (fileName, data) => {
    try {
        const workbook = new Excel.Workbook();
        // make workbook with 'map' name
        const ws = workbook.addWorksheet("map")

        // Start Cell A1 for title column
        let headerColumn = 1
        let row_number = 1
        // #1 geocoded_waypoints section
        for (let waypoint in data.geocoded_waypoints) {
            for (let key in data.geocoded_waypoints[waypoint]) {
                ws.getCell(columnName(headerColumn) + String(row_number)).value = key
                ws.getCell(columnName(headerColumn + 1) + String(row_number)).value = data.geocoded_waypoints[waypoint][key]
                row_number++;
            }
            row_number++;   // skip one row
        }

        // #2 routes section
        for (let index in data.routes) {
            for (let key in data.routes[index]) {
                if(key == 'bounds') {
                    ws.getCell(columnName(headerColumn) + String(row_number)).value = key
                    for (let sub_key in data.routes[index][key]) {
                        ws.getCell(columnName(headerColumn + 1) + String(row_number)).value = sub_key
                        ws.getCell(columnName(headerColumn + 2) + String(row_number)).value = data.routes[index][key][sub_key]
                        row_number++;
                    }
                }
                if(key == 'summary') {
                    ws.getCell(columnName(headerColumn) + String(row_number)).value = key
                    ws.getCell(columnName(headerColumn + 1) + String(row_number)).value = data.routes[index][key]
                }
                if(key == 'legs') {
                    ws.getCell(columnName(headerColumn) + String(row_number)).value = key
                    row_number++;
                    for (let sub_key in data.routes[index][key][0]) {
                        if(sub_key != 'steps') {
                            ws.getCell(columnName(headerColumn + 1) + String(row_number)).value = sub_key
                            ws.getCell(columnName(headerColumn + 2) + String(row_number)).value = data.routes[index][key][0][sub_key]
                            row_number++;
                        }
                        if(sub_key == 'steps') {
                            for (let sub_child_key in data.routes[index][key][0][sub_key]) {
                                for (let grand_key in data.routes[index][key][0][sub_key][sub_child_key]) {
                                    // skip two fileds (html_instructions & polyline)
                                    if(grand_key != 'html_instructions' && grand_key != 'polyline') {
                                        ws.getCell(columnName(headerColumn + 1) + String(row_number)).value = grand_key
                                        ws.getCell(columnName(headerColumn + 2) + String(row_number)).value = data.routes[index][key][0][sub_key][sub_child_key][grand_key]
                                        row_number++;
                                    }
                                }
                            }
                        }
                    }
                    row_number++;
                }
            }
        }

        workbook.xlsx
            .writeFile(fileName)
            .then(() => {
                console.log('file created');
            })
            .catch(err => {
                console.log(err.message);
            });
    } catch (err) {
        console.log(err.message);
    }
}
// get map direction (from, to)
getMapInformation('Disneyland', 'Universal Studios Hollywood')
    .then((result) => {
        writeMapInformation('map.xlsx', result)
    })

安装库

npm install axios exceljs

运行它

node get-map.js

结果

相关问题