Chrome Puppeteer - response.status()找不到300个状态码

7ajki6be  于 2023-05-04  发布在  Go
关注(0)|答案(2)|浏览(145)

我有一个 puppet 项目与node.js,我想找到所有的链接,目前在一个页面与他们的状态码。在这个页面jumptastic.com/duluth/中,我故意放置了一个带有301的链接(/blg_redirect/),但我的代码一直将其计数为200。帮助?

const puppeteer = require('puppeteer');
const express = require('express');

const Router = express.Router();

async function categorizeLinks(req, res, url) {
    const browser = await puppeteer.launch({ headless: 'false', args: ['--no-sandbox'] })

    const page = await browser.newPage();
    await page.setViewport({ width: 1366, height: 1068 });
    await page.goto(url);

    // Extract all links from the page
    const links = await page.$$eval('a', (anchors) => {
        return anchors.map((anchor) => anchor.href);
    });
    console.log(links);

    const categorizedLinks = {
        '200': [],
        '300': [],
        '400': [],
        '500': [],
    };

    // Iterate through each link and fetch its status code
    for (const link of links) {
        try {
            const response = await page.goto(link, { timeout: 10000 });
            const statusCode = response.status();

            // Categorize the link based on its status code
            if (statusCode >= 200 && statusCode <= 299) {
                categorizedLinks['200'].push(link);
            } else if (statusCode >= 300 && statusCode <= 399) {
                categorizedLinks['300'].push(link);
            } else if (statusCode >= 400 && statusCode <= 499) {
                categorizedLinks['400'].push(link);
            } else if (statusCode >= 500) {
                categorizedLinks['500'].push(link);
            }
        }
        catch (error) {
            console.error(`Failed to fetch link: ${link}`, error);
        }
    }
    await browser.close();

    console.log("categorizedLinks", categorizedLinks);
}

Router.get('/', async function findLinkStatus(req, res) {
    if (!req.query.url) return res.status(400).json({ error: "url is required" })

    const url = req.query.url;

    try {
        await categorizeLinks(req, res, url)
        res.status(200).json({ status: 'Pass', url: url});
        // res.status(200).json({ status: 'Pass', clientsID: Number(clientsID) });
    } catch (err) {
        console.error(err)
        // console.error(`Failed to fetch link: ${link}`, error);
        res.status(500).json({ message: "Something went wrong", status: '500' })
    }
});

module.exports = Router;

输出应该找到一个301重定向链接。它发现所有其他的状态码罚款我目前的知识。

gj3fmq9x

gj3fmq9x1#

您可以使用拦截器,如

await page.setRequestInterception(true);
page.on('request', request => {
  if (request.isNavigationRequest() && request.redirectChain().length)
    request.abort();
  else
    request.continue();
});
await page.goto('https://example.com');

GitHub:如何停止puppeteer跟随重定向#1132
您可以将其更新为使用request.respond

await page.setRequestInterception(true);
page.on('request', request => {
  if (request.isNavigationRequest() && request.redirectChain().length)
    request.respond({
      status: 300, // You might have to check this 2XX seems to be working but never tried with 3XX
      contentType: 'text/plain',
      body: 'Redirects!',
    });
  else
    request.continue();
});
await page.goto('https://example.com');
tvz2xvvm

tvz2xvvm2#

你必须记住,puppeteer是'前端',它是一个浏览器。
因此,当浏览器在服务器响应中遇到重定向状态码时,它将遵循该重定向,并且希望最终结果是浏览器成功加载重定向重定向到的页面,这意味着我们将获得状态码200。

相关问题