NodeJS Puppeteer -无法在酒店网站中拉取正确的CSS选择器

5ssjco0h  于 2023-04-29  发布在  Node.js
关注(0)|答案(1)|浏览(142)

**目标:**保存a中的所有酒店名称。txt文件(点击网站输入后)。
**问题:**我相信我的CSS选择器('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name')可能不正确,因为我的candidates函数返回一个空数组,而且,没有if(!nameEl) return false;行,我得到一个TypeError: Cannot read properties of undefined (reading 'trim')
candidates_distance.txt的预期输出:[“Aloft芝加哥Downtown River North”,“The Gwen,a Luxury Collection Hotel,Michigan Avenue Chicago”,“Sheraton Grand Chicago Riverwalk”]
实际输出:[]

导致Issue的代码:candidates函数返回空数组:

// ! STUCK: candidates function returning empty array - likely CSS selector is incorrect 
    const candidates = await page.$$eval('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name', elements => {
        return elements
          .filter(el => {
            const nameEl = el.querySelector('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name');
            if(!nameEl) return false;
            const name = nameEl.textContent.trim();
            return name !== "";
          })
          .map(el => el.textContent.trim());
    });
    fs.writeFile('candidates_name.txt', JSON.stringify(candidates), function (err) {
        if (err) throw err;
    });

复制代码:

constants.js

module.exports = {
    URL: "https://www.marriott.com/default.mi",
    DESTINATION: "Chicago, IL, USA",
    START_MONTH: "August",
    START_DAY: 7,
    END_MONTH: "August",
    END_DAY: 10,
    PROMO_CODE: "MMP"
}

index.js

const puppeteer = require('puppeteer-extra');
const constants = require('./marriot_constants.js');
const blockResources = require('puppeteer-extra-plugin-block-resources');
const fs = require('fs/promises');

// ! NOT WORKING: Configure the plugin to block all images and stylesheets
const pluginConfig = blockResources({
    blockedTypes: new Set(['image', 'stylesheet'])
});
puppeteer.use(pluginConfig);

let browser;
(async () => {
    browser = await puppeteer.launch();
    const [page] = await browser.pages();
    const ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36";
    await page.setUserAgent(ua);
    await page.setViewport({width: 800, height: 10700});
    await page.goto(constants.URL, {waitUntil: "domcontentloaded"});

    await page.type('[id$="-search-destination"]', constants.DESTINATION);

    await page.click(".search__calendar-value");
    await page.waitForSelector(".search-dates-popup");

    const findMonth = async targetMonth => {
        for(let i = 0; i < 12; i++){ 
            const month = await page.$eval(".DayPicker-Month", el => el.textContent);
            if(month.includes(targetMonth)) break;
            await page.click('[aria-label="Next Month"]');
        }   
    };

    const chooseDay = day => page.$$eval(".DayPicker-Day-value", (els, day) =>
        els.find(e => e.textContent.trim() === day).click(), String(day)
    );

    await findMonth(constants.START_MONTH);
    await chooseDay(constants.START_DAY);
    await findMonth(constants.END_MONTH);
    await chooseDay(constants.END_DAY);
    await page.click('button.m-button-secondary');

    await page.click('[class^="StyledSpecialRatesDiv"] > [class="t-font-s"]');
    await page.waitForSelector('[class^="StyledPopupMain"]');
    await page.click('[class^="StyledRadioButtonDiv"]:nth-child(5)')
    await page.type('[class^="StyledRadioButtonDiv"]:nth-child(5)', constants.PROMO_CODE);
    await page.waitForSelector('[class="crop-promo-code"] > [class="m-button-secondary m-button-s"]');
    await page.click('[class="crop-promo-code"] > [class="m-button-secondary m-button-s"]');

    await page.click('[id$="find-a-hotel-homePage-form"] > [class^="StyledFindBtn"]');
    await page.waitForNavigation();

    // ! STUCK: candidates function returning null array or cannot pull Hotel Names - likely CSS selector is incorrect 
    const candidates = await page.$$eval('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name', elements => {
        return elements
          .filter(el => {
            const nameEl = el.querySelector('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name');
            if(!nameEl) return false;
            const name = nameEl.textContent.trim();
            return name !== "";
          })
          .map(el => el.textContent.trim());
    });
    fs.writeFile('candidates_name.txt', JSON.stringify(candidates), function (err) {
        if (err) throw err;
    });
      
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
vq8itlhq

vq8itlhq1#

您可以在浏览器控制台中测试选择器,按照以下格式编写并运行它。它将显示您的选择器所选择的内容。

$$('span.l-property-name');

在本例中,[class^="js-hotel-name"]并不直接位于[class^="js-hotel-location"]之下,因此它不选择任何内容。
应该是

[class^="js-hotel-location"] [class^="js-hotel-name"] > span.l-property-name

但也可以选择额外的vacation home优惠,因此您的标签应该是

[id^="property-record-map"] span.l-property-name

你也不需要在你的代码中使用filter部分:

const candidates = await page.$$eval('[id^="property-record-map"] span.l-property-name', elements => elements.map(el => el.textContent.trim()));

对于constants.js文件中的内容,选择器返回的酒店比您编写的酒店多,而获得输出的唯一方法是

["Aloft Chicago Downtown River North", "The Gwen, a Luxury Collection Hotel, Michigan Avenue Chicago", "Sheraton Grand Chicago Riverwalk"]

如果我做下面的额外步骤,按品牌 (豪华精选,喜来登,雅乐轩) 和价格 (200+美元) 过滤:

// add after -> await page.waitForNavigation();

async function waitClick(selector) {
    let btn = await page.waitForSelector(selector);
    await btn.click();
}

await waitClick('[data-component-name="searchFilters"] a.m-button'); // All Filters
await waitClick('a[data-code=Brands]'); // filters > brands btn

let selectBrands = ['a[data-for=brands_LC]','a[data-for=brands_SI]','a[data-for=brands_AL]']; // Brands to select
for (let s of selectBrands) {
    await waitClick(s); 
    await page.waitForSelector('div.js-sort-and-filter-form',{visible : true}); // wait for selector
}

await waitClick('a[data-code=price]'); // filters > price btn
await waitClick('label[for=price_3]'); // 200+ USD

await page.waitForSelector('div.js-sort-and-filter-form',{visible : true});

await waitClick('form.js-submit-form button[type=Submit]'); // Apply filters

await page.waitForSelector('body');  

const candidates = await page.$$eval('[id^="property-record-map"] span.l-property-name', elements => elements.map(el => el.textContent.trim()));

// next comes the fs.writeFile part

相关问题