我有一个傀儡爬虫,每隔几个月就会因为网站的小变化而坏掉。我希望加快调试过程,而不是每次发生这种情况时都陷入黑洞。考虑到下面的代码,我可以用什么具体的方法来做到这一点?
const puppeteer = require("puppeteer");
const delay = require("./delay.js")
const fs = require("fs");
//url of some event in the future
const url = ["https://www.oddsportal.com/baseball/usa/mlb/oakland-athletics-seattle-mariners-bFxLtxG9/"]
const res = [];
puppeteer.launch({ headless: false, sloMo:1000, }).then(async (browser) => {
var page = await browser.newPage();
await page.goto("https://www.oddsportal.com/");
await delay(1000);
console.log("Getting data..");
for (i = 0; i < url.length; i++) {
try {
await page.goto(url[i]);
await page.waitForSelector('[class="flex flex-col items-center justify-center gap-1 border-r border-[#E0E0E0] min-w-[60px] max-sm:min-w-[55px] max-sm:max-w-[55px]"]')
//relative flex flex-col items-center justify-center font-bold text-black-main
await delay(500);
var data = await page.evaluate(async () => {
debugger; //docs said include this..does not do what I want yet..
const delay = (milliseconds) => new Promise((resolve) => setTimeout(resolve, milliseconds));
var t = document.querySelectorAll('[class="flex flex-col items-center justify-center gap-1 border-r border-[#E0E0E0] min-w-[60px] max-sm:min-w-[55px] max-sm:max-w-[55px]"]')
var average = {
avg_odds_lt: t[0].innerText,
avg_odds_rt: t[1].innerText
}
console.log(average);
t = document.querySelectorAll('[class="absolute w-full text-center cursor-pointer height-content"]');
var UserPredictions = {
pxtx_lt: t[0].innerText,
pxtx_rt: t[1].innerText
}
var bettingExchange = [];
t = document.querySelectorAll('[class="height-content min-mt:!hidden text-black-main font-bold text-xs leading-[18px]"]')
for (i = 0; i < t.length; i++) {
t[i].click();
await delay(500);
if (i % 2 == 0) {
var exch_name = document.querySelectorAll('[class="w-[75px] bg-cover bg-no-repeat"]')[0]?.alt;
var odds = document.querySelectorAll('[class="flex flex-col gap-1 text-xs"]')[1]?.innerText.split("\n");
var volume = document.querySelectorAll('[class="flex flex-col gap-1 text-xs"]')[2]?.innerText.split("\n");
var opOdds = document.querySelectorAll('[class="flex gap-1"]')[1]?.innerText.split("\n");
bettingExchange.push({
exch_name,
left_OddsMovement: {
odds,
volume
},
OpeningOdds: opOdds
})
} else {
var exch_name = document.querySelectorAll('[class="w-[75px] bg-cover bg-no-repeat"]')[0]?.alt;
var odds = document.querySelectorAll('[class="flex flex-col gap-1 text-xs"]')[1]?.innerText.split("\n");
var volume = document.querySelectorAll('[class="flex flex-col gap-1 text-xs"]')[2]?.innerText.split("\n");
var opOdds = document.querySelectorAll('[class="flex gap-1"]')[1]?.innerText.split("\n");
bettingExchange.push({
exch_name,
right_OddsMovement: {
odds,
volume
},
OpeningOdds: opOdds
})
}
}
return ({
average,
UserPredictions,
bettingExchange
})
})
data['url'] = page.url();
// console.log(JSON.stringify(data));
console.log(url[i] + " =>[Success]");
res.push(data);
} catch {
console.error(url[i] + " =>[ERROR]");
}
}
await browser.close();
几个小时前,我收到了[成功]的消息,在这段时间里,我认为11种风格中的一种已经改变了。我怎样才能更好地调试它,这样我就可以确切地知道哪种风格破坏了脚本,这样我就不必深入研究每种风格了?
我对Puppeteer相当陌生,因为我更习惯Python的BeautifulSoup。干杯
1条答案
按热度按时间x33g5p2x1#
我有个主意你可以保存每个css文件,并将它们与下一次执行的前一个版本进行比较。
我正在使用AI生成此片段的 * 部分 *。