在Puppeteer浏览器中,点击Cookie接受按钮后的意外关闭行为
<p>我正在尝试使用node.js和puppeteer爬取一个网站以获取实时足球信息,但是在console.log("trying to select team element")之后,我的浏览器立即关闭。</p>
<pre class="brush:php;toolbar:false;">const puppeteer = require("puppeteer");
async function openPage() {
const browser = await puppeteer.launch( {headless: true} );
const page = await browser.newPage();
await page.setViewport({ width: 1000, height: 926 });
await page.goto("https://www.livescore.com/en/");
// 接受cookies
const button = await page.waitForSelector('#onetrust-accept-btn-handler');
if (button) {
await button.click();
console.log("clicked cookie button");
};
return page;
}
async function scrapeData(page) {
let content = [];
// 获取比赛元素
let elements = await page.waitForSelector(".Ip")
console.log("trying to select team element")
for (let i=0; i < elements.length; i++) {
let homeTeamElement = await elements[i].$(".Ip")
if (homeTeamElement) {
const homeTeamText = await homeTeamElement.evaluate(node => node.textContent);
content.push(homeTeamText);
}
};
return content;
};
(async () => {
const page = await openPage();
const dataScraped = await scrapeData(page);
console.log(dataScraped)
await page.browser().close();
})();</pre>
<p>对于为什么会这样以及对我的代码的进一步批评,欢迎提出任何想法!</p>
await page.waitForSelector(".Ip")只返回一个元素,而不是一个数组,所以无法循环遍历。应该有一个清晰的错误消息来解释这个问题。相反,可以使用page.$$eval(或者如果你想尝试最新的定位器 API)来提取数据。const puppeteer = require("puppeteer"); // ^21.0.2 const url = "";
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.setViewport({width: 1000, height: 926});
await page.goto(url, {waitUntil: "domcontentloaded"});
// not really necessary
const button = await page.waitForSelector("#onetrust-accept-btn-handler");
await button.click();
await page.waitForSelector(".Ip");
const content = await page.$$eval(".Ip", els =>
els.map(e => {
const text = id =>
e.querySelector(`[id*=${id}]`).textContent.trim();
return {
time: text("status-or-time"),
home: text("home-team-name"),
away: text("away-team-name"),
homeTeamScore: +text("home-team-score"),
awaitTeamScore: +text("away-team-score"),
};
})
);
console.log(content);
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
注:
waitForSelector的返回值。它保证是该元素,否则如果在规定时间内找不到它,它会抛出异常。await elements[i].$(".Ip")不会帮助你访问任何内容,因为在你已经持有的.Ip元素内部没有.Ip。