Wait for html to stabilize before considering page loaded

This commit is contained in:
Saoud Rizwan 2024-09-21 16:01:34 -04:00
parent 99a82e5563
commit 2e272a1ad3
2 changed files with 38 additions and 2 deletions

View File

@ -2,7 +2,7 @@
"name": "claude-dev",
"displayName": "Claude Dev",
"description": "Autonomous coding agent right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.",
"version": "1.9.1",
"version": "1.9.2",
"icon": "icons/icon.png",
"galleryBanner": {
"color": "#1E1E1E",

View File

@ -7,6 +7,7 @@ import TurndownService from "turndown"
// @ts-ignore
import PCR from "puppeteer-chromium-resolver"
import pWaitFor from "p-wait-for"
import delay from "delay"
interface PCRStats {
puppeteer: { launch: typeof launch }
@ -114,7 +115,9 @@ export class UrlContentFetcher {
try {
// networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time
await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle0"] })
await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] })
// await this.page.goto(url, { timeout: 10_000, waitUntil: "load" })
await this.waitTillHTMLStable(this.page) // in case the page is loading more resources
} catch (err) {
if (!(err instanceof TimeoutError)) {
logs.push(`[Navigation Error] ${err.toString()}`)
@ -143,4 +146,37 @@ export class UrlContentFetcher {
logs: logs.join("\n"),
}
}
// page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded
// https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202
private async waitTillHTMLStable(page: Page, timeout = 5_000) {
const checkDurationMsecs = 500 // 1000
const maxChecks = timeout / checkDurationMsecs
let lastHTMLSize = 0
let checkCounts = 1
let countStableSizeIterations = 0
const minStableSizeIterations = 3
while (checkCounts++ <= maxChecks) {
let html = await page.content()
let currentHTMLSize = html.length
// let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length)
console.log("last: ", lastHTMLSize, " <> curr: ", currentHTMLSize)
if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) {
countStableSizeIterations++
} else {
countStableSizeIterations = 0 //reset the counter
}
if (countStableSizeIterations >= minStableSizeIterations) {
console.log("Page rendered fully...")
break
}
lastHTMLSize = currentHTMLSize
await delay(checkDurationMsecs)
}
}
}