diff --git a/package.json b/package.json index 442855b21..a9a00b38d 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "claude-dev", "displayName": "Claude Dev", "description": "Autonomous coding agent right in your IDE, capable of creating/editing files, executing commands, and more with your permission every step of the way.", - "version": "1.9.1", + "version": "1.9.2", "icon": "icons/icon.png", "galleryBanner": { "color": "#1E1E1E", diff --git a/src/utils/UrlContentFetcher.ts b/src/utils/UrlContentFetcher.ts index fb0788e12..a3408e9b5 100644 --- a/src/utils/UrlContentFetcher.ts +++ b/src/utils/UrlContentFetcher.ts @@ -7,6 +7,7 @@ import TurndownService from "turndown" // @ts-ignore import PCR from "puppeteer-chromium-resolver" import pWaitFor from "p-wait-for" +import delay from "delay" interface PCRStats { puppeteer: { launch: typeof launch } @@ -114,7 +115,9 @@ export class UrlContentFetcher { try { // networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time - await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle0"] }) + await this.page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] }) + // await this.page.goto(url, { timeout: 10_000, waitUntil: "load" }) + await this.waitTillHTMLStable(this.page) // in case the page is loading more resources } catch (err) { if (!(err instanceof TimeoutError)) { logs.push(`[Navigation Error] ${err.toString()}`) @@ -143,4 +146,37 @@ export class UrlContentFetcher { logs: logs.join("\n"), } } + + // page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded + // https://stackoverflow.com/questions/52497252/puppeteer-wait-until-page-is-completely-loaded/61304202#61304202 + private async waitTillHTMLStable(page: Page, timeout = 5_000) { + const checkDurationMsecs = 500 // 1000 + const maxChecks = timeout / checkDurationMsecs + let lastHTMLSize = 0 + let checkCounts = 1 + let countStableSizeIterations = 0 + const minStableSizeIterations = 3 + + while (checkCounts++ <= maxChecks) { + let html = await page.content() + let currentHTMLSize = html.length + + // let bodyHTMLSize = await page.evaluate(() => document.body.innerHTML.length) + console.log("last: ", lastHTMLSize, " <> curr: ", currentHTMLSize) + + if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) { + countStableSizeIterations++ + } else { + countStableSizeIterations = 0 //reset the counter + } + + if (countStableSizeIterations >= minStableSizeIterations) { + console.log("Page rendered fully...") + break + } + + lastHTMLSize = currentHTMLSize + await delay(checkDurationMsecs) + } + } }