diff --git a/src/crawler.ts b/src/crawler.ts index 0ea9ba56..ff26f24d 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -932,7 +932,12 @@ self.__bx_behaviors.selectMainBehavior(); // run custom driver here await this.driver({ page, data, crawler: this }); - data.title = await page.title(); + data.title = await timedRun( + page.title(), + PAGE_OP_TIMEOUT_SECS, + "Timed out getting page title, something is likely wrong", + logDetails, + ); data.favicon = await this.getFavicon(page, logDetails); await this.doPostLoadActions(opts); diff --git a/src/util/browser.ts b/src/util/browser.ts index 091834e7..df4a2293 100644 --- a/src/util/browser.ts +++ b/src/util/browser.ts @@ -371,8 +371,8 @@ export class Browser { } } - addInitScript(page: Page, script: string) { - return page.evaluateOnNewDocument(script); + async addInitScript(page: Page, script: string) { + await page.evaluateOnNewDocument(script); } async checkScript(cdp: CDPSession, filename: string, script: string) { diff --git a/src/util/recorder.ts b/src/util/recorder.ts index 0f16a4ff..f137ba46 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -1249,14 +1249,19 @@ export class Recorder { return { fetched, mime, ts }; } - async getCookieString(cdp: CDPSession, url: string) { - const cookieList: string[] = []; - const { cookies } = await cdp.send("Network.getCookies", { urls: [url] }); - for (const { name, value } of cookies) { - cookieList.push(`${name}=${value}`); - } + async getCookieString(cdp: CDPSession, url: string): Promise { + try { + const cookieList: string[] = []; + const { cookies } = await cdp.send("Network.getCookies", { urls: [url] }); + for (const { name, value } of cookies) { + cookieList.push(`${name}=${value}`); + } - return cookieList.join(";"); + return cookieList.join(";"); + } catch (e) { + logger.warn("Error getting cookies", { page: url, e }, "recorder"); + return ""; + } } } diff --git a/src/util/screencaster.ts b/src/util/screencaster.ts index 80c94057..f87dc877 100644 --- a/src/util/screencaster.ts +++ b/src/util/screencaster.ts @@ -123,7 +123,9 @@ class RedisPubSubTransport { this.castChannel = `c:${crawlId}:cast`; this.ctrlChannel = `c:${crawlId}:ctrl`; - void this.init(redisUrl); + this.init(redisUrl).catch((e) => + logger.warn("error starting cast", e, "screencast"), + ); } async init(redisUrl: string) { diff --git a/src/util/timing.ts b/src/util/timing.ts index dfafaf3c..714345e3 100644 --- a/src/util/timing.ts +++ b/src/util/timing.ts @@ -26,7 +26,7 @@ export function timedRun( return Promise.race([promise, rejectPromiseOnTimeout(timeout)]).catch( (err) => { - if (err == "timeout reached") { + if (err === "timeout reached") { const logFunc = isWarn ? logger.warn : logger.error; logFunc.call( logger,