diff --git a/index.js b/index.js index c0d9f91a..51e73a89 100644 --- a/index.js +++ b/index.js @@ -19,6 +19,8 @@ const defaultOptions = { destination: null, concurrency: 4, include: ["/"], + // If you get a Timeout error, ignore and move on + fastFail: true, userAgent: "ReactSnap", // 4 params below will be refactored to one: `puppeteer: {}` // https://github.com/stereobooster/react-snap/issues/120 diff --git a/src/puppeteer_utils.js b/src/puppeteer_utils.js index 820cded0..734f142a 100644 --- a/src/puppeteer_utils.js +++ b/src/puppeteer_utils.js @@ -157,7 +157,9 @@ const crawl = async opt => { const onUnhandledRejection = error => { console.log("🔥 UnhandledPromiseRejectionWarning", error); - shuttingDown = true; + if (options.fastFail) { + shuttingDown = true; + } }; process.on("unhandledRejection", onUnhandledRejection); @@ -237,20 +239,28 @@ const crawl = async opt => { beforeFetch && beforeFetch({ page, route }); await page.setUserAgent(options.userAgent); const tracker = createTracker(page); + let skipPage = false; try { await page.goto(pageUrl, { waitUntil: "networkidle0" }); } catch (e) { e.message = augmentTimeoutError(e.message, tracker); - throw e; + if (opt.fastFail) { + throw e; + } else { + console.log(`🔥 failed to crawl page: ${pageUrl}`, e); + skipPage = true; + } } finally { tracker.dispose(); } - if (options.waitFor) await page.waitFor(options.waitFor); - if (options.crawl) { - const links = await getLinks({ page }); - links.forEach(addToQueue); + if (skipPage == false) { + if (options.waitFor) await page.waitFor(options.waitFor); + if (options.crawl) { + const links = await getLinks({ page }); + links.forEach(addToQueue); + } + afterFetch && (await afterFetch({ page, route, browser, addToQueue })); } - afterFetch && (await afterFetch({ page, route, browser, addToQueue })); await page.close(); console.log(`✅ crawled ${processed + 1} out of ${enqued} (${route})`); } catch (e) {