From f41ad5e8d17035f56bb3c3a4c27f9e0bcd2845d5 Mon Sep 17 00:00:00 2001 From: Jo Espina Date: Mon, 26 Dec 2022 23:11:42 +0800 Subject: [PATCH 1/3] Add exclude option Simple logic to exclude specific URLs from crawl queue --- index.js | 1 + src/puppeteer_utils.js | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index c0d9f91a..ab01addd 100644 --- a/index.js +++ b/index.js @@ -19,6 +19,7 @@ const defaultOptions = { destination: null, concurrency: 4, include: ["/"], + exclude: [], userAgent: "ReactSnap", // 4 params below will be refactored to one: `puppeteer: {}` // https://github.com/stereobooster/react-snap/issues/120 diff --git a/src/puppeteer_utils.js b/src/puppeteer_utils.js index 820cded0..c897f3a0 100644 --- a/src/puppeteer_utils.js +++ b/src/puppeteer_utils.js @@ -184,7 +184,10 @@ const crawl = async opt => { // Port can be null, therefore we need the null check const isOnAppPort = port && port.toString() === options.port.toString(); - if (hostname === "localhost" && isOnAppPort && !uniqueUrls.has(newUrl) && !streamClosed) { + // Do not add excluded urls to the queue + const isExcluded = options.exclude.includes(newUrl) + + if (hostname === "localhost" && isOnAppPort && !uniqueUrls.has(newUrl) && !streamClosed && !isExcluded) { uniqueUrls.add(newUrl); enqued++; queue.write(newUrl); From b823fcdd730329f03036d13defe1e15ba9da41f6 Mon Sep 17 00:00:00 2001 From: Jo Espina Date: Mon, 26 Dec 2022 23:48:48 +0800 Subject: [PATCH 2/3] Remove exclude from default options --- index.js | 1 - src/puppeteer_utils.js | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/index.js b/index.js index ab01addd..c0d9f91a 100644 --- a/index.js +++ b/index.js @@ -19,7 +19,6 @@ const defaultOptions = { destination: null, concurrency: 4, include: ["/"], - exclude: [], userAgent: "ReactSnap", // 4 params below will be refactored to one: `puppeteer: {}` // https://github.com/stereobooster/react-snap/issues/120 diff --git a/src/puppeteer_utils.js b/src/puppeteer_utils.js index c897f3a0..bc387187 100644 --- a/src/puppeteer_utils.js +++ b/src/puppeteer_utils.js @@ -185,7 +185,7 @@ const crawl = async opt => { const isOnAppPort = port && port.toString() === options.port.toString(); // Do not add excluded urls to the queue - const isExcluded = options.exclude.includes(newUrl) + const isExcluded = options.exclude && options.exclude.includes(newUrl) if (hostname === "localhost" && isOnAppPort && !uniqueUrls.has(newUrl) && !streamClosed && !isExcluded) { uniqueUrls.add(newUrl); From f322a5a907638ad130d999c0cb7e804c21c47632 Mon Sep 17 00:00:00 2001 From: Jo Espina Date: Mon, 26 Dec 2022 23:48:53 +0800 Subject: [PATCH 3/3] Add test --- tests/examples/other/exclude-url.html | 13 +++++++++++++ tests/run.test.js | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 tests/examples/other/exclude-url.html diff --git a/tests/examples/other/exclude-url.html b/tests/examples/other/exclude-url.html new file mode 100644 index 00000000..58495720 --- /dev/null +++ b/tests/examples/other/exclude-url.html @@ -0,0 +1,13 @@ + + + + + + + + Foo + Bar + Baz + + + \ No newline at end of file diff --git a/tests/run.test.js b/tests/run.test.js index a39a63ae..32170c8b 100644 --- a/tests/run.test.js +++ b/tests/run.test.js @@ -589,6 +589,27 @@ describe("history.pushState two redirects to the same file", () => { }); }); +describe("excludes urls in options.exclude array", () => { + const source = "tests/examples/other"; + const include = ["/exclude-url.html"]; + + const exclude = ["http://localhost:45671/bar.html", "http://localhost:45671/baz.html"] + + const { fs, filesCreated, names } = mockFs(); + + beforeAll(() => snapRun(fs, { source, include, exclude, port: 45671 })); + test("should not crawl urls in exclude", () => { + expect(filesCreated()).toEqual(3); + expect(names()).toEqual( + expect.arrayContaining([ + `/${source}/exclude-url.html`, + `/${source}/foo.html`, + `/${source}/404.html`, + ]) + ); + }); +}); + describe.skip("publicPath", () => {}); describe.skip("skipThirdPartyRequests", () => {});