Skip to content

Commit 9ef9121

Browse files
authored
fix: setting maxPagesPerCrawl to 0 sets no limit (#454)
Using 0 as the "no limit" value for `maxRequestsPerCrawl` stopped working in the generic Actors due to changes to Crawlee. This PR handles this logic directly in the Actor, so we don't rely on undefined behaviour from Crawlee. Closes #453
1 parent faca4ea commit 9ef9121

File tree

6 files changed

+24
-6
lines changed

6 files changed

+24
-6
lines changed

packages/actor-scraper/camoufox-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
241241
respectRobotsTxtFile: this.input.respectRobotsTxtFile,
242242
maxConcurrency: this.input.maxConcurrency,
243243
maxRequestRetries: this.input.maxRequestRetries,
244-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
244+
maxRequestsPerCrawl:
245+
this.input.maxPagesPerCrawl === 0
246+
? undefined
247+
: this.input.maxPagesPerCrawl,
245248
proxyConfiguration: (await Actor.createProxyConfiguration(
246249
this.input.proxyConfiguration,
247250
)) as any as ProxyConfiguration,

packages/actor-scraper/cheerio-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
231231
failedRequestHandler: this._failedRequestHandler.bind(this),
232232
respectRobotsTxtFile: this.input.respectRobotsTxtFile,
233233
maxRequestRetries: this.input.maxRequestRetries,
234-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
234+
maxRequestsPerCrawl:
235+
this.input.maxPagesPerCrawl === 0
236+
? undefined
237+
: this.input.maxPagesPerCrawl,
235238
additionalMimeTypes: this.input.additionalMimeTypes,
236239
autoscaledPoolOptions: {
237240
maxConcurrency: this.input.maxConcurrency,

packages/actor-scraper/jsdom-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
232232
failedRequestHandler: this._failedRequestHandler.bind(this),
233233
respectRobotsTxtFile: this.input.respectRobotsTxtFile,
234234
maxRequestRetries: this.input.maxRequestRetries,
235-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
235+
maxRequestsPerCrawl:
236+
this.input.maxPagesPerCrawl === 0
237+
? undefined
238+
: this.input.maxPagesPerCrawl,
236239
additionalMimeTypes: this.input.additionalMimeTypes,
237240
autoscaledPoolOptions: {
238241
maxConcurrency: this.input.maxConcurrency,

packages/actor-scraper/playwright-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
275275
respectRobotsTxtFile: this.input.respectRobotsTxtFile,
276276
maxConcurrency: this.input.maxConcurrency,
277277
maxRequestRetries: this.input.maxRequestRetries,
278-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
278+
maxRequestsPerCrawl:
279+
this.input.maxPagesPerCrawl === 0
280+
? undefined
281+
: this.input.maxPagesPerCrawl,
279282
proxyConfiguration: (await Actor.createProxyConfiguration(
280283
this.input.proxyConfiguration,
281284
)) as any as ProxyConfiguration,

packages/actor-scraper/puppeteer-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
272272
respectRobotsTxtFile: this.input.respectRobotsTxtFile,
273273
maxConcurrency: this.input.maxConcurrency,
274274
maxRequestRetries: this.input.maxRequestRetries,
275-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
275+
maxRequestsPerCrawl:
276+
this.input.maxPagesPerCrawl === 0
277+
? undefined
278+
: this.input.maxPagesPerCrawl,
276279
proxyConfiguration: (await Actor.createProxyConfiguration(
277280
this.input.proxyConfiguration,
278281
)) as any as ProxyConfiguration,

packages/actor-scraper/web-scraper/src/internals/crawler_setup.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,10 @@ export class CrawlerSetup implements CrawlerSetupOptions {
301301
? MAX_CONCURRENCY_IN_DEVELOPMENT
302302
: this.input.maxConcurrency,
303303
maxRequestRetries: this.input.maxRequestRetries,
304-
maxRequestsPerCrawl: this.input.maxPagesPerCrawl,
304+
maxRequestsPerCrawl:
305+
this.input.maxPagesPerCrawl === 0
306+
? undefined
307+
: this.input.maxPagesPerCrawl,
305308
proxyConfiguration: (await Actor.createProxyConfiguration(
306309
this.input.proxyConfiguration,
307310
)) as any as ProxyConfiguration,

0 commit comments

Comments
 (0)