apify · vdusek · Jul 1, 2025 · May 10, 2025 · May 10, 2025 · May 15, 2025
diff --git a/docs/deployment/code_examples/google/cloud_run_example.py b/docs/deployment/code_examples/google/cloud_run_example.py
@@ -5,24 +5,23 @@
 import uvicorn
 from litestar import Litestar, get
 
-from crawlee import service_locator
 from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
-
-# highlight-start
-# Disable writing storage data to the file system
-configuration = service_locator.get_configuration()
-configuration.persist_storage = False
-configuration.write_metadata = False
-# highlight-end
+from crawlee.storage_clients import MemoryStorageClient
 
 
 @get('/')
 async def main() -> str:
     """The crawler entry point that will be called when the HTTP endpoint is accessed."""
+    # highlight-start
+    # Disable writing storage data to the file system
+    storage_client = MemoryStorageClient()
+    # highlight-end
+
     crawler = PlaywrightCrawler(
         headless=True,
         max_requests_per_crawl=10,
         browser_type='firefox',
+        storage_client=storage_client,
     )
 
     @crawler.router.default_handler

diff --git a/docs/deployment/code_examples/google/google_example.py b/docs/deployment/code_examples/google/google_example.py
@@ -6,22 +6,21 @@
 import functions_framework
 from flask import Request, Response
 
-from crawlee import service_locator
 from crawlee.crawlers import (
     BeautifulSoupCrawler,
     BeautifulSoupCrawlingContext,
 )
-
-# highlight-start
-# Disable writing storage data to the file system
-configuration = service_locator.get_configuration()
-configuration.persist_storage = False
-configuration.write_metadata = False
-# highlight-end
+from crawlee.storage_clients import MemoryStorageClient
 
 
 async def main() -> str:
+    # highlight-start
+    # Disable writing storage data to the file system
+    storage_client = MemoryStorageClient()
+    # highlight-end
+
     crawler = BeautifulSoupCrawler(
+        storage_client=storage_client,
         max_request_retries=1,
         request_handler_timeout=timedelta(seconds=30),
         max_requests_per_crawl=10,

diff --git a/docs/examples/code_examples/export_entire_dataset_to_file_csv.py b/docs/examples/code_examples/export_entire_dataset_to_file_csv.py
@@ -30,7 +30,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     await crawler.run(['https://crawlee.dev'])
 
     # Export the entire dataset to a CSV file.
-    await crawler.export_data_csv(path='results.csv')
+    await crawler.export_data(path='results.csv')
 
 
 if __name__ == '__main__':

diff --git a/docs/examples/code_examples/export_entire_dataset_to_file_json.py b/docs/examples/code_examples/export_entire_dataset_to_file_json.py
@@ -30,7 +30,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     await crawler.run(['https://crawlee.dev'])
 
     # Export the entire dataset to a JSON file.
-    await crawler.export_data_json(path='results.json')
+    await crawler.export_data(path='results.json')
 
 
 if __name__ == '__main__':

diff --git a/docs/examples/code_examples/parsel_crawler.py b/docs/examples/code_examples/parsel_crawler.py
@@ -40,7 +40,7 @@ async def some_hook(context: BasicCrawlingContext) -> None:
     await crawler.run(['https://github.com'])
 
     # Export the entire dataset to a JSON file.
-    await crawler.export_data_json(path='results.json')
+    await crawler.export_data(path='results.json')
 
 
 if __name__ == '__main__':

diff --git a/docs/guides/code_examples/storage_clients/custom_storage_client_example.py b/docs/guides/code_examples/storage_clients/custom_storage_client_example.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from crawlee.storage_clients import StorageClient
+from crawlee.storage_clients._base import (
+    DatasetClient,
+    KeyValueStoreClient,
+    RequestQueueClient,
+)
+
+if TYPE_CHECKING:
+    from crawlee.configuration import Configuration
+
+# Implement the storage type clients with your backend logic.
+
+
+class CustomDatasetClient(DatasetClient):
+    # Implement methods like push_data, get_data, iterate_items, etc.
+    pass
+
+
+class CustomKeyValueStoreClient(KeyValueStoreClient):
+    # Implement methods like get_value, set_value, delete, etc.
+    pass
+
+
+class CustomRequestQueueClient(RequestQueueClient):
+    # Implement methods like add_request, fetch_next_request, etc.
+    pass
+
+
+# Implement the storage client factory.
+
+
+class CustomStorageClient(StorageClient):
+    async def create_dataset_client(
+        self,
+        *,
+        id: str | None = None,
+        name: str | None = None,
+        configuration: Configuration | None = None,
+    ) -> CustomDatasetClient:
+        # Create and return your custom dataset client.
+        pass
+
+    async def create_kvs_client(
+        self,
+        *,
+        id: str | None = None,
+        name: str | None = None,
+        configuration: Configuration | None = None,
+    ) -> CustomKeyValueStoreClient:
+        # Create and return your custom key-value store client.
+        pass
+
+    async def create_rq_client(
+        self,
+        *,
+        id: str | None = None,
+        name: str | None = None,
+        configuration: Configuration | None = None,
+    ) -> CustomRequestQueueClient:
+        # Create and return your custom request queue client.
+        pass
diff --git a/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py b/docs/guides/code_examples/storage_clients/file_system_storage_client_basic_example.py
@@ -0,0 +1,8 @@
+from crawlee.crawlers import ParselCrawler
+from crawlee.storage_clients import FileSystemStorageClient
+
+# Create a new instance of storage client.
+storage_client = FileSystemStorageClient()
+
+# And pass it to the crawler.
+crawler = ParselCrawler(storage_client=storage_client)
diff --git a/.../guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py b/.../guides/code_examples/storage_clients/file_system_storage_client_configuration_example.py
@@ -0,0 +1,18 @@
+from crawlee.configuration import Configuration
+from crawlee.crawlers import ParselCrawler
+from crawlee.storage_clients import FileSystemStorageClient
+
+# Create a new instance of storage client.
+storage_client = FileSystemStorageClient()
+
+# Create a configuration with custom settings.
+configuration = Configuration(
+    storage_dir='./my_storage',
+    purge_on_start=False,
+)
+
+# And pass them to the crawler.
+crawler = ParselCrawler(
+    storage_client=storage_client,
+    configuration=configuration,
+)
diff --git a/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py b/docs/guides/code_examples/storage_clients/memory_storage_client_basic_example.py
@@ -0,0 +1,8 @@
+from crawlee.crawlers import ParselCrawler
+from crawlee.storage_clients import MemoryStorageClient
+
+# Create a new instance of storage client.
+storage_client = MemoryStorageClient()
+
+# And pass it to the crawler.
+crawler = ParselCrawler(storage_client=storage_client)
diff --git a/docs/guides/code_examples/storage_clients/registering_storage_client_example.py b/docs/guides/code_examples/storage_clients/registering_storage_client_example.py
@@ -0,0 +1,29 @@
+import asyncio
+
+from crawlee import service_locator
+from crawlee.crawlers import ParselCrawler
+from crawlee.storage_clients import MemoryStorageClient
+from crawlee.storages import Dataset
+
+
+async def main() -> None:
+    # Create custom storage client, MemoryStorageClient for example.
+    storage_client = MemoryStorageClient()
+
+    # Register it globally via the service locator.
+    service_locator.set_storage_client(storage_client)
+
+    # Or pass it directly to the crawler, it will be registered globally
+    # to the service locator under the hood.
+    crawler = ParselCrawler(storage_client=storage_client)
+
+    # Or just provide it when opening a storage (e.g. dataset), it will be used
+    # for this storage only, not globally.
+    dataset = await Dataset.open(
+        name='my_dataset',
+        storage_client=storage_client,
+    )
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py b/docs/guides/code_examples/storages/cleaning_purge_explicitly_example.py
@@ -1,20 +1,19 @@
 import asyncio
 
-from crawlee.crawlers import HttpCrawler
-from crawlee.storage_clients import MemoryStorageClient
+from crawlee.storages import Dataset
 
 
 async def main() -> None:
-    storage_client = MemoryStorageClient.from_config()
+    # Create storage client with configuration
+    dataset = await Dataset.open(name='my-dataset')
 
-    # Call the purge_on_start method to explicitly purge the storage.
-    # highlight-next-line
-    await storage_client.purge_on_start()
+    # Purge the dataset explicitly - purging will remove all items from the dataset.
+    # But keeps the dataset itself and its metadata.
+    await dataset.purge()
 
-    # Pass the storage client to the crawler.
-    crawler = HttpCrawler(storage_client=storage_client)
-
-    # ...
+    # Or you can drop the dataset completely, which will remove the dataset
+    # and all its items.
+    await dataset.drop()
 
 
 if __name__ == '__main__':

diff --git a/docs/guides/code_examples/storages/dataset_basic_example.py b/docs/guides/code_examples/storages/dataset_basic_example.py
@@ -6,7 +6,7 @@
 async def main() -> None:
     # Open the dataset, if it does not exist, it will be created.
     # Leave name empty to use the default dataset.
-    dataset = await Dataset.open()
+    dataset = await Dataset.open(name='my-dataset')
 
     # Push a single row of data.
     await dataset.push_data({'foo': 'bar'})

diff --git a/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py b/docs/guides/code_examples/storages/dataset_with_crawler_explicit_example.py
@@ -7,7 +7,7 @@
 async def main() -> None:
     # Open the dataset, if it does not exist, it will be created.
     # Leave name empty to use the default dataset.
-    dataset = await Dataset.open()
+    dataset = await Dataset.open(name='my-dataset')
 
     # Create a new crawler (it can be any subclass of BasicCrawler).
     crawler = BeautifulSoupCrawler()

diff --git a/docs/guides/code_examples/storages/kvs_basic_example.py b/docs/guides/code_examples/storages/kvs_basic_example.py
@@ -6,7 +6,7 @@
 async def main() -> None:
     # Open the key-value store, if it does not exist, it will be created.
     # Leave name empty to use the default KVS.
-    kvs = await KeyValueStore.open()
+    kvs = await KeyValueStore.open(name='my-key-value-store')
 
     # Set a value associated with 'some-key'.
     await kvs.set_value(key='some-key', value={'foo': 'bar'})

diff --git a/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py b/docs/guides/code_examples/storages/kvs_with_crawler_explicit_example.py
@@ -7,7 +7,7 @@
 async def main() -> None:
     # Open the key-value store, if it does not exist, it will be created.
     # Leave name empty to use the default KVS.
-    kvs = await KeyValueStore.open()
+    kvs = await KeyValueStore.open(name='my-key-value-store')
 
     # Create a new Playwright crawler.
     crawler = PlaywrightCrawler()

diff --git a/docs/guides/code_examples/storages/rq_basic_example.py b/docs/guides/code_examples/storages/rq_basic_example.py
@@ -12,7 +12,7 @@ async def main() -> None:
     await request_queue.add_request('https://apify.com/')
 
     # Add multiple requests as a batch.
-    await request_queue.add_requests_batched(
+    await request_queue.add_requests(
         ['https://crawlee.dev/', 'https://crawlee.dev/python/']
     )
 

diff --git a/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py b/docs/guides/code_examples/storages/rq_with_crawler_explicit_example.py
@@ -10,12 +10,10 @@ async def main() -> None:
     request_queue = await RequestQueue.open(name='my-request-queue')
 
     # Interact with the request queue directly, e.g. add a batch of requests.
-    await request_queue.add_requests_batched(
-        ['https://apify.com/', 'https://crawlee.dev/']
-    )
+    await request_queue.add_requests(['https://apify.com/', 'https://crawlee.dev/'])
 
     # Create a new crawler (it can be any subclass of BasicCrawler) and pass the request
-    # list as request manager to it. It will be managed by the crawler.
+    # queue as request manager to it. It will be managed by the crawler.
     crawler = HttpCrawler(request_manager=request_queue)
 
     # Define the default request handler, which will be called for every request.

diff --git a/docs/guides/request_loaders.mdx b/docs/guides/request_loaders.mdx
@@ -42,7 +42,7 @@ classDiagram
 %% Abstract classes
 %% ========================
 
-class BaseStorage {
+class Storage {
     <<abstract>>
     + id
     + name
@@ -52,12 +52,12 @@ class BaseStorage {
 
 class RequestLoader {
     <<abstract>>
+    + handled_count
+    + total_count
     + fetch_next_request()
     + mark_request_as_handled()
     + is_empty()
     + is_finished()
-    + get_handled_count()
-    + get_total_count()
     + to_tandem()
 }
 
@@ -92,7 +92,7 @@ class RequestManagerTandem {
 %% Inheritance arrows
 %% ========================
 
-BaseStorage <|-- RequestQueue
+Storage <|-- RequestQueue
 RequestManager <|-- RequestQueue
 
 RequestLoader <|-- RequestManager