From b680c969ade84ac138fed356f585e147510dfceb Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 22 Oct 2025 15:49:07 +0200 Subject: [PATCH] fix: Fix type of cloud_storage_client in SmartApifyStorageClient --- src/apify/storage_clients/_apify/_models.py | 2 +- .../storage_clients/_apify/_storage_client.py | 39 +++++++-------- .../_smart_apify/_storage_client.py | 49 +++++++++---------- 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/src/apify/storage_clients/_apify/_models.py b/src/apify/storage_clients/_apify/_models.py index f6917843..9e74a939 100644 --- a/src/apify/storage_clients/_apify/_models.py +++ b/src/apify/storage_clients/_apify/_models.py @@ -120,7 +120,7 @@ class RequestQueueStats(BaseModel): """The number of request queue reads.""" storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)] - """Storage size in Bytes.""" + """Storage size in bytes.""" write_count: Annotated[int, Field(alias='writeCount', default=0)] """The number of request queue writes.""" diff --git a/src/apify/storage_clients/_apify/_storage_client.py b/src/apify/storage_clients/_apify/_storage_client.py index cab538f8..02c06258 100644 --- a/src/apify/storage_clients/_apify/_storage_client.py +++ b/src/apify/storage_clients/_apify/_storage_client.py @@ -57,6 +57,12 @@ class ApifyStorageClient(StorageClient): should be used when multiple consumers need to process requests from the same queue simultaneously. """ + _LSP_ERROR_MSG = 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.' + """This class (intentionally) violates the Liskov Substitution Principle. + + It requires a specialized `Configuration` instance compared to its parent class. + """ + def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None: """Initialize a new instance. @@ -68,23 +74,6 @@ def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'singl """ self._request_queue_access = request_queue_access - # This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent. - _lsp_violation_error_message_template = ( - 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.' - ) - - @override - def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable: - if isinstance(configuration, ApifyConfiguration): - # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time. - # Whichever client variation gets used first, wins. - return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration) - - config_class = type(configuration) - raise TypeError( - self._lsp_violation_error_message_template.format(f'{config_class.__module__}.{config_class.__name__}') - ) - @override async def create_dataset_client( self, @@ -98,7 +87,7 @@ async def create_dataset_client( if isinstance(configuration, ApifyConfiguration): return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration) - raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__)) + raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__)) @override async def create_kvs_client( @@ -113,7 +102,7 @@ async def create_kvs_client( if isinstance(configuration, ApifyConfiguration): return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration) - raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__)) + raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__)) @override async def create_rq_client( @@ -130,4 +119,14 @@ async def create_rq_client( id=id, name=name, alias=alias, configuration=configuration, access=self._request_queue_access ) - raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__)) + raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__)) + + @override + def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable: + if isinstance(configuration, ApifyConfiguration): + # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time. + # Whichever client variation gets used first, wins. + return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration) + + config_class = type(configuration) + raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}')) diff --git a/src/apify/storage_clients/_smart_apify/_storage_client.py b/src/apify/storage_clients/_smart_apify/_storage_client.py index 92b3d1e9..f2e75310 100644 --- a/src/apify/storage_clients/_smart_apify/_storage_client.py +++ b/src/apify/storage_clients/_smart_apify/_storage_client.py @@ -8,8 +8,7 @@ from apify._configuration import Configuration as ApifyConfiguration from apify._utils import docs_group -from apify.storage_clients import ApifyStorageClient -from apify.storage_clients._file_system import ApifyFileSystemStorageClient +from apify.storage_clients import ApifyStorageClient, FileSystemStorageClient if TYPE_CHECKING: from collections.abc import Hashable @@ -36,7 +35,7 @@ class SmartApifyStorageClient(StorageClient): def __init__( self, *, - cloud_storage_client: ApifyStorageClient | None = None, + cloud_storage_client: StorageClient | None = None, local_storage_client: StorageClient | None = None, ) -> None: """Initialize a new instance. @@ -47,8 +46,8 @@ def __init__( local_storage_client: Storage client used when an Actor is not running on the Apify platform and when `force_cloud` flag is not set. Defaults to `FileSystemStorageClient`. """ - self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single') - self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient() + self._cloud_storage_client = cloud_storage_client or ApifyStorageClient() + self._local_storage_client = local_storage_client or FileSystemStorageClient() def __str__(self) -> str: return ( @@ -56,26 +55,6 @@ def __str__(self) -> str: f' local_storage_client={self._local_storage_client.__class__.__name__})' ) - def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient: - """Get a suitable storage client based on the global configuration and the value of the force_cloud flag. - - Args: - force_cloud: If True, return `cloud_storage_client`. - """ - if ApifyConfiguration.get_global_configuration().is_at_home: - return self._cloud_storage_client - - configuration = ApifyConfiguration.get_global_configuration() - if force_cloud: - if configuration.token is None: - raise RuntimeError( - 'In order to use the Apify cloud storage from your computer, ' - 'you need to provide an Apify token using the APIFY_TOKEN environment variable.' - ) - return self._cloud_storage_client - - return self._local_storage_client - @override def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable: if ApifyConfiguration.get_global_configuration().is_at_home: @@ -123,3 +102,23 @@ async def create_rq_client( return await self.get_suitable_storage_client().create_rq_client( id=id, name=id, alias=alias, configuration=configuration ) + + def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient: + """Get a suitable storage client based on the global configuration and the value of the force_cloud flag. + + Args: + force_cloud: If True, return `cloud_storage_client`. + """ + if ApifyConfiguration.get_global_configuration().is_at_home: + return self._cloud_storage_client + + configuration = ApifyConfiguration.get_global_configuration() + if force_cloud: + if configuration.token is None: + raise RuntimeError( + 'In order to use the Apify cloud storage from your computer, ' + 'you need to provide an Apify token using the APIFY_TOKEN environment variable.' + ) + return self._cloud_storage_client + + return self._local_storage_client