diff --git a/pyproject.toml b/pyproject.toml index 27ba25fd3..d07ec46b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.1.118" +version = "2.1.119" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.10" diff --git a/src/uipath/_services/documents_service.py b/src/uipath/_services/documents_service.py index 27cd77ed0..5b67f42c4 100644 --- a/src/uipath/_services/documents_service.py +++ b/src/uipath/_services/documents_service.py @@ -13,6 +13,8 @@ from .._utils import Endpoint from ..models.documents import ( ActionPriority, + ClassificationResponse, + ClassificationResult, ExtractionResponse, ExtractionResponseIXP, ProjectType, @@ -26,6 +28,58 @@ POLLING_TIMEOUT = 300 # seconds +def _is_provided(arg: Any) -> bool: + return arg is not None + + +def _must_not_be_provided(**kwargs: Any) -> None: + for name, value in kwargs.items(): + if value is not None: + raise ValueError(f"`{name}` must not be provided") + + +def _must_be_provided(**kwargs: Any) -> None: + for name, value in kwargs.items(): + if value is None: + raise ValueError(f"`{name}` must be provided") + + +def _exactly_one_must_be_provided(**kwargs: Any) -> None: + provided = [name for name, value in kwargs.items() if value is not None] + if len(provided) != 1: + raise ValueError( + f"Exactly one of `{', '.join(kwargs.keys())}` must be provided" + ) + + +def _validate_extract_params_and_get_project_type( + project_name: Optional[str], + file: Optional[FileContent], + file_path: Optional[str], + classification_result: Optional[ClassificationResult], + project_type: Optional[ProjectType], + document_type_name: Optional[str], +) -> ProjectType: + if _is_provided(project_name): + _must_be_provided(project_type=project_type) + _exactly_one_must_be_provided(file=file, file_path=file_path) + _must_not_be_provided(classification_result=classification_result) + if project_type == ProjectType.MODERN: + _must_be_provided(document_type_name=document_type_name) + else: + _must_not_be_provided( + project_name=project_name, + project_type=project_type, + file=file, + file_path=file_path, + document_type_name=document_type_name, + ) + _must_be_provided(classification_result=classification_result) + project_type = ProjectType.MODERN + + return project_type # type: ignore + + class DocumentsService(FolderContext, BaseService): """Service for managing UiPath DocumentUnderstanding Document Operations. @@ -100,10 +154,65 @@ async def _get_project_tags_async(self, project_id: str) -> Set[str]: ) return {tag["name"] for tag in response.json().get("tags", [])} + def _get_document_id( + self, + project_id: Optional[str], + file: Optional[FileContent], + file_path: Optional[str], + classification_result: Optional[ClassificationResult], + ) -> str: + if classification_result is not None: + return classification_result.document_id + + document_id = self._start_digitization( + project_id=project_id, # type: ignore + file=file, + file_path=file_path, + ) + self._wait_for_digitization( + project_id=project_id, # type: ignore + document_id=document_id, + ) + + return document_id + + async def _get_document_id_async( + self, + project_id: Optional[str], + file: Optional[FileContent], + file_path: Optional[str], + classification_result: Optional[ClassificationResult], + ) -> str: + if classification_result is not None: + return classification_result.document_id + + document_id = await self._start_digitization_async( + project_id=project_id, # type: ignore + file=file, + file_path=file_path, + ) + await self._wait_for_digitization_async( + project_id=project_id, # type: ignore + document_id=document_id, + ) + + return document_id + def _get_project_id_and_validate_tag( - self, project_name: str, project_type: ProjectType, tag: str + self, + tag: str, + project_name: Optional[str], + project_type: Optional[ProjectType], + classification_result: Optional[ClassificationResult], ) -> str: - project_id = self._get_project_id_by_name(project_name, project_type) + if project_name is not None: + project_id = self._get_project_id_by_name( + project_name, + project_type, # type: ignore + ) + else: + project_id = classification_result.project_id # type: ignore + tags = self._get_project_tags(project_id) if tag not in tags: raise ValueError( @@ -113,11 +222,20 @@ def _get_project_id_and_validate_tag( return project_id async def _get_project_id_and_validate_tag_async( - self, project_name: str, project_type: ProjectType, tag: str + self, + tag: str, + project_name: Optional[str], + project_type: Optional[ProjectType], + classification_result: Optional[ClassificationResult], ) -> str: - project_id = await self._get_project_id_by_name_async( - project_name, project_type - ) + if project_name is not None: + project_id = await self._get_project_id_by_name_async( + project_name, + project_type, # type: ignore + ) + else: + project_id = classification_result.project_id # type: ignore + tags = await self._get_project_tags_async(project_id) if tag not in tags: raise ValueError( @@ -129,44 +247,100 @@ async def _get_project_id_and_validate_tag_async( def _start_digitization( self, project_id: str, - file: FileContent, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, ) -> str: - return self.request( - "POST", - url=Endpoint( - f"/du_/api/framework/projects/{project_id}/digitization/start" - ), - params={"api-version": 1.1}, - headers=self._get_common_headers(), - files={"File": file}, - ).json()["documentId"] + with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: + return self.request( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/digitization/start" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + files={"File": handle}, + ).json()["documentId"] async def _start_digitization_async( self, project_id: str, - file: FileContent, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, ) -> str: - return ( - await self.request_async( - "POST", + with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: + return ( + await self.request_async( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/digitization/start" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + files={"File": handle}, + ) + ).json()["documentId"] + + def _wait_for_digitization(self, project_id: str, document_id: str) -> None: + def result_getter() -> Tuple[str, Optional[str], Optional[str]]: + result = self.request( + method="GET", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/digitization/start" + f"/du_/api/framework/projects/{project_id}/digitization/result/{document_id}" ), params={"api-version": 1.1}, headers=self._get_common_headers(), - files={"File": file}, + ).json() + return ( + result["status"], + result.get("error", None), + result.get("result", None), ) - ).json()["documentId"] + + self._wait_for_operation( + result_getter=result_getter, + wait_statuses=["NotStarted", "Running"], + success_status="Succeeded", + ) + + async def _wait_for_digitization_async( + self, project_id: str, document_id: str + ) -> None: + async def result_getter() -> Tuple[str, Optional[str], Optional[str]]: + result = ( + await self.request_async( + method="GET", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/digitization/result/{document_id}" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ) + ).json() + return ( + result["status"], + result.get("error", None), + result.get("result", None), + ) + + await self._wait_for_operation_async( + result_getter=result_getter, + wait_statuses=["NotStarted", "Running"], + success_status="Succeeded", + ) def _get_document_type_id( self, project_id: str, document_type_name: Optional[str], project_type: ProjectType, + classification_result: Optional[ClassificationResult], ) -> str: if project_type == ProjectType.IXP: return str(UUID(int=0)) + if classification_result is not None: + return classification_result.document_type_id + response = self.request( "GET", url=Endpoint(f"/du_/api/framework/projects/{project_id}/document-types"), @@ -190,10 +364,14 @@ async def _get_document_type_id_async( project_id: str, document_type_name: Optional[str], project_type: ProjectType, + classification_result: Optional[ClassificationResult], ) -> str: if project_type == ProjectType.IXP: return str(UUID(int=0)) + if classification_result is not None: + return classification_result.document_type_id + response = await self.request_async( "GET", url=Endpoint(f"/du_/api/framework/projects/{project_id}/document-types"), @@ -380,25 +558,225 @@ async def result_getter() -> Tuple[str, str, Any]: return ExtractionResponse.model_validate(extraction_response) + def _start_classification( + self, + project_id: str, + tag: str, + document_id: str, + ) -> str: + return self.request( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/classification/start" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + json={"documentId": document_id}, + ).json()["operationId"] + + async def _start_classification_async( + self, + project_id: str, + tag: str, + document_id: str, + ) -> str: + return ( + await self.request_async( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/classification/start" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + json={"documentId": document_id}, + ) + ).json()["operationId"] + + def _wait_for_classification( + self, + project_id: str, + tag: str, + operation_id: str, + ) -> List[ClassificationResult]: + def result_getter() -> Tuple[str, Optional[str], Optional[str]]: + result = self.request( + method="GET", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/classification/result/{operation_id}" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ).json() + return ( + result["status"], + result.get("error", None), + result.get("result", None), + ) + + classification_response = self._wait_for_operation( + result_getter=result_getter, + wait_statuses=["NotStarted", "Running"], + success_status="Succeeded", + ) + for classification_result in classification_response["classificationResults"]: + classification_result["ProjectId"] = project_id + + return ClassificationResponse.model_validate( + classification_response + ).classification_results + + async def _wait_for_classification_async( + self, + project_id: str, + tag: str, + operation_id: str, + ) -> List[ClassificationResult]: + async def result_getter() -> Tuple[str, Optional[str], Optional[str]]: + result = ( + await self.request_async( + method="GET", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/classification/result/{operation_id}" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ) + ).json() + return ( + result["status"], + result.get("error", None), + result.get("result", None), + ) + + classification_response = await self._wait_for_operation_async( + result_getter=result_getter, + wait_statuses=["NotStarted", "Running"], + success_status="Succeeded", + ) + for classification_result in classification_response["classificationResults"]: + classification_result["ProjectId"] = project_id + + return ClassificationResponse.model_validate( + classification_response + ).classification_results + + @traced(name="documents_classify", run_type="uipath") + def classify( + self, + tag: str, + project_name: str, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, + ) -> List[ClassificationResult]: + """Classify a document using a DU Modern project. + + Args: + project_name (str): Name of the [DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project. + tag (str): Tag of the published project version. + file (FileContent, optional): The document file to be classified. + file_path (str, optional): Path to the document file to be classified. + + Note: + Either `file` or `file_path` must be provided, but not both. + + Returns: + List[ClassificationResult]: A list of classification results. + + Examples: + ```python + with open("path/to/document.pdf", "rb") as file: + classification_results = service.classify( + project_name="MyModernProjectName", + tag="Production", + file=file, + ) + ``` + """ + _exactly_one_must_be_provided(file=file, file_path=file_path) + + project_id = self._get_project_id_and_validate_tag( + tag=tag, + project_name=project_name, + project_type=ProjectType.MODERN, + classification_result=None, + ) + + document_id = self._get_document_id( + project_id=project_id, + file=file, + file_path=file_path, + classification_result=None, + ) + + operation_id = self._start_classification( + project_id=project_id, + tag=tag, + document_id=document_id, + ) + + return self._wait_for_classification( + project_id=project_id, + tag=tag, + operation_id=operation_id, + ) + + @traced(name="documents_classify_async", run_type="uipath") + async def classify_async( + self, + tag: str, + project_name: str, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, + ) -> List[ClassificationResult]: + """Asynchronously version of the [`classify`][uipath._services.documents_service.DocumentsService.classify] method.""" + _exactly_one_must_be_provided(file=file, file_path=file_path) + + project_id = await self._get_project_id_and_validate_tag_async( + tag=tag, + project_name=project_name, + project_type=ProjectType.MODERN, + classification_result=None, + ) + + document_id = await self._get_document_id_async( + project_id=project_id, + file=file, + file_path=file_path, + classification_result=None, + ) + + operation_id = await self._start_classification_async( + project_id=project_id, + tag=tag, + document_id=document_id, + ) + + return await self._wait_for_classification_async( + project_id=project_id, + tag=tag, + operation_id=operation_id, + ) + @traced(name="documents_extract", run_type="uipath") def extract( self, - project_name: str, tag: str, + project_name: Optional[str] = None, file: Optional[FileContent] = None, file_path: Optional[str] = None, - project_type: ProjectType = ProjectType.IXP, + classification_result: Optional[ClassificationResult] = None, + project_type: Optional[ProjectType] = None, document_type_name: Optional[str] = None, ) -> Union[ExtractionResponse, ExtractionResponseIXP]: - """Extract predicted data from a document using an IXP project. + """Extract predicted data from a document using an DU Modern/IXP project. Args: - project_name (str): Name of the [IXP](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project)/[DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project. + project_name (str, optional): Name of the [IXP](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project)/[DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project. Must be provided if `classification_result` is not provided. tag (str): Tag of the published project version. - file (FileContent, optional): The document file to be processed. - file_path (str, optional): Path to the document file to be processed. - project_type (ProjectType, optional): Type of the project. Defaults to `ProjectType.IXP`. - document_type_name (str, optional): Document type name associated with the extractor to be used for extraction. Required if `project_type` is `ProjectType.MODERN`. + file (FileContent, optional): The document file to be processed. Must be provided if `classification_result` is not provided. + file_path (str, optional): Path to the document file to be processed. Must be provided if `classification_result` is not provided. + project_type (ProjectType, optional): Type of the project. Must be provided if `project_name` is provided. + document_type_name (str, optional): Document type name associated with the extractor to be used for extraction. Required if `project_type` is `ProjectType.MODERN` and `project_name` is provided. Note: Either `file` or `file_path` must be provided, but not both. @@ -417,7 +795,7 @@ def extract( ) ``` - DU Modern projects: + DU Modern projects (providing document type name): ```python with open("path/to/document.pdf", "rb") as file: extraction_response = service.extract( @@ -428,27 +806,50 @@ def extract( document_type_name="Receipts", ) ``` - """ - if file is None and file_path is None: - raise ValueError("Either `file` or `file_path` must be provided") - if file is not None and file_path is not None: - raise ValueError("`file` and `file_path` are mutually exclusive") - if project_type == ProjectType.MODERN and document_type_name is None: - raise ValueError( - "`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`" + + DU Modern projects (using existing classification result): + ```python + with open("path/to/document.pdf", "rb") as file: + classification_results = uipath.documents.classify( + tag="Production", + project_name="MyModernProjectName", + file=file, + ) + + extraction_result = uipath.documents.extract( + tag="Production", + classification_result=max(classification_results, key=lambda result: result.confidence), ) + ``` + """ + project_type = _validate_extract_params_and_get_project_type( + project_name=project_name, + file=file, + file_path=file_path, + classification_result=classification_result, + project_type=project_type, + document_type_name=document_type_name, + ) project_id = self._get_project_id_and_validate_tag( - project_name=project_name, project_type=project_type, tag=tag + tag=tag, + project_name=project_name, + project_type=project_type, + classification_result=classification_result, ) - with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: - document_id = self._start_digitization(project_id=project_id, file=handle) # type: ignore + document_id = self._get_document_id( + project_id=project_id, + file=file, + file_path=file_path, + classification_result=classification_result, + ) document_type_id = self._get_document_type_id( project_id=project_id, document_type_name=document_type_name, project_type=project_type, + classification_result=classification_result, ) operation_id = self._start_extraction( @@ -469,37 +870,43 @@ def extract( @traced(name="documents_extract_async", run_type="uipath") async def extract_async( self, - project_name: str, tag: str, + project_name: Optional[str] = None, file: Optional[FileContent] = None, file_path: Optional[str] = None, - project_type: ProjectType = ProjectType.IXP, + classification_result: Optional[ClassificationResult] = None, + project_type: Optional[ProjectType] = None, document_type_name: Optional[str] = None, ) -> Union[ExtractionResponse, ExtractionResponseIXP]: """Asynchronously version of the [`extract`][uipath._services.documents_service.DocumentsService.extract] method.""" - if file is None and file_path is None: - raise ValueError("Either `file` or `file_path` must be provided") - if file is not None and file_path is not None: - raise ValueError("`file` and `file_path` are mutually exclusive") - if project_type == ProjectType.MODERN and document_type_name is None: - raise ValueError( - "`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`" - ) + project_type = _validate_extract_params_and_get_project_type( + project_name=project_name, + file=file, + file_path=file_path, + classification_result=classification_result, + project_type=project_type, + document_type_name=document_type_name, + ) project_id = await self._get_project_id_and_validate_tag_async( - project_name=project_name, project_type=project_type, tag=tag + tag=tag, + project_name=project_name, + project_type=project_type, + classification_result=classification_result, ) - with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: - document_id = await self._start_digitization_async( - project_id=project_id, - file=handle, # type: ignore - ) + document_id = await self._get_document_id_async( + project_id=project_id, + file=file, + file_path=file_path, + classification_result=classification_result, + ) document_type_id = await self._get_document_type_id_async( project_id=project_id, document_type_name=document_type_name, project_type=project_type, + classification_result=classification_result, ) operation_id = await self._start_extraction_async( diff --git a/src/uipath/models/documents.py b/src/uipath/models/documents.py index 2280344fe..ee676bec0 100644 --- a/src/uipath/models/documents.py +++ b/src/uipath/models/documents.py @@ -151,3 +151,67 @@ class ValidatedResult(BaseModel): document_id: str = Field(alias="DocumentId") results_document: dict = Field(alias="ResultsDocument") # type: ignore + + +class Reference(BaseModel): + model_config = ConfigDict( + serialize_by_alias=True, + validate_by_alias=True, + ) + + text_start_index: int = Field(alias="TextStartIndex") + text_length: int = Field(alias="TextLength") + tokens: List[str] = Field(alias="Tokens") + + +class DocumentBounds(BaseModel): + model_config = ConfigDict( + serialize_by_alias=True, + validate_by_alias=True, + ) + + start_page: int = Field(alias="StartPage") + page_count: int = Field(alias="PageCount") + text_start_index: int = Field(alias="TextStartIndex") + text_length: int = Field(alias="TextLength") + page_range: str = Field(alias="PageRange") + + +class ClassificationResult(BaseModel): + """A model representing the result of a document classification. + + Attributes: + document_id (str): The ID of the classified document. + document_type_id (str): The ID of the predicted document type. + confidence (float): The confidence score of the classification. + ocr_confidence (float): The OCR confidence score of the document. + reference (Reference): The reference information for the classified document. + document_bounds (DocumentBounds): The bounds of the document in terms of pages and text. + classifier_name (str): The name of the classifier used. + project_id (str): The ID of the project associated with the classification. + """ + + model_config = ConfigDict( + serialize_by_alias=True, + validate_by_alias=True, + ) + + document_id: str = Field(alias="DocumentId") + document_type_id: str = Field(alias="DocumentTypeId") + confidence: float = Field(alias="Confidence") + ocr_confidence: float = Field(alias="OcrConfidence") + reference: Reference = Field(alias="Reference") + document_bounds: DocumentBounds = Field(alias="DocumentBounds") + classifier_name: str = Field(alias="ClassifierName") + project_id: str = Field(alias="ProjectId") + + +class ClassificationResponse(BaseModel): + model_config = ConfigDict( + serialize_by_alias=True, + validate_by_alias=True, + ) + + classification_results: List[ClassificationResult] = Field( + alias="classificationResults" + ) diff --git a/tests/sdk/services/test_documents_service.py b/tests/sdk/services/test_documents_service.py index 281d62728..12d7095bf 100644 --- a/tests/sdk/services/test_documents_service.py +++ b/tests/sdk/services/test_documents_service.py @@ -11,6 +11,7 @@ from uipath._services.documents_service import DocumentsService from uipath.models.documents import ( ActionPriority, + ClassificationResult, ExtractionResponse, ProjectType, ValidationAction, @@ -27,6 +28,12 @@ def documents_tests_data_path(tests_data_path: Path) -> Path: return tests_data_path / "documents_service" +@pytest.fixture +def classification_response(documents_tests_data_path: Path) -> dict: # type: ignore + with open(documents_tests_data_path / "classification_response.json", "r") as f: + return json.load(f) + + @pytest.fixture def ixp_extraction_response(documents_tests_data_path: Path) -> dict: # type: ignore with open(documents_tests_data_path / "ixp_extraction_response.json", "r") as f: @@ -58,6 +65,301 @@ def validated_result(documents_tests_data_path: Path) -> dict: # type: ignore class TestDocumentsService: + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.parametrize( + "file,file_path", + [ + (None, None), + (b"something", "something"), + ], + ) + @pytest.mark.asyncio + async def test_classify_with_invalid_parameters( + self, + service: DocumentsService, + mode: str, + file, + file_path, + ): + # ACT & ASSERT + with pytest.raises( + ValueError, + match="Exactly one of `file, file_path` must be provided", + ): + if mode == "async": + await service.classify_async( + tag="Production", + project_name="TestProject", + file=file, + file_path=file_path, + ) + else: + service.classify( + tag="Production", + project_name="TestProject", + file=file, + file_path=file_path, + ) + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_extract_with_classification_result( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + classification_response: dict, # type: ignore + modern_extraction_response: dict, # type: ignore + ): + # ARRANGE + project_id = str(uuid4()) + document_id = str(uuid4()) + document_type_id = str(uuid4()) + classification_response["classificationResults"][0]["ProjectId"] = project_id + classification_response["classificationResults"][0]["DocumentId"] = document_id + classification_response["classificationResults"][0]["DocumentTypeId"] = ( + document_type_id + ) + classification_result = ClassificationResult.model_validate( + classification_response["classificationResults"][0] + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/tags?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "tags": [ + {"name": "Staging"}, + {"name": "Production"}, + ] + }, + ) + operation_id = str(uuid4()) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/document-types/{document_type_id}/extraction/start?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + match_json={"documentId": document_id}, + json={"operationId": operation_id}, + ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/document-types/{document_type_id}/extraction/result/{operation_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": "Succeeded", "result": modern_extraction_response}, + ) + + # ACT + if mode == "async": + response = await service.extract_async( + tag="Production", classification_result=classification_result + ) + else: + response = service.extract( + tag="Production", classification_result=classification_result + ) + + # ASSERT + modern_extraction_response["projectId"] = project_id + modern_extraction_response["tag"] = "Production" + modern_extraction_response["documentTypeId"] = document_type_id + assert response.model_dump() == modern_extraction_response + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_classify( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + classification_response: dict, # type: ignore + ): + # ARRANGE + project_id = str(uuid4()) + document_id = str(uuid4()) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=Modern", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "projects": [ + {"id": str(uuid4()), "name": "OtherProject"}, + {"id": project_id, "name": "TestProject"}, + {"id": str(uuid4()), "name": "AnotherProject"}, + ] + }, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/tags?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "tags": [ + {"name": "Production"}, + ] + }, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/start?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + match_files={"File": b"test content"}, + json={"documentId": document_id}, + ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/result/{document_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": "Succeeded", "result": {}}, + ) + + operation_id = str(uuid4()) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/classification/start?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + match_json={"documentId": document_id}, + json={"operationId": operation_id}, + ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/classification/result/{operation_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": "Succeeded", "result": classification_response}, + ) + + # ACT + if mode == "async": + response = await service.classify_async( + tag="Production", project_name="TestProject", file=b"test content" + ) + else: + response = service.classify( + tag="Production", project_name="TestProject", file=b"test content" + ) + + # ASSERT + classification_response["classificationResults"][0]["ProjectId"] = project_id + assert ( + response[0].model_dump() + == classification_response["classificationResults"][0] + ) + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + @pytest.mark.parametrize( + "project_name,file,file_path,classification_result,project_type,document_type_name, error", + [ + ( + None, + None, + None, + None, + None, + None, + "`classification_result` must be provided", + ), + ( + "TestProject", + None, + None, + None, + None, + None, + "`project_type` must be provided", + ), + ( + "TestProject", + None, + None, + None, + ProjectType.IXP, + None, + "Exactly one of `file, file_path` must be provided", + ), + ( + "TestProject", + b"something", + None, + None, + ProjectType.MODERN, + None, + "`document_type_name` must be provided", + ), + ( + "TestProject", + b"something", + None, + "dummy classification result", + ProjectType.MODERN, + "dummy doctype", + "`classification_result` must not be provided", + ), + ], + ) + async def test_extract_with_invalid_parameters( + self, + service: DocumentsService, + mode: str, + project_name, + file, + file_path, + classification_result, + project_type, + document_type_name, + error, + ): + # ACT & ASSERT + with pytest.raises(ValueError, match=error): + if mode == "async": + await service.extract_async( + tag="live", + project_name=project_name, + project_type=project_type, + file=file, + file_path=file_path, + classification_result=classification_result, + document_type_name=document_type_name, + ) + else: + service.extract( + tag="live", + project_name=project_name, + project_type=project_type, + file=file, + file_path=file_path, + classification_result=classification_result, + document_type_name=document_type_name, + ) + @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio async def test_extract_ixp( @@ -112,6 +414,14 @@ async def test_extract_ixp( match_files={"File": b"test content"}, json={"documentId": document_id}, ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/result/{document_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": "Succeeded", "result": {}}, + ) httpx_mock.add_response( url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/live/document-types/{UUID(int=0)}/extraction/start?api-version=1.1", status_code=200, @@ -150,11 +460,17 @@ async def test_extract_ixp( # ACT if mode == "async": response = await service.extract_async( - project_name="TestProjectIXP", tag="live", file=b"test content" + project_name="TestProjectIXP", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) else: response = service.extract( - project_name="TestProjectIXP", tag="live", file=b"test content" + project_name="TestProjectIXP", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) # ASSERT @@ -162,7 +478,7 @@ async def test_extract_ixp( expected_response["projectId"] = project_id expected_response["tag"] = "live" expected_response["documentTypeId"] = str(UUID(int=0)) - assert response.model_dump() == ixp_extraction_response + assert response.model_dump() == expected_response @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio @@ -221,6 +537,14 @@ async def test_extract_modern( match_files={"File": b"test content"}, json={"documentId": document_id}, ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/result/{document_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": "Succeeded", "result": {}}, + ) httpx_mock.add_response( url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/document-types?api-version=1.1", @@ -260,7 +584,7 @@ async def test_extract_modern( # ACT if mode == "async": - response = service.extract( + response = await service.extract_async( project_name="TestProjectModern", tag="Production", file=b"test content", @@ -268,7 +592,7 @@ async def test_extract_modern( document_type_name="Invoice", ) else: - response = await service.extract_async( + response = service.extract( project_name="TestProjectModern", tag="Production", file=b"test content", @@ -281,7 +605,7 @@ async def test_extract_modern( expected_response["projectId"] = project_id expected_response["tag"] = "Production" expected_response["documentTypeId"] = document_type_id - assert response.model_dump() == modern_extraction_response + assert response.model_dump() == expected_response @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio @@ -291,7 +615,7 @@ async def test_extract_modern_without_document_type_name( # ACT & ASSERT with pytest.raises( ValueError, - match="`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`", + match="`document_type_name` must be provided", ): if mode == "async": await service.extract_async( @@ -343,12 +667,14 @@ async def test_get_document_type_id_not_found( project_id="dummy_project_id", document_type_name="NonExistentType", project_type=ProjectType.MODERN, + classification_result=None, ) else: service._get_document_type_id( project_id="dummy_project_id", document_type_name="NonExistentType", project_type=ProjectType.MODERN, + classification_result=None, ) @pytest.mark.parametrize("mode", ["sync", "async"]) @@ -361,11 +687,12 @@ async def test_extract_with_both_file_and_file_path_provided( # ACT & ASSERT with pytest.raises( ValueError, - match="`file` and `file_path` are mutually exclusive", + match="Exactly one of `file, file_path` must be provided", ): if mode == "async": await service.extract_async( project_name="TestProject", + project_type=ProjectType.IXP, tag="live", file=b"test content", file_path="path/to/file.pdf", @@ -373,6 +700,7 @@ async def test_extract_with_both_file_and_file_path_provided( else: service.extract( project_name="TestProject", + project_type=ProjectType.IXP, tag="live", file=b"test content", file_path="path/to/file.pdf", @@ -388,16 +716,18 @@ async def test_extract_with_neither_file_nor_file_path_provided( # ACT & ASSERT with pytest.raises( ValueError, - match="Either `file` or `file_path` must be provided", + match="Exactly one of `file, file_path` must be provided", ): if mode == "async": await service.extract_async( project_name="TestProject", + project_type=ProjectType.IXP, tag="live", ) else: service.extract( project_name="TestProject", + project_type=ProjectType.IXP, tag="live", ) @@ -430,11 +760,17 @@ async def test_extract_with_wrong_project_name( with pytest.raises(ValueError, match="Project 'TestProject' not found."): if mode == "async": await service.extract_async( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProject", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) else: service.extract( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProject", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) @pytest.mark.parametrize("mode", ["sync", "async"]) @@ -473,11 +809,17 @@ async def test_extract_with_wrong_tag( with pytest.raises(ValueError, match="Tag 'live' not found."): if mode == "async": await service.extract_async( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProject", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) else: service.extract( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProject", + project_type=ProjectType.IXP, + tag="live", + file=b"test content", ) @pytest.mark.parametrize("mode", ["sync", "async"]) diff --git a/tests/sdk/services/tests_data/documents_service/classification_response.json b/tests/sdk/services/tests_data/documents_service/classification_response.json new file mode 100644 index 000000000..a815fc783 --- /dev/null +++ b/tests/sdk/services/tests_data/documents_service/classification_response.json @@ -0,0 +1,23 @@ +{ + "classificationResults": [ + { + "DocumentTypeId": "0d209d75-9afd-ef11-aaa7-000d3a234147", + "DocumentId": "0a9f9927-e6af-f011-8e60-6045bd9ba6d0", + "Confidence": 0.53288215, + "OcrConfidence": -1.0, + "Reference": { + "TextStartIndex": 0, + "TextLength": 0, + "Tokens": [] + }, + "DocumentBounds": { + "StartPage": 0, + "PageCount": 1, + "TextStartIndex": 0, + "TextLength": 629, + "PageRange": "1" + }, + "ClassifierName": "Production_classifier" + } + ] +} diff --git a/uv.lock b/uv.lock index bd0a2f315..3431c4588 100644 --- a/uv.lock +++ b/uv.lock @@ -3047,7 +3047,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.1.118" +version = "2.1.119" source = { editable = "." } dependencies = [ { name = "azure-monitor-opentelemetry" },