diff --git a/docs/core/documents_models.md b/docs/core/documents_models.md index 74654323f..16dc499dd 100644 --- a/docs/core/documents_models.md +++ b/docs/core/documents_models.md @@ -1 +1,3 @@ ::: uipath.models.documents + options: + show_bases: true diff --git a/pyproject.toml b/pyproject.toml index 94948df71..0223d3a5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.1.111" +version = "2.1.112" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.10" diff --git a/src/uipath/_services/documents_service.py b/src/uipath/_services/documents_service.py index 02c8eea64..27cd77ed0 100644 --- a/src/uipath/_services/documents_service.py +++ b/src/uipath/_services/documents_service.py @@ -1,7 +1,8 @@ import asyncio import time +from contextlib import nullcontext from pathlib import Path -from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple +from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple, Union from uuid import UUID from httpx._types import FileContent @@ -13,6 +14,8 @@ from ..models.documents import ( ActionPriority, ExtractionResponse, + ExtractionResponseIXP, + ProjectType, ValidatedResult, ValidationAction, ) @@ -41,11 +44,13 @@ def _get_common_headers(self) -> Dict[str, str]: "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", } - def _get_project_id_by_name(self, project_name: str) -> str: + def _get_project_id_by_name( + self, project_name: str, project_type: ProjectType + ) -> str: response = self.request( "GET", url=Endpoint("/du_/api/framework/projects"), - params={"api-version": 1.1, "type": "IXP"}, + params={"api-version": 1.1, "type": project_type.value}, headers=self._get_common_headers(), ) @@ -58,11 +63,13 @@ def _get_project_id_by_name(self, project_name: str) -> str: except StopIteration: raise ValueError(f"Project '{project_name}' not found.") from None - async def _get_project_id_by_name_async(self, project_name: str) -> str: + async def _get_project_id_by_name_async( + self, project_name: str, project_type: ProjectType + ) -> str: response = await self.request_async( "GET", url=Endpoint("/du_/api/framework/projects"), - params={"api-version": 1.1, "type": "IXP"}, + params={"api-version": 1.1, "type": project_type.value}, headers=self._get_common_headers(), ) @@ -93,8 +100,10 @@ async def _get_project_tags_async(self, project_id: str) -> Set[str]: ) return {tag["name"] for tag in response.json().get("tags", [])} - def _get_project_id_and_validate_tag(self, project_name: str, tag: str) -> str: - project_id = self._get_project_id_by_name(project_name) + def _get_project_id_and_validate_tag( + self, project_name: str, project_type: ProjectType, tag: str + ) -> str: + project_id = self._get_project_id_by_name(project_name, project_type) tags = self._get_project_tags(project_id) if tag not in tags: raise ValueError( @@ -104,9 +113,11 @@ def _get_project_id_and_validate_tag(self, project_name: str, tag: str) -> str: return project_id async def _get_project_id_and_validate_tag_async( - self, project_name: str, tag: str + self, project_name: str, project_type: ProjectType, tag: str ) -> str: - project_id = await self._get_project_id_by_name_async(project_name) + project_id = await self._get_project_id_by_name_async( + project_name, project_type + ) tags = await self._get_project_tags_async(project_id) if tag not in tags: raise ValueError( @@ -147,16 +158,71 @@ async def _start_digitization_async( ) ).json()["documentId"] + def _get_document_type_id( + self, + project_id: str, + document_type_name: Optional[str], + project_type: ProjectType, + ) -> str: + if project_type == ProjectType.IXP: + return str(UUID(int=0)) + + response = self.request( + "GET", + url=Endpoint(f"/du_/api/framework/projects/{project_id}/document-types"), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ) + + try: + return next( + extractor["id"] + for extractor in response.json().get("documentTypes", []) + if extractor["name"].lower() == document_type_name.lower() # type: ignore + ) + except StopIteration: + raise ValueError( + f"Document type '{document_type_name}' not found." + ) from None + + async def _get_document_type_id_async( + self, + project_id: str, + document_type_name: Optional[str], + project_type: ProjectType, + ) -> str: + if project_type == ProjectType.IXP: + return str(UUID(int=0)) + + response = await self.request_async( + "GET", + url=Endpoint(f"/du_/api/framework/projects/{project_id}/document-types"), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ) + + try: + return next( + extractor["id"] + for extractor in response.json().get("documentTypes", []) + if extractor["name"].lower() == document_type_name.lower() # type: ignore + ) + except StopIteration: + raise ValueError( + f"Document type '{document_type_name}' not found." + ) from None + def _start_extraction( self, project_id: str, tag: str, + document_type_id: str, document_id: str, ) -> str: return self.request( "POST", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/start" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start" ), params={"api-version": 1.1}, headers=self._get_common_headers(), @@ -167,13 +233,14 @@ async def _start_extraction_async( self, project_id: str, tag: str, + document_type_id: str, document_id: str, ) -> str: return ( await self.request_async( "POST", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/start" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start" ), params={"api-version": 1.1}, headers=self._get_common_headers(), @@ -183,7 +250,7 @@ async def _start_extraction_async( def _wait_for_operation( self, - result_getter: Callable[[], Tuple[str, Any]], + result_getter: Callable[[], Tuple[Any, Optional[Any], Optional[Any]]], wait_statuses: List[str], success_status: str, ) -> Any: @@ -195,19 +262,23 @@ def _wait_for_operation( status in wait_statuses and (time.monotonic() - start_time) < POLLING_TIMEOUT ): - status, result = result_getter() + status, error, result = result_getter() time.sleep(POLLING_INTERVAL) if status != success_status: if time.monotonic() - start_time >= POLLING_TIMEOUT: raise TimeoutError("Operation timed out.") - raise RuntimeError(f"Operation failed with status: {status}") + raise RuntimeError( + f"Operation failed with status: {status}, error: {error}" + ) return result async def _wait_for_operation_async( self, - result_getter: Callable[[], Awaitable[Tuple[str, Any]]], + result_getter: Callable[ + [], Awaitable[Tuple[Any, Optional[Any], Optional[Any]]] + ], wait_statuses: List[str], success_status: str, ) -> Any: @@ -219,55 +290,80 @@ async def _wait_for_operation_async( status in wait_statuses and (time.monotonic() - start_time) < POLLING_TIMEOUT ): - status, result = await result_getter() + status, error, result = await result_getter() await asyncio.sleep(POLLING_INTERVAL) if status != success_status: if time.monotonic() - start_time >= POLLING_TIMEOUT: raise TimeoutError("Operation timed out.") - raise RuntimeError(f"Operation failed with status: {status}") + raise RuntimeError( + f"Operation failed with status: {status}, error: {error}" + ) return result def _wait_for_extraction( - self, project_id: str, tag: str, operation_id: str - ) -> ExtractionResponse: - extraction_response = self._wait_for_operation( - result_getter=lambda: ( - ( - result := self.request( - method="GET", - url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/result/{operation_id}" - ), - params={"api-version": 1.1}, - headers=self._get_common_headers(), - ).json() - )["status"], + self, + project_id: str, + tag: str, + document_type_id: str, + operation_id: str, + project_type: ProjectType, + ) -> Union[ExtractionResponse, ExtractionResponseIXP]: + def result_getter() -> Tuple[str, str, Any]: + result = self.request( + method="GET", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ).json() + return ( + result["status"], + result.get("error", None), result.get("result", None), - ), + ) + + extraction_response = self._wait_for_operation( + result_getter=result_getter, wait_statuses=["NotStarted", "Running"], success_status="Succeeded", ) extraction_response["projectId"] = project_id extraction_response["tag"] = tag + extraction_response["documentTypeId"] = document_type_id + + if project_type == ProjectType.IXP: + return ExtractionResponseIXP.model_validate(extraction_response) + return ExtractionResponse.model_validate(extraction_response) async def _wait_for_extraction_async( - self, project_id: str, tag: str, operation_id: str - ) -> ExtractionResponse: - async def result_getter() -> Tuple[str, Any]: - result = await self.request_async( - method="GET", - url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/result/{operation_id}" - ), - params={"api-version": 1.1}, - headers=self._get_common_headers(), + self, + project_id: str, + tag: str, + document_type_id: str, + operation_id: str, + project_type: ProjectType, + ) -> Union[ExtractionResponse, ExtractionResponseIXP]: + async def result_getter() -> Tuple[str, str, Any]: + result = ( + await self.request_async( + method="GET", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + ) + ).json() + return ( + result["status"], + result.get("error", None), + result.get("result", None), ) - json_result = result.json() - return json_result["status"], json_result.get("result", None) extraction_response = await self._wait_for_operation_async( result_getter=result_getter, @@ -277,6 +373,11 @@ async def result_getter() -> Tuple[str, Any]: extraction_response["projectId"] = project_id extraction_response["tag"] = tag + extraction_response["documentTypeId"] = document_type_id + + if project_type == ProjectType.IXP: + return ExtractionResponseIXP.model_validate(extraction_response) + return ExtractionResponse.model_validate(extraction_response) @traced(name="documents_extract", run_type="uipath") @@ -286,14 +387,18 @@ def extract( tag: str, file: Optional[FileContent] = None, file_path: Optional[str] = None, - ) -> ExtractionResponse: + project_type: ProjectType = ProjectType.IXP, + document_type_name: Optional[str] = None, + ) -> Union[ExtractionResponse, ExtractionResponseIXP]: """Extract predicted data from a document using an IXP project. Args: - project_name (str): Name of the IXP project. Details about IXP projects can be found in the [official documentation](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project). + project_name (str): Name of the [IXP](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project)/[DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project. tag (str): Tag of the published project version. file (FileContent, optional): The document file to be processed. file_path (str, optional): Path to the document file to be processed. + project_type (ProjectType, optional): Type of the project. Defaults to `ProjectType.IXP`. + document_type_name (str, optional): Document type name associated with the extractor to be used for extraction. Required if `project_type` is `ProjectType.MODERN`. Note: Either `file` or `file_path` must be provided, but not both. @@ -302,38 +407,63 @@ def extract( ExtractionResponse: The extraction result containing predicted data. Examples: + IXP projects: ```python with open("path/to/document.pdf", "rb") as file: extraction_response = service.extract( - project_name="MyProject", + project_name="MyIXPProjectName", tag="live", file=file, ) ``` + + DU Modern projects: + ```python + with open("path/to/document.pdf", "rb") as file: + extraction_response = service.extract( + project_name="MyModernProjectName", + tag="Production", + file=file, + project_type=ProjectType.MODERN, + document_type_name="Receipts", + ) + ``` """ if file is None and file_path is None: raise ValueError("Either `file` or `file_path` must be provided") if file is not None and file_path is not None: raise ValueError("`file` and `file_path` are mutually exclusive") + if project_type == ProjectType.MODERN and document_type_name is None: + raise ValueError( + "`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`" + ) project_id = self._get_project_id_and_validate_tag( - project_name=project_name, tag=tag + project_name=project_name, project_type=project_type, tag=tag ) - if file_path is not None: - with open(Path(file_path), "rb") as handle: - document_id = self._start_digitization( - project_id=project_id, file=handle - ) - else: - document_id = self._start_digitization(project_id=project_id, file=file) # type: ignore + with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: + document_id = self._start_digitization(project_id=project_id, file=handle) # type: ignore + + document_type_id = self._get_document_type_id( + project_id=project_id, + document_type_name=document_type_name, + project_type=project_type, + ) operation_id = self._start_extraction( - project_id=project_id, tag=tag, document_id=document_id + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + document_id=document_id, ) return self._wait_for_extraction( - project_id=project_id, tag=tag, operation_id=operation_id + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + operation_id=operation_id, + project_type=project_type, ) @traced(name="documents_extract_async", run_type="uipath") @@ -343,40 +473,55 @@ async def extract_async( tag: str, file: Optional[FileContent] = None, file_path: Optional[str] = None, - ) -> ExtractionResponse: - """Asynchronously extract predicted data from a document using an IXP project.""" + project_type: ProjectType = ProjectType.IXP, + document_type_name: Optional[str] = None, + ) -> Union[ExtractionResponse, ExtractionResponseIXP]: + """Asynchronously version of the [`extract`][uipath._services.documents_service.DocumentsService.extract] method.""" if file is None and file_path is None: raise ValueError("Either `file` or `file_path` must be provided") if file is not None and file_path is not None: raise ValueError("`file` and `file_path` are mutually exclusive") + if project_type == ProjectType.MODERN and document_type_name is None: + raise ValueError( + "`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`" + ) project_id = await self._get_project_id_and_validate_tag_async( - project_name=project_name, tag=tag + project_name=project_name, project_type=project_type, tag=tag ) - if file_path is not None: - with open(Path(file_path), "rb") as handle: - document_id = await self._start_digitization_async( - project_id=project_id, file=handle - ) - else: + with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: document_id = await self._start_digitization_async( project_id=project_id, - file=file, # type: ignore + file=handle, # type: ignore ) + document_type_id = await self._get_document_type_id_async( + project_id=project_id, + document_type_name=document_type_name, + project_type=project_type, + ) + operation_id = await self._start_extraction_async( - project_id=project_id, tag=tag, document_id=document_id + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + document_id=document_id, ) return await self._wait_for_extraction_async( - project_id=project_id, tag=tag, operation_id=operation_id + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + operation_id=operation_id, + project_type=project_type, ) def _start_validation( self, project_id: str, tag: str, + document_type_id: str, action_title: str, action_priority: ActionPriority, action_catalog: str, @@ -388,7 +533,7 @@ def _start_validation( return self.request( "POST", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/validation/start" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/validation/start" ), params={"api-version": 1.1}, headers=self._get_common_headers(), @@ -409,6 +554,7 @@ async def _start_validation_async( self, project_id: str, tag: str, + document_type_id: str, action_title: str, action_priority: ActionPriority, action_catalog: str, @@ -421,7 +567,7 @@ async def _start_validation_async( await self.request_async( "POST", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/validation/start" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/validation/start" ), params={"api-version": 1.1}, headers=self._get_common_headers(), @@ -440,25 +586,25 @@ async def _start_validation_async( ).json()["operationId"] def _get_validation_result( - self, project_id: str, tag: str, operation_id: str + self, project_id: str, tag: str, document_type_id: str, operation_id: str ) -> Dict: # type: ignore return self.request( method="GET", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/validation/result/{operation_id}" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/validation/result/{operation_id}" ), params={"api-version": 1.1}, headers=self._get_common_headers(), ).json() async def _get_validation_result_async( - self, project_id: str, tag: str, operation_id: str + self, project_id: str, tag: str, document_type_id: str, operation_id: str ) -> Dict: # type: ignore return ( await self.request_async( method="GET", url=Endpoint( - f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/validation/result/{operation_id}" + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/validation/result/{operation_id}" ), params={"api-version": 1.1}, headers=self._get_common_headers(), @@ -466,43 +612,58 @@ async def _get_validation_result_async( ).json() def _wait_for_create_validation_action( - self, project_id: str, tag: str, operation_id: str + self, project_id: str, tag: str, document_type_id: str, operation_id: str ) -> ValidationAction: - response = self._wait_for_operation( - lambda: ( - ( - result := self._get_validation_result( - project_id=project_id, tag=tag, operation_id=operation_id - ) - )["status"], + def result_getter() -> Tuple[Any, Optional[Any], Optional[Any]]: + result = self._get_validation_result( + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + operation_id=operation_id, + ) + return ( + result["status"], + result.get("error", None), result.get("result", None), - ), + ) + + response = self._wait_for_operation( + result_getter=result_getter, wait_statuses=["NotStarted", "Running"], success_status="Succeeded", ) response["projectId"] = project_id response["tag"] = tag + response["documentTypeId"] = document_type_id response["operationId"] = operation_id return ValidationAction.model_validate(response) async def _wait_for_create_validation_action_async( - self, project_id: str, tag: str, operation_id: str + self, project_id: str, tag: str, document_type_id: str, operation_id: str ) -> ValidationAction: - async def result_getter() -> Tuple[str, Any]: + async def result_getter_async() -> Tuple[Any, Optional[Any], Optional[Any]]: result = await self._get_validation_result_async( - project_id=project_id, tag=tag, operation_id=operation_id + project_id=project_id, + tag=tag, + document_type_id=document_type_id, + operation_id=operation_id, + ) + return ( + result["status"], + result.get("error", None), + result.get("result", None), ) - return result["status"], result.get("result", None) response = await self._wait_for_operation_async( - result_getter=result_getter, + result_getter=result_getter_async, wait_statuses=["NotStarted", "Running"], success_status="Succeeded", ) response["projectId"] = project_id response["tag"] = tag + response["documentTypeId"] = document_type_id response["operationId"] = operation_id return ValidationAction.model_validate(response) @@ -546,7 +707,8 @@ def create_validation_action( """ operation_id = self._start_validation( project_id=extraction_response.project_id, - tag=extraction_response.tag, # should I validate tag again? + tag=extraction_response.tag, + document_type_id=extraction_response.document_type_id, action_title=action_title, action_priority=action_priority, action_catalog=action_catalog, @@ -559,6 +721,7 @@ def create_validation_action( return self._wait_for_create_validation_action( project_id=extraction_response.project_id, tag=extraction_response.tag, + document_type_id=extraction_response.document_type_id, operation_id=operation_id, ) @@ -573,11 +736,11 @@ async def create_validation_action_async( storage_bucket_directory_path: str, extraction_response: ExtractionResponse, ) -> ValidationAction: - """Asynchronously create a validation action for a document based on the extraction response.""" - # Add reference to sync method docstring + """Asynchronous version of the [`create_validation_action`][uipath._services.documents_service.DocumentsService.create_validation_action] method.""" operation_id = await self._start_validation_async( project_id=extraction_response.project_id, - tag=extraction_response.tag, # should I validate tag again? + tag=extraction_response.tag, + document_type_id=extraction_response.document_type_id, action_title=action_title, action_priority=action_priority, action_catalog=action_catalog, @@ -590,6 +753,7 @@ async def create_validation_action_async( return await self._wait_for_create_validation_action_async( project_id=extraction_response.project_id, tag=extraction_response.tag, + document_type_id=extraction_response.document_type_id, operation_id=operation_id, ) @@ -613,17 +777,22 @@ def get_validation_result( validated_result = service.get_validation_result(validation_action) ``` """ - response = self._wait_for_operation( - result_getter=lambda: ( - ( - result := self._get_validation_result( - project_id=validation_action.project_id, - tag=validation_action.tag, - operation_id=validation_action.operation_id, - ) - )["result"]["actionStatus"], + + def result_getter() -> Tuple[str, None, Any]: + result = self._get_validation_result( + project_id=validation_action.project_id, + tag=validation_action.tag, + document_type_id=validation_action.document_type_id, + operation_id=validation_action.operation_id, + ) + return ( + result["result"]["actionStatus"], + None, result["result"].get("validatedExtractionResults", None), - ), + ) + + response = self._wait_for_operation( + result_getter=result_getter, wait_statuses=["Unassigned", "Pending"], success_status="Completed", ) @@ -634,16 +803,19 @@ def get_validation_result( async def get_validation_result_async( self, validation_action: ValidationAction ) -> ValidatedResult: - """Asynchronously get the result of a validation action.""" + """Asynchronous version of the [`get_validation_result`][uipath._services.documents_service.DocumentsService.get_validation_result] method.""" - async def result_getter() -> Tuple[str, Any]: + async def result_getter() -> Tuple[str, None, Any]: result = await self._get_validation_result_async( project_id=validation_action.project_id, tag=validation_action.tag, + document_type_id=validation_action.document_type_id, operation_id=validation_action.operation_id, ) - return result["result"]["actionStatus"], result["result"].get( - "validatedExtractionResults", None + return ( + result["result"]["actionStatus"], + None, + result["result"].get("validatedExtractionResults", None), ) response = await self._wait_for_operation_async( diff --git a/src/uipath/models/documents.py b/src/uipath/models/documents.py index 42d782248..2280344fe 100644 --- a/src/uipath/models/documents.py +++ b/src/uipath/models/documents.py @@ -18,10 +18,25 @@ class FieldType(str, Enum): class ActionPriority(str, Enum): + """Priority levels for validation actions. More details can be found in the [official documentation](https://docs.uipath.com/action-center/automation-cloud/latest/user-guide/create-document-validation-action#configuration).""" + LOW = "Low" + """Low priority""" MEDIUM = "Medium" + """Medium priority""" HIGH = "High" + """High priority""" CRITICAL = "Critical" + """Critical priority""" + + +class ProjectType(str, Enum): + """Project types available and supported by Documents Service.""" + + IXP = "IXP" + """Represents an [IXP](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project) project type.""" + MODERN = "Modern" + """Represents a [DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project type.""" class FieldValueProjection(BaseModel): @@ -71,9 +86,9 @@ class ExtractionResponse(BaseModel): Attributes: extraction_result (ExtractionResult): The result of the extraction process. - data_projection (List[FieldGroupValueProjection]): A simplified projection of the extracted data. project_id (str): The ID of the project associated with the extraction. tag (str): The tag associated with the published model version. + document_type_id (str): The ID of the document type associated with the extraction. """ model_config = ConfigDict( @@ -82,9 +97,19 @@ class ExtractionResponse(BaseModel): ) extraction_result: ExtractionResult = Field(alias="extractionResult") - data_projection: List[FieldGroupValueProjection] = Field(alias="dataProjection") project_id: str = Field(alias="projectId") tag: str + document_type_id: str = Field(alias="documentTypeId") + + +class ExtractionResponseIXP(ExtractionResponse): + """A model representing the response from a document extraction process for IXP projects. + + Attributes: + data_projection (List[FieldGroupValueProjection]): A simplified projection of the extracted data. + """ + + data_projection: List[FieldGroupValueProjection] = Field(alias="dataProjection") class ValidationAction(BaseModel): @@ -107,6 +132,7 @@ class ValidationAction(BaseModel): action_status: str = Field(alias="actionStatus") project_id: str = Field(alias="projectId") tag: str + document_type_id: str = Field(alias="documentTypeId") operation_id: str = Field(alias="operationId") diff --git a/tests/sdk/services/test_documents_service.py b/tests/sdk/services/test_documents_service.py index 4ceb31891..281d62728 100644 --- a/tests/sdk/services/test_documents_service.py +++ b/tests/sdk/services/test_documents_service.py @@ -9,7 +9,12 @@ from uipath._config import Config from uipath._execution_context import ExecutionContext from uipath._services.documents_service import DocumentsService -from uipath.models.documents import ActionPriority, ExtractionResponse, ValidationAction +from uipath.models.documents import ( + ActionPriority, + ExtractionResponse, + ProjectType, + ValidationAction, +) @pytest.fixture @@ -23,8 +28,14 @@ def documents_tests_data_path(tests_data_path: Path) -> Path: @pytest.fixture -def extraction_response(documents_tests_data_path: Path) -> dict: # type: ignore - with open(documents_tests_data_path / "extraction_response.json", "r") as f: +def ixp_extraction_response(documents_tests_data_path: Path) -> dict: # type: ignore + with open(documents_tests_data_path / "ixp_extraction_response.json", "r") as f: + return json.load(f) + + +@pytest.fixture +def modern_extraction_response(documents_tests_data_path: Path) -> dict: # type: ignore + with open(documents_tests_data_path / "modern_extraction_response.json", "r") as f: return json.load(f) @@ -49,14 +60,14 @@ def validated_result(documents_tests_data_path: Path) -> dict: # type: ignore class TestDocumentsService: @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio - async def test_extract( + async def test_extract_ixp( self, httpx_mock: HTTPXMock, service: DocumentsService, base_url: str, org: str, tenant: str, - extraction_response: dict, # type: ignore + ixp_extraction_response: dict, # type: ignore mode: str, ): # ARRANGE @@ -73,7 +84,7 @@ async def test_extract( json={ "projects": [ {"id": str(uuid4()), "name": "OtherProject"}, - {"id": project_id, "name": "TestProject"}, + {"id": project_id, "name": "TestProjectIXP"}, {"id": str(uuid4()), "name": "AnotherProject"}, ] }, @@ -117,7 +128,7 @@ async def test_extract( match_headers={ "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", }, - json={"status": "NotStarted", "result": extraction_response}, + json={"status": "NotStarted", "result": ixp_extraction_response}, ) httpx_mock.add_response( url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/live/document-types/{UUID(int=0)}/extraction/result/{operation_id}?api-version=1.1", @@ -125,7 +136,7 @@ async def test_extract( match_headers={ "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", }, - json={"status": "Running", "result": extraction_response}, + json={"status": "Running", "result": ixp_extraction_response}, ) httpx_mock.add_response( url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/live/document-types/{UUID(int=0)}/extraction/result/{operation_id}?api-version=1.1", @@ -133,24 +144,212 @@ async def test_extract( match_headers={ "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", }, - json={"status": "Succeeded", "result": extraction_response}, + json={"status": "Succeeded", "result": ixp_extraction_response}, ) # ACT if mode == "async": response = await service.extract_async( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProjectIXP", tag="live", file=b"test content" ) else: response = service.extract( - project_name="TestProject", tag="live", file=b"test content" + project_name="TestProjectIXP", tag="live", file=b"test content" ) # ASSERT - expected_response = extraction_response + expected_response = ixp_extraction_response expected_response["projectId"] = project_id expected_response["tag"] = "live" - assert response.model_dump() == extraction_response + expected_response["documentTypeId"] = str(UUID(int=0)) + assert response.model_dump() == ixp_extraction_response + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_extract_modern( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + modern_extraction_response: dict, # type: ignore + mode: str, + ): + # ARRANGE + project_id = str(uuid4()) + document_type_id = str(uuid4()) + document_id = str(uuid4()) + operation_id = str(uuid4()) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=Modern", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "projects": [ + {"id": str(uuid4()), "name": "OtherProject"}, + {"id": project_id, "name": "TestProjectModern"}, + {"id": str(uuid4()), "name": "AnotherProject"}, + ] + }, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/tags?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "tags": [ + {"name": "Development"}, + {"name": "Staging"}, + {"name": "Production"}, + ] + }, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/start?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + match_files={"File": b"test content"}, + json={"documentId": document_id}, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/document-types?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={ + "documentTypes": [ + {"id": str(uuid4()), "name": "Receipt"}, + {"id": document_type_id, "name": "Invoice"}, + {"id": str(uuid4()), "name": "Contract"}, + ] + }, + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/document-types/{document_type_id}/extraction/start?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + match_json={"documentId": document_id}, + json={"operationId": operation_id}, + ) + + statuses = ["NotStarted", "Running", "Succeeded"] + for status in statuses: + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/Production/document-types/{document_type_id}/extraction/result/{operation_id}?api-version=1.1", + status_code=200, + match_headers={ + "X-UiPath-Internal-ConsumptionSourceType": "CodedAgents", + }, + json={"status": status, "result": modern_extraction_response}, + ) + + # ACT + if mode == "async": + response = service.extract( + project_name="TestProjectModern", + tag="Production", + file=b"test content", + project_type=ProjectType.MODERN, + document_type_name="Invoice", + ) + else: + response = await service.extract_async( + project_name="TestProjectModern", + tag="Production", + file=b"test content", + project_type=ProjectType.MODERN, + document_type_name="Invoice", + ) + + # ASSERT + expected_response = modern_extraction_response + expected_response["projectId"] = project_id + expected_response["tag"] = "Production" + expected_response["documentTypeId"] = document_type_id + assert response.model_dump() == modern_extraction_response + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_extract_modern_without_document_type_name( + self, service: DocumentsService, mode: str + ): + # ACT & ASSERT + with pytest.raises( + ValueError, + match="`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`", + ): + if mode == "async": + await service.extract_async( + project_name="TestProjectModern", + tag="Production", + file=b"test content", + project_type=ProjectType.MODERN, + ) + else: + service.extract( + project_name="TestProjectModern", + tag="Production", + file=b"test content", + project_type=ProjectType.MODERN, + ) + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_get_document_type_id_not_found( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + ): + # ARRANGE + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/dummy_project_id/document-types?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + json={ + "documentTypes": [ + {"id": str(uuid4()), "name": "Receipt"}, + {"id": str(uuid4()), "name": "Invoice"}, + {"id": str(uuid4()), "name": "Contract"}, + ] + }, + ) + + # ACT & ASSERT + with pytest.raises( + ValueError, + match="Document type 'NonExistentType' not found.", + ): + if mode == "async": + await service._get_document_type_id_async( + project_id="dummy_project_id", + document_type_name="NonExistentType", + project_type=ProjectType.MODERN, + ) + else: + service._get_document_type_id( + project_id="dummy_project_id", + document_type_name="NonExistentType", + project_type=ProjectType.MODERN, + ) @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio @@ -290,13 +489,14 @@ async def test_create_validation_action( base_url: str, org: str, tenant: str, - extraction_response: dict, # type: ignore + ixp_extraction_response: dict, # type: ignore create_validation_action_response: dict, # type: ignore mode: str, ): # ARRANGE project_id = str(uuid4()) operation_id = str(uuid4()) + document_type_id = str(UUID(int=0)) tag = "live" action_title = "TestAction" action_priority = ActionPriority.HIGH @@ -310,8 +510,8 @@ async def test_create_validation_action( status_code=200, match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, match_json={ - "extractionResult": extraction_response["extractionResult"], - "documentId": extraction_response["extractionResult"]["DocumentId"], + "extractionResult": ixp_extraction_response["extractionResult"], + "documentId": ixp_extraction_response["extractionResult"]["DocumentId"], "actionTitle": action_title, "actionPriority": action_priority, "actionCatalog": action_catalog, @@ -343,8 +543,9 @@ async def test_create_validation_action( json={"status": "Succeeded", "result": create_validation_action_response}, ) - extraction_response["projectId"] = project_id - extraction_response["tag"] = tag + ixp_extraction_response["projectId"] = project_id + ixp_extraction_response["tag"] = tag + ixp_extraction_response["documentTypeId"] = document_type_id # ACT if mode == "async": @@ -356,7 +557,7 @@ async def test_create_validation_action( storage_bucket_name=storage_bucket_name, storage_bucket_directory_path=storage_bucket_directory_path, extraction_response=ExtractionResponse.model_validate( - extraction_response + ixp_extraction_response ), ) else: @@ -368,13 +569,14 @@ async def test_create_validation_action( storage_bucket_name=storage_bucket_name, storage_bucket_directory_path=storage_bucket_directory_path, extraction_response=ExtractionResponse.model_validate( - extraction_response + ixp_extraction_response ), ) # ASSERT create_validation_action_response["projectId"] = project_id create_validation_action_response["tag"] = tag + create_validation_action_response["documentTypeId"] = document_type_id create_validation_action_response["operationId"] = operation_id assert response.model_dump() == create_validation_action_response @@ -394,9 +596,11 @@ async def test_get_validation_result( # ARRANGE project_id = str(uuid4()) operation_id = str(uuid4()) + document_type_id = str(UUID(int=0)) create_validation_action_response["projectId"] = project_id create_validation_action_response["tag"] = "live" + create_validation_action_response["documentTypeId"] = document_type_id create_validation_action_response["operationId"] = operation_id create_validation_action_response["actionStatus"] = "Completed" create_validation_action_response["validatedExtractionResults"] = ( @@ -404,7 +608,7 @@ async def test_get_validation_result( ) httpx_mock.add_response( - url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/live/document-types/{UUID(int=0)}/validation/result/{operation_id}?api-version=1.1", + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/live/document-types/{document_type_id}/validation/result/{operation_id}?api-version=1.1", status_code=200, match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, json={"status": "Succeeded", "result": create_validation_action_response}, @@ -440,10 +644,10 @@ async def test_wait_for_operation_timeout( mock_time.monotonic.side_effect = [0, 10, 30, 60, 200, 280, 310, 350] def mock_result_getter(): - return "Running", None + return "Running", None, None async def mock_result_getter_async(): - return "Running", None + return "Running", None, None # ACT & ASSERT with pytest.raises(TimeoutError, match="Operation timed out."): @@ -470,13 +674,15 @@ async def test_wait_for_operation_failed( # ARRANGE def mock_result_getter(): - return "Failed", None + return "Failed", "Dummy error", None async def mock_result_getter_async(): - return "Failed", None + return "Failed", "Dummy error", None # ACT & ASSERT - with pytest.raises(Exception, match="Operation failed with status: Failed"): + with pytest.raises( + Exception, match="Operation failed with status: Failed, error: Dummy error" + ): if mode == "async": await service._wait_for_operation_async( result_getter=mock_result_getter_async, diff --git a/tests/sdk/services/tests_data/documents_service/extraction_response.json b/tests/sdk/services/tests_data/documents_service/ixp_extraction_response.json similarity index 100% rename from tests/sdk/services/tests_data/documents_service/extraction_response.json rename to tests/sdk/services/tests_data/documents_service/ixp_extraction_response.json diff --git a/tests/sdk/services/tests_data/documents_service/modern_extraction_response.json b/tests/sdk/services/tests_data/documents_service/modern_extraction_response.json new file mode 100644 index 000000000..141919c16 --- /dev/null +++ b/tests/sdk/services/tests_data/documents_service/modern_extraction_response.json @@ -0,0 +1,192 @@ +{ + "extractionResult": { + "DocumentId": "da303456-7ba3-f011-8e60-6045bd9ba6d0", + "ResultsVersion": 0, + "ResultsDocument": { + "Bounds": { + "StartPage": 0, + "PageCount": 1, + "TextStartIndex": 0, + "TextLength": 629, + "PageRange": "1" + }, + "Language": "eng", + "DocumentGroup": "", + "DocumentCategory": "", + "DocumentTypeId": "2e4e0ad9-72a3-f011-8e61-000d3a395253", + "DocumentTypeName": "receipts", + "DocumentTypeDataVersion": 0, + "DataVersion": 0, + "DocumentTypeSource": "Automatic", + "DocumentTypeField": { + "Components": [], + "Value": "receipts", + "UnformattedValue": "", + "Reference": { + "TextStartIndex": 0, + "TextLength": 0, + "Tokens": [] + }, + "DerivedFields": [], + "Confidence": 1.0, + "OperatorConfirmed": false, + "OcrConfidence": -1.0, + "TextType": "Unknown", + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + }, + "Fields": [ + { + "FieldId": "field-2", + "FieldName": "To", + "FieldType": "Text", + "IsMissing": true, + "DataSource": "Automatic", + "Values": [], + "DataVersion": 0, + "OperatorConfirmed": false, + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + }, + { + "FieldId": "field-1", + "FieldName": "From", + "FieldType": "Text", + "IsMissing": false, + "DataSource": "Automatic", + "Values": [ + { + "Components": [], + "Value": "with", + "UnformattedValue": "with", + "Reference": { + "TextStartIndex": 275, + "TextLength": 4, + "Tokens": [ + { + "TextStartIndex": 275, + "TextLength": 4, + "Page": 0, + "PageWidth": 595.0, + "PageHeight": 842.0, + "Boxes": [[378.08, 96.32, 12.27, 8.54]] + } + ] + }, + "DerivedFields": [], + "Confidence": 0.56592697, + "OperatorConfirmed": false, + "OcrConfidence": 1.0, + "TextType": "Text", + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + } + ], + "DataVersion": 0, + "OperatorConfirmed": false, + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + }, + { + "FieldId": "date", + "FieldName": "Transaction Date", + "FieldType": "Date", + "IsMissing": false, + "DataSource": "Automatic", + "Values": [ + { + "Components": [], + "Value": "2025-12-01", + "UnformattedValue": "December", + "Reference": { + "TextStartIndex": 559, + "TextLength": 8, + "Tokens": [ + { + "TextStartIndex": 559, + "TextLength": 8, + "Page": 0, + "PageWidth": 595.0, + "PageHeight": 842.0, + "Boxes": [[45.09, 464.79, 33.09, 9.87]] + } + ] + }, + "DerivedFields": [ + { + "FieldId": "Year", + "Value": "2025" + }, + { + "FieldId": "Month", + "Value": "12" + }, + { + "FieldId": "Day", + "Value": "1" + } + ], + "Confidence": 0.8514132, + "OperatorConfirmed": false, + "OcrConfidence": 1.0, + "TextType": "Text", + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + } + ], + "DataVersion": 0, + "OperatorConfirmed": false, + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + }, + { + "FieldId": "total", + "FieldName": "Total Amount", + "FieldType": "Number", + "IsMissing": false, + "DataSource": "Automatic", + "Values": [ + { + "Components": [], + "Value": "66.79", + "UnformattedValue": "66.79", + "Reference": { + "TextStartIndex": 582, + "TextLength": 5, + "Tokens": [ + { + "TextStartIndex": 582, + "TextLength": 5, + "Page": 0, + "PageWidth": 595.0, + "PageHeight": 842.0, + "Boxes": [[143.28, 500.81, 26.95, 14.68]] + } + ] + }, + "DerivedFields": [ + { + "FieldId": "Value", + "Value": "66.79" + } + ], + "Confidence": 0.9245848, + "OperatorConfirmed": false, + "OcrConfidence": 1.0, + "TextType": "Text", + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + } + ], + "DataVersion": 0, + "OperatorConfirmed": false, + "ValidatorNotes": "", + "ValidatorNotesInfo": "" + } + ], + "Tables": [] + }, + "ExtractorPayloads": null, + "BusinessRulesResults": null + } +}