UiPath · alexandrujircan · Oct 15, 2025 · Oct 16, 2025
diff --git a/src/uipath/_services/documents_service.py b/src/uipath/_services/documents_service.py
@@ -13,6 +13,7 @@
 from .._utils import Endpoint
 from ..models.documents import (
     ActionPriority,
+    DigitizationResult,
     ExtractionResponse,
     ExtractionResponseIXP,
     ProjectType,
@@ -26,6 +27,51 @@
 POLLING_TIMEOUT = 300  # seconds
 
 
+def _is_provided(arg: Any) -> bool:
+    return arg is not None
+
+
+def _must_not_be_provided(**kwargs: Any) -> None:
+    for name, value in kwargs.items():
+        if value is not None:
+            raise ValueError(f"`{name}` should not be provided")
+
+
+def _must_be_provided(**kwargs: Any) -> None:
+    for name, value in kwargs.items():
+        if value is None:
+            raise ValueError(f"`{name}` should be provided")
+
+
+def _are_mutually_exclusive(**kwargs: Any) -> None:
+    provided = [name for name, value in kwargs.items() if value is not None]
+    if len(provided) > 1:
+        raise ValueError(f"`{', '.join(provided)}` are mutually exclusive")
+
+
+def _validate_extract_params(
+    project_name: Optional[str] = None,
+    file: Optional[FileContent] = None,
+    file_path: Optional[str] = None,
+    digitization_result: Optional[DigitizationResult] = None,
+    project_type: Optional[ProjectType] = ProjectType.IXP,
+    document_type_name: Optional[str] = None,
+):
+    _are_mutually_exclusive(file=file, file_path=file_path)
+
+    if _is_provided(project_name):
+        _must_not_be_provided(digitization_result=digitization_result)
+    else:
+        _must_be_provided(digitization_result=digitization_result)
+        _must_not_be_provided(project_type=project_type, file=file, file_path=file_path)
+        project_type = digitization_result.project_type
+
+    if project_type == ProjectType.MODERN:
+        _must_be_provided(document_type_name=document_type_name)
+    else:
+        _must_not_be_provided(document_type_name=document_type_name)
+
+
 class DocumentsService(FolderContext, BaseService):
     """Service for managing UiPath DocumentUnderstanding Document Operations.
 
@@ -96,10 +142,32 @@ async def _get_project_tags_async(self, project_id: str) -> Set[str]:
         )
         return {tag["name"] for tag in response.json().get("tags", [])}
 
+    def _get_document_id(
+        self,
+        project_id: Optional[str] = None,
+        file: Optional[FileContent] = None,
+        file_path: Optional[str] = None,
+        digitization_result: Optional[DigitizationResult] = None,
+    ) -> str:
+        if digitization_result is not None:
+            return digitization_result.document_object_model.document_id
+
+        return self._start_digitization(
+            project_id=project_id, file=file, file_path=file_path
+        )
+
     def _get_project_id_and_validate_tag(
-        self, project_name: str, project_type: ProjectType, tag: str
+        self,
+        tag: str,
+        project_name: Optional[str],
+        project_type: Optional[ProjectType],
+        digitization_result: Optional[DigitizationResult],
     ) -> str:
-        project_id = self._get_project_id_by_name(project_name, project_type)
+        if digitization_result is None:
+            project_id = self._get_project_id_by_name(project_name, project_type)
+        else:
+            project_id = digitization_result.project_id
+
         tags = self._get_project_tags(project_id)
         if tag not in tags:
             raise ValueError(
@@ -125,17 +193,50 @@ async def _get_project_id_and_validate_tag_async(
     def _start_digitization(
         self,
         project_id: str,
-        file: FileContent,
+        file: Optional[FileContent] = None,
+        file_path: Optional[str] = None,
     ) -> str:
-        return self.request(
-            "POST",
-            url=Endpoint(
-                f"/du_/api/framework/projects/{project_id}/digitization/start"
-            ),
-            params={"api-version": 1.1},
-            headers=self._get_common_headers(),
-            files={"File": file},
-        ).json()["documentId"]
+        with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
+            return self.request(
+                "POST",
+                url=Endpoint(
+                    f"/du_/api/framework/projects/{project_id}/digitization/start"
+                ),
+                params={"api-version": 1.1},
+                headers=self._get_common_headers(),
+                files={"File": handle},
+            ).json()["documentId"]
+
+    def _wait_for_digitization(
+        self,
+        project_id: str,
+        document_id: str,
+        project_type: ProjectType,
+    ) -> DigitizationResult:
+        def result_getter() -> Tuple[str, Optional[str], Optional[str]]:
+            result = self.request(
+                method="GET",
+                url=Endpoint(
+                    f"/du_/api/framework/projects/{project_id}/digitization/result/{document_id}"
+                ),
+                params={"api-version": 1.1},
+                headers=self._get_common_headers(),
+            ).json()
+            return (
+                result["status"],
+                result.get("error", None),
+                result.get("result", None),
+            )
+
+        digitization_response = self._wait_for_operation(
+            result_getter=result_getter,
+            wait_statuses=["NotStarted", "Running"],
+            success_status="Succeeded",
+        )
+        digitization_response["projectId"] = project_id
+        digitization_response["projectType"] = project_type.value
+
+        return DigitizationResult.model_validate(digitization_response)
 
     async def _start_digitization_async(
         self,
@@ -376,13 +477,34 @@ async def result_getter() -> Tuple[str, str, Any]:
 
         return ExtractionResponse.model_validate(extraction_response)
 
+    @traced(name="documents_digitize", run_type="uipath")
+    def digitize(
+        self,
+        project_name: str,
+        file: Optional[FileContent] = None,
+        file_path: Optional[str] = None,
+        project_type: ProjectType = ProjectType.IXP,
+    ) -> DigitizationResult:
+        _are_mutually_exclusive(file=file, file_path=file_path)
+
+        project_id = self._get_project_id_by_name(project_name, project_type)
+
+        document_id = self._start_digitization(
+            project_id=project_id, file=file, file_path=file_path
+        )
+
+        return self._wait_for_digitization(
+            project_id=project_id, document_id=document_id, project_type=project_type
+        )
+
     @traced(name="documents_extract", run_type="uipath")
     def extract(
         self,
-        project_name: str,
         tag: str,
+        project_name: Optional[str] = None,
         file: Optional[FileContent] = None,
         file_path: Optional[str] = None,
+        digitization_result: Optional[DigitizationResult] = None,
         project_type: ProjectType = ProjectType.IXP,
         document_type_name: Optional[str] = None,
     ) -> Union[ExtractionResponse, ExtractionResponseIXP]:
@@ -414,32 +536,58 @@ def extract(
             ```
 
             DU Modern projects:
-            ```python
-            with open("path/to/document.pdf", "rb") as file:
+                Automatic digitization:
+                ```python
+                with open("path/to/document.pdf", "rb") as file:
+                    extraction_response = service.extract(
+                        project_name="MyModernProjectName",
+                        tag="Production",
+                        file=file,
+                        project_type=ProjectType.MODERN,
+                        document_type_name="Receipts",
+                    )
+                ```
+                Using existing digitization result:
+                ```python
+                with open("path/to/document.pdf", "rb") as file:
+                    digitization_result = service.digitize(
+                        project_name="MyModernProjectName",
+                        file=file,
+                        project_type=ProjectType.MODERN,
+                    )
+
                 extraction_response = service.extract(
-                    project_name="MyModernProjectName",
                     tag="Production",
-                    file=file,
-                    project_type=ProjectType.MODERN,
+                    digitization_result=digitization_result,
                     document_type_name="Receipts",
+                    project_type=None,
                 )
-            ```
+                ```
         """
-        if file is None and file_path is None:
-            raise ValueError("Either `file` or `file_path` must be provided")
-        if file is not None and file_path is not None:
-            raise ValueError("`file` and `file_path` are mutually exclusive")
-        if project_type == ProjectType.MODERN and document_type_name is None:
-            raise ValueError(
-                "`document_type_name` must be provided when `project_type` is `ProjectType.MODERN`"
-            )
+        _validate_extract_params(
+            project_name=project_name,
+            file=file,
+            file_path=file_path,
+            digitization_result=digitization_result,
+            project_type=project_type,
+            document_type_name=document_type_name,
+        )
 
         project_id = self._get_project_id_and_validate_tag(
-            project_name=project_name, project_type=project_type, tag=tag
+            tag=tag,
+            project_name=project_name,
+            project_type=project_type,
+            digitization_result=digitization_result,
         )
 
-        with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
-            document_id = self._start_digitization(project_id=project_id, file=handle)  # type: ignore
+        project_type = project_type or digitization_result.project_type
+
+        document_id = self._get_document_id(
+            project_id=project_id,
+            file=file,
+            file_path=file_path,
+            digitization_result=digitization_result,
+        )
 
         document_type_id = self._get_document_type_id(
             project_id=project_id,

diff --git a/src/uipath/models/documents.py b/src/uipath/models/documents.py
@@ -151,3 +151,38 @@ class ValidatedResult(BaseModel):
 
     document_id: str = Field(alias="DocumentId")
     results_document: dict = Field(alias="ResultsDocument")  # type: ignore
+
+
+class Metadata(BaseModel):
+    model_config = ConfigDict(
+        serialize_by_alias=True,
+        validate_by_alias=True,
+    )
+
+    key: str
+    value: str
+
+
+class DocumentObjectModel(BaseModel):
+    model_config = ConfigDict(
+        serialize_by_alias=True,
+        validate_by_alias=True,
+    )
+
+    document_id: str = Field(alias="documentId")
+    contentType: str = Field(alias="contentType")
+    length: int
+    pages: List[dict]
+    documentMetadata: List[Metadata] = Field(alias="documentMetadata")
+
+
+class DigitizationResult(BaseModel):
+    model_config = ConfigDict(
+        serialize_by_alias=True,
+        validate_by_alias=True,
+    )
+
+    document_object_model: DocumentObjectModel = Field(alias="documentObjectModel")
+    document_text: str = Field(alias="documentText")
+    project_id: str = Field(alias="projectId")
+    project_type: ProjectType = Field(alias="projectType")