Skip to content

Commit f90f82c

Browse files
feat(Documents): add support for modern projects
1 parent dbffd67 commit f90f82c

File tree

2 files changed

+96
-23
lines changed

2 files changed

+96
-23
lines changed

src/uipath/_services/documents_service.py

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from ..models.documents import (
1414
ActionPriority,
1515
ExtractionResponse,
16+
ProjectType,
1617
ValidatedResult,
1718
ValidationAction,
1819
)
@@ -37,11 +38,13 @@ def _get_common_headers(self) -> Dict[str, str]:
3738
"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents",
3839
}
3940

40-
def _get_project_id_by_name(self, project_name: str) -> str:
41+
def _get_project_id_by_name(
42+
self, project_name: str, project_type: ProjectType
43+
) -> str:
4144
response = self.request(
4245
"GET",
4346
url=Endpoint("/du_/api/framework/projects"),
44-
params={"api-version": 1.1, "type": "IXP"},
47+
params={"api-version": 1.1},
4548
headers=self._get_common_headers(),
4649
)
4750

@@ -89,8 +92,10 @@ async def _get_project_tags_async(self, project_id: str) -> Set[str]:
8992
)
9093
return {tag["name"] for tag in response.json().get("tags", [])}
9194

92-
def _get_project_id_and_validate_tag(self, project_name: str, tag: str) -> str:
93-
project_id = self._get_project_id_by_name(project_name)
95+
def _get_project_id_and_validate_tag(
96+
self, project_name: str, project_type: ProjectType, tag: str
97+
) -> str:
98+
project_id = self._get_project_id_by_name(project_name, project_type)
9499
tags = self._get_project_tags(project_id)
95100
if tag not in tags:
96101
raise ValueError(
@@ -143,16 +148,39 @@ async def _start_digitization_async(
143148
)
144149
).json()["documentId"]
145150

151+
def _get_document_type_id(
152+
self, project_id: str, extractor_name: Optional[str]
153+
) -> str:
154+
if extractor_name is None:
155+
return str(UUID(int=0))
156+
157+
response = self.request(
158+
"GET",
159+
url=Endpoint(f"/du_/api/framework/projects/{project_id}/extractors"),
160+
params={"api-version": 1.1},
161+
headers=self._get_common_headers(),
162+
)
163+
164+
try:
165+
return next(
166+
extractor["documentTypeId"]
167+
for extractor in response.json().get("extractors", [])
168+
if extractor["projectVersionName"] == extractor_name
169+
)
170+
except StopIteration:
171+
raise ValueError(f"Extractor '{extractor_name}' not found.") from None
172+
146173
def _start_extraction(
147174
self,
148175
project_id: str,
149176
tag: str,
177+
document_type_id: str,
150178
document_id: str,
151179
) -> str:
152180
return self.request(
153181
"POST",
154182
url=Endpoint(
155-
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/start"
183+
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start"
156184
),
157185
params={"api-version": 1.1},
158186
headers=self._get_common_headers(),
@@ -179,7 +207,7 @@ async def _start_extraction_async(
179207

180208
def _wait_for_operation(
181209
self,
182-
result_getter: Callable[[], Tuple[str, Any]],
210+
result_getter: Callable[[], Tuple[str, dict, Any]],
183211
wait_statuses: List[str],
184212
success_status: str,
185213
) -> Any:
@@ -191,13 +219,15 @@ def _wait_for_operation(
191219
status in wait_statuses
192220
and (time.monotonic() - start_time) < POLLING_TIMEOUT
193221
):
194-
status, result = result_getter()
222+
status, error, result = result_getter()
195223
time.sleep(POLLING_INTERVAL)
196224

197225
if status != success_status:
198226
if time.monotonic() - start_time >= POLLING_TIMEOUT:
199227
raise TimeoutError("Operation timed out.")
200-
raise RuntimeError(f"Operation failed with status: {status}")
228+
raise RuntimeError(
229+
f"Operation failed with status: {status}, error: {error}"
230+
)
201231

202232
return result
203233

@@ -226,20 +256,21 @@ async def _wait_for_operation_async(
226256
return result
227257

228258
def _wait_for_extraction(
229-
self, project_id: str, tag: str, operation_id: str
259+
self, project_id: str, tag: str, document_type_id: str, operation_id: str
230260
) -> ExtractionResponse:
231261
extraction_response = self._wait_for_operation(
232262
result_getter=lambda: (
233263
(
234264
result := self.request(
235265
method="GET",
236266
url=Endpoint(
237-
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/extraction/result/{operation_id}"
267+
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}"
238268
),
239269
params={"api-version": 1.1},
240270
headers=self._get_common_headers(),
241271
).json()
242272
)["status"],
273+
result.get("error", None),
243274
result.get("result", None),
244275
),
245276
wait_statuses=["NotStarted", "Running"],
@@ -248,12 +279,13 @@ def _wait_for_extraction(
248279

249280
extraction_response["projectId"] = project_id
250281
extraction_response["tag"] = tag
282+
extraction_response["documentTypeId"] = document_type_id
251283
return ExtractionResponse.model_validate(extraction_response)
252284

253285
async def _wait_for_extraction_async(
254286
self, project_id: str, tag: str, operation_id: str
255287
) -> ExtractionResponse:
256-
async def result_getter() -> Tuple[str, Any]:
288+
async def result_getter() -> Tuple[str, str, Any]:
257289
result = await self.request_async(
258290
method="GET",
259291
url=Endpoint(
@@ -263,7 +295,11 @@ async def result_getter() -> Tuple[str, Any]:
263295
headers=self._get_common_headers(),
264296
)
265297
json_result = result.json()
266-
return json_result["status"], json_result.get("result", None)
298+
return (
299+
json_result["status"],
300+
json_result.get("error", None),
301+
json_result.get("result", None),
302+
)
267303

268304
extraction_response = await self._wait_for_operation_async(
269305
result_getter=result_getter,
@@ -282,14 +318,18 @@ def extract(
282318
tag: str,
283319
file: Optional[FileContent] = None,
284320
file_path: Optional[str] = None,
321+
project_type: ProjectType = ProjectType.IXP,
322+
extractor_name: Optional[str] = None,
285323
) -> ExtractionResponse:
286324
"""Extract predicted data from a document using an IXP project.
287325
288326
Args:
289-
project_name (str): Name of the IXP project. Details about IXP projects can be found in the [official documentation](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project).
327+
project_name (str): Name of the [IXP](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project)/[DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project.
290328
tag (str): Tag of the published project version.
291329
file (FileContent, optional): The document file to be processed.
292330
file_path (str, optional): Path to the document file to be processed.
331+
project_type (ProjectType, optional): Type of the project. Defaults to `ProjectType.IXP`.
332+
extractor_name (str, optional): Name of the extractor to be used. Necessary only for DU Modern projects.
293333
294334
Note:
295335
Either `file` or `file_path` must be provided, but not both.
@@ -313,7 +353,7 @@ def extract(
313353
raise ValueError("`file` and `file_path` are mutually exclusive")
314354

315355
project_id = self._get_project_id_and_validate_tag(
316-
project_name=project_name, tag=tag
356+
project_name=project_name, project_type=project_type, tag=tag
317357
)
318358

319359
if file_path is not None:
@@ -324,12 +364,22 @@ def extract(
324364
else:
325365
document_id = self._start_digitization(project_id=project_id, file=file) # type: ignore
326366

367+
document_type_id = self._get_document_type_id(
368+
project_id=project_id, extractor_name=extractor_name
369+
)
370+
327371
operation_id = self._start_extraction(
328-
project_id=project_id, tag=tag, document_id=document_id
372+
project_id=project_id,
373+
tag=tag,
374+
document_type_id=document_type_id,
375+
document_id=document_id,
329376
)
330377

331378
return self._wait_for_extraction(
332-
project_id=project_id, tag=tag, operation_id=operation_id
379+
project_id=project_id,
380+
tag=tag,
381+
document_type_id=document_type_id,
382+
operation_id=operation_id,
333383
)
334384

335385
@traced(name="documents_extract_async", run_type="uipath")
@@ -373,6 +423,7 @@ def _start_validation(
373423
self,
374424
project_id: str,
375425
tag: str,
426+
document_type_id: str,
376427
action_title: str,
377428
action_priority: ActionPriority,
378429
action_catalog: str,
@@ -384,7 +435,7 @@ def _start_validation(
384435
return self.request(
385436
"POST",
386437
url=Endpoint(
387-
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{UUID(int=0)}/validation/start"
438+
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/validation/start"
388439
),
389440
params={"api-version": 1.1},
390441
headers=self._get_common_headers(),
@@ -471,6 +522,7 @@ def _wait_for_create_validation_action(
471522
project_id=project_id, tag=tag, operation_id=operation_id
472523
)
473524
)["status"],
525+
result.get("error", None),
474526
result.get("result", None),
475527
),
476528
wait_statuses=["NotStarted", "Running"],
@@ -485,11 +537,15 @@ def _wait_for_create_validation_action(
485537
async def _wait_for_create_validation_action_async(
486538
self, project_id: str, tag: str, operation_id: str
487539
) -> ValidationAction:
488-
async def result_getter() -> Tuple[str, Any]:
540+
async def result_getter() -> Tuple[str, str, Any]:
489541
result = await self._get_validation_result_async(
490542
project_id=project_id, tag=tag, operation_id=operation_id
491543
)
492-
return result["status"], result.get("result", None)
544+
return (
545+
result["status"],
546+
result.get("error", None),
547+
result.get("result", None),
548+
)
493549

494550
response = await self._wait_for_operation_async(
495551
result_getter=result_getter,
@@ -542,7 +598,8 @@ def create_validation_action(
542598
"""
543599
operation_id = self._start_validation(
544600
project_id=extraction_response.project_id,
545-
tag=extraction_response.tag, # should I validate tag again?
601+
tag=extraction_response.tag,
602+
document_type_id=extraction_response.document_type_id,
546603
action_title=action_title,
547604
action_priority=action_priority,
548605
action_catalog=action_catalog,
@@ -573,7 +630,7 @@ async def create_validation_action_async(
573630
# Add reference to sync method docstring
574631
operation_id = await self._start_validation_async(
575632
project_id=extraction_response.project_id,
576-
tag=extraction_response.tag, # should I validate tag again?
633+
tag=extraction_response.tag,
577634
action_title=action_title,
578635
action_priority=action_priority,
579636
action_catalog=action_catalog,
@@ -618,6 +675,7 @@ def get_validation_result(
618675
operation_id=validation_action.operation_id,
619676
)
620677
)["result"]["actionStatus"],
678+
None,
621679
result["result"].get("validatedExtractionResults", None),
622680
),
623681
wait_statuses=["Unassigned", "Pending"],

src/uipath/models/documents.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ class ActionPriority(str, Enum):
2424
CRITICAL = "Critical"
2525

2626

27+
class ProjectType(str, Enum):
28+
IXP = "IXP"
29+
MODERN = "Modern"
30+
31+
2732
class FieldValueProjection(BaseModel):
2833
model_config = ConfigDict(
2934
serialize_by_alias=True,
@@ -71,9 +76,9 @@ class ExtractionResponse(BaseModel):
7176
7277
Attributes:
7378
extraction_result (ExtractionResult): The result of the extraction process.
74-
data_projection (List[FieldGroupValueProjection]): A simplified projection of the extracted data.
7579
project_id (str): The ID of the project associated with the extraction.
7680
tag (str): The tag associated with the published model version.
81+
document_type_id (str): The ID of the document type associated with the extraction.
7782
"""
7883

7984
model_config = ConfigDict(
@@ -82,9 +87,19 @@ class ExtractionResponse(BaseModel):
8287
)
8388

8489
extraction_result: ExtractionResult = Field(alias="extractionResult")
85-
data_projection: List[FieldGroupValueProjection] = Field(alias="dataProjection")
8690
project_id: str = Field(alias="projectId")
8791
tag: str
92+
document_type_id: str = Field(alias="documentTypeId")
93+
94+
95+
class ExtractionResponseIXP(ExtractionResponse):
96+
"""A model representing the response from a document extraction process for IXP projects.
97+
98+
Attributes:
99+
data_projection (List[FieldGroupValueProjection]): A simplified projection of the extracted data.
100+
"""
101+
102+
data_projection: List[FieldGroupValueProjection] = Field(alias="dataProjection")
88103

89104

90105
class ValidationAction(BaseModel):

0 commit comments

Comments
 (0)