1313from ..models .documents import (
1414 ActionPriority ,
1515 ExtractionResponse ,
16+ ProjectType ,
1617 ValidatedResult ,
1718 ValidationAction ,
1819)
@@ -37,11 +38,13 @@ def _get_common_headers(self) -> Dict[str, str]:
3738 "X-UiPath-Internal-ConsumptionSourceType" : "CodedAgents" ,
3839 }
3940
40- def _get_project_id_by_name (self , project_name : str ) -> str :
41+ def _get_project_id_by_name (
42+ self , project_name : str , project_type : ProjectType
43+ ) -> str :
4144 response = self .request (
4245 "GET" ,
4346 url = Endpoint ("/du_/api/framework/projects" ),
44- params = {"api-version" : 1.1 , "type" : "IXP" },
47+ params = {"api-version" : 1.1 },
4548 headers = self ._get_common_headers (),
4649 )
4750
@@ -89,8 +92,10 @@ async def _get_project_tags_async(self, project_id: str) -> Set[str]:
8992 )
9093 return {tag ["name" ] for tag in response .json ().get ("tags" , [])}
9194
92- def _get_project_id_and_validate_tag (self , project_name : str , tag : str ) -> str :
93- project_id = self ._get_project_id_by_name (project_name )
95+ def _get_project_id_and_validate_tag (
96+ self , project_name : str , project_type : ProjectType , tag : str
97+ ) -> str :
98+ project_id = self ._get_project_id_by_name (project_name , project_type )
9499 tags = self ._get_project_tags (project_id )
95100 if tag not in tags :
96101 raise ValueError (
@@ -143,16 +148,39 @@ async def _start_digitization_async(
143148 )
144149 ).json ()["documentId" ]
145150
151+ def _get_document_type_id (
152+ self , project_id : str , extractor_name : Optional [str ]
153+ ) -> str :
154+ if extractor_name is None :
155+ return str (UUID (int = 0 ))
156+
157+ response = self .request (
158+ "GET" ,
159+ url = Endpoint (f"/du_/api/framework/projects/{ project_id } /extractors" ),
160+ params = {"api-version" : 1.1 },
161+ headers = self ._get_common_headers (),
162+ )
163+
164+ try :
165+ return next (
166+ extractor ["documentTypeId" ]
167+ for extractor in response .json ().get ("extractors" , [])
168+ if extractor ["projectVersionName" ] == extractor_name
169+ )
170+ except StopIteration :
171+ raise ValueError (f"Extractor '{ extractor_name } ' not found." ) from None
172+
146173 def _start_extraction (
147174 self ,
148175 project_id : str ,
149176 tag : str ,
177+ document_type_id : str ,
150178 document_id : str ,
151179 ) -> str :
152180 return self .request (
153181 "POST" ,
154182 url = Endpoint (
155- f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ UUID ( int = 0 ) } /extraction/start"
183+ f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/start"
156184 ),
157185 params = {"api-version" : 1.1 },
158186 headers = self ._get_common_headers (),
@@ -179,7 +207,7 @@ async def _start_extraction_async(
179207
180208 def _wait_for_operation (
181209 self ,
182- result_getter : Callable [[], Tuple [str , Any ]],
210+ result_getter : Callable [[], Tuple [str , dict , Any ]],
183211 wait_statuses : List [str ],
184212 success_status : str ,
185213 ) -> Any :
@@ -191,13 +219,15 @@ def _wait_for_operation(
191219 status in wait_statuses
192220 and (time .monotonic () - start_time ) < POLLING_TIMEOUT
193221 ):
194- status , result = result_getter ()
222+ status , error , result = result_getter ()
195223 time .sleep (POLLING_INTERVAL )
196224
197225 if status != success_status :
198226 if time .monotonic () - start_time >= POLLING_TIMEOUT :
199227 raise TimeoutError ("Operation timed out." )
200- raise RuntimeError (f"Operation failed with status: { status } " )
228+ raise RuntimeError (
229+ f"Operation failed with status: { status } , error: { error } "
230+ )
201231
202232 return result
203233
@@ -226,20 +256,21 @@ async def _wait_for_operation_async(
226256 return result
227257
228258 def _wait_for_extraction (
229- self , project_id : str , tag : str , operation_id : str
259+ self , project_id : str , tag : str , document_type_id : str , operation_id : str
230260 ) -> ExtractionResponse :
231261 extraction_response = self ._wait_for_operation (
232262 result_getter = lambda : (
233263 (
234264 result := self .request (
235265 method = "GET" ,
236266 url = Endpoint (
237- f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ UUID ( int = 0 ) } /extraction/result/{ operation_id } "
267+ f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/result/{ operation_id } "
238268 ),
239269 params = {"api-version" : 1.1 },
240270 headers = self ._get_common_headers (),
241271 ).json ()
242272 )["status" ],
273+ result .get ("error" , None ),
243274 result .get ("result" , None ),
244275 ),
245276 wait_statuses = ["NotStarted" , "Running" ],
@@ -248,12 +279,13 @@ def _wait_for_extraction(
248279
249280 extraction_response ["projectId" ] = project_id
250281 extraction_response ["tag" ] = tag
282+ extraction_response ["documentTypeId" ] = document_type_id
251283 return ExtractionResponse .model_validate (extraction_response )
252284
253285 async def _wait_for_extraction_async (
254286 self , project_id : str , tag : str , operation_id : str
255287 ) -> ExtractionResponse :
256- async def result_getter () -> Tuple [str , Any ]:
288+ async def result_getter () -> Tuple [str , str , Any ]:
257289 result = await self .request_async (
258290 method = "GET" ,
259291 url = Endpoint (
@@ -263,7 +295,11 @@ async def result_getter() -> Tuple[str, Any]:
263295 headers = self ._get_common_headers (),
264296 )
265297 json_result = result .json ()
266- return json_result ["status" ], json_result .get ("result" , None )
298+ return (
299+ json_result ["status" ],
300+ json_result .get ("error" , None ),
301+ json_result .get ("result" , None ),
302+ )
267303
268304 extraction_response = await self ._wait_for_operation_async (
269305 result_getter = result_getter ,
@@ -282,14 +318,18 @@ def extract(
282318 tag : str ,
283319 file : Optional [FileContent ] = None ,
284320 file_path : Optional [str ] = None ,
321+ project_type : ProjectType = ProjectType .IXP ,
322+ extractor_name : Optional [str ] = None ,
285323 ) -> ExtractionResponse :
286324 """Extract predicted data from a document using an IXP project.
287325
288326 Args:
289- project_name (str): Name of the IXP project. Details about IXP projects can be found in the [official documentation ](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project).
327+ project_name (str): Name of the [IXP ](https://docs.uipath.com/ixp/automation-cloud/latest/overview/managing-projects#creating-a-new-project)/[DU Modern](https://docs.uipath.com/document-understanding/automation-cloud/latest/user-guide/about-document-understanding) project .
290328 tag (str): Tag of the published project version.
291329 file (FileContent, optional): The document file to be processed.
292330 file_path (str, optional): Path to the document file to be processed.
331+ project_type (ProjectType, optional): Type of the project. Defaults to `ProjectType.IXP`.
332+ extractor_name (str, optional): Name of the extractor to be used. Necessary only for DU Modern projects.
293333
294334 Note:
295335 Either `file` or `file_path` must be provided, but not both.
@@ -313,7 +353,7 @@ def extract(
313353 raise ValueError ("`file` and `file_path` are mutually exclusive" )
314354
315355 project_id = self ._get_project_id_and_validate_tag (
316- project_name = project_name , tag = tag
356+ project_name = project_name , project_type = project_type , tag = tag
317357 )
318358
319359 if file_path is not None :
@@ -324,12 +364,22 @@ def extract(
324364 else :
325365 document_id = self ._start_digitization (project_id = project_id , file = file ) # type: ignore
326366
367+ document_type_id = self ._get_document_type_id (
368+ project_id = project_id , extractor_name = extractor_name
369+ )
370+
327371 operation_id = self ._start_extraction (
328- project_id = project_id , tag = tag , document_id = document_id
372+ project_id = project_id ,
373+ tag = tag ,
374+ document_type_id = document_type_id ,
375+ document_id = document_id ,
329376 )
330377
331378 return self ._wait_for_extraction (
332- project_id = project_id , tag = tag , operation_id = operation_id
379+ project_id = project_id ,
380+ tag = tag ,
381+ document_type_id = document_type_id ,
382+ operation_id = operation_id ,
333383 )
334384
335385 @traced (name = "documents_extract_async" , run_type = "uipath" )
@@ -373,6 +423,7 @@ def _start_validation(
373423 self ,
374424 project_id : str ,
375425 tag : str ,
426+ document_type_id : str ,
376427 action_title : str ,
377428 action_priority : ActionPriority ,
378429 action_catalog : str ,
@@ -384,7 +435,7 @@ def _start_validation(
384435 return self .request (
385436 "POST" ,
386437 url = Endpoint (
387- f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ UUID ( int = 0 ) } /validation/start"
438+ f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /validation/start"
388439 ),
389440 params = {"api-version" : 1.1 },
390441 headers = self ._get_common_headers (),
@@ -471,6 +522,7 @@ def _wait_for_create_validation_action(
471522 project_id = project_id , tag = tag , operation_id = operation_id
472523 )
473524 )["status" ],
525+ result .get ("error" , None ),
474526 result .get ("result" , None ),
475527 ),
476528 wait_statuses = ["NotStarted" , "Running" ],
@@ -485,11 +537,15 @@ def _wait_for_create_validation_action(
485537 async def _wait_for_create_validation_action_async (
486538 self , project_id : str , tag : str , operation_id : str
487539 ) -> ValidationAction :
488- async def result_getter () -> Tuple [str , Any ]:
540+ async def result_getter () -> Tuple [str , str , Any ]:
489541 result = await self ._get_validation_result_async (
490542 project_id = project_id , tag = tag , operation_id = operation_id
491543 )
492- return result ["status" ], result .get ("result" , None )
544+ return (
545+ result ["status" ],
546+ result .get ("error" , None ),
547+ result .get ("result" , None ),
548+ )
493549
494550 response = await self ._wait_for_operation_async (
495551 result_getter = result_getter ,
@@ -542,7 +598,8 @@ def create_validation_action(
542598 """
543599 operation_id = self ._start_validation (
544600 project_id = extraction_response .project_id ,
545- tag = extraction_response .tag , # should I validate tag again?
601+ tag = extraction_response .tag ,
602+ document_type_id = extraction_response .document_type_id ,
546603 action_title = action_title ,
547604 action_priority = action_priority ,
548605 action_catalog = action_catalog ,
@@ -573,7 +630,7 @@ async def create_validation_action_async(
573630 # Add reference to sync method docstring
574631 operation_id = await self ._start_validation_async (
575632 project_id = extraction_response .project_id ,
576- tag = extraction_response .tag , # should I validate tag again?
633+ tag = extraction_response .tag ,
577634 action_title = action_title ,
578635 action_priority = action_priority ,
579636 action_catalog = action_catalog ,
@@ -618,6 +675,7 @@ def get_validation_result(
618675 operation_id = validation_action .operation_id ,
619676 )
620677 )["result" ]["actionStatus" ],
678+ None ,
621679 result ["result" ].get ("validatedExtractionResults" , None ),
622680 ),
623681 wait_statuses = ["Unassigned" , "Pending" ],
0 commit comments