diff --git a/src/uipath/_services/context_grounding_service.py b/src/uipath/_services/context_grounding_service.py index 323020198..febc2a9d1 100644 --- a/src/uipath/_services/context_grounding_service.py +++ b/src/uipath/_services/context_grounding_service.py @@ -1,4 +1,3 @@ -import json from typing import Any, Dict, List, Optional, Tuple, Union import httpx @@ -10,16 +9,28 @@ from .._folder_context import FolderContext from .._utils import Endpoint, RequestSpec, header_folder, infer_bindings from .._utils.constants import ( - CONFLUENCE_DATA_SOURCE, - DROPBOX_DATA_SOURCE, - GOOGLE_DRIVE_DATA_SOURCE, - LLMV4, - ONEDRIVE_DATA_SOURCE, + LLMV4_REQUEST, ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE, ) from ..models import IngestionInProgressException from ..models.context_grounding import ContextGroundingQueryResponse from ..models.context_grounding_index import ContextGroundingIndex +from ..models.context_grounding_payloads import ( + BucketDataSource, + BucketSourceConfig, + ConfluenceDataSource, + ConfluenceSourceConfig, + CreateIndexPayload, + DropboxDataSource, + DropboxSourceConfig, + GoogleDriveDataSource, + GoogleDriveSourceConfig, + OneDriveDataSource, + OneDriveSourceConfig, + PreProcessing, + SourceConfig, +) +from ..models.exceptions import UnsupportedDataSourceException from ..tracing._traced import traced from ._base_service import BaseService from .buckets_service import BucketsService @@ -323,12 +334,12 @@ async def retrieve_by_id_async( def create_index( self, name: str, - source: Dict[str, Any], + source: SourceConfig, description: Optional[str] = None, cron_expression: Optional[str] = None, time_zone_id: Optional[str] = None, advanced_ingestion: Optional[bool] = True, - preprocessing_request: Optional[str] = LLMV4, + preprocessing_request: Optional[str] = LLMV4_REQUEST, folder_key: Optional[str] = None, folder_path: Optional[str] = None, ) -> ContextGroundingIndex: @@ -336,12 +347,12 @@ def create_index( Args: name (str): The name of the index to create. - source (dict): Source configuration dictionary: - - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional) - - For Google Drive: type="google", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) - - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional) - - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) - - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional) + source (SourceConfig): Source configuration using one of: + - BucketSourceConfig: For storage buckets + - GoogleDriveSourceConfig: For Google Drive + - DropboxSourceConfig: For Dropbox + - OneDriveSourceConfig: For OneDrive + - ConfluenceSourceConfig: For Confluence description (Optional[str]): Description of the index. cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM). time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time"). @@ -362,7 +373,7 @@ def create_index( advanced_ingestion=advanced_ingestion if advanced_ingestion is not None else True, - preprocessing_request=preprocessing_request or LLMV4, + preprocessing_request=preprocessing_request or LLMV4_REQUEST, folder_path=folder_path, folder_key=folder_key, ) @@ -370,7 +381,7 @@ def create_index( response = self.request( spec.method, spec.endpoint, - content=spec.content, + json=spec.json, headers=spec.headers, ) @@ -381,12 +392,12 @@ def create_index( async def create_index_async( self, name: str, - source: Dict[str, Any], + source: SourceConfig, description: Optional[str] = None, cron_expression: Optional[str] = None, time_zone_id: Optional[str] = None, advanced_ingestion: Optional[bool] = True, - preprocessing_request: Optional[str] = LLMV4, + preprocessing_request: Optional[str] = LLMV4_REQUEST, folder_key: Optional[str] = None, folder_path: Optional[str] = None, ) -> ContextGroundingIndex: @@ -394,12 +405,12 @@ async def create_index_async( Args: name (str): The name of the index to create. - source (dict): Source configuration dictionary: - - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional) - - For Google Drive: type="google_drive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) - - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional) - - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional) - - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional) + source (SourceConfig): Source configuration using one of: + - BucketSourceConfig: For storage buckets + - GoogleDriveSourceConfig: For Google Drive + - DropboxSourceConfig: For Dropbox + - OneDriveSourceConfig: For OneDrive + - ConfluenceSourceConfig: For Confluence description (Optional[str]): Description of the index. cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM). time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time"). @@ -420,7 +431,7 @@ async def create_index_async( advanced_ingestion=advanced_ingestion if advanced_ingestion is not None else True, - preprocessing_request=preprocessing_request or LLMV4, + preprocessing_request=preprocessing_request or LLMV4_REQUEST, folder_path=folder_path, folder_key=folder_key, ) @@ -428,7 +439,7 @@ async def create_index_async( response = await self.request_async( spec.method, spec.endpoint, - content=spec.content, + json=spec.json, headers=spec.headers, ) @@ -697,7 +708,7 @@ def _create_spec( self, name: str, description: Optional[str], - source: Dict[str, Any], + source: SourceConfig, advanced_ingestion: bool, preprocessing_request: str, cron_expression: Optional[str] = None, @@ -710,7 +721,7 @@ def _create_spec( Args: name: Index name description: Index description - source: Source configuration dictionary + source: Source configuration (typed model) cron_expression: Optional cron expression for scheduled indexing time_zone_id: Optional timezone for cron expression advanced_ingestion: Whether to enable advanced ingestion with preprocessing @@ -721,175 +732,103 @@ def _create_spec( Returns: RequestSpec for the create index request """ - source_type = source.get("type", "").lower() - folder_key = self._resolve_folder_key(folder_key, folder_path) - file_type = source.get("file_type") - file_name_glob = f"**/*.{file_type}" if file_type else "**/*" - data_source = self._build_data_source(source_type, source, file_name_glob) + data_source_dict = self._build_data_source(source) if cron_expression: - data_source["indexer"] = { + data_source_dict["indexer"] = { "cronExpression": cron_expression, "timeZoneId": time_zone_id or "UTC", } - payload = { - "name": name, - "description": description or "", - "dataSource": data_source, - } - - if advanced_ingestion and preprocessing_request: - payload["preProcessing"] = { - "@odata.type": preprocessing_request, - } + payload = CreateIndexPayload( + name=name, + description=description or "", + data_source=data_source_dict, + pre_processing=( + PreProcessing(**{"@odata.type": preprocessing_request}) + if advanced_ingestion and preprocessing_request + else None + ), + ) return RequestSpec( method="POST", endpoint=Endpoint("/ecs_/v2/indexes/create"), - content=json.dumps(payload), + json=payload.model_dump(by_alias=True, exclude_none=True), headers={ **header_folder(folder_key, None), - "Content-Type": "application/json", }, ) - def _build_data_source( - self, source_type: str, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration based on type.""" - if source_type == "bucket": - return self._build_bucket_data_source(source, file_name_glob) - elif source_type in ["google_drive"]: - return self._build_google_drive_data_source(source, file_name_glob) - elif source_type == "dropbox": - return self._build_dropbox_data_source(source, file_name_glob) - elif source_type == "onedrive": - return self._build_onedrive_data_source(source, file_name_glob) - elif source_type == "confluence": - return self._build_confluence_data_source(source, file_name_glob) + def _build_data_source(self, source: SourceConfig) -> Dict[str, Any]: + """Build data source configuration from typed source config. + + Args: + source: Typed source configuration model + + Returns: + Dictionary with data source configuration for API + """ + file_name_glob = f"**/*.{source.file_type}" if source.file_type else "**/*" + + data_source: Union[ + BucketDataSource, + GoogleDriveDataSource, + DropboxDataSource, + OneDriveDataSource, + ConfluenceDataSource, + ] + + if isinstance(source, BucketSourceConfig): + data_source = BucketDataSource( + folder=source.folder_path, + bucketName=source.bucket_name, + fileNameGlob=file_name_glob, + directoryPath=source.directory_path, + ) + elif isinstance(source, GoogleDriveSourceConfig): + data_source = GoogleDriveDataSource( + folder=source.folder_path, + connectionId=source.connection_id, + connectionName=source.connection_name, + leafFolderId=source.leaf_folder_id, + directoryPath=source.directory_path, + fileNameGlob=file_name_glob, + ) + elif isinstance(source, DropboxSourceConfig): + data_source = DropboxDataSource( + folder=source.folder_path, + connectionId=source.connection_id, + connectionName=source.connection_name, + directoryPath=source.directory_path, + fileNameGlob=file_name_glob, + ) + elif isinstance(source, OneDriveSourceConfig): + data_source = OneDriveDataSource( + folder=source.folder_path, + connectionId=source.connection_id, + connectionName=source.connection_name, + leafFolderId=source.leaf_folder_id, + directoryPath=source.directory_path, + fileNameGlob=file_name_glob, + ) + elif isinstance(source, ConfluenceSourceConfig): + data_source = ConfluenceDataSource( + folder=source.folder_path, + connectionId=source.connection_id, + connectionName=source.connection_name, + directoryPath=source.directory_path, + fileNameGlob=file_name_glob, + spaceId=source.space_id, + ) else: raise ValueError( - f"Unsupported data source type: {source_type}. " - f"Supported types: bucket, google_drive, dropbox, onedrive, confluence" + f"Unsupported source configuration type: {type(source).__name__}" ) - def _build_bucket_data_source( - self, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration for storage bucket.""" - required_fields = ["bucket_name", "folder_path"] - for field in required_fields: - if not source.get(field): - raise ValueError(f"{field} is required for bucket data source") - - return { - "@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE, - "folder": source["folder_path"], - "bucketName": source["bucket_name"], - "fileNameGlob": file_name_glob, - "directoryPath": source.get("directory_path", "/"), - } - - def _build_google_drive_data_source( - self, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration for Google Drive.""" - required_fields = [ - "connection_id", - "connection_name", - "leaf_folder_id", - "directory_path", - "folder_path", - ] - for field in required_fields: - if not source.get(field): - raise ValueError(f"{field} is required for Google Drive data source") - - return { - "@odata.type": GOOGLE_DRIVE_DATA_SOURCE, - "folder": source["folder_path"], - "connectionId": source["connection_id"], - "connectionName": source["connection_name"], - "leafFolderId": source["leaf_folder_id"], - "directoryPath": source["directory_path"], - "fileNameGlob": file_name_glob, - } - - def _build_dropbox_data_source( - self, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration for Dropbox.""" - required_fields = [ - "connection_id", - "connection_name", - "directory_path", - "folder_path", - ] - for field in required_fields: - if not source.get(field): - raise ValueError(f"{field} is required for Dropbox data source") - - return { - "@odata.type": DROPBOX_DATA_SOURCE, - "folder": source["folder_path"], - "connectionId": source["connection_id"], - "connectionName": source["connection_name"], - "directoryPath": source["directory_path"], - "fileNameGlob": file_name_glob, - } - - def _build_onedrive_data_source( - self, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration for OneDrive.""" - required_fields = [ - "connection_id", - "connection_name", - "leaf_folder_id", - "directory_path", - "folder_path", - ] - for field in required_fields: - if not source.get(field): - raise ValueError(f"{field} is required for OneDrive data source") - - return { - "@odata.type": ONEDRIVE_DATA_SOURCE, - "folder": source["folder_path"], - "connectionId": source["connection_id"], - "connectionName": source["connection_name"], - "leafFolderId": source["leaf_folder_id"], - "directoryPath": source["directory_path"], - "fileNameGlob": file_name_glob, - } - - def _build_confluence_data_source( - self, source: Dict[str, Any], file_name_glob: str - ) -> Dict[str, Any]: - """Build data source configuration for Confluence.""" - required_fields = [ - "connection_id", - "connection_name", - "directory_path", - "folder_path", - "space_id", - ] - for field in required_fields: - if not source.get(field): - raise ValueError(f"{field} is required for Confluence data source") - - return { - "@odata.type": CONFLUENCE_DATA_SOURCE, - "folder": source["folder_path"], - "connectionId": source["connection_id"], - "connectionName": source["connection_name"], - "directoryPath": source["directory_path"], - "fileNameGlob": file_name_glob, - "spaceId": source["space_id"], - } + return data_source.model_dump(by_alias=True, exclude_none=True) def _retrieve_by_id_spec( self, @@ -962,9 +901,38 @@ def _resolve_folder_key(self, folder_key, folder_path): return folder_key def _extract_bucket_info(self, index: ContextGroundingIndex) -> Tuple[str, str]: - try: - return index.data_source.bucketName, index.data_source.folder # type: ignore - except AttributeError as e: - raise Exception( - "ContextGrounding: Cannot extract bucket data from index" - ) from e + """Extract bucket information from the index, validating it's a storage bucket data source. + + Args: + index: The context grounding index + + Returns: + Tuple of (bucket_name, folder_path) + + Raises: + UnsupportedDataSourceException: If the data source is not an Orchestrator Storage Bucket + """ + if not index.data_source: + raise UnsupportedDataSourceException("add_to_index") + + # Check if the data source has the @odata.type field indicating it's a storage bucket + data_source_dict = ( + index.data_source.model_dump(by_alias=True) + if hasattr(index.data_source, "model_dump") + else index.data_source.__dict__ + ) + odata_type = data_source_dict.get("@odata.type") or data_source_dict.get( + "odata.type" + ) + + if odata_type and odata_type != ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE: + raise UnsupportedDataSourceException("add_to_index", odata_type) + + # Try to extract bucket information + bucket_name = getattr(index.data_source, "bucketName", None) + folder = getattr(index.data_source, "folder", None) + + if not bucket_name or not folder: + raise UnsupportedDataSourceException("add_to_index") + + return bucket_name, folder diff --git a/src/uipath/_utils/constants.py b/src/uipath/_utils/constants.py index c55d92a42..eeaf1c894 100644 --- a/src/uipath/_utils/constants.py +++ b/src/uipath/_utils/constants.py @@ -21,21 +21,36 @@ HEADER_JOB_KEY = "x-uipath-jobkey" HEADER_SW_LOCK_KEY = "x-uipath-sw-lockkey" -# Data sources -ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE = ( +# Data sources (request types) +ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE_REQUEST = ( "#UiPath.Vdbs.Domain.Api.V20Models.StorageBucketDataSourceRequest" ) -CONFLUENCE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.ConfluenceDataSourceRequest" -DROPBOX_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.DropboxDataSourceRequest" -GOOGLE_DRIVE_DATA_SOURCE = ( +CONFLUENCE_DATA_SOURCE_REQUEST = ( + "#UiPath.Vdbs.Domain.Api.V20Models.ConfluenceDataSourceRequest" +) +DROPBOX_DATA_SOURCE_REQUEST = ( + "#UiPath.Vdbs.Domain.Api.V20Models.DropboxDataSourceRequest" +) +GOOGLE_DRIVE_DATA_SOURCE_REQUEST = ( "#UiPath.Vdbs.Domain.Api.V20Models.GoogleDriveDataSourceRequest" ) -ONEDRIVE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.OneDriveDataSourceRequest" +ONEDRIVE_DATA_SOURCE_REQUEST = ( + "#UiPath.Vdbs.Domain.Api.V20Models.OneDriveDataSourceRequest" +) + +# Data sources +ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE = ( + "#UiPath.Vdbs.Domain.Api.V20Models.StorageBucketDataSource" +) +CONFLUENCE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.ConfluenceDataSource" +DROPBOX_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.DropboxDataSource" +GOOGLE_DRIVE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.GoogleDriveDataSource" +ONEDRIVE_DATA_SOURCE = "#UiPath.Vdbs.Domain.Api.V20Models.OneDriveDataSource" # Preprocessing request types -LLMV3Mini = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV3MiniPreProcessingRequest" -LLMV4 = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV4PreProcessingRequest" -NativeV1 = "#UiPath.Vdbs.Domain.Api.V20Models.NativeV1PreProcessingRequest" +LLMV3Mini_REQUEST = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV3MiniPreProcessingRequest" +LLMV4_REQUEST = "#UiPath.Vdbs.Domain.Api.V20Models.LLMV4PreProcessingRequest" +NativeV1_REQUEST = "#UiPath.Vdbs.Domain.Api.V20Models.NativeV1PreProcessingRequest" # Local storage diff --git a/src/uipath/models/context_grounding_payloads.py b/src/uipath/models/context_grounding_payloads.py new file mode 100644 index 000000000..b234cb15d --- /dev/null +++ b/src/uipath/models/context_grounding_payloads.py @@ -0,0 +1,185 @@ +import re +from typing import Any, Dict, Literal, Optional, Union + +from pydantic import BaseModel, ConfigDict, Field, model_validator +from pydantic.alias_generators import to_camel + +from uipath._utils.constants import ( + CONFLUENCE_DATA_SOURCE_REQUEST, + DROPBOX_DATA_SOURCE_REQUEST, + GOOGLE_DRIVE_DATA_SOURCE_REQUEST, + ONEDRIVE_DATA_SOURCE_REQUEST, + ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE_REQUEST, +) + + +class DataSourceBase(BaseModel): + folder: str = Field(alias="folder", description="Folder path") + file_name_glob: str = Field( + alias="fileNameGlob", description="File name glob pattern" + ) + directory_path: str = Field(alias="directoryPath", description="Directory path") + + +class BucketDataSource(DataSourceBase): + odata_type: str = Field( + alias="@odata.type", + default=ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE_REQUEST, + ) + bucket_name: str = Field(alias="bucketName", description="Storage bucket name") + + +class GoogleDriveDataSource(DataSourceBase): + odata_type: str = Field( + alias="@odata.type", + default=GOOGLE_DRIVE_DATA_SOURCE_REQUEST, + ) + connection_id: str = Field(alias="connectionId", description="Connection ID") + connection_name: str = Field(alias="connectionName", description="Connection name") + leaf_folder_id: str = Field(alias="leafFolderId", description="Leaf folder ID") + + +class DropboxDataSource(DataSourceBase): + odata_type: str = Field( + alias="@odata.type", + default=DROPBOX_DATA_SOURCE_REQUEST, + ) + connection_id: str = Field(alias="connectionId", description="Connection ID") + connection_name: str = Field(alias="connectionName", description="Connection name") + + +class OneDriveDataSource(DataSourceBase): + odata_type: str = Field( + alias="@odata.type", + default=ONEDRIVE_DATA_SOURCE_REQUEST, + ) + connection_id: str = Field(alias="connectionId", description="Connection ID") + connection_name: str = Field(alias="connectionName", description="Connection name") + leaf_folder_id: str = Field(alias="leafFolderId", description="Leaf folder ID") + + +class ConfluenceDataSource(DataSourceBase): + odata_type: str = Field( + alias="@odata.type", + default=CONFLUENCE_DATA_SOURCE_REQUEST, + ) + connection_id: str = Field(alias="connectionId", description="Connection ID") + connection_name: str = Field(alias="connectionName", description="Connection name") + space_id: str = Field(alias="spaceId", description="Space ID") + + +class Indexer(BaseModel): + """Configuration for periodic indexing of data sources.""" + + model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True) + + cron_expression: str = Field(description="Cron expression for scheduling") + time_zone_id: str = Field(default="UTC", description="Time zone ID") + + @model_validator(mode="before") + @classmethod + def validate_cron(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Validate cron expression format.""" + cron_expr = values.get("cron_expression") or values.get("cronExpression") + if not cron_expr: + return values + + # Supports @aliases, @every syntax and standard cron expressions with 5-7 fields + cron_pattern = r"^(@(annually|yearly|monthly|weekly|daily|hourly|reboot))|(@every (\d+(ns|us|µs|ms|s|m|h))+)|((((\d+,)+\d+|(\d+(\/|-)\d+)|\d+|\*) ?){5,7})$" + + if not re.match(cron_pattern, cron_expr.strip(), re.IGNORECASE): + raise ValueError(f"Invalid cron expression format: '{cron_expr}'") + + return values + + +class PreProcessing(BaseModel): + odata_type: str = Field( + alias="@odata.type", description="OData type for preprocessing" + ) + + +class CreateIndexPayload(BaseModel): + """Payload for creating a context grounding index. + + Note: data_source is Dict[str, Any] because it may contain additional + fields like 'indexer' that are added dynamically based on configuration. + The data source is still validated through the _build_data_source method + which uses typed models internally. + """ + + name: str = Field(description="Index name") + description: str = Field(default="", description="Index description") + data_source: Dict[str, Any] = Field( + alias="dataSource", description="Data source configuration" + ) + pre_processing: Optional[PreProcessing] = Field( + default=None, alias="preProcessing", description="Preprocessing configuration" + ) + + model_config = ConfigDict(populate_by_name=True) + + +# user-facing source configuration models +class BaseSourceConfig(BaseModel): + """Base configuration for all source types.""" + + folder_path: str = Field(description="Folder path in orchestrator") + directory_path: str = Field(description="Directory path") + file_type: Optional[str] = Field( + default=None, description="File type filter (e.g., 'pdf', 'txt')" + ) + indexer: Optional[Indexer] = Field( + default=None, description="Optional indexer configuration for periodic updates" + ) + + +class ConnectionSourceConfig(BaseSourceConfig): + """Base configuration for sources that use connections.""" + + connection_id: str = Field(description="Connection ID") + connection_name: str = Field(description="Connection name") + + +class BucketSourceConfig(BaseSourceConfig): + type: Literal["bucket"] = Field( + default="bucket", description="Source type identifier" + ) + bucket_name: str = Field(description="Storage bucket name") + directory_path: str = Field(default="/", description="Directory path in bucket") + + +class GoogleDriveSourceConfig(ConnectionSourceConfig): + type: Literal["google_drive"] = Field( + default="google_drive", description="Source type identifier" + ) + leaf_folder_id: str = Field(description="Leaf folder ID in Google Drive") + + +class DropboxSourceConfig(ConnectionSourceConfig): + type: Literal["dropbox"] = Field( + default="dropbox", description="Source type identifier" + ) + + +class OneDriveSourceConfig(ConnectionSourceConfig): + type: Literal["onedrive"] = Field( + default="onedrive", description="Source type identifier" + ) + leaf_folder_id: str = Field(description="Leaf folder ID in OneDrive") + + +class ConfluenceSourceConfig(ConnectionSourceConfig): + type: Literal["confluence"] = Field( + default="confluence", description="Source type identifier" + ) + space_id: str = Field(description="Confluence space ID") + + +SourceConfig = Union[ + BucketSourceConfig, + GoogleDriveSourceConfig, + DropboxSourceConfig, + OneDriveSourceConfig, + ConfluenceSourceConfig, +] diff --git a/src/uipath/models/exceptions.py b/src/uipath/models/exceptions.py index 3da8e00a7..cf5bc05ee 100644 --- a/src/uipath/models/exceptions.py +++ b/src/uipath/models/exceptions.py @@ -3,6 +3,17 @@ from httpx import HTTPStatusError +class UnsupportedDataSourceException(Exception): + """Exception raised when attempting to use an operation with an unsupported data source type.""" + + def __init__(self, operation: str, data_source_type: Optional[str] = None): + if data_source_type: + message = f"Operation '{operation}' is not supported for data source type: {data_source_type}. Only Orchestrator Storage Bucket data sources are supported." + else: + message = f"Operation '{operation}' requires an Orchestrator Storage Bucket data source." + super().__init__(message) + + class IngestionInProgressException(Exception): """An exception that is triggered when a search is attempted on an index that is currently undergoing ingestion.""" diff --git a/tests/sdk/services/test_context_grounding_service.py b/tests/sdk/services/test_context_grounding_service.py index d03732fb2..9b18d6c87 100644 --- a/tests/sdk/services/test_context_grounding_service.py +++ b/tests/sdk/services/test_context_grounding_service.py @@ -1,4 +1,7 @@ +import json + import pytest +from pydantic import ValidationError from pytest_httpx import HTTPXMock from uipath._config import Config @@ -6,8 +9,15 @@ from uipath._services.buckets_service import BucketsService from uipath._services.context_grounding_service import ContextGroundingService from uipath._services.folder_service import FolderService -from uipath._utils.constants import HEADER_USER_AGENT +from uipath._utils.constants import HEADER_USER_AGENT, LLMV3Mini_REQUEST from uipath.models import ContextGroundingIndex, ContextGroundingQueryResponse +from uipath.models.context_grounding_payloads import ( + BucketSourceConfig, + ConfluenceSourceConfig, + DropboxSourceConfig, + GoogleDriveSourceConfig, + OneDriveSourceConfig, +) @pytest.fixture @@ -367,13 +377,12 @@ def test_create_index_bucket( }, ) - source = { - "type": "bucket", - "bucket_name": "test-bucket", - "folder_path": "/test/folder", - "directory_path": "/", - "file_type": "pdf", - } + source = BucketSourceConfig( + bucket_name="test-bucket", + folder_path="/test/folder", + directory_path="/", + file_type="pdf", + ) index = service.create_index( name="test-bucket-index", @@ -400,8 +409,6 @@ def test_create_index_bucket( == f"UiPath.Python.Sdk/UiPath.Python.Sdk.Activities.ContextGroundingService.create_index/{version}" ) - import json - request_data = json.loads(create_request.content) assert request_data["name"] == "test-bucket-index" assert request_data["description"] == "Test bucket index" @@ -451,15 +458,14 @@ def test_create_index_google_drive( }, ) - source = { - "type": "google_drive", - "connection_id": "conn-123", - "connection_name": "Google Drive Connection", - "leaf_folder_id": "folder-456", - "directory_path": "/shared-docs", - "folder_path": "/test/folder", - "file_type": "docx", - } + source = GoogleDriveSourceConfig( + connection_id="conn-123", + connection_name="Google Drive Connection", + leaf_folder_id="folder-456", + directory_path="/shared-docs", + folder_path="/test/folder", + file_type="docx", + ) index = service.create_index( name="test-google-index", @@ -476,8 +482,6 @@ def test_create_index_google_drive( sent_requests = httpx_mock.get_requests() create_request = sent_requests[1] - import json - request_data = json.loads(create_request.content) assert ( request_data["dataSource"]["@odata.type"] @@ -525,13 +529,12 @@ def test_create_index_dropbox( }, ) - source = { - "type": "dropbox", - "connection_id": "dropbox-conn-789", - "connection_name": "Dropbox Connection", - "directory_path": "/company-files", - "folder_path": "/test/folder", - } + source = DropboxSourceConfig( + connection_id="dropbox-conn-789", + connection_name="Dropbox Connection", + directory_path="/company-files", + folder_path="/test/folder", + ) index = service.create_index( name="test-dropbox-index", source=source, advanced_ingestion=False @@ -543,8 +546,6 @@ def test_create_index_dropbox( sent_requests = httpx_mock.get_requests() create_request = sent_requests[1] - import json - request_data = json.loads(create_request.content) assert ( request_data["dataSource"]["@odata.type"] @@ -587,15 +588,14 @@ def test_create_index_onedrive( }, ) - source = { - "type": "onedrive", - "connection_id": "onedrive-conn-101", - "connection_name": "OneDrive Connection", - "leaf_folder_id": "onedrive-folder-202", - "directory_path": "/reports", - "folder_path": "/test/folder", - "file_type": "xlsx", - } + source = OneDriveSourceConfig( + connection_id="onedrive-conn-101", + connection_name="OneDrive Connection", + leaf_folder_id="onedrive-folder-202", + directory_path="/reports", + folder_path="/test/folder", + file_type="xlsx", + ) index = service.create_index(name="test-onedrive-index", source=source) @@ -605,8 +605,6 @@ def test_create_index_onedrive( sent_requests = httpx_mock.get_requests() create_request = sent_requests[1] - import json - request_data = json.loads(create_request.content) assert ( request_data["dataSource"]["@odata.type"] @@ -647,14 +645,13 @@ def test_create_index_confluence( }, ) - source = { - "type": "confluence", - "connection_id": "confluence-conn-303", - "connection_name": "Confluence Connection", - "space_id": "space-404", - "directory_path": "/wiki-docs", - "folder_path": "/test/folder", - } + source = ConfluenceSourceConfig( + connection_id="confluence-conn-303", + connection_name="Confluence Connection", + space_id="space-404", + directory_path="/wiki-docs", + folder_path="/test/folder", + ) index = service.create_index(name="test-confluence-index", source=source) @@ -664,8 +661,6 @@ def test_create_index_confluence( sent_requests = httpx_mock.get_requests() create_request = sent_requests[1] - import json - request_data = json.loads(create_request.content) assert ( request_data["dataSource"]["@odata.type"] @@ -708,11 +703,10 @@ async def test_create_index_async( }, ) - source = { - "type": "bucket", - "bucket_name": "async-bucket", - "folder_path": "/async/folder", - } + source = BucketSourceConfig( + bucket_name="async-bucket", + folder_path="/async/folder", + ) index = await service.create_index_async( name="test-async-index", description="Test async index", source=source @@ -739,25 +733,9 @@ def test_create_index_missing_bucket_name( org: str, tenant: str, ) -> None: - httpx_mock.add_response( - url=f"{base_url}{org}{tenant}/orchestrator_/api/FoldersNavigation/GetFoldersForCurrentUser?searchText=test-folder-path&skip=0&take=20", - status_code=200, - json={ - "PageItems": [ - { - "Key": "test-folder-key", - "FullyQualifiedName": "test-folder-path", - } - ] - }, - ) - - source = {"type": "bucket", "folder_path": "/test/folder"} - - with pytest.raises( - ValueError, match="bucket_name is required for bucket data source" - ): - service.create_index(name="test-invalid-bucket", source=source) + # Pydantic will raise ValidationError for missing required fields + with pytest.raises(ValidationError, match="bucket_name"): + BucketSourceConfig(folder_path="/test/folder") # type: ignore[call-arg] def test_create_index_missing_google_drive_fields( self, @@ -767,57 +745,12 @@ def test_create_index_missing_google_drive_fields( org: str, tenant: str, ) -> None: - httpx_mock.add_response( - url=f"{base_url}{org}{tenant}/orchestrator_/api/FoldersNavigation/GetFoldersForCurrentUser?searchText=test-folder-path&skip=0&take=20", - status_code=200, - json={ - "PageItems": [ - { - "Key": "test-folder-key", - "FullyQualifiedName": "test-folder-path", - } - ] - }, - ) - - source = { - "type": "google_drive", - "connection_id": "conn-123", - "folder_path": "/test/folder", - } - - with pytest.raises( - ValueError, match="connection_name is required for Google Drive data source" - ): - service.create_index(name="test-invalid-google", source=source) - - def test_create_index_unsupported_source_type( - self, - httpx_mock: HTTPXMock, - service: ContextGroundingService, - base_url: str, - org: str, - tenant: str, - ) -> None: - httpx_mock.add_response( - url=f"{base_url}{org}{tenant}/orchestrator_/api/FoldersNavigation/GetFoldersForCurrentUser?searchText=test-folder-path&skip=0&take=20", - status_code=200, - json={ - "PageItems": [ - { - "Key": "test-folder-key", - "FullyQualifiedName": "test-folder-path", - } - ] - }, - ) - - source = {"type": "unsupported", "folder_path": "/test/folder"} - - with pytest.raises( - ValueError, match="Unsupported data source type: unsupported" - ): - service.create_index(name="test-unsupported", source=source) + # Pydantic will raise ValidationError for missing required fields + with pytest.raises(ValidationError, match="connection_name"): + GoogleDriveSourceConfig( # type: ignore[call-arg] + connection_id="conn-123", + folder_path="/test/folder", + ) def test_create_index_custom_preprocessing( self, @@ -827,8 +760,6 @@ def test_create_index_custom_preprocessing( org: str, tenant: str, ) -> None: - from uipath._utils.constants import LLMV3Mini - httpx_mock.add_response( url=f"{base_url}{org}{tenant}/orchestrator_/api/FoldersNavigation/GetFoldersForCurrentUser?searchText=test-folder-path&skip=0&take=20", status_code=200, @@ -852,16 +783,15 @@ def test_create_index_custom_preprocessing( }, ) - source = { - "type": "bucket", - "bucket_name": "test-bucket", - "folder_path": "/test/folder", - } + source = BucketSourceConfig( + bucket_name="test-bucket", + folder_path="/test/folder", + ) index = service.create_index( name="test-custom-prep-index", source=source, - preprocessing_request=LLMV3Mini, + preprocessing_request=LLMV3Mini_REQUEST, ) assert isinstance(index, ContextGroundingIndex) @@ -869,7 +799,5 @@ def test_create_index_custom_preprocessing( sent_requests = httpx_mock.get_requests() create_request = sent_requests[1] - import json - request_data = json.loads(create_request.content) - assert request_data["preProcessing"]["@odata.type"] == LLMV3Mini + assert request_data["preProcessing"]["@odata.type"] == LLMV3Mini_REQUEST