Merge branch 'validator' into async_query

aditya1503 · web-flow · commit ae49baf4aea7 · 2025-03-26T16:52:58.000-07:00
diff --git a/src/cleanlab_codex/response_validation.py b/src/cleanlab_codex/response_validation.py
@@ -1,4 +1,6 @@
 """
+This module is now superseded by this [Validator API](/codex/api/validator/).
+
 Validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
 """
 
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
@@ -24,6 +24,11 @@
 class BadResponseThresholds(BaseModel):
     """Config for determining if a response is bad.
     Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
+    
+    Default Thresholds:
+        - trustworthiness: 0.5
+        - response_helpfulness: 0.5
+        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)    
     """
 
     trustworthiness: float = Field(
@@ -83,15 +88,41 @@ def __init__(
         trustworthy_rag_config: Optional[dict[str, Any]] = None,
         bad_response_thresholds: Optional[dict[str, float]] = None,
     ):
-        """Evaluates the quality of responses generated in RAG applications and remediates them if needed.
+        """Real-time detection and remediation of bad responses in RAG applications, powered by Cleanlab's TrustworthyRAG and Codex.
 
-        This object combines Cleanlab's various Evals with thresholding to detect bad responses and remediates them with Codex.
+        This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses 
+        in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided 
+        answer from your Codex project.
+
+        For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including 
+        both detection and Codex remediation. The `detect()` method is available separately for testing and threshold tuning purposes
+        without triggering a Codex lookup.
+
+        By default, this uses the same default configurations as [`TrustworthyRAG`](/tlm/api/python/utils.rag/#class-trustworthyrag), except:
+            - Explanations are returned in logs for better debugging
+            - Only the `response_helpfulness` eval is run
 
         Args:
-            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
-            tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
-            trustworthy_rag_config (Optional[dict[str, Any]]): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), which is used to detect response issues.
-            bad_response_thresholds (Optional[dict[str, float]]): Detection score thresholds used to flag whether or not a response is considered bad. Each key in this dict corresponds to an Eval from TrustworthyRAG, and the value indicates a threshold below which scores from this Eval are considered detected issues.  A response is flagged as bad if any issues are detected for it.
+            codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers 
+                when bad responses are detected.
+
+            tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified 
+                in trustworthy_rag_config.
+
+            trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), 
+                which is used to detect response issues. If not provided, default configuration will be used.
+
+            bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether 
+                a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value 
+                indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response 
+                is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See 
+                [`BadResponseThresholds`](/codex/api/python/validator/#class-badresponsethresholds) for more details.
+                
+        Raises:
+            ValueError: If both tlm_api_key and api_key in trustworthy_rag_config are provided.
+            ValueError: If bad_response_thresholds contains thresholds for non-existent evaluation metrics.
+            TypeError: If any threshold value is not a number.
+            ValueError: If any threshold value is not between 0 and 1.
         """
         trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
         if tlm_api_key is not None and "api_key" in trustworthy_rag_config:
@@ -171,7 +202,12 @@ def detect(
         prompt: Optional[str] = None,
         form_prompt: Optional[Callable[[str, str], str]] = None,
     ) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
-        """Evaluate the response quality using TrustworthyRAG and determine if it is a bad response via thresholding.
+        """Score response quality using TrustworthyRAG and flag bad responses based on configured thresholds.
+        
+        Note:
+            This method is primarily intended for testing and threshold tuning purposes. For production use cases,
+            we recommend using the `validate()` method which provides a complete validation workflow including 
+            Codex remediation.
 
         Args:
             query (str): The user query that was used to generate the response.
@@ -201,7 +237,7 @@ def detect(
         is_bad_response = any(score_dict["is_bad"] for score_dict in thresholded_scores.values())
         return thresholded_scores, is_bad_response
 
-    def remediate(self, query: str) -> str | None:
+    def _remediate(self, query: str) -> str | None:
         """Request a SME-provided answer for this query, if one is available in Codex.
 
         Args:
diff --git a/tests/test_validator.py b/tests/test_validator.py
@@ -133,7 +133,7 @@ def test_remediate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None
         mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
 
         validator = Validator(codex_access_key="test")
-        result = validator.remediate("test query")
+        result = validator._remediate("test query")
 
         # Verify project.query was called
         mock_project.from_access_key.return_value.query.assert_called_once_with(question="test query")

-Original file line number
+Diff line change
@@ @@ -1,4 +1,6 @@ @@
 """
 +This module is now superseded by this [Validator API](/codex/api/validator/).
++
 Validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
 """