Skip to content

Commit ae49baf

Browse files
authored
Merge branch 'validator' into async_query
2 parents 94c626a + d422bcf commit ae49baf

File tree

3 files changed

+47
-9
lines changed

3 files changed

+47
-9
lines changed

src/cleanlab_codex/response_validation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""
2+
This module is now superseded by this [Validator API](/codex/api/validator/).
3+
24
Validation functions for evaluating LLM responses and determining if they should be replaced with Codex-generated alternatives.
35
"""
46

src/cleanlab_codex/validator.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
class BadResponseThresholds(BaseModel):
2525
"""Config for determining if a response is bad.
2626
Each key is an evaluation metric and the value is a threshold such that if the score is below the threshold, the response is bad.
27+
28+
Default Thresholds:
29+
- trustworthiness: 0.5
30+
- response_helpfulness: 0.5
31+
- Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
2732
"""
2833

2934
trustworthiness: float = Field(
@@ -83,15 +88,41 @@ def __init__(
8388
trustworthy_rag_config: Optional[dict[str, Any]] = None,
8489
bad_response_thresholds: Optional[dict[str, float]] = None,
8590
):
86-
"""Evaluates the quality of responses generated in RAG applications and remediates them if needed.
91+
"""Real-time detection and remediation of bad responses in RAG applications, powered by Cleanlab's TrustworthyRAG and Codex.
8792
88-
This object combines Cleanlab's various Evals with thresholding to detect bad responses and remediates them with Codex.
93+
This object combines Cleanlab's TrustworthyRAG evaluation scores with configurable thresholds to detect potentially bad responses
94+
in your RAG application. When a bad response is detected, it automatically attempts to remediate by retrieving an expert-provided
95+
answer from your Codex project.
96+
97+
For most use cases, we recommend using the `validate()` method which provides a complete validation workflow including
98+
both detection and Codex remediation. The `detect()` method is available separately for testing and threshold tuning purposes
99+
without triggering a Codex lookup.
100+
101+
By default, this uses the same default configurations as [`TrustworthyRAG`](/tlm/api/python/utils.rag/#class-trustworthyrag), except:
102+
- Explanations are returned in logs for better debugging
103+
- Only the `response_helpfulness` eval is run
89104
90105
Args:
91-
codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project.
92-
tlm_api_key (Optional[str]): The API key for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag).
93-
trustworthy_rag_config (Optional[dict[str, Any]]): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag), which is used to detect response issues.
94-
bad_response_thresholds (Optional[dict[str, float]]): Detection score thresholds used to flag whether or not a response is considered bad. Each key in this dict corresponds to an Eval from TrustworthyRAG, and the value indicates a threshold below which scores from this Eval are considered detected issues. A response is flagged as bad if any issues are detected for it.
106+
codex_access_key (str): The [access key](/codex/web_tutorials/create_project/#access-keys) for a Codex project. Used to retrieve expert-provided answers
107+
when bad responses are detected.
108+
109+
tlm_api_key (str, optional): API key for accessing [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag). If not provided, this must be specified
110+
in trustworthy_rag_config.
111+
112+
trustworthy_rag_config (dict[str, Any], optional): Optional initialization arguments for [TrustworthyRAG](/tlm/api/python/utils.rag/#class-trustworthyrag),
113+
which is used to detect response issues. If not provided, default configuration will be used.
114+
115+
bad_response_thresholds (dict[str, float], optional): Detection score thresholds used to flag whether
116+
a response is considered bad. Each key corresponds to an Eval from TrustworthyRAG, and the value
117+
indicates a threshold (between 0 and 1) below which scores are considered detected issues. A response
118+
is flagged as bad if any issues are detected. If not provided, default thresholds will be used. See
119+
[`BadResponseThresholds`](/codex/api/python/validator/#class-badresponsethresholds) for more details.
120+
121+
Raises:
122+
ValueError: If both tlm_api_key and api_key in trustworthy_rag_config are provided.
123+
ValueError: If bad_response_thresholds contains thresholds for non-existent evaluation metrics.
124+
TypeError: If any threshold value is not a number.
125+
ValueError: If any threshold value is not between 0 and 1.
95126
"""
96127
trustworthy_rag_config = trustworthy_rag_config or get_default_trustworthyrag_config()
97128
if tlm_api_key is not None and "api_key" in trustworthy_rag_config:
@@ -171,7 +202,12 @@ def detect(
171202
prompt: Optional[str] = None,
172203
form_prompt: Optional[Callable[[str, str], str]] = None,
173204
) -> tuple[ThresholdedTrustworthyRAGScore, bool]:
174-
"""Evaluate the response quality using TrustworthyRAG and determine if it is a bad response via thresholding.
205+
"""Score response quality using TrustworthyRAG and flag bad responses based on configured thresholds.
206+
207+
Note:
208+
This method is primarily intended for testing and threshold tuning purposes. For production use cases,
209+
we recommend using the `validate()` method which provides a complete validation workflow including
210+
Codex remediation.
175211
176212
Args:
177213
query (str): The user query that was used to generate the response.
@@ -201,7 +237,7 @@ def detect(
201237
is_bad_response = any(score_dict["is_bad"] for score_dict in thresholded_scores.values())
202238
return thresholded_scores, is_bad_response
203239

204-
def remediate(self, query: str) -> str | None:
240+
def _remediate(self, query: str) -> str | None:
205241
"""Request a SME-provided answer for this query, if one is available in Codex.
206242
207243
Args:

tests/test_validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def test_remediate(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None
133133
mock_project.from_access_key.return_value.query.return_value = ("expert answer", None)
134134

135135
validator = Validator(codex_access_key="test")
136-
result = validator.remediate("test query")
136+
result = validator._remediate("test query")
137137

138138
# Verify project.query was called
139139
mock_project.from_access_key.return_value.query.assert_called_once_with(question="test query")

0 commit comments

Comments
 (0)