Skip to content

Commit 7b504ed

Browse files
authored
Merge pull request #1 from guardrails-ai/jc/update_for_new_guardrails
Update validator, fixing a handful of imports that were incompatible with new guardrails.
2 parents e218839 + 05e89b6 commit 7b504ed

File tree

6 files changed

+71
-37
lines changed

6 files changed

+71
-37
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@ This validator can be used to ensure fairness of model output across various dem
1616
## Requirements
1717

1818
* Dependencies:
19-
- guardrails-ai>=0.4.0
20-
- dbias>=0.1.0
19+
- guardrails-ai>=0.5.0
20+
- transformers>=4.40
2121

2222
* Dev Dependencies:
2323
- pytest
2424
- pyright
2525
- ruff
2626

2727
* Foundation model access keys:
28-
- Dependent on the use case (rephrase if unclear)
28+
- None (basic HuggingFace hub access)
2929

3030

3131
# Installation
@@ -46,8 +46,8 @@ from guardrails.hub import BiasCheck
4646
from guardrails import Guard
4747

4848
# Setup Guard
49-
guard = Guard.use(
50-
BiasCheck()
49+
guard = Guard().use(
50+
BiasCheck(threshold=0.9, on_fail="exception")
5151
)
5252

5353
guard.validate("The movie was great!") # Validator passes

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
2-
name = "validator-template"
3-
version = "0.0.0"
4-
description = "Template repo for Guardrails Hub validators."
2+
name = "bias_check"
3+
version = "0.2.0"
4+
description = "Wrapping a bias check model into a validator."
55
authors = [
66
{name = "Guardrails AI", email = "[email protected]"}
77
]
@@ -10,7 +10,8 @@ readme = "README.md"
1010
requires-python = ">= 3.8.1"
1111
dependencies = [
1212
"guardrails-ai>=0.4.0",
13-
"Dbias"
13+
"transformers>=4.40.2",
14+
"tensorflow>=2.18.0"
1415
]
1516

1617
[project.optional-dependencies]

tests/test_validator.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
1-
# to run these, run
2-
# make tests
31

4-
from guardrails import Guard
5-
import pytest
6-
from validator import BiasCheck
72
from guardrails.validator_base import FailResult, PassResult
83

4+
from validator import BiasCheck
5+
96
# We use 'exception' as the validator's fail action,
107
# so we expect failures to always raise an Exception
118
# Learn more about corrective actions here:
129
# https://www.guardrailsai.com/docs/concepts/output/#%EF%B8%8F-specifying-corrective-actions
13-
def test_success_case(self):
14-
validator = BiasCheck(debias_strength=0.5)
10+
def test_success_case():
11+
validator = BiasCheck()
1512
input_text = "The sun rises in the morning."
1613
result = validator.validate(input_text, {})
1714
assert isinstance(result, PassResult)
1815

19-
def test_failure_case(self):
20-
validator = BiasCheck(debias_strength=0.5)
16+
def test_failure_case():
17+
validator = BiasCheck(threshold=0.5)
2118
input_text = "The sun only rises for Humanists."
2219
result = validator.validate(input_text, {})
2320
assert isinstance(result, FailResult)
24-
assert result.error_message == "The original response contains potential biases that are now addressed."
25-
assert result.fix_value == "The sun rises for everyone."
21+
assert result.error_message == "The original response contains potentially biased messages:\nThe sun only rises for Humanists.\n (Message scores: 0.6546186208724976)"
22+
assert result.fix_value == ""

validator/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .main import ValidatorTemplate
1+
from .main import BiasCheck
22

3-
__all__ = ["ValidatorTemplate"]
3+
__all__ = ["BiasCheck"]

validator/main.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
register_validator,
99
)
1010

11-
import Dbias
12-
from Dbias import text_debiasing
11+
from transformers import pipeline
1312

1413
@register_validator(name="guardrails/bias_check", data_type="string")
1514
class BiasCheck(Validator):
@@ -24,24 +23,61 @@ class BiasCheck(Validator):
2423
| Programmatic fix | The debiased text if bias is detected |
2524
2625
Args:
27-
debias_strength (float): The strength of the bias to apply, ranging from 0 to 1.
28-
on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `reask`, `fix`, `filter`, `refrain`, `noop`, `exception` or `fix_reask`. Otherwise, must be a function that is called when the validator fails.
26+
threshold (float): Higher is more likely to allow bias. Lower is more sensitive and more likely to flag biased messages.
27+
on_fail (Callable): The policy to enact when a validator fails. If `str`, must be one of `filter`, `noop`, or `exception`. Otherwise, must be a function that is called when the validator fails.
2928
""" # noqa
3029

3130
def __init__(
3231
self,
33-
debias_strength: float = 0.5,
32+
threshold: float = 0.9,
3433
on_fail: Optional[Callable] = None,
3534
):
36-
super().__init__(on_fail=on_fail, debias_strength=debias_strength)
37-
self.debias_strength = debias_strength
35+
super().__init__(on_fail=on_fail)
36+
valid_on_fail_operations = {"filter", "noop", "exception"}
37+
if isinstance(on_fail, str) and on_fail not in valid_on_fail_operations:
38+
raise Exception(
39+
f"on_fail value ({on_fail}) not in list of allowable operations: {valid_on_fail_operations}"
40+
)
41+
self.threshold = threshold
42+
self.model = pipeline(
43+
'text-classification',
44+
model="d4data/bias-detection-model",
45+
)
46+
# There are some spurious loading complaints with TFDistilBert models.
47+
# See https://discuss.huggingface.co/t/message-some-layers-from-the-model-were-not-used/1972/7
3848

39-
def validate(self, value: Any, metadata: Dict = {}) -> ValidationResult:
49+
def validate(self, value: Any, metadata: Optional[Dict] = None) -> ValidationResult:
4050
"""Validates that the text is free from biases related to age, gender, sex, ethnicity, religion, etc."""
41-
debiased_value = Dbias.text_debiasing.debias_text(value, strength=self.debias_strength)
42-
if value != debiased_value:
51+
if isinstance(value, str):
52+
value = [value,] # Ensure we're always passing lists of strings into the classifier.
53+
54+
classified_examples = self.model(value)
55+
passing_outputs = list()
56+
passing_scores = list()
57+
failing_outputs = list()
58+
failing_scores = list()
59+
for text, prediction in zip(value, classified_examples):
60+
if prediction['label'] == 'Biased':
61+
score = prediction['score']
62+
elif prediction['label'] == 'Non-biased':
63+
score = -prediction['score'] # Note the negation!
64+
else:
65+
raise Exception(f"Got unexpected prediction label: {prediction['label']}")
66+
if score > self.threshold:
67+
failing_outputs.append(text)
68+
failing_scores.append(score)
69+
else:
70+
passing_outputs.append(text)
71+
passing_scores.append(score)
72+
73+
if failing_outputs:
74+
failure_message = "The original response contains potentially biased messages:\n"
75+
failure_message += "\n - ".join(failing_outputs)
76+
message_scores = [str(s) for s in failing_scores]
77+
failure_message += "\n (Message scores: {})".format(", ".join(message_scores))
78+
# Do we need to call the on_fail_method here?
4379
return FailResult(
44-
error_message="The original response contains potential biases that are now addressed.",
45-
fix_value=debiased_value,
80+
error_message=failure_message,
81+
fix_value=" ".join(passing_outputs),
4682
)
4783
return PassResult()

validator/post-install.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1+
from transformers import pipeline
12
print("post-install starting...")
2-
print("This is where you would do things like download nltk tokenizers or login to the HuggingFace hub...")
3-
print("post-install complete!")
4-
# If you don't have anything to add here you should delete this file.
3+
_ = pipeline("text-classification", "d4data/bias-detection-model")
4+
print("post-install complete!")

0 commit comments

Comments
 (0)