Skip to content

Commit e77ebb0

Browse files
authored
✨ add RAG metadata and better errors (#371)
1 parent 4b4fe59 commit e77ebb0

File tree

10 files changed

+154
-22
lines changed

10 files changed

+154
-22
lines changed

mindee/error/mindee_http_error_v2.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,42 @@
22
from typing import Optional
33

44
from mindee.parsing.common.string_dict import StringDict
5+
from mindee.parsing.v2 import ErrorItem, ErrorResponse
56

67

7-
class MindeeHTTPErrorV2(RuntimeError):
8+
class MindeeHTTPErrorV2(RuntimeError, ErrorResponse):
89
"""An exception relating to HTTP calls."""
910

10-
status: int
11-
detail: Optional[str]
12-
13-
def __init__(self, status: int, detail: Optional[str]) -> None:
11+
def __init__(self, response: ErrorResponse) -> None:
1412
"""
1513
Base exception for HTTP calls.
1614
17-
:param status: HTTP code for the error
18-
:param detail: Error details.
15+
:param response:
1916
"""
20-
self.status = status
21-
self.detail = detail
22-
super().__init__(f"HTTP error {status} - {detail}")
17+
self.status = response.status
18+
self.title = response.title
19+
self.code = response.code
20+
self.detail = response.detail
21+
self.errors: list[ErrorItem] = response.errors
22+
super().__init__(
23+
f"HTTP {self.status} - {self.title} :: {self.code} - {self.detail}"
24+
)
2325

2426

2527
class MindeeHTTPUnknownErrorV2(MindeeHTTPErrorV2):
2628
"""HTTP error with unknown status code."""
2729

2830
def __init__(self, detail: Optional[str]) -> None:
29-
super().__init__(-1, f"Couldn't deserialize server error. Found: {detail}")
31+
super().__init__(
32+
ErrorResponse(
33+
{
34+
"status": -1,
35+
"code": "000-000",
36+
"title": "Unknown Error",
37+
"detail": f"Couldn't deserialize server error. Found: {detail}",
38+
}
39+
)
40+
)
3041

3142

3243
def handle_error_v2(raw_response: StringDict) -> None:
@@ -38,7 +49,4 @@ def handle_error_v2(raw_response: StringDict) -> None:
3849
"""
3950
if "status" not in raw_response or "detail" not in raw_response:
4051
raise MindeeHTTPUnknownErrorV2(json.dumps(raw_response, indent=2))
41-
raise MindeeHTTPErrorV2(
42-
raw_response["status"],
43-
raw_response["detail"],
44-
)
52+
raise MindeeHTTPErrorV2(ErrorResponse(raw_response))

mindee/parsing/v2/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from mindee.parsing.v2.error_item import ErrorItem
12
from mindee.parsing.v2.error_response import ErrorResponse
23
from mindee.parsing.v2.inference import Inference
34
from mindee.parsing.v2.inference_active_options import InferenceActiveOptions
@@ -16,4 +17,5 @@
1617
"InferenceResult",
1718
"JobResponse",
1819
"ErrorResponse",
20+
"ErrorItem",
1921
]

mindee/parsing/v2/error_item.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import Optional
2+
3+
from mindee.parsing.common.string_dict import StringDict
4+
5+
6+
class ErrorItem:
7+
"""Explicit details on a problem."""
8+
9+
pointer: Optional[str]
10+
"""A JSON Pointer to the location of the body property."""
11+
detail: str
12+
"""Explicit information on the issue."""
13+
14+
def __init__(self, raw_response: StringDict):
15+
self.pointer = raw_response.get("pointer", None)
16+
self.detail = raw_response["detail"]
Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,32 @@
1+
from typing import List
2+
13
from mindee.parsing.common.string_dict import StringDict
4+
from mindee.parsing.v2.error_item import ErrorItem
25

36

47
class ErrorResponse:
5-
"""Error response info."""
8+
"""Error response detailing a problem. The format adheres to RFC 9457."""
69

7-
detail: str
8-
"""Detail relevant to the error."""
910
status: int
10-
"""Http error code."""
11+
"""The HTTP status code returned by the server."""
12+
detail: str
13+
"""A human-readable explanation specific to the occurrence of the problem."""
14+
title: str
15+
"""A short, human-readable summary of the problem."""
16+
code: str
17+
"""A machine-readable code specific to the occurrence of the problem."""
18+
errors: List[ErrorItem]
19+
"""A list of explicit error details."""
1120

1221
def __init__(self, raw_response: StringDict):
13-
self.detail = raw_response["detail"]
1422
self.status = raw_response["status"]
23+
self.detail = raw_response["detail"]
24+
self.title = raw_response["title"]
25+
self.code = raw_response["code"]
26+
try:
27+
self.errors = [ErrorItem(error) for error in raw_response["errors"]]
28+
except KeyError:
29+
self.errors = []
1530

1631
def __str__(self):
17-
return f"HTTP Status: {self.status} - {self.detail}"
32+
return f"HTTP {self.status} - {self.title} :: {self.code} - {self.detail}"

mindee/parsing/v2/inference_result.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from mindee.parsing.common.string_dict import StringDict
44
from mindee.parsing.v2.field.inference_fields import InferenceFields
5+
from mindee.parsing.v2.rag_metadata import RagMetadata
56
from mindee.parsing.v2.raw_text import RawText
67

78

@@ -12,11 +13,15 @@ class InferenceResult:
1213
"""Fields contained in the inference."""
1314
raw_text: Optional[RawText] = None
1415
"""Potential options retrieved alongside the inference."""
16+
rag: Optional[RagMetadata] = None
17+
"""RAG metadata."""
1518

1619
def __init__(self, raw_response: StringDict) -> None:
1720
self.fields = InferenceFields(raw_response["fields"])
1821
if raw_response.get("raw_text"):
1922
self.raw_text = RawText(raw_response["raw_text"])
23+
if raw_response.get("rag"):
24+
self.rag = RagMetadata(raw_response["rag"])
2025

2126
def __str__(self) -> str:
2227
out_str = f"Fields\n======{self.fields}"

mindee/parsing/v2/rag_metadata.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from typing import Optional
2+
3+
from mindee.parsing.common.string_dict import StringDict
4+
5+
6+
class RagMetadata:
7+
"""Metadata about the RAG operation."""
8+
9+
retrieved_document_id: Optional[str]
10+
11+
def __init__(self, raw_response: StringDict):
12+
self.retrieved_document_id = raw_response["retrieved_document_id"]

tests/v2/parsing/test_inference_response.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from mindee.parsing.v2.inference import Inference
1111
from mindee.parsing.v2.inference_file import InferenceFile
1212
from mindee.parsing.v2.inference_model import InferenceModel
13+
from mindee.parsing.v2.rag_metadata import RagMetadata
1314
from tests.utils import V2_DATA_DIR
1415

1516

@@ -198,6 +199,26 @@ def test_raw_texts():
198199
)
199200

200201

202+
@pytest.mark.v2
203+
def test_rag_metadata_when_matched():
204+
"""RAG metadata when matched."""
205+
json_sample, _ = _get_inference_samples("rag_matched")
206+
response = InferenceResponse(json_sample)
207+
rag = response.inference.result.rag
208+
assert isinstance(rag, RagMetadata)
209+
assert rag.retrieved_document_id == "12345abc-1234-1234-1234-123456789abc"
210+
211+
212+
@pytest.mark.v2
213+
def test_rag_metadata_when_not_matched():
214+
"""RAG metadata when not matched."""
215+
json_sample, _ = _get_inference_samples("rag_not_matched")
216+
response = InferenceResponse(json_sample)
217+
rag = response.inference.result.rag
218+
assert isinstance(rag, RagMetadata)
219+
assert rag.retrieved_document_id is None
220+
221+
201222
@pytest.mark.v2
202223
def test_full_inference_response():
203224
json_sample, rst_sample = _get_product_samples("financial_document", "complete")
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import json
2+
3+
import pytest
4+
5+
from mindee import JobResponse
6+
from mindee.parsing.v2 import ErrorItem, ErrorResponse
7+
from tests.utils import V2_DATA_DIR
8+
9+
10+
def _get_job_samples(json_file: str) -> dict:
11+
json_path = V2_DATA_DIR / "job" / json_file
12+
with json_path.open("r", encoding="utf-8") as fh:
13+
json_sample = json.load(fh)
14+
return json_sample
15+
16+
17+
@pytest.mark.v2
18+
def test_should_load_when_status_is_processing():
19+
"""Should load when status is Processing."""
20+
json_sample = _get_job_samples("ok_processing.json")
21+
response = JobResponse(json_sample)
22+
23+
assert response.job is not None
24+
assert response.job.error is None
25+
26+
27+
@pytest.mark.v2
28+
def test_should_load_with_422_error():
29+
"""Should load with 422 error."""
30+
json_sample = _get_job_samples("fail_422.json")
31+
response = JobResponse(json_sample)
32+
33+
assert response.job is not None
34+
assert isinstance(response.job.error, ErrorResponse)
35+
assert response.job.error.status == 422
36+
assert response.job.error.code.startswith("422-")
37+
assert isinstance(response.job.error.errors, list)
38+
assert len(response.job.error.errors) == 1
39+
assert isinstance(response.job.error.errors[0], ErrorItem)

tests/v2/test_client.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@ class _FakePostRespError:
2727

2828
def json(self):
2929
# Shape must match what handle_error_v2 expects
30-
return {"status": -1, "detail": "forced failure from test"}
30+
return {
31+
"status": 0,
32+
"code": "000-000",
33+
"title": "From Test",
34+
"detail": "forced failure from test",
35+
}
3136

3237
class _FakeOkProcessingJobResp:
3338
status_code = 200

tests/v2/test_client_integration.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ def test_invalid_uuid_must_throw_error(v2_client: ClientV2) -> None:
179179

180180
exc: MindeeHTTPErrorV2 = exc_info.value
181181
assert exc.status == 422
182+
assert exc.title is not None
183+
assert exc.code.startswith("422-")
184+
assert isinstance(exc.errors, list)
182185

183186

184187
@pytest.mark.integration
@@ -197,6 +200,9 @@ def test_unknown_model_must_throw_error(v2_client: ClientV2) -> None:
197200

198201
exc: MindeeHTTPErrorV2 = exc_info.value
199202
assert exc.status == 404
203+
assert exc.title is not None
204+
assert exc.code.startswith("404-")
205+
assert isinstance(exc.errors, list)
200206

201207

202208
@pytest.mark.integration
@@ -227,6 +233,9 @@ def test_unknown_webhook_ids_must_throw_error(
227233

228234
exc: MindeeHTTPErrorV2 = exc_info.value
229235
assert exc.status == 422
236+
assert exc.title is not None
237+
assert exc.code.startswith("422-")
238+
assert isinstance(exc.errors, list)
230239
assert "no matching webhooks" in exc.detail.lower()
231240

232241

0 commit comments

Comments
 (0)