Skip to content

Commit 86a393c

Browse files
♻️ refactor page options usage, make fields subscriptable (#330)
1 parent 77b7ad9 commit 86a393c

File tree

13 files changed

+65
-98
lines changed

13 files changed

+65
-98
lines changed

mindee/client_v2.py

Lines changed: 17 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
from mindee.error.mindee_http_error_v2 import handle_error_v2
77
from mindee.input.inference_predict_options import InferencePredictOptions
88
from mindee.input.local_response import LocalResponse
9-
from mindee.input.page_options import PageOptions
10-
from mindee.input.polling_options_v2 import PollingOptionsV2
9+
from mindee.input.polling_options import PollingOptions
1110
from mindee.input.sources.local_input_source import LocalInputSource
1211
from mindee.logger import logger
1312
from mindee.mindee_http.mindee_api_v2 import MindeeApiV2
@@ -39,11 +38,7 @@ def __init__(self, api_key: Optional[str] = None) -> None:
3938
self.mindee_api = MindeeApiV2(api_key)
4039

4140
def enqueue(
42-
self,
43-
input_source: LocalInputSource,
44-
options: InferencePredictOptions,
45-
page_options: Optional[PageOptions] = None,
46-
close_file: bool = True,
41+
self, input_source: LocalInputSource, options: InferencePredictOptions
4742
) -> PollingResponse:
4843
"""
4944
Enqueues a document to a given model.
@@ -52,28 +47,19 @@ def enqueue(
5247
Has to be created beforehand.
5348
5449
:param options: Options for the prediction.
55-
56-
:param close_file: Whether to ``close()`` the file after parsing it.
57-
Set to ``False`` if you need to access the file after this operation.
58-
59-
:param page_options: If set, remove pages from the document as specified.
60-
This is done before sending the file to the server.
61-
It is useful to avoid page limitations.
6250
:return: A valid inference response.
6351
"""
6452
logger.debug("Enqueuing document to '%s'", options.model_id)
6553

66-
if page_options and input_source.is_pdf():
54+
if options.page_options and input_source.is_pdf():
6755
input_source.process_pdf(
68-
page_options.operation,
69-
page_options.on_min_pages,
70-
page_options.page_indexes,
56+
options.page_options.operation,
57+
options.page_options.on_min_pages,
58+
options.page_options.page_indexes,
7159
)
7260

7361
response = self.mindee_api.predict_async_req_post(
74-
input_source=input_source,
75-
options=options,
76-
close_file=close_file,
62+
input_source=input_source, options=options
7763
)
7864
dict_response = response.json()
7965

@@ -103,12 +89,7 @@ def parse_queued(
10389
return InferenceResponse(dict_response)
10490

10591
def enqueue_and_parse(
106-
self,
107-
input_source: LocalInputSource,
108-
options: InferencePredictOptions,
109-
polling_options: Optional[PollingOptionsV2] = None,
110-
page_options: Optional[PageOptions] = None,
111-
close_file: bool = True,
92+
self, input_source: LocalInputSource, options: InferencePredictOptions
11293
) -> InferenceResponse:
11394
"""
11495
Enqueues to an asynchronous endpoint and automatically polls for a response.
@@ -118,39 +99,25 @@ def enqueue_and_parse(
11899
119100
:param options: Options for the prediction.
120101
121-
:param polling_options: Options for polling.
122-
123-
:param close_file: Whether to ``close()`` the file after parsing it.
124-
Set to ``False`` if you need to access the file after this operation.
125-
126-
:param page_options: If set, remove pages from the document as specified.
127-
This is done before sending the file to the server.
128-
It is useful to avoid page limitations.
129-
130102
:return: A valid inference response.
131103
"""
132-
if not polling_options:
133-
polling_options = PollingOptionsV2()
104+
if not options.polling_options:
105+
options.polling_options = PollingOptions()
134106
self._validate_async_params(
135-
polling_options.initial_delay_sec,
136-
polling_options.delay_sec,
137-
polling_options.max_retries,
138-
)
139-
queue_result = self.enqueue(
140-
input_source,
141-
options,
142-
page_options,
143-
close_file,
107+
options.polling_options.initial_delay_sec,
108+
options.polling_options.delay_sec,
109+
options.polling_options.max_retries,
144110
)
111+
queue_result = self.enqueue(input_source, options)
145112
logger.debug(
146113
"Successfully enqueued document with job id: %s", queue_result.job.id
147114
)
148-
sleep(polling_options.initial_delay_sec)
115+
sleep(options.polling_options.initial_delay_sec)
149116
retry_counter = 1
150117
poll_results = self.parse_queued(
151118
queue_result.job.id,
152119
)
153-
while retry_counter < polling_options.max_retries:
120+
while retry_counter < options.polling_options.max_retries:
154121
if not isinstance(poll_results, PollingResponse):
155122
break
156123
if poll_results.job.status == "Failed":
@@ -160,7 +127,7 @@ def enqueue_and_parse(
160127
queue_result.job.id,
161128
)
162129
retry_counter += 1
163-
sleep(polling_options.delay_sec)
130+
sleep(options.polling_options.delay_sec)
164131
poll_results = self.parse_queued(queue_result.job.id)
165132

166133
if not isinstance(poll_results, InferenceResponse):

mindee/input/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from mindee.input.inference_predict_options import InferencePredictOptions
22
from mindee.input.local_response import LocalResponse
33
from mindee.input.page_options import PageOptions
4-
from mindee.input.polling_options_v2 import PollingOptionsV2
4+
from mindee.input.polling_options import PollingOptions
55
from mindee.input.sources.base_64_input import Base64Input
66
from mindee.input.sources.bytes_input import BytesInput
77
from mindee.input.sources.file_input import FileInput

mindee/input/inference_predict_options.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from dataclasses import dataclass
22
from typing import List, Optional
33

4+
from mindee.input.page_options import PageOptions
5+
from mindee.input.polling_options import PollingOptions
6+
47

58
@dataclass
69
class InferencePredictOptions:
@@ -19,3 +22,9 @@ class InferencePredictOptions:
1922
"""Optional alias for the file."""
2023
webhook_ids: Optional[List[str]] = None
2124
"""IDs of webhooks to propagate the API response to."""
25+
page_options: Optional[PageOptions] = None
26+
"""Options for page-level inference."""
27+
polling_options: Optional[PollingOptions] = None
28+
"""Options for polling."""
29+
close_file: bool = True
30+
"""Whether to close the file after parsing."""

mindee/input/polling_options_v2.py renamed to mindee/input/polling_options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
class PollingOptionsV2:
1+
class PollingOptions:
22
"""Options for asynchronous polling."""
33

44
initial_delay_sec: float

mindee/mindee_http/mindee_api_v2.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,13 @@ def set_from_env(self) -> None:
6868
logger.debug("Value was set from env: %s", name)
6969

7070
def predict_async_req_post(
71-
self,
72-
input_source: LocalInputSource,
73-
options: InferencePredictOptions,
74-
close_file: bool = True,
71+
self, input_source: LocalInputSource, options: InferencePredictOptions
7572
) -> requests.Response:
7673
"""
7774
Make an asynchronous request to POST a document for prediction on the V2 API.
7875
7976
:param input_source: Input object.
8077
:param options: Options for the enqueueing of the document.
81-
:param close_file: Whether to `close()` the file after parsing it.
8278
:return: requests response.
8379
"""
8480
data = {"model_id": options.model_id}
@@ -93,7 +89,7 @@ def predict_async_req_post(
9389
if options.alias and len(options.alias):
9490
data["alias"] = options.alias
9591

96-
files = {"file": input_source.read_contents(close_file)}
92+
files = {"file": input_source.read_contents(options.close_file)}
9793
response = requests.post(
9894
url=url,
9995
files=files,

mindee/parsing/v2/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
from mindee.parsing.v2.base_field import ListField, ObjectField, SimpleField
1+
from mindee.parsing.v2.base_field import (
2+
InferenceFields,
3+
ListField,
4+
ObjectField,
5+
SimpleField,
6+
)
27
from mindee.parsing.v2.common_response import CommonResponse
38
from mindee.parsing.v2.error_response import ErrorResponse
49
from mindee.parsing.v2.inference import Inference
5-
from mindee.parsing.v2.inference_fields import InferenceFields
610
from mindee.parsing.v2.inference_file import InferenceFile
711
from mindee.parsing.v2.inference_model import InferenceModel
812
from mindee.parsing.v2.inference_options import InferenceOptions

mindee/parsing/v2/base_field.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,28 @@ def create_field(
2929
raise MindeeApiV2Error(f"Unrecognized field format {raw_response}.")
3030

3131

32+
class InferenceFields(Dict[str, Union["SimpleField", "ObjectField", "ListField"]]):
33+
"""Inference fields dict."""
34+
35+
def __init__(self, raw_response: StringDict, indent_level: int = 0) -> None:
36+
super().__init__()
37+
for key, value in raw_response.items():
38+
field_obj = BaseField.create_field(value, indent_level)
39+
self[key] = field_obj
40+
41+
def __getattr__(self, item):
42+
try:
43+
return self[item]
44+
except KeyError:
45+
raise AttributeError(item) from None
46+
47+
def __str__(self) -> str:
48+
str_fields = ""
49+
for field_key, field_value in self.items():
50+
str_fields += f":{field_key}: {field_value}"
51+
return str_fields
52+
53+
3254
class ListField(BaseField):
3355
"""List field containing multiple fields."""
3456

@@ -55,21 +77,14 @@ def __str__(self) -> str:
5577
class ObjectField(BaseField):
5678
"""Object field containing multiple fields."""
5779

58-
fields: Dict[str, Union[ListField, "ObjectField", "SimpleField"]]
80+
fields: InferenceFields
5981
"""Fields contained in the object."""
6082

6183
def __init__(self, raw_response: StringDict, indent_level: int = 0):
6284
super().__init__(indent_level)
6385
inner_fields = raw_response.get("fields", raw_response)
6486

65-
self.fields: Dict[str, Union["ListField", "ObjectField", "SimpleField"]] = {}
66-
for field_key, field_value in inner_fields.items():
67-
if isinstance(field_value, dict):
68-
self.fields[field_key] = BaseField.create_field(
69-
field_value, self._indent_level + 1
70-
)
71-
else:
72-
raise MindeeApiV2Error(f"Unrecognized field format '{field_value}'.")
87+
self.fields = InferenceFields(inner_fields, self._indent_level + 1)
7388

7489
def __str__(self) -> str:
7590
out_str = ""

mindee/parsing/v2/inference_fields.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

mindee/parsing/v2/inference_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Optional
22

33
from mindee.parsing.common.string_dict import StringDict
4-
from mindee.parsing.v2.inference_fields import InferenceFields
4+
from mindee.parsing.v2.base_field import InferenceFields
55
from mindee.parsing.v2.inference_options import InferenceOptions
66

77

mindee/tests/product/fr/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)