Skip to content

Commit c186f33

Browse files
fix errors
1 parent e7b0725 commit c186f33

File tree

3 files changed

+21
-55
lines changed

3 files changed

+21
-55
lines changed

.github/workflows/actions.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ jobs:
2323
version: keras-3.8
2424
- backend: jax
2525
version: keras-nightly
26-
- backend: openvino
27-
python-version: '3.10'
2826
runs-on: ubuntu-latest
2927
env:
3028
KERAS_BACKEND: ${{ matrix.backend }}

keras_hub/src/models/causal_lm.py

Lines changed: 20 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -133,69 +133,29 @@ def make_generate_function(self):
133133

134134
self.generate_function = self.generate_step
135135
if keras.config.backend() == "openvino":
136-
import os
137-
from multiprocessing import Pipe
138-
from multiprocessing import Process
139-
140136
import openvino as ov
141137
import openvino.runtime.opset14 as ov_opset
142-
import psutil
143138

144139
from keras_hub.src.utils.keras_utils import print_msg
145140

146141
def ov_infer(inputs, stop_token_ids, fn):
147-
def isolated_infer(pipe, compiled_model, flat_inputs):
148-
outputs = compiled_model(flat_inputs)
149-
outputs = outputs.to_tuple()
150-
pipe.send(outputs)
151-
pipe.close()
152-
153142
def get_outputs(inputs, struct_outputs, compiled_ov_model):
154143
flatten_inputs = tree.flatten(inputs)
155-
free_mem = psutil.virtual_memory().available / (1024**3)
156-
# On average OpenVINO needs about 2 GB to run
157-
# an inference, also it is wrapped by an env var,
158-
# to be tuned.
159-
threshold = float(
160-
os.getenv("OV_INFER_FREE_MEM_THRESHOLD", 2)
161-
)
162-
if free_mem > threshold:
163-
"""Run inference in a separate process only if
164-
free memory usage is above a certain threshold.
165-
This threshold is calculated to ensure that
166-
swap memory won't be triggered. When swap is
167-
likely to be used, fallback to normal inference
168-
to avoid severe performance degradation.
169-
Running inference in a subprocess prevents OpenVINO from
170-
allocating extra memory in the main process during its
171-
internal infer request creation. This can reduce memory
172-
usage by 0.5–2 GB depending on the model size.
173-
However, using a subprocess introduces an extra
174-
overhead, increasing latency by around 1–2 seconds
175-
per inference.
176-
"""
177-
parent_conn, child_conn = Pipe()
178-
p = Process(
179-
target=isolated_infer,
180-
args=(
181-
child_conn,
182-
compiled_ov_model,
183-
flatten_inputs,
184-
),
185-
)
186-
p.start()
187-
outputs = parent_conn.recv()
188-
p.join()
189-
else:
190-
outputs = compiled_ov_model(flatten_inputs)
191-
outputs = outputs.to_tuple()
144+
outputs = compiled_ov_model(flatten_inputs).to_tuple()
192145
outputs = self._unpack_singleton(
193146
tree.pack_sequence_as(struct_outputs, outputs)
194147
)
195148
return outputs
196149

150+
core = ov.Core()
151+
device = "GPU" if "GPU" in core.available_devices else "CPU"
152+
197153
# Try using the existing compiled model
198-
if self.ov_compiled_model is not None:
154+
if (
155+
self.ov_compiled_model is not None
156+
and getattr(self, "ov_device", None) is not None
157+
and device == self.ov_device
158+
):
199159
try:
200160
return get_outputs(
201161
inputs, self.struct_outputs, self.ov_compiled_model
@@ -228,10 +188,17 @@ def get_outputs(inputs, struct_outputs, compiled_ov_model):
228188
ov.PartialShape([-1] * rank)
229189
)
230190
ov_model.validate_nodes_and_infer_types()
231-
core = ov.Core()
232-
device = "CPU"
233-
# OpenVINO supports only compiling with 'CPU' devices.
234-
self.ov_compiled_model = core.compile_model(ov_model, device)
191+
192+
self.ov_device = device
193+
model_dtype = (
194+
"f16"
195+
if self.dtype == "float16" or self.dtype == "bfloat16"
196+
else "f32"
197+
)
198+
config = {"INFERENCE_PRECISION_HINT": model_dtype}
199+
self.ov_compiled_model = core.compile_model(
200+
ov_model, device, config
201+
)
235202
return get_outputs(
236203
inputs, self.struct_outputs, self.ov_compiled_model
237204
)

openvino_excluded_tests.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ keras_hub/src/models/dinov2
2424
keras_hub/src/models/distil_bert
2525
keras_hub/src/models/efficientnet
2626
keras_hub/src/models/electra
27+
keras_hub/src/models/esm
2728
keras_hub/src/models/falcon
2829
keras_hub/src/models/flux
2930
keras_hub/src/models/f_net

0 commit comments

Comments
 (0)