add openvino_utils

Mohamed-Ashraf273 · Mohamed-Ashraf273 · commit 553c3d7293a3 · 2025-08-14T04:24:28.000+03:00
diff --git a/keras_hub/src/models/causal_lm.py b/keras_hub/src/models/causal_lm.py
@@ -133,80 +133,14 @@ def make_generate_function(self):
 
         self.generate_function = self.generate_step
         if keras.config.backend() == "openvino":
-            import openvino as ov
-            import openvino.runtime.opset14 as ov_opset
-
-            from keras_hub.src.utils.keras_utils import print_msg
-
-            def ov_infer(inputs, stop_token_ids, fn):
-                def get_outputs(inputs, struct_outputs, compiled_ov_model):
-                    flatten_inputs = tree.flatten(inputs)
-                    outputs = compiled_ov_model(flatten_inputs).to_tuple()
-                    outputs = self._unpack_singleton(
-                        tree.pack_sequence_as(struct_outputs, outputs)
-                    )
-                    return outputs
-
-                core = ov.Core()
-                device = "GPU" if "GPU" in core.available_devices else "CPU"
-
-                # Try using the existing compiled model
-                if (
-                    self.ov_compiled_model is not None
-                    and getattr(self, "ov_device", None) is not None
-                    and device == self.ov_device
-                ):
-                    try:
-                        return get_outputs(
-                            inputs, self.struct_outputs, self.ov_compiled_model
-                        )
-                    except RuntimeError as e:
-                        # Delete previous model and struct outputs, then
-                        # Fall through to recompilation if inference fails
-                        print_msg(
-                            "WARNING: OpenVINO inference \033[1mFAILED\033[0m, "
-                            "so we'll Rebuild and compile the model then "
-                            f"try again.\n{e}"
-                        )
-                        del self.ov_compiled_model
-                        del self.struct_outputs
-
-                # Rebuild and compile the OpenVINO model
-                struct_params = self._parameterize_data(inputs)
-                self.struct_outputs = fn(struct_params, stop_token_ids)
-                parameters = [
-                    p.output.get_node() for p in tree.flatten(struct_params)
-                ]
-                results = [
-                    ov_opset.result(r.output)
-                    for r in tree.flatten(self.struct_outputs)
-                ]
-                ov_model = ov.Model(results=results, parameters=parameters)
-                for ov_input in ov_model.inputs:
-                    rank = ov_input.get_partial_shape().rank.get_length()
-                    ov_input.get_node().set_partial_shape(
-                        ov.PartialShape([-1] * rank)
-                    )
-                ov_model.validate_nodes_and_infer_types()
-
-                self.ov_device = device
-                model_dtype = (
-                    "f16"
-                    if self.dtype == "float16" or self.dtype == "bfloat16"
-                    else "f32"
-                )
-                config = {"INFERENCE_PRECISION_HINT": model_dtype}
-                self.ov_compiled_model = core.compile_model(
-                    ov_model, device, config
-                )
-                return get_outputs(
-                    inputs, self.struct_outputs, self.ov_compiled_model
-                )
+            from keras_hub.src.utils.openvino_utils import ov_infer
 
             def wrapped_generate_function(inputs, stop_token_ids=None):
-                # ops.array converts to numpy in openvino backend
+                # Convert to numpy for OpenVINO backend
                 inputs = tree.map_structure(ops.array, inputs)
-                return ov_infer(inputs, stop_token_ids, self.generate_step)
+                return ov_infer(
+                    self, inputs, stop_token_ids, self.generate_step
+                )
 
             self.generate_function = wrapped_generate_function
         if keras.config.backend() == "torch":
diff --git a/keras_hub/src/utils/openvino_utils.py b/keras_hub/src/utils/openvino_utils.py
@@ -0,0 +1,126 @@
+from keras import tree
+
+from keras_hub.src.utils.keras_utils import print_msg
+
+try:
+    import openvino as ov
+    import openvino.opset14 as ov_opset
+    from openvino import Core
+
+    core = Core()
+except ImportError:
+    ov = None
+    ov_opset = None
+    core = None
+
+
+def get_device():
+    """Detect and return the best available OpenVINO device.
+
+    Returns:
+        tuple: (core, device) where device is "GPU" or "CPU".
+    """
+    return "GPU" if "GPU" in core.available_devices else "CPU"
+
+
+def compile_model(struct_params, struct_outputs, device, model_dtype):
+    """Compile OpenVINO model with dynamic shapes and precision hints.
+
+    Args:
+        struct_params: Model parameters structure.
+        struct_outputs: Model outputs structure.
+        device: Target device ("GPU" or "CPU").
+        model_dtype: Model precision ("f16" or "f32").
+
+    Returns:
+        Compiled OpenVINO model ready for inference.
+    """
+    parameters = [p.output.get_node() for p in tree.flatten(struct_params)]
+    results = [ov_opset.result(r.output) for r in tree.flatten(struct_outputs)]
+    ov_model = ov.Model(results=results, parameters=parameters)
+
+    # Set dynamic shape
+    for ov_input in ov_model.inputs:
+        rank = ov_input.get_partial_shape().rank.get_length()
+        ov_input.get_node().set_partial_shape(ov.PartialShape([-1] * rank))
+
+    ov_model.validate_nodes_and_infer_types()
+
+    config = {"INFERENCE_PRECISION_HINT": model_dtype}
+    compiled_model = core.compile_model(ov_model, device, config)
+    return compiled_model
+
+
+def get_outputs(inputs, struct_outputs, compiled_ov_model, unpack_singleton):
+    """Execute compiled OpenVINO model and return structured outputs.
+
+    Args:
+        inputs: Input tensors for inference.
+        struct_outputs: Expected output structure.
+        compiled_ov_model: Compiled OpenVINO model.
+        unpack_singleton: Function to unpack singleton outputs.
+
+    Returns:
+        Structured model outputs matching expected format.
+    """
+    flatten_inputs = tree.flatten(inputs)
+    outputs = compiled_ov_model(flatten_inputs).to_tuple()
+    outputs = unpack_singleton(tree.pack_sequence_as(struct_outputs, outputs))
+    return outputs
+
+
+def ov_infer(model, inputs, stop_token_ids, fn):
+    """High-level OpenVINO inference with model reuse and compilation.
+
+    This function manages OpenVINO model compilation and caching. It reuses
+    existing compiled models when possible, or compiles new ones as needed.
+    Handles device detection and automatic precision selection.
+
+    Args:
+        model: Keras model with OpenVINO backend support.
+        inputs: Input tensors for inference.
+        stop_token_ids: Token IDs that should stop generation.
+        fn: Function to execute with the parameterized inputs.
+
+    Returns:
+        Model outputs from OpenVINO inference.
+    """
+    device = get_device()
+
+    # Try to use existing compiled model
+    if (
+        getattr(model, "ov_compiled_model", None) is not None
+        and getattr(model, "ov_device", None) is not None
+        and device == model.ov_device
+    ):
+        try:
+            return get_outputs(
+                inputs,
+                model.struct_outputs,
+                model.ov_compiled_model,
+                model._unpack_singleton,
+            )
+        except RuntimeError as e:
+            print_msg(
+                "WARNING: OpenVINO inference \033[1mFAILED\033[0m, "
+                "recompiling model and trying again.\n" + str(e)
+            )
+            del model.ov_compiled_model
+            del model.struct_outputs
+
+    # Compile a new model
+    struct_params = model._parameterize_data(inputs)
+    model.struct_outputs = fn(struct_params, stop_token_ids)
+    model.ov_device = device
+    model_dtype = "f16" if model.dtype in ("float16", "bfloat16") else "f32"
+
+    model.ov_compiled_model = compile_model(
+        struct_params, model.struct_outputs, device, model_dtype
+    )
+
+    return get_outputs(
+        inputs,
+        model.struct_outputs,
+        model.ov_compiled_model,
+        model._unpack_singleton,
+    )
diff --git a/keras_hub/src/utils/openvino_utils_test.py b/keras_hub/src/utils/openvino_utils_test.py