keras-team
diff --git a/‎.github/workflows/actions.yml
Lines changed: 21 additions & 5 deletions b/‎.github/workflows/actions.yml
Lines changed: 21 additions & 5 deletions
diff --git a/‎keras_hub/src/models/causal_lm.py
Lines changed: 147 additions & 1 deletion b/‎keras_hub/src/models/causal_lm.py
Lines changed: 147 additions & 1 deletion
diff --git a/‎keras_hub/src/models/gemma/gemma_causal_lm.py
Lines changed: 77 additions & 11 deletions b/‎keras_hub/src/models/gemma/gemma_causal_lm.py
Lines changed: 77 additions & 11 deletions
@@ -16,22 +16,25 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        backend: [tensorflow, jax, torch]
+        backend: [tensorflow, jax, torch, openvino]
         version: [keras-stable]
         include:
           - backend: jax
             version: keras-3.5
           - backend: jax
             version: keras-nightly
+          - backend: openvino
+            version: keras-stable
+            python-version: '3.10'
     runs-on: ubuntu-latest
     env:
       KERAS_BACKEND: ${{ matrix.backend }}
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Python 3.9
+    - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: 3.9
+        python-version: ${{ matrix.python-version || '3.9' }}
     - name: Get pip cache dir
       id: pip-cache
       run: |
@@ -48,6 +51,10 @@ jobs:
       run: |
           pip install -r requirements.txt --progress-bar off
           pip install --no-deps -e "." --progress-bar off
+          if [[ "${{ matrix.backend }}" == "openvino" ]]; then
+            pip uninstall -y keras
+            pip install git+https://github.com/keras-team/keras.git@master --upgrade --force-reinstall --progress-bar off
+          fi
     - name: Pin Keras 3.5
       if: ${{ matrix.version == 'keras-3.5'}}
       run: |
@@ -60,11 +67,20 @@ jobs:
         pip install keras-nightly --progress-bar off
     - name: Test with pytest
       run: |
-        pytest keras_hub/
+        if [[ "${{ matrix.backend }}" == "openvino" ]]; then
+          pytest keras_hub/src/models/gemma/gemma_causal_lm_test.py
+        else
+          pytest keras_hub/
+        fi
     - name: Run integration tests
       run: |
         python pip_build.py --install
-        cd integration_tests && pytest . -k "not NoTensorflow"
+        cd integration_tests
+        if [[ "${{ matrix.backend }}" == "openvino" ]]; then
+          pytest . --ignore=basic_usage_test.py -k "not NoTensorflow"
+        else
+          pytest . -k "not NoTensorflow"
+        fi
     - name: Run no tensorflow integration test
       if: ${{ matrix.backend != 'tensorflow'}}
       run: |
 
@@ -58,6 +58,11 @@ class CausalLM(Task):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        # only OpenVINO needs these declarations
+        if keras.config.backend() == "openvino":
+            self._ov_models = {}
+            self.struct_outputs = None
+            self.ov_infer = None
 
     def compile(
         self,
@@ -132,6 +137,144 @@ def make_generate_function(self):
             return self.generate_function
 
         self.generate_function = self.generate_step
+        if keras.config.backend() == "openvino":
+            import os
+            import shutil
+
+            import numpy as np
+            import openvino as ov
+            import openvino.runtime.opset14 as ov_opset
+            from keras.src.backend.openvino.core import OPENVINO_DTYPES
+            from keras.src.backend.openvino.core import OpenVINOKerasTensor
+
+            def unpack_singleton(x):
+                if isinstance(x, (list, tuple)) and len(x) == 1:
+                    return x[0]
+                return x
+
+            def parameterize_inputs(inputs):
+                if isinstance(inputs, (list, tuple)):
+                    return [parameterize_inputs(e) for e in inputs]
+                elif isinstance(inputs, dict):
+                    return {
+                        k: parameterize_inputs(v) for k, v in inputs.items()
+                    }
+                elif isinstance(inputs, np.ndarray):
+                    ov_type = OPENVINO_DTYPES[str(inputs.dtype)]
+                    ov_shape = list(inputs.shape)
+                    param = ov_opset.parameter(shape=ov_shape, dtype=ov_type)
+                    return OpenVINOKerasTensor(param.output(0))
+                elif isinstance(inputs, (int, np.integer)):
+                    param = ov_opset.parameter(shape=[], dtype=ov.Type.i32)
+                    return OpenVINOKerasTensor(param.output(0))
+                elif isinstance(inputs, (float, np.floating)):
+                    param = ov_opset.parameter(shape=[], dtype=ov.Type.f32)
+                    return OpenVINOKerasTensor(param.output(0))
+                else:
+                    raise TypeError(f"Unknown input type: {type(inputs)}")
+
+            def set_struct_outputs(inputs, fn):
+                struct_params = parameterize_inputs(inputs)
+                self.struct_outputs = fn(struct_params)
+                return struct_params, self.struct_outputs
+
+            def get_outputs_from_model(inputs, model):
+                flatten_inputs = tree.flatten(inputs)
+                assert OpenVINOKerasTensor not in inputs, (
+                    "inputs should be numpy arrays"
+                )
+                outputs = model(flatten_inputs)
+                outputs = unpack_singleton(
+                    tree.pack_sequence_as(
+                        self.struct_outputs, outputs.to_tuple()
+                    )
+                )
+                return outputs
+
+            def get_model(inputs, fn, ov_model=None, compiled=False):
+                config = {
+                    "CACHE_DIR": "openvino_cache",
+                }
+
+                struct_params, _ = set_struct_outputs(inputs, fn)
+
+                if ov_model is not None:
+                    assert compiled, (
+                        "if you pass a model, you should make compiled=True"
+                    )
+                    return ov.compile_model(ov_model, "CPU", config)
+
+                parameters = [
+                    p.output.get_node() for p in tree.flatten(struct_params)
+                ]
+                results = [
+                    ov_opset.result(r.output)
+                    for r in tree.flatten(self.struct_outputs)
+                ]
+
+                ov_model = ov.Model(results=results, parameters=parameters)
+                if not compiled:
+                    return ov_model
+
+                return ov.compile_model(ov_model, "CPU", config)
+
+            def compile_model_disc(inputs, fn, name):
+                model_path = f"./run_dir/{name}.xml"
+                if not os.path.exists(model_path):
+                    ov_model = get_model(inputs, fn)
+                    ov.save_model(ov_model, model_path)
+                model = ov.Core().read_model(model_path)
+                return get_model(inputs, fn, ov_model=model, compiled=True)
+
+            def ov_infer(
+                inputs,
+                fn,
+                cache=False,
+                disc=False,
+                name=None,
+            ):
+                compiled_model = None
+                if cache:
+                    assert name is not None, (
+                        "you should provide name of the model being cached"
+                    )
+                    if self._ov_models.get(name) is None:
+                        self._ov_models[name] = get_model(
+                            inputs, fn, compiled=True
+                        )
+                    else:
+                        set_struct_outputs(inputs, fn)
+                    compiled_model = self._ov_models[name]
+                elif disc:
+                    assert name is not None, (
+                        "you should provide the name of thr model"
+                    )
+                    compiled_model = compile_model_disc(inputs, fn, name)
+                else:
+                    compiled_model = get_model(inputs, fn, compiled=True)
+                outputs = get_outputs_from_model(inputs, compiled_model)
+                del compiled_model
+                return outputs
+
+            def delete_ov_cache():
+                for path in ["openvino_cache", "run_dir"]:
+                    if os.path.exists(path):
+                        shutil.rmtree(path, ignore_errors=True)
+
+            self.ov_infer = ov_infer
+
+            def wrapped_generate_function(inputs, stop_token_ids=None):
+                final_outputs = []
+                os.makedirs("./run_dir", exist_ok=True)
+                for input in inputs:
+                    outputs = self.generate_step(input, stop_token_ids)
+                    for k, v in outputs.items():
+                        outputs[k] = ops.convert_to_numpy(v)
+                    final_outputs.append(outputs)
+                delete_ov_cache()
+                return final_outputs
+
+            self.generate_function = wrapped_generate_function
         if keras.config.backend() == "torch":
             import torch
 
@@ -386,7 +529,10 @@ def postprocess(x):
         if strip_prompt:
             outputs = [strip_prompt_function(generate(x), x) for x in inputs]
         else:
-            outputs = [generate(x) for x in inputs]
+            if keras.config.backend() == "openvino":
+                outputs = generate(inputs)
+            else:
+                outputs = [generate(x) for x in inputs]
 
         if self.preprocessor is not None:
             outputs = [postprocess(x) for x in outputs]
 
@@ -196,22 +196,88 @@ def call_with_cache(
             the final hidden representation of the input tokens, and `cache` is
             the decoding cache.
         """
-        x = self.backbone.token_embedding(token_ids)
-        x = x * ops.cast(ops.sqrt(self.backbone.hidden_dim), x.dtype)
-        # Each decoder layer has a cache; we update them separately.
+
+        def embed_and_scale_tokens(token_ids):
+            x = self.backbone.token_embedding(token_ids)
+            return x * ops.cast(ops.sqrt(self.backbone.hidden_dim), x.dtype)
+
+        def make_apply_fn(layer):
+            def apply_transformer_layer(inputs):
+                x = inputs["x"]
+                current_cache = inputs["current_cache"]
+                index = inputs["cache_update_index"]
+                x, next_cache = layer(
+                    x, cache=current_cache, cache_update_index=index
+                )
+                return x, next_cache
+
+            return apply_transformer_layer
+
+        def finalize_generation_step(inputs):
+            x = self.backbone.layer_norm(inputs["x"])
+            cache = ops.stack(inputs["caches"], axis=1)
+            logits = self.backbone.token_embedding(x, reverse=True)
+            return logits, x, cache
+
+        use_openvino = keras.config.backend() == "openvino"
+
+        if use_openvino:
+            token_ids = ops.convert_to_numpy(token_ids)
+            cache = ops.convert_to_numpy(cache)
+            if token_ids.shape[1] == 1:
+                x = self.ov_infer(
+                    token_ids,
+                    embed_and_scale_tokens,
+                    cache=True,
+                    name="embed_and_scale_tokens",
+                )
+            else:
+                x = self.ov_infer(token_ids, embed_and_scale_tokens)
+        else:
+            x = embed_and_scale_tokens(token_ids)
+
         caches = []
         for i, transformer_layer in enumerate(self.backbone.transformer_layers):
             current_cache = cache[:, i, ...]
-            x, next_cache = transformer_layer(
-                x,
-                cache=current_cache,
-                cache_update_index=cache_update_index,
-            )
+            inputs = {
+                "x": x,
+                "current_cache": current_cache,
+                "cache_update_index": cache_update_index,
+            }
+
+            apply_fn = make_apply_fn(transformer_layer)
+
+            if use_openvino:
+                if token_ids.shape[1] == 1:
+                    x, next_cache = self.ov_infer(
+                        inputs,
+                        apply_fn,
+                        disc=True,
+                        name=f"layer_{i}",
+                    )
+                else:
+                    x, next_cache = self.ov_infer(inputs, apply_fn)
+            else:
+                x, next_cache = apply_fn(inputs)
+
             caches.append(next_cache)
 
-        cache = ops.stack(caches, axis=1)
-        hidden_states = x = self.backbone.layer_norm(x)
-        logits = self.backbone.token_embedding(x, reverse=True)
+        inputs = {"x": x, "caches": caches}
+        if use_openvino:
+            if token_ids.shape[1] == 1:
+                logits, hidden_states, cache = self.ov_infer(
+                    inputs,
+                    finalize_generation_step,
+                    cache=True,
+                    name="finalize_generation_step",
+                )
+            else:
+                logits, hidden_states, cache = self.ov_infer(
+                    inputs, finalize_generation_step
+                )
+        else:
+            logits, hidden_states, cache = finalize_generation_step(inputs)
+
         return logits, hidden_states, cache
 
     def _build_cache(self, token_ids):