Skip to content

Commit 9f91fdb

Browse files
adding quantization and model reusing for openvino backend
1 parent b798a4f commit 9f91fdb

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

keras_hub/src/models/causal_lm.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,13 @@ def make_generate_function(self):
133133

134134
self.generate_function = self.generate_step
135135
if keras.config.backend() == "openvino":
136+
import os
137+
import tempfile
138+
136139
import openvino as ov
137140
import openvino.runtime.opset14 as ov_opset
141+
from nncf import CompressWeightsMode
142+
from nncf import compress_weights
138143

139144
from keras_hub.src.utils.openvino_utils import get_outputs
140145
from keras_hub.src.utils.openvino_utils import get_struct_outputs
@@ -143,17 +148,45 @@ def ov_infer(inputs, stop_token_ids, fn):
143148
struct_params, struct_outputs = get_struct_outputs(
144149
inputs, stop_token_ids, fn
145150
)
146-
parameters = [
147-
p.output.get_node() for p in tree.flatten(struct_params)
148-
]
149-
results = [
150-
ov_opset.result(r.output)
151-
for r in tree.flatten(struct_outputs)
152-
]
153-
core = ov.Core()
154-
ov_model = ov.Model(results=results, parameters=parameters)
155-
compile_ov_model = core.compile_model(ov_model, "CPU")
156-
return get_outputs(inputs, struct_outputs, compile_ov_model)
151+
if not hasattr(ov_infer, "compiled_model"):
152+
ov_infer.compiled_model = None
153+
parameters = [
154+
p.output.get_node() for p in tree.flatten(struct_params)
155+
]
156+
results = [
157+
ov_opset.result(r.output)
158+
for r in tree.flatten(struct_outputs)
159+
]
160+
core = ov.Core()
161+
ov_model = ov.Model(results=results, parameters=parameters)
162+
for ov_input in ov_model.inputs:
163+
rank = ov_input.get_partial_shape().rank.get_length()
164+
ov_input.get_node().set_partial_shape(
165+
ov.PartialShape([-1] * rank)
166+
)
167+
ov_model.validate_nodes_and_infer_types()
168+
with tempfile.TemporaryDirectory() as tmpdir:
169+
path = os.path.join(tmpdir, "ov_model.xml")
170+
ov.serialize(ov_model, path)
171+
del ov_model
172+
ov_model = core.read_model(path)
173+
group_sizes = [128, 64, 16, 4]
174+
for group_size in group_sizes:
175+
try:
176+
final_model = compress_weights(
177+
ov_model,
178+
mode=CompressWeightsMode.INT4_SYM,
179+
group_size=group_size,
180+
)
181+
break
182+
except Exception:
183+
continue
184+
ov_infer.compile_ov_model = core.compile_model(
185+
final_model, "CPU"
186+
)
187+
return get_outputs(
188+
inputs, struct_outputs, ov_infer.compile_ov_model
189+
)
157190

158191
def wrapped_generate_function(inputs, stop_token_ids=None):
159192
inputs = tree.map_structure(ops.array, inputs)

keras_hub/src/utils/openvino_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,26 @@ def unpack_singleton(x):
3030
return x
3131

3232

33-
def parameterize_inputs(inputs):
33+
def parameterize_inputs(inputs, prefix=""):
3434
if isinstance(inputs, (list, tuple)):
35-
return [parameterize_inputs(e) for e in inputs]
35+
return [
36+
parameterize_inputs(e, f"{prefix}{i}") for i, e in enumerate(inputs)
37+
]
3638
elif isinstance(inputs, dict):
37-
return {k: parameterize_inputs(v) for k, v in inputs.items()}
39+
return {k: parameterize_inputs(v, k) for k, v in inputs.items()}
3840
elif isinstance(inputs, np.ndarray):
3941
ov_type = OPENVINO_DTYPES[str(inputs.dtype)]
4042
ov_shape = list(inputs.shape)
4143
param = ov_opset.parameter(shape=ov_shape, dtype=ov_type)
44+
param.set_friendly_name(prefix)
4245
return ops.convert_to_tensor(param.output(0))
4346
elif isinstance(inputs, (int, np.integer)):
4447
param = ov_opset.parameter(shape=[], dtype=ov.Type.i32)
48+
param.set_friendly_name(prefix)
4549
return ops.convert_to_tensor(param.output(0))
4650
elif isinstance(inputs, (float, np.floating)):
4751
param = ov_opset.parameter(shape=[], dtype=ov.Type.f32)
52+
param.set_friendly_name(prefix)
4853
return ops.convert_to_tensor(param.output(0))
4954
else:
5055
raise TypeError(f"Unknown input type: {type(inputs)}")

requirements-common.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ tensorflow-datasets
1919
safetensors
2020
pillow
2121
openvino
22+
nncf
2223
transformers

0 commit comments

Comments
 (0)