Initial commit

nikita-savelyevv · nikita-savelyevv · commit 03ce84cfbb23 · 2025-12-02T18:58:46.000+01:00
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -165,14 +165,29 @@ def __init__(
         self.apply_caching = apply_caching
         self.inference_result_mock = inference_result_mock
         self.tensor_cache = {}
+        self.stateful = len(request.query_state()) > 0
+        self._reset_state_called = False
 
     def collect_inputs(self, inputs):
+        if self.stateful:
+            if isinstance(inputs, dict) and is_nncf_version(">", "2.19"):
+                from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY
+
+                # To reflect the state resetting during NNCF calibration, we add a special key to the input dict
+                # Shallow copying is done on purpose shallow copy on purpose: we only need to add a key to the top-level dict
+                inputs = inputs.copy()
+                inputs[NNCF_DATASET_RESET_STATE_KEY] = self._reset_state_called
+            self._reset_state_called = False
+
         if not self.apply_caching or not isinstance(inputs, dict):
             self.collected_inputs.append(copy.deepcopy(inputs))
             return
 
         copied_inputs = {}
         for k, v in inputs.items():
+            if isinstance(v, bool):
+                copied_inputs[k] = v
+                continue
             data = v
             if isinstance(data, openvino.Tensor):
                 data = data.data
@@ -221,6 +236,10 @@ def wait(self):
     def get_tensor(self, name: str):
         return Tensor(self.request.results[name])
 
+    def reset_state(self):
+        self.request.reset_state()
+        self._reset_state_called = True
+
     def __getattr__(self, attr):
         if attr in self.__dict__:
             return getattr(self, attr)
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -2143,10 +2143,12 @@ def _generate_random_audio_data(processor):
         ).input_features
         return input_features
 
-    @parameterized.expand(itertools.product(MODEL_NAME, APPLY_CACHING))
-    def test_calibration_data_uniqueness(self, model_name, apply_caching):
+    @parameterized.expand(itertools.product(MODEL_NAME, STATEFUL, APPLY_CACHING))
+    def test_calibration_data_uniqueness(self, model_name, stateful, apply_caching):
         model_id = MODEL_NAMES[model_name]
-        ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True, compile=True, device=OPENVINO_DEVICE)
+        ov_model = OVModelForSpeechSeq2Seq.from_pretrained(
+            model_id, export=True, compile=True, stateful=stateful, device=OPENVINO_DEVICE
+        )
         processor = AutoProcessor.from_pretrained(model_id)
 
         calibration_data = []
@@ -2158,13 +2160,28 @@ def test_calibration_data_uniqueness(self, model_name, apply_caching):
             ov_model.decoder.request = InferRequestWrapper(
                 ov_model.decoder.request, calibration_data, apply_caching=apply_caching
             )
-        for _ in range(2):
+        n_samples = 3
+        for _ in range(n_samples):
             input_features = self._generate_random_audio_data(processor)
             ov_model.generate(input_features, max_new_tokens=10, min_new_tokens=10)
 
         data_hashes_per_key = defaultdict(list)
         data_id_per_key = defaultdict(set)
 
+        # Check that reset state flag is present and correctly set in collected inputs
+        if stateful and is_nncf_version(">", "2.19"):
+            from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY
+
+            # All inputs should have reset state key
+            self.assertTrue(all(NNCF_DATASET_RESET_STATE_KEY in inputs_dict for inputs_dict in calibration_data))
+            # The number of times reset state flag is set to True should be equal to (2 * n_samples), because
+            # for each sequence generation, the state is reset twice
+            self.assertEqual(
+                sum(int(inputs_dict[NNCF_DATASET_RESET_STATE_KEY]) for inputs_dict in calibration_data), 2 * n_samples
+            )
+            # Remove reset state key from inputs to avoid affecting data uniqueness checks
+            [input_dict.pop(NNCF_DATASET_RESET_STATE_KEY) for input_dict in calibration_data]
+
         for inputs_dict in calibration_data:
             for k, v in inputs_dict.items():
                 if k in ["input_ids", "beam_idx"]:
@@ -2174,14 +2191,14 @@ def test_calibration_data_uniqueness(self, model_name, apply_caching):
                 data_hashes_per_key[k].append(hash(x.tobytes()))
                 data_id_per_key[k].add(id(v))
         for k, data_hashes in data_hashes_per_key.items():
-            # All hashes can not be equal because calibration dataset contains at least 2 different samples
+            # All hashes can not be equal because calibration dataset contains at least n_samples different samples
             self.assertTrue(any(data_hashes[0] != it for it in data_hashes))
         if apply_caching:
-            # With caching, encoder hidden states tensors should be cached, resulting in only 2 tensors stored
-            self.assertEqual(len(data_id_per_key["encoder_hidden_states"]), 2)
+            # With caching, encoder hidden states tensors should be cached, resulting in only n_samples tensors stored
+            self.assertEqual(len(data_id_per_key["encoder_hidden_states"]), n_samples)
         else:
             # Without caching, encoder hidden states tensors will be unique for each collected input
-            self.assertGreater(len(data_id_per_key["encoder_hidden_states"]), 2)
+            self.assertGreater(len(data_id_per_key["encoder_hidden_states"]), n_samples)
 
 
 def check_optimization_not_applicable_to_optimized_model(model, quantization_config):