huggingface · nikita-savelyevv · Dec 9, 2025 · Dec 2, 2025
diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
@@ -109,14 +109,14 @@ Optional arguments:
                         without zero point. 'int8_asym' stands for 8-bit integer asymmetric quantization with zero
                         points per each quantization group.
   --dataset DATASET     The dataset used for data-aware compression or quantization with NNCF. For language models you
-                        can use the one from the list ['auto','wikitext2','c4','c4-new']. With 'auto' the dataset will
-                        be collected from model's generations. For diffusion models it should be on of
+                        can use the one from the list ['auto','wikitext2','c4','c4-new','gsm8k']. With 'auto' the
+                        dataset will be collected from model's generations. For diffusion models it should be on of
                         ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit']. For
                         visual language models the dataset must be set to 'contextual'. Note: if none of the data-aware
                         compression algorithms are selected and ratio parameter is omitted or equals 1.0, the dataset
                         argument will not have an effect on the resulting model. Note: for text generation task,
-                        datasets with English texts such as 'wikitext2','c4' or 'c4-new' usually work fine even for
-                        non-English models.
+                        datasets with English texts such as 'wikitext2','gsm8k','c4' or 'c4-new' usually work fine even
+                        for non-English models.
   --all-layers          Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an
                         weight compression is applied, they are compressed to INT8.
   --awq                 Whether to apply AWQ algorithm. AWQ improves generation quality of INT4-compressed LLMs. If

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -160,14 +160,14 @@ def parse_args_openvino(parser: "ArgumentParser"):
         default=None,
         help=(
             "The dataset used for data-aware compression or quantization with NNCF. "
-            "For language models you can use the one from the list ['auto','wikitext2','c4','c4-new']. With 'auto' the "
+            "For language models you can use the one from the list ['auto','wikitext2','c4','c4-new','gsm8k']. With 'auto' the "
             "dataset will be collected from model's generations. "
             "For diffusion models it should be on of ['conceptual_captions',"
             "'laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit']. "
             "For visual language models the dataset must be set to 'contextual'. "
             "Note: if none of the data-aware compression algorithms are selected and ratio parameter is omitted or "
             "equals 1.0, the dataset argument will not have an effect on the resulting model."
-            "Note: for text generation task, datasets with English texts such as 'wikitext2','c4' or 'c4-new' usually "
+            "Note: for text generation task, datasets with English texts such as 'wikitext2','gsm8k','c4' or 'c4-new' usually "
             "work fine even for non-English models."
         ),
     )

diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -655,7 +655,7 @@ def _prepare_decoder_calibration_data(
         return OVCalibrationDataset(nncf.Dataset(collected_inputs))
 
     def _prepare_causal_lm_calibration_data(
-        self, config: OVQuantizationConfigBase, seqlen: int = 32
+        self, config: OVQuantizationConfigBase, seqlen: Optional[int] = None
     ) -> OVCalibrationDataset:
         """
         Prepares calibration data for causal language models. Relies on `optimum.gptq.data` module.
@@ -671,7 +671,22 @@ def _prepare_causal_lm_calibration_data(
             if config.dataset == "auto":
                 generated_data = nncf.data.generate_text_data(self.model, tokenizer, dataset_size=nsamples)
                 calibration_dataset = [tokenizer(text, return_tensors="pt") for text in generated_data]
+            elif config.dataset == "gsm8k":
+                seqlen = seqlen or 256
+                dataset = self.load_dataset(
+                    "openai/gsm8k",
+                    dataset_config_name="main",
+                    dataset_split="train",
+                    num_samples=nsamples,
+                    preprocess_function=lambda x: {"text": f"Question: {x['question']}\nAnswer: {x['answer']}"},
+                    preprocess_batch=False,
+                )
+                calibration_dataset = [
+                    tokenizer(text, return_tensors="pt", truncation=True, max_length=seqlen)
+                    for text in dataset["text"]
+                ]
             else:
+                seqlen = seqlen or 32
                 calibration_dataset = get_dataset(config.dataset, tokenizer, seqlen=seqlen, nsamples=nsamples)
         elif isinstance(config.dataset, list) and all(isinstance(it, str) for it in config.dataset):
             calibration_dataset = [tokenizer(text, return_tensors="pt") for text in config.dataset[:nsamples]]

diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
@@ -150,7 +150,7 @@
     "text-to-audio": "OVModelForTextToSpeechSeq2Seq",
 }
 
-PREDEFINED_CAUSAL_LANGUAGE_DATASETS = {"wikitext2", "c4", "c4-new", "auto"}
+PREDEFINED_CAUSAL_LANGUAGE_DATASETS = {"wikitext2", "c4", "c4-new", "auto", "gsm8k"}
 
 PREDEFINED_LANGUAGE_DATASETS = {
     "wikitext2": {"id": "wikitext", "name": "wikitext-2-raw-v1", "split": "train", "streaming": False},

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -318,7 +318,7 @@ class OVCLIExportTestCase(unittest.TestCase):
             "text-generation",
             "llama",
             "int4_f8e5m2",
-            "--dataset wikitext2 --num-samples 1 --group-size 16 --trust-remote-code",
+            "--dataset gsm8k --num-samples 1 --group-size 16 --trust-remote-code",
             {
                 "model": 15,
             },
@@ -531,7 +531,7 @@ class OVCLIExportTestCase(unittest.TestCase):
         (
             "text-generation-with-past",
             "llama_awq",
-            "int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
+            "int4 --ratio 1.0 --sym --group-size 16 --awq --dataset gsm8k --num-samples 100 "
             "--sensitivity-metric max_activation_variance",
             {"model": {"int8": 4, "int4": 14}},
         ),

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -206,7 +206,7 @@ class OVQuantizerTest(unittest.TestCase):
             OVMixedQuantizationConfig(
                 weight_quantization_config=OVWeightQuantizationConfig(bits=4, group_size=16),
                 full_quantization_config=OVQuantizationConfig(dtype="f8e5m2"),
-                dataset="wikitext2",
+                dataset="gsm8k",
                 num_samples=1,
             ),
             {
@@ -1816,7 +1816,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
                     dtype="f8e4m3", ignored_scope={"patterns": [f"{pattern_prefix}.layers.0.mlp"]}
                 ),
                 ignored_scope={"patterns": [f"{pattern_prefix}.layers.1.self_attn"]},
-                dataset="wikitext2",
+                dataset="gsm8k",
                 num_samples=1,
             ),
         ),
@@ -1907,6 +1907,11 @@ class OVQuantizationConfigTest(unittest.TestCase):
             OVWeightQuantizationConfig,
             None,
         ),
+        (
+            dict(bits=4, dataset="gsm8k"),
+            OVWeightQuantizationConfig,
+            None,
+        ),
         (dict(bits=8, fast_bias_correction=True), OVQuantizationConfig, None),
         (
             dict(