Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/openvino/export.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,14 @@ Optional arguments:
without zero point. 'int8_asym' stands for 8-bit integer asymmetric quantization with zero
points per each quantization group.
--dataset DATASET The dataset used for data-aware compression or quantization with NNCF. For language models you
can use the one from the list ['auto','wikitext2','c4','c4-new']. With 'auto' the dataset will
be collected from model's generations. For diffusion models it should be on of
can use the one from the list ['auto','wikitext2','c4','c4-new','gsm8k']. With 'auto' the
dataset will be collected from model's generations. For diffusion models it should be on of
['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit']. For
visual language models the dataset must be set to 'contextual'. Note: if none of the data-aware
compression algorithms are selected and ratio parameter is omitted or equals 1.0, the dataset
argument will not have an effect on the resulting model. Note: for text generation task,
datasets with English texts such as 'wikitext2','c4' or 'c4-new' usually work fine even for
non-English models.
datasets with English texts such as 'wikitext2','gsm8k','c4' or 'c4-new' usually work fine even
for non-English models.
--all-layers Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an
weight compression is applied, they are compressed to INT8.
--awq Whether to apply AWQ algorithm. AWQ improves generation quality of INT4-compressed LLMs. If
Expand Down
4 changes: 2 additions & 2 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,14 @@ def parse_args_openvino(parser: "ArgumentParser"):
default=None,
help=(
"The dataset used for data-aware compression or quantization with NNCF. "
"For language models you can use the one from the list ['auto','wikitext2','c4','c4-new']. With 'auto' the "
"For language models you can use the one from the list ['auto','wikitext2','c4','c4-new','gsm8k']. With 'auto' the "
"dataset will be collected from model's generations. "
"For diffusion models it should be on of ['conceptual_captions',"
"'laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit']. "
"For visual language models the dataset must be set to 'contextual'. "
"Note: if none of the data-aware compression algorithms are selected and ratio parameter is omitted or "
"equals 1.0, the dataset argument will not have an effect on the resulting model."
"Note: for text generation task, datasets with English texts such as 'wikitext2','c4' or 'c4-new' usually "
"Note: for text generation task, datasets with English texts such as 'wikitext2','gsm8k','c4' or 'c4-new' usually "
"work fine even for non-English models."
),
)
Expand Down
17 changes: 16 additions & 1 deletion optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def _prepare_decoder_calibration_data(
return OVCalibrationDataset(nncf.Dataset(collected_inputs))

def _prepare_causal_lm_calibration_data(
self, config: OVQuantizationConfigBase, seqlen: int = 32
self, config: OVQuantizationConfigBase, seqlen: Optional[int] = None
) -> OVCalibrationDataset:
"""
Prepares calibration data for causal language models. Relies on `optimum.gptq.data` module.
Expand All @@ -671,7 +671,22 @@ def _prepare_causal_lm_calibration_data(
if config.dataset == "auto":
generated_data = nncf.data.generate_text_data(self.model, tokenizer, dataset_size=nsamples)
calibration_dataset = [tokenizer(text, return_tensors="pt") for text in generated_data]
elif config.dataset == "gsm8k":
seqlen = seqlen or 256
dataset = self.load_dataset(
"openai/gsm8k",
dataset_config_name="main",
dataset_split="train",
num_samples=nsamples,
preprocess_function=lambda x: {"text": f"Question: {x['question']}\nAnswer: {x['answer']}"},
preprocess_batch=False,
)
calibration_dataset = [
tokenizer(text, return_tensors="pt", truncation=True, max_length=seqlen)
for text in dataset["text"]
]
else:
seqlen = seqlen or 32
calibration_dataset = get_dataset(config.dataset, tokenizer, seqlen=seqlen, nsamples=nsamples)
elif isinstance(config.dataset, list) and all(isinstance(it, str) for it in config.dataset):
calibration_dataset = [tokenizer(text, return_tensors="pt") for text in config.dataset[:nsamples]]
Expand Down
2 changes: 1 addition & 1 deletion optimum/intel/openvino/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
"text-to-audio": "OVModelForTextToSpeechSeq2Seq",
}

PREDEFINED_CAUSAL_LANGUAGE_DATASETS = {"wikitext2", "c4", "c4-new", "auto"}
PREDEFINED_CAUSAL_LANGUAGE_DATASETS = {"wikitext2", "c4", "c4-new", "auto", "gsm8k"}

PREDEFINED_LANGUAGE_DATASETS = {
"wikitext2": {"id": "wikitext", "name": "wikitext-2-raw-v1", "split": "train", "streaming": False},
Expand Down
4 changes: 2 additions & 2 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ class OVCLIExportTestCase(unittest.TestCase):
"text-generation",
"llama",
"int4_f8e5m2",
"--dataset wikitext2 --num-samples 1 --group-size 16 --trust-remote-code",
"--dataset gsm8k --num-samples 1 --group-size 16 --trust-remote-code",
{
"model": 15,
},
Expand Down Expand Up @@ -531,7 +531,7 @@ class OVCLIExportTestCase(unittest.TestCase):
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
"int4 --ratio 1.0 --sym --group-size 16 --awq --dataset gsm8k --num-samples 100 "
"--sensitivity-metric max_activation_variance",
{"model": {"int8": 4, "int4": 14}},
),
Expand Down
9 changes: 7 additions & 2 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class OVQuantizerTest(unittest.TestCase):
OVMixedQuantizationConfig(
weight_quantization_config=OVWeightQuantizationConfig(bits=4, group_size=16),
full_quantization_config=OVQuantizationConfig(dtype="f8e5m2"),
dataset="wikitext2",
dataset="gsm8k",
num_samples=1,
),
{
Expand Down Expand Up @@ -1816,7 +1816,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
dtype="f8e4m3", ignored_scope={"patterns": [f"{pattern_prefix}.layers.0.mlp"]}
),
ignored_scope={"patterns": [f"{pattern_prefix}.layers.1.self_attn"]},
dataset="wikitext2",
dataset="gsm8k",
num_samples=1,
),
),
Expand Down Expand Up @@ -1907,6 +1907,11 @@ class OVQuantizationConfigTest(unittest.TestCase):
OVWeightQuantizationConfig,
None,
),
(
dict(bits=4, dataset="gsm8k"),
OVWeightQuantizationConfig,
None,
),
(dict(bits=8, fast_bias_correction=True), OVQuantizationConfig, None),
(
dict(
Expand Down
Loading