Skip to content

Commit 189e9d5

Browse files
author
George Ohashi
committed
revert example script
Signed-off-by: George Ohashi <[email protected]>
1 parent c2a2016 commit 189e9d5

File tree

1 file changed

+19
-34
lines changed

1 file changed

+19
-34
lines changed

examples/quantization_kv_cache/llama3_fp8_kv_example.py

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
# Select model and load it.
88
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
9-
109
model = AutoModelForCausalLM.from_pretrained(
1110
MODEL_ID,
1211
device_map="auto",
@@ -20,7 +19,7 @@
2019

2120
# Select number of samples. 512 samples is a good place to start.
2221
# Increasing the number of samples can improve accuracy.
23-
NUM_CALIBRATION_SAMPLES = 10
22+
NUM_CALIBRATION_SAMPLES = 512
2423
MAX_SEQUENCE_LENGTH = 2048
2524

2625
# Load dataset and preprocess.
@@ -50,43 +49,29 @@ def process_and_tokenize(example):
5049
quant_stage:
5150
quant_modifiers:
5251
QuantizationModifier:
52+
ignore: ["lm_head"]
5353
config_groups:
54-
fp8_attention:
55-
output_activations:
54+
group_0:
55+
weights:
56+
num_bits: 8
57+
type: float
58+
strategy: tensor
59+
dynamic: false
60+
symmetric: true
61+
input_activations:
5662
num_bits: 8
5763
type: float
58-
strategy: channel
64+
strategy: tensor
5965
dynamic: false
6066
symmetric: true
61-
# targets: ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
62-
targets: ['re:.*q_proj',]
63-
67+
targets: ["Linear"]
68+
kv_cache_scheme:
69+
num_bits: 8
70+
type: float
71+
strategy: tensor
72+
dynamic: false
73+
symmetric: true
6474
"""
65-
# recipe = """
66-
# quant_stage:
67-
# quant_modifiers:
68-
# QuantizationModifier:
69-
# config_groups:
70-
# fp8_attention_q_proj:
71-
# output_activations:
72-
# num_bits: 8
73-
# type: float
74-
# strategy: channel
75-
# # group_size: 512
76-
# dynamic: false
77-
# symmetric: true
78-
# targets: ['re:.*q_proj']
79-
# # fp8_attention_kv_proj:
80-
# # output_activations:
81-
# # num_bits: 8
82-
# # type: float
83-
# # strategy: group
84-
# # group_size: 128
85-
# # dynamic: false
86-
# # symmetric: true
87-
# # targets: ['re:.*k_proj', 're:.*v_proj']
88-
89-
# """
9075

9176
# Apply algorithms.
9277
oneshot(
@@ -111,6 +96,6 @@ def process_and_tokenize(example):
11196
print("==========================================\n\n")
11297

11398
# Save to disk compressed.
114-
SAVE_DIR = MODEL_ID.split("/")[1] + "-AttnQuantOnly-Group"
99+
SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-KV"
115100
model.save_pretrained(SAVE_DIR, save_compressed=True)
116101
tokenizer.save_pretrained(SAVE_DIR)

0 commit comments

Comments
 (0)