Skip to content

Commit dee5a5a

Browse files
authored
Updating example files in Llama4 (#530)
Signed-off-by: Mohit Soni <[email protected]>
1 parent 2c27cc1 commit dee5a5a

File tree

4 files changed

+10
-26
lines changed

4 files changed

+10
-26
lines changed

QEfficient/transformers/models/llama4/modeling_llama4.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -929,14 +929,6 @@ def get_specializations(
929929
)
930930
vision_size = num_features_per_tile * max_num_tiles
931931

932-
downsample_ratio = int(round(1.0 / (self.config.vision_config.pixel_shuffle_ratio**2)))
933-
num_features_per_tile = int(
934-
(img_size // self.config.vision_config.patch_size)
935-
* (img_size // self.config.vision_config.patch_size)
936-
// downsample_ratio
937-
)
938-
vision_size = num_features_per_tile * max_num_tiles
939-
940932
vision = [
941933
{
942934
"batch_size": batch_size,

QEfficient/transformers/models/modeling_auto.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -866,10 +866,6 @@ def kv_offload_generate(
866866
chunk_inputs = lang_inputs.copy()
867867
prefill_start = perf_counter()
868868

869-
# Prepare inputs for prefill
870-
chunk_inputs = lang_inputs.copy()
871-
prefill_start = perf_counter()
872-
873869
# Run prefill
874870
chunk_inputs = lang_inputs.copy()
875871
for i in range(num_chunks):

examples/llama4_example.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import torch
99
import transformers
10-
from transformers import AutoConfig, AutoModelForImageTextToText, AutoProcessor, TextStreamer
10+
from transformers import AutoConfig, AutoProcessor, TextStreamer
1111

1212
from QEfficient import QEFFAutoModelForImageTextToText
1313

@@ -17,14 +17,12 @@
1717
config.text_config.num_hidden_layers = 4
1818
config.vision_config.num_hidden_layers = 2
1919

20-
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation="eager", config=config)
21-
model.eval()
22-
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
20+
qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
21+
model_id, attn_implementation="eager", kv_offload=True, config=config
22+
)
23+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
2324
processor = AutoProcessor.from_pretrained(model_id)
2425

25-
### For running the model in single QPC approach use kv_offload=False. For Dual QPC approach use kv_offload=True ###
26-
qeff_model = QEFFAutoModelForImageTextToText(model, kv_offload=True)
27-
2826
### use skip_vision=Ture, if want to run only text, ow false ###
2927
skip_vision = True
3028

examples/llama4_multi_image_example.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import torch
99
import transformers
10-
from transformers import AutoConfig, AutoModelForImageTextToText, AutoProcessor, TextStreamer
10+
from transformers import AutoConfig, AutoProcessor, TextStreamer
1111

1212
from QEfficient import QEFFAutoModelForImageTextToText
1313

@@ -17,14 +17,12 @@
1717
config.text_config.num_hidden_layers = 4
1818
config.vision_config.num_hidden_layers = 2
1919

20-
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation="eager", config=config)
21-
model.eval()
22-
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
20+
qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
21+
model_id, attn_implementation="eager", kv_offload=True, config=config
22+
)
23+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
2324
processor = AutoProcessor.from_pretrained(model_id)
2425

25-
### For running the model in single QPC approach use kv_offload=False. For Dual QPC approach use kv_offload=True ###
26-
qeff_model = QEFFAutoModelForImageTextToText(model, kv_offload=True)
27-
2826
### For multi-image, the value of max_num_tiles should be the sum of the num_tiles values across all the images ###
2927
qeff_model.compile(
3028
prefill_seq_len=128,

0 commit comments

Comments
 (0)