File tree Expand file tree Collapse file tree 2 files changed +10
-1
lines changed Expand file tree Collapse file tree 2 files changed +10
-1
lines changed Original file line number Diff line number Diff line change @@ -367,9 +367,11 @@ def test_auto_dtype_tp8(self):
367
367
@skip_pre_hopper
368
368
def test_fp8_tp4 (self ):
369
369
model_path = f"{ llm_models_root ()} /modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8"
370
- kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.6 )
370
+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.5 )
371
371
with LLM (model_path ,
372
372
tensor_parallel_size = 4 ,
373
+ max_seq_len = 8192 ,
374
+ max_batch_size = 32 ,
373
375
kv_cache_config = kv_cache_config ) as llm :
374
376
assert llm .args .quant_config .quant_algo == QuantAlgo .FP8
375
377
task = MMLU (self .MODEL_NAME )
Original file line number Diff line number Diff line change @@ -442,3 +442,10 @@ unittest/trt/attention/test_gpt_attention.py -k "partition3" SKIP (https://nvbug
442
442
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5414909)
443
443
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep1-disable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5418673)
444
444
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5418673)
445
+ examples/test_llama.py::test_llm_api_lookahead_decoding_1gpu[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5419066)
446
+ examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5360086)
447
+ examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5141288)
448
+ examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_vl_7b_instruct-enable_gemm_plugin-enable_weight_only] SKIP (https://nvbugs/5419067)
449
+ examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2_vl_7b_instruct-nb:4] SKIP (https://nvbugs/5419068)
450
+ examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2_vl_7b_instruct-enable_ptpc-nb:4] SKIP (https://nvbugs/5419069)
451
+ examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-fp8-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (https://nvbugs/5419070)
You can’t perform that action at this time.
0 commit comments