|
138 | 138 | "disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8]": 67.32832619687542,
|
139 | 139 | "disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-False-TinyLlama-1.1B-Chat-v1.0]": 46.302398771978915,
|
140 | 140 | "disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-True-TinyLlama-1.1B-Chat-v1.0]": 38.81214914191514,
|
141 |
| - "test_unittests.py::test_unittests_v2[unittest/_torch -k \"not (modeling or multi_gpu or auto_deploy)\"]": 1186.6702785710804, |
| 141 | + "test_unittests.py::test_unittests_v2[unittest/_torch/attention]": 588.56, |
| 142 | + "test_unittests.py::test_unittests_v2[unittest/_torch/compilation]": 31.94, |
| 143 | + "test_unittests.py::test_unittests_v2[unittest/_torch/debugger]": 36.69, |
| 144 | + "test_unittests.py::test_unittests_v2[unittest/_torch/executor]": 170.86, |
| 145 | + "test_unittests.py::test_unittests_v2[unittest/_torch/misc]": 600.50, |
| 146 | + "test_unittests.py::test_unittests_v2[unittest/_torch/modules]": 158.50, |
| 147 | + "test_unittests.py::test_unittests_v2[unittest/_torch/multimodal]": 23.54, |
| 148 | + "test_unittests.py::test_unittests_v2[unittest/_torch/sampler]": 107.66, |
| 149 | + "test_unittests.py::test_unittests_v2[unittest/_torch/speculative]": 1850.16, |
| 150 | + "test_unittests.py::test_unittests_v2[unittest/_torch/thop]": 852.56, |
142 | 151 | "test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mixtral\"]": 208.1838396479725,
|
143 | 152 | "test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek\"]": 393.0210295501165,
|
144 | 153 | "cpp/test_e2e.py::test_model[-gpt_executor-80]": 4016.7569622844458,
|
|
238 | 247 | "disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]": 48.16434509307146,
|
239 | 248 | "test_e2e.py::test_trtllm_bench_iteration_log[PyTorch-non-streaming-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]": 163.86223009089008,
|
240 | 249 | "test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-instruct-hf-fp8-True-True]": 115.74023819994181,
|
241 |
| - "test_unittests.py::test_unittests_v2[unittest/_torch -k \"modeling_llama\"]": 718.749935634085, |
| 250 | + "test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_llama\"]": 718.749935634085, |
242 | 251 | "accuracy/test_cli_flow.py::TestGpt2::test_int8_kv_cache": 399.65961667895317,
|
243 | 252 | "accuracy/test_cli_flow.py::TestLlama3_2_1B::test_int4_awq_int8_kv_cache": 392.90223736315966,
|
244 | 253 | "accuracy/test_cli_flow.py::TestQwen2_7BInstruct::test_int4_awq_prequantized": 604.7383968606591,
|
|
280 | 289 | "disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]": 67.3897166326642,
|
281 | 290 | "disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]": 98.97588296607137,
|
282 | 291 | "disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]": 67.9668476767838,
|
283 |
| - "test_unittests.py::test_unittests_v2[unittest/_torch/test_attention_mla.py]": 26.32902159006335, |
| 292 | + "test_unittests.py::test_unittests_v2[unittest/_torch/attention/test_attention_mla.py]": 26.32902159006335, |
284 | 293 | "accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 591.2785023800097,
|
285 | 294 | "accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]": 306.84709841990843,
|
286 | 295 | "accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False]": 220.57452515885234,
|
|
292 | 301 | "test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]": 109.26379436196294,
|
293 | 302 | "test_e2e.py::test_ptp_quickstart_advanced_mixed_precision": 80.88908524392173,
|
294 | 303 | "test_e2e.py::test_ptp_quickstart_advanced_mtp[DeepSeek-V3-Lite-BF16-DeepSeek-V3-Lite/bf16]": 99.42739840806462,
|
295 |
| - "test_unittests.py::test_unittests_v2[unittest/_torch/speculative/test_eagle3.py]": 317.8708840459585, |
296 | 304 | "accuracy/test_cli_flow.py::TestLlama7B::test_auto_dtype": 402.75543826818466,
|
297 | 305 | "examples/test_bert.py::test_llm_bert_general[compare_hf-disable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float32-BertModel-bert/bert-base-uncased]": 111.17977902293205,
|
298 | 306 | "examples/test_mamba.py::test_llm_mamba_1gpu[mamba-130m-float16-enable_gemm_plugin]": 112.04011878371239,
|
|
0 commit comments