File tree Expand file tree Collapse file tree 5 files changed +6
-7
lines changed Expand file tree Collapse file tree 5 files changed +6
-7
lines changed Original file line number Diff line number Diff line change @@ -40,6 +40,8 @@ microsoft/Phi-3-small-128k-instruct:
40
40
- accuracy : 27.208
41
41
microsoft/Phi-3.5-mini-instruct :
42
42
- accuracy : 31.354
43
+ microsoft/Phi-4-mini-instruct :
44
+ - accuracy : 32.921
43
45
state-spaces/mamba-130m-hf :
44
46
- accuracy : 19.470
45
47
lmsys/vicuna-7b-v1.3 :
Original file line number Diff line number Diff line change @@ -126,3 +126,5 @@ mistralai/Mistral-Small-3.1-24B-Instruct-2503:
126
126
- accuracy : 89.23
127
127
microsoft/Phi-4-multimodal-instruct :
128
128
- accuracy : 81.19
129
+ microsoft/Phi-4-mini-instruct :
130
+ - accuracy : 82.30
Original file line number Diff line number Diff line change @@ -1920,10 +1920,6 @@ class TestPhi4MiniInstruct(LlmapiAccuracyTestHarness):
1920
1920
MODEL_NAME = "microsoft/Phi-4-mini-instruct"
1921
1921
MODEL_PATH = f"{ llm_models_root ()} /Phi-4-mini-instruct"
1922
1922
1923
- @pytest .mark .skip (
1924
- reason =
1925
- "Temporarily skipping test_auto_dtype while resolving Phi-4's architecture issue."
1926
- )
1927
1923
def test_auto_dtype (self ):
1928
1924
with LLM (self .MODEL_PATH ) as llm :
1929
1925
task = CnnDailymail (self .MODEL_NAME )
@@ -1932,9 +1928,6 @@ def test_auto_dtype(self):
1932
1928
task .evaluate (llm )
1933
1929
task = GSM8K (self .MODEL_NAME )
1934
1930
task .evaluate (llm )
1935
- task = GPQADiamond (self .MODEL_NAME )
1936
- task .evaluate (llm ,
1937
- extra_evaluator_kwargs = dict (apply_chat_template = True ))
1938
1931
1939
1932
1940
1933
class TestKanana_Instruct (LlmapiAccuracyTestHarness ):
Original file line number Diff line number Diff line change @@ -495,6 +495,7 @@ accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_fp8
495
495
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_auto_dtype
496
496
accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8
497
497
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
498
+ accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
498
499
499
500
test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]
500
501
test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding-]
Original file line number Diff line number Diff line change @@ -63,6 +63,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[laten
63
63
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass]
64
64
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_trtllm]
65
65
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
66
+ accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
66
67
disaggregated/test_disaggregated.py::test_disaggregated_cache_aware_balance[TinyLlama-1.1B-Chat-v1.0]
67
68
disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
68
69
disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one_mtp[DeepSeek-V3-Lite-fp8]
You can’t perform that action at this time.
0 commit comments