Skip to content

Commit a6c917e

Browse files
committed
revert changes of debugging
Signed-off-by: bhsueh <[email protected]>
1 parent f7a4465 commit a6c917e

File tree

3 files changed

+3
-8
lines changed

3 files changed

+3
-8
lines changed

tensorrt_llm/evaluate/lm_eval.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,10 +398,6 @@ def evaluate(self,
398398
system_instruction=self.system_prompt)
399399
# Normalize scores to range 0~100
400400
scores = results["results"][self.task_name]
401-
if self.task_name == "gsm8k":
402-
print(f"scores: {scores}, results: {results}")
403-
import sys
404-
sys.stdout.flush()
405401
for metric in scores.keys():
406402
if isinstance(scores[metric], (float, int)):
407403
scores[metric] *= 100

tests/integration/defs/accuracy/accuracy_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ class GSM8K(AccuracyTask):
297297
ALPHA = 0.05
298298
BETA = 0.2
299299
SIGMA = 50
300-
NUM_SAMPLES = 2 # Full sample
300+
NUM_SAMPLES = 1319 # Full sample
301301

302302
MAX_INPUT_LEN = 4096
303303
MAX_OUTPUT_LEN = 256

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,11 +2308,10 @@ def test_nvfp4(
23082308
**pytorch_config,
23092309
enable_attention_dp=attention_dp,
23102310
max_batch_size=32) as llm:
2311-
# task = MMLU(self.MODEL_NAME)
2312-
# task.evaluate(llm)
2311+
task = MMLU(self.MODEL_NAME)
2312+
task.evaluate(llm)
23132313
task = GSM8K(self.MODEL_NAME)
23142314
task.evaluate(llm)
2315-
assert False
23162315

23172316
def test_eagle3(self):
23182317
pytorch_config = dict(

0 commit comments

Comments
 (0)