@@ -1828,6 +1828,9 @@ def data_preparation_thread(
18281828 ** reward_metrics ,
18291829 }
18301830
1831+ total_tokens = result .token_statistics .num_prompt_tokens + result .token_statistics .num_response_tokens
1832+ metrics ["val/actor_tokens_per_second" ] = total_tokens / result .token_statistics .generation_time
1833+
18311834 if args .save_traces :
18321835 traces = {
18331836 "scores" : scores .tolist (),
@@ -2287,8 +2290,8 @@ def one_training_step(
22872290 "val/num_total_tokens" : num_total_tokens ,
22882291 "val/num_step_tokens" : num_step_tokens ,
22892292 "epoch" : episode / args .num_samples_per_prompt_rollout / len (train_dataset ),
2290- "tokens_per_second_overall " : num_total_tokens / total_training_time if total_training_time > 0 else 0 ,
2291- "tokens_per_second_step " : num_step_tokens / step_time if step_time > 0 else 0 ,
2293+ "learner_tokens_per_second_overall " : num_total_tokens / total_training_time ,
2294+ "learner_tokens_per_second_step " : num_step_tokens / step_time ,
22922295 "time/total" : step_time ,
22932296 "time/training" : train_timer .duration ,
22942297 "time/saving" : save_time ,
@@ -2374,6 +2377,12 @@ def maybe_evaluate(
23742377 }
23752378 if "time/generation" in eval_generate_metrics :
23762379 eval_metrics ["eval/generation_time" ] = eval_generate_metrics ["time/generation" ]
2380+
2381+ total_tokens = (
2382+ eval_result .token_statistics .num_prompt_tokens + eval_result .token_statistics .num_response_tokens
2383+ )
2384+ eval_metrics ["eval/actor_tokens_per_second" ] = total_tokens / eval_result .token_statistics .generation_time
2385+
23772386 print_rich_single_line_metrics (eval_metrics )
23782387
23792388 table = {}
0 commit comments