We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fc6152d commit bec0673Copy full SHA for bec0673
verifiers/trainers/grpo_trainer.py
@@ -1327,6 +1327,7 @@ def compute_loss( # type: ignore
1327
if self.log_policy_entropy:
1328
masked_entropy = per_token_entropy * completion_mask
1329
total_completion_tokens = completion_mask.sum()
1330
+
1331
if total_completion_tokens > 0:
1332
mean_entropy = masked_entropy.sum() / total_completion_tokens
1333
gathered_entropy = self.accelerator.gather_for_metrics(mean_entropy)
0 commit comments