Minor update

Haichao-Zhang · Haichao-Zhang · commit d89cb3ea5512 · 2025-05-23T12:30:03.000-07:00
diff --git a/alf/algorithms/algorithm.py b/alf/algorithms/algorithm.py
@@ -1469,7 +1469,6 @@ def train_from_replay_buffer(self, update_global_counter=False):
                 ``True``, it will affect the counter only if
                 ``config.update_counter_every_mini_batch=True``.
         """
-
         config: TrainerConfig = self._config
 
         # returns 0 if haven't started training yet, when ``_replay_buffer`` is
diff --git a/alf/algorithms/rl_algorithm.py b/alf/algorithms/rl_algorithm.py
@@ -19,7 +19,7 @@
 import os
 import time
 import torch
-from typing import Callable, List, Optional
+from typing import Callable, Optional
 from absl import logging
 
 import alf
@@ -603,15 +603,14 @@ def _async_unroll(self, unroll_length: int):
 
     def post_process_experience(self, rollout_info, step_type: StepType,
                                 experiences: Experience):
-        """A function for postprocessing experience.  By default, it returns the input
+        """A function for postprocessing experience. By default, it returns the input
         experience unmodified. Users can customize this function in the derived
         class to achieve different effects. For example:
         - per-step processing: return the current step of experience unmodified (by default)
-            or a modified version according to the customized ``post_process_experience`` function.
+            or a modified version according to the customized ``post_process_experience``.
             As another example, task filtering can be simply achieved by returning ``[]``
-            in ``post_process_experience`` for that particular task.
-        - per-episode processing: ``should_post_process_experience`` returns True on episode
-            end and ``post_process_experience`` can return a list of processed
+            for that particular task.
+        - per-episode processing: this can be achieved by returning a list of processed
             experiences. For example, this can be used for success episode labeling.
 
         Args:
@@ -637,9 +636,7 @@ def _process_unroll_step(self, policy_step, action, time_step,
         to create customized post processing behaviors.
 
         Args:
-            experiences: a list of experience, containing the experience starting from the
-            initial time when ``should_post_process_experience`` is False to the step where
-            ``should_post_process_experience`` is True.
+            experiences: a list of experience
         """
 
         self.observe_for_metrics(time_step.cpu())