microsoft
diff --git a/‎debug_gym/agents/base_agent.py‎
Lines changed: 12 additions & 16 deletions b/‎debug_gym/agents/base_agent.py‎
Lines changed: 12 additions & 16 deletions
diff --git a/‎debug_gym/agents/froggy_agent.py‎
Lines changed: 39 additions & 44 deletions b/‎debug_gym/agents/froggy_agent.py‎
Lines changed: 39 additions & 44 deletions
diff --git a/‎debug_gym/agents/history_tracker.py‎
Lines changed: 12 additions & 12 deletions b/‎debug_gym/agents/history_tracker.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎debug_gym/agents/utils.py‎
Lines changed: 2 additions & 2 deletions b/‎debug_gym/agents/utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎debug_gym/llms/openai.py‎
Lines changed: 6 additions & 6 deletions b/‎debug_gym/llms/openai.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎scripts/run.py‎
Lines changed: 1 addition & 1 deletion b/‎scripts/run.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,16 +1,13 @@
 import json
 import os
-import subprocess
 import uuid
-from collections import namedtuple
-from copy import copy
 from dataclasses import MISSING, asdict, dataclass, field, fields
 from typing import Any, Dict
 
 import numpy as np
 from jinja2 import Environment, Template
 
-from debug_gym.agents.history_tracker import HistoryTracker, build_history_prompt
+from debug_gym.agents.history_tracker import HistoryTracker
 from debug_gym.gym.envs.env import EnvInfo, RepoEnv
 from debug_gym.gym.utils import filter_non_utf8
 from debug_gym.llms.base import LLM
@@ -142,15 +139,13 @@ def _load_system_prompt_template(self) -> Template | None:
         """Load system prompt template from config if specified and register custom filters.
         If no template is specified, return None.
         """
-        system_prompt_template_file = self.args.system_prompt_template_file
-        if system_prompt_template_file:
-            if not os.path.isfile(system_prompt_template):
-                error_msg = (
-                    f"System prompt template file `{system_prompt_template}` not found."
-                )
+        system_prompt_template = None
+        if self.args.system_prompt_template_file is not None:
+            if not os.path.isfile(self.args.system_prompt_template_file):
+                error_msg = f"System prompt template file `{self.args.system_prompt_template_file}` not found."
                 self.logger.error(error_msg)
                 raise FileNotFoundError(error_msg)
-            with open(system_prompt_template, "r") as f:
+            with open(self.args.system_prompt_template_file, "r") as f:
                 system_prompt_template = f.read()
 
         system_prompt_template = (
@@ -162,19 +157,20 @@ def _load_system_prompt_template(self) -> Template | None:
             env.filters["to_pretty_json"] = self.to_pretty_json
             env.filters["trim_message"] = self.trim_message
             return env.from_string(system_prompt_template)
+
         return None
 
     def _load_instance_prompt_template(self) -> Template | None:
         """Load instance prompt template from config if specified and register custom filters.
         If no template is specified, return None.
         """
-        instance_prompt_template_file = self.args.instance_prompt_template_file
-        if instance_prompt_template_file:
-            if not os.path.isfile(instance_prompt_template_file):
-                error_msg = f"Instance prompt template file `{instance_prompt_template_file}` not found."
+        instance_prompt_template = None
+        if self.args.instance_prompt_template_file is not None:
+            if not os.path.isfile(self.args.instance_prompt_template_file):
+                error_msg = f"Instance prompt template file `{self.args.instance_prompt_template_file}` not found."
                 self.logger.error(error_msg)
                 raise FileNotFoundError(error_msg)
-            with open(instance_prompt_template_file, "r") as f:
+            with open(self.args.instance_prompt_template_file, "r") as f:
                 instance_prompt_template = f.read()
 
         instance_prompt_template = (
 
@@ -1,51 +1,9 @@
-import json
-import subprocess
 from dataclasses import dataclass
 from typing import Any, Dict
 
-from jinja2 import Template
-
-from debug_gym.agents.base_agent import (
-    LLM,
-    AgentArgs,
-    BaseAgent,
-    Environment,
-    register_agent,
-)
+from debug_gym.agents.base_agent import LLM, AgentArgs, BaseAgent, register_agent
 from debug_gym.agents.history_tracker import HistoryTracker
 from debug_gym.gym.envs.env import EnvInfo
-from debug_gym.gym.utils import filter_non_utf8
-from debug_gym.llms.utils import trim
-
-
-def build_history_prompt(
-    history: HistoryTracker, llm: LLM, reset_prompt_history_after_rewrite: bool = False
-):
-    env_observations, llm_responses = history.get()
-    latest_rewrite_step = 0
-    # Find the latest rewrite step if reset_prompt_history_after_rewrite
-    if reset_prompt_history_after_rewrite:
-        for i, obs in enumerate(env_observations):
-            if obs.rewrite_counter == env_observations[-1].rewrite_counter:
-                latest_rewrite_step = i
-                break
-
-    env_observations = env_observations[latest_rewrite_step:]
-    llm_responses = llm_responses[latest_rewrite_step:]
-
-    messages = []
-    for obs, response in zip(env_observations, llm_responses):
-        # environment observation
-        messages.extend(
-            llm.convert_observation_to_message(
-                obs.step_observation.observation,
-                obs.action_tool_call.id if obs.action_tool_call else None,
-                obs.action_tool_call.name if obs.action_tool_call else None,
-            )
-        )
-        # llm response
-        messages.extend(llm.convert_response_to_message(response))
-    return messages
 
 
 @dataclass
@@ -67,7 +25,6 @@ def __init__(
         *args,
         **kwargs,
     ):
-
         agent_args = (
             FroggyAgentArgs.from_dict(agent_args)
             if isinstance(agent_args, dict)
@@ -80,6 +37,7 @@ def build_history_prompt(self):
             self.history,
             self.llm,
             self.args.reset_prompt_history_after_rewrite,
+            history_cutoff=self.args.memory_size,
         )
         return messages
 
@@ -159,3 +117,40 @@ def _default_system_prompt(self, info) -> str:
             system_prompt_dict["Shortcut features"] = shortcut_features
 
         return self.to_pretty_json(system_prompt_dict)
+
+
+def build_history_prompt(
+    history: HistoryTracker,
+    llm: LLM,
+    reset_prompt_history_after_rewrite: bool = False,
+    history_cutoff: int = None,
+):
+    env_observations, llm_responses = history.get()
+    if history_cutoff is not None:
+        env_observations = env_observations[-history_cutoff:]
+        llm_responses = llm_responses[-history_cutoff:]
+
+    latest_rewrite_step = 0
+    # Find the latest rewrite step if reset_prompt_history_after_rewrite
+    if reset_prompt_history_after_rewrite:
+        for i, obs in enumerate(env_observations):
+            if obs.rewrite_counter == env_observations[-1].rewrite_counter:
+                latest_rewrite_step = i
+                break
+
+    env_observations = env_observations[latest_rewrite_step:]
+    llm_responses = llm_responses[latest_rewrite_step:]
+
+    messages = []
+    for obs, response in zip(env_observations, llm_responses):
+        # llm response
+        messages.append(llm.convert_response_to_message(response))
+        # environment observation
+        messages.append(
+            llm.convert_observation_to_message(
+                obs.step_observation.observation,
+                obs.action_tool_call.id if obs.action_tool_call else None,
+                obs.action_tool_call.name if obs.action_tool_call else None,
+            )
+        )
+    return messages
@@ -30,33 +30,33 @@ def init(
 
     def step(
         self,
-        llm_response: LLMResponse,
         env_observation: EnvInfo,
+        llm_response: LLMResponse,
     ) -> None:
         """llm_responses can be None since the initial state does not have prompt and response"""
-        self.llm_responses.append(copy.deepcopy(llm_response))
         self.env_observations.append(copy.deepcopy(env_observation))
+        self.llm_responses.append(copy.deepcopy(llm_response))
 
     def get(self):
-        # return the history_steps latest steps
+        """Returns the full history of environment observations and LLM responses."""
         return (
             self.env_observations,
             self.llm_responses,
         )
 
-    def json(self, game_step=None):
-        if len(self.env_observations) == 0 and self.env_init is None:
+    def json(self, game_step: int | None = None):
+        if len(self.env_observations) == 0 and self.env_initial_observation is None:
             return {}
 
-        if game_step >= len(self.env_observations):
+        # Retrieve the most recent step by default.
+        game_step = (
+            game_step if game_step is not None else len(self.env_observations) - 1
+        )
+        if game_step < 0 or game_step >= len(self.env_observations):
             raise ValueError(
-                f"Invalid game_step: {game_step}. Max step: {len(self.env_observations)-1}"
+                f"Invalid game_step: {game_step}; should be between [0, {len(self.env_observations)-1}]."
             )
 
-        if game_step is None:
-            # retrieve the most recent step
-            game_step = len(self.env_observations) - 1
-
         if game_step == 0:
             # initial state
             json_out = {
@@ -91,7 +91,7 @@ def json(self, game_step=None):
         return json_out
 
     def score(self):
-        return sum([memory.score for memory in self.env_observations])
+        return sum([obs.score for obs in self.env_observations])
 
     def __len__(self):
         return len(self.env_observations)
 
@@ -157,10 +157,10 @@ def save_patch(env, problem_path: Path, logger: DebugGymLogger):
     logger.debug(f"Patch saved in {patch_path}")
 
 
-def save_trajectory(agent, problem: str, problem_path: Path, logger: DebugGymLogger):
+def save_trajectory(agent, problem_path: Path, logger: DebugGymLogger):
     """Persist the agent trajectory to disk."""
     problem_path.mkdir(parents=True, exist_ok=True)
-    trajectory = agent.build_trajectory(task_name=problem)
+    trajectory = agent._build_trajectory()
     json_file = problem_path / "trajectory.json"
     with open(json_file, "w") as f:
         json.dump(trajectory, f, indent=4)
 
@@ -191,25 +191,25 @@ def parse_tool_call_response(self, response) -> ToolCall:
         )
 
     def convert_response_to_message(self, response: LLMResponse) -> dict:
-        response = {
+        message = {
             "role": "assistant",
             "tool_calls": [
                 {
                     "type": "function",
                     "id": response.tool.id,
                     "function": {
-                        "name": response[0].tool.name,
-                        "arguments": json.dumps(response[0].tool.arguments),
+                        "name": response.tool.name,
+                        "arguments": json.dumps(response.tool.arguments),
                     },
                 },
             ],
-            "content": filter_non_utf8(f"{response[0].response}"),
+            "content": filter_non_utf8(f"{response.response}"),
         }
         if response.reasoning_response:
-            response["reasoning_content"] = filter_non_utf8(
+            message["reasoning_content"] = filter_non_utf8(
                 f"{response.reasoning_response}"
             )
-        return response
+        return message
 
     def convert_observation_to_message(
         self, observation: str, last_tool_call_id=None, last_tool_call_name=None
 
@@ -141,7 +141,7 @@ def run_agent(args, task_name: str, task_data: dict, config: dict):
             raise
 
         # save trajectory
-        save_trajectory(agent, task_name, task_path, task_logger)
+        save_trajectory(agent, task_path, task_logger)
 
         # optionally apply patch
         if config["save_patch"]: