From 68a2c663fdd51117ab47aa99fb3edf6d1e8bc924 Mon Sep 17 00:00:00 2001 From: Noam Gariani <91398631+noamgariani11@users.noreply.github.com> Date: Mon, 30 Sep 2024 19:36:34 -0500 Subject: [PATCH 1/2] Update Monte_Carlo.py --- Solvers/Monte_Carlo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Solvers/Monte_Carlo.py b/Solvers/Monte_Carlo.py index f3d848a..403c3c6 100644 --- a/Solvers/Monte_Carlo.py +++ b/Solvers/Monte_Carlo.py @@ -76,7 +76,7 @@ def make_epsilon_greedy_policy(self): Use: self.Q: A dictionary that maps from state -> action-values. Each value is a numpy array of length nA - self.options.epsilon: Chance the sample a random action. Float betwen 0 and 1. + self.options.epsilon: Chance the sample a random action. Float between 0 and 1. self.env.action_space.n: Number of actions in the environment. Returns: From b9c887e8fa2de93d7bdc522677949fb9a4756441 Mon Sep 17 00:00:00 2001 From: Noam Gariani <91398631+noamgariani11@users.noreply.github.com> Date: Mon, 30 Sep 2024 19:39:35 -0500 Subject: [PATCH 2/2] Update Monte_Carlo.py --- Solvers/Monte_Carlo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Solvers/Monte_Carlo.py b/Solvers/Monte_Carlo.py index 403c3c6..ab83a43 100644 --- a/Solvers/Monte_Carlo.py +++ b/Solvers/Monte_Carlo.py @@ -146,7 +146,7 @@ def train_episode(self): Run a single episode of Monte Carlo Control Off-Policy Control using Weighted Importance Sampling. Use: - elf.env: OpenAI environment. + self.env: OpenAI environment. self.options.steps: steps per episode self.behavior_policy(state): returns a soft policy which is the behavior policy (act according to this policy)