1
1
import argparse
2
- import gym
2
+ import gymnasium as gym
3
3
import os
4
4
import threading
5
5
import time
@@ -68,7 +68,7 @@ class Observer:
68
68
def __init__ (self , batch = True ):
69
69
self .id = rpc .get_worker_info ().id - 1
70
70
self .env = gym .make ('CartPole-v1' )
71
- self .env .seed ( args .seed )
71
+ self .env .reset ( seed = args .seed )
72
72
self .select_action = Agent .select_action_batch if batch else Agent .select_action
73
73
74
74
def run_episode (self , agent_rref , n_steps ):
@@ -92,10 +92,10 @@ def run_episode(self, agent_rref, n_steps):
92
92
)
93
93
94
94
# apply the action to the environment, and get the reward
95
- state , reward , done , _ = self .env .step (action )
95
+ state , reward , terminated , truncated , _ = self .env .step (action )
96
96
rewards [step ] = reward
97
97
98
- if done or step + 1 >= n_steps :
98
+ if terminated or truncated or step + 1 >= n_steps :
99
99
curr_rewards = rewards [start_step :(step + 1 )]
100
100
R = 0
101
101
for i in range (curr_rewards .numel () - 1 , - 1 , - 1 ):
@@ -226,8 +226,7 @@ def run_worker(rank, world_size, n_episode, batch, print_log=True):
226
226
last_reward , running_reward = agent .run_episode (n_steps = NUM_STEPS )
227
227
228
228
if print_log :
229
- print ('Episode {}\t Last reward: {:.2f}\t Average reward: {:.2f}' .format (
230
- i_episode , last_reward , running_reward ))
229
+ print (f'Episode { i_episode } \t Last reward: { last_reward :.2f} \t Average reward: { running_reward :.2f} ' )
231
230
else :
232
231
# other ranks are the observer
233
232
rpc .init_rpc (OBSERVER_NAME .format (rank ), rank = rank , world_size = world_size )
0 commit comments