Skip to content

Commit 0f5bb4d

Browse files
author
Ervin T
authored
Merge pull request #2704 from Unity-Technologies/hotfix-0.10.1
Merge Hotfix 0.10.1
2 parents 056b4d3 + 8912a86 commit 0f5bb4d

File tree

21 files changed

+151
-86
lines changed

21 files changed

+151
-86
lines changed

UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,10 @@ MonoBehaviour:
1212
m_Name: PushBlockLearning
1313
m_EditorClassIdentifier:
1414
brainParameters:
15-
vectorObservationSize: 0
15+
vectorObservationSize: 70
1616
numStackedVectorObservations: 3
1717
vectorActionSize: 07000000
18-
cameraResolutions:
19-
- width: 84
20-
height: 84
21-
blackAndWhite: 0
18+
cameraResolutions: []
2219
vectorActionDescriptions:
2320
-
2421
vectorActionSpaceType: 0

UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3196,7 +3196,7 @@ MonoBehaviour:
31963196
m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
31973197
m_Name:
31983198
m_EditorClassIdentifier:
3199-
brain: {fileID: 11400000, guid: 59a04e208fb8a423586adf25bf1fecd0, type: 2}
3199+
brain: {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
32003200
agentParameters:
32013201
agentCameras:
32023202
- {fileID: 20712684238256298}

UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,9 +609,7 @@ MonoBehaviour:
609609
broadcastHub:
610610
broadcastingBrains:
611611
- {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
612-
m_BrainsToControl:
613-
- {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
614-
m_MaxSteps: 0
612+
m_BrainsToControl: []
615613
m_TrainingConfiguration:
616614
width: 80
617615
height: 80

config/sac_trainer_config.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ FoodCollectorLearning:
3434

3535
BouncerLearning:
3636
normalize: true
37-
beta: 0.0
3837
max_steps: 5.0e5
3938
num_layers: 2
4039
hidden_units: 64
@@ -43,7 +42,6 @@ BouncerLearning:
4342
PushBlockLearning:
4443
max_steps: 5.0e4
4544
init_entcoef: 0.05
46-
beta: 1.0e-2
4745
hidden_units: 256
4846
summary_freq: 2000
4947
time_horizon: 64
@@ -70,7 +68,6 @@ BigWallJumpLearning:
7068
StrikerLearning:
7169
max_steps: 5.0e5
7270
learning_rate: 1e-3
73-
beta: 1.0e-2
7471
hidden_units: 256
7572
summary_freq: 2000
7673
time_horizon: 128
@@ -81,7 +78,6 @@ StrikerLearning:
8178
GoalieLearning:
8279
max_steps: 5.0e5
8380
learning_rate: 1e-3
84-
beta: 1.0e-2
8581
hidden_units: 256
8682
summary_freq: 2000
8783
time_horizon: 128
@@ -119,7 +115,6 @@ VisualPyramidsLearning:
119115
hidden_units: 256
120116
buffer_init_steps: 1000
121117
num_layers: 1
122-
beta: 1.0e-2
123118
max_steps: 5.0e5
124119
buffer_size: 500000
125120
init_entcoef: 0.01
@@ -216,7 +211,6 @@ HallwayLearning:
216211
num_layers: 2
217212
hidden_units: 128
218213
memory_size: 256
219-
beta: 0.0
220214
init_entcoef: 0.1
221215
max_steps: 5.0e5
222216
summary_freq: 1000
@@ -229,7 +223,6 @@ VisualHallwayLearning:
229223
num_layers: 1
230224
hidden_units: 128
231225
memory_size: 256
232-
beta: 1.0e-2
233226
gamma: 0.99
234227
batch_size: 64
235228
max_steps: 5.0e5
@@ -243,7 +236,6 @@ VisualPushBlockLearning:
243236
num_layers: 1
244237
hidden_units: 128
245238
memory_size: 256
246-
beta: 1.0e-2
247239
gamma: 0.99
248240
buffer_size: 1024
249241
batch_size: 64

gym-unity/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111
author_email="[email protected]",
1212
url="https://github.com/Unity-Technologies/ml-agents",
1313
packages=find_packages(),
14-
install_requires=["gym", "mlagents_envs==0.10.0"],
14+
install_requires=["gym", "mlagents_envs==0.10.1"],
1515
)

markdown-link-check.config.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
{
88
"pattern": "^https://developer.nvidia.com/compute/machine-learning/cudnn/secure",
99
"comment": "Requires login"
10+
},
11+
{
12+
"pattern": "^https?://bair.berkeley.edu",
13+
"comment": "Temporary berkeley outage"
1014
}
1115
]
1216
}

ml-agents-envs/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="mlagents_envs",
8-
version="0.10.0",
8+
version="0.10.1",
99
description="Unity Machine Learning Agents Interface",
1010
url="https://github.com/Unity-Technologies/ml-agents",
1111
author="Unity Technologies",

ml-agents/mlagents/trainers/bc/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def __init__(
4040
for size in self.act_size:
4141
policy_branches.append(
4242
tf.layers.dense(
43-
hidden,
43+
hidden_reg,
4444
size,
4545
activation=None,
4646
use_bias=False,

ml-agents/mlagents/trainers/bc/trainer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,12 @@ def update_policy(self):
129129
len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,
130130
self.batches_per_epoch,
131131
)
132-
for i in range(num_batches):
132+
133+
batch_size = self.n_sequences * self.policy.sequence_length
134+
135+
for i in range(0, num_batches * batch_size, batch_size):
133136
update_buffer = self.demonstration_buffer.update_buffer
134-
start = i * self.n_sequences
135-
end = (i + 1) * self.n_sequences
136-
mini_batch = update_buffer.make_mini_batch(start, end)
137+
mini_batch = update_buffer.make_mini_batch(i, i + batch_size)
137138
run_out = self.policy.update(mini_batch, self.n_sequences)
138139
loss = run_out["policy_loss"]
139140
batch_losses.append(loss)

ml-agents/mlagents/trainers/ppo/policy.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import logging
22
import numpy as np
3-
from typing import Any, Dict
3+
from typing import Any, Dict, Optional
44
import tensorflow as tf
55

66
from mlagents.envs.timers import timed
7-
from mlagents.envs.brain import BrainInfo
7+
from mlagents.envs.brain import BrainInfo, BrainParameters
88
from mlagents.trainers.models import EncoderType, LearningRateSchedule
99
from mlagents.trainers.ppo.models import PPOModel
1010
from mlagents.trainers.tf_policy import TFPolicy
@@ -17,7 +17,14 @@
1717

1818

1919
class PPOPolicy(TFPolicy):
20-
def __init__(self, seed, brain, trainer_params, is_training, load):
20+
def __init__(
21+
self,
22+
seed: int,
23+
brain: BrainParameters,
24+
trainer_params: Dict[str, Any],
25+
is_training: bool,
26+
load: bool,
27+
):
2128
"""
2229
Policy for Proximal Policy Optimization Networks.
2330
:param seed: Random seed.
@@ -29,8 +36,8 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
2936
super().__init__(seed, brain, trainer_params)
3037

3138
reward_signal_configs = trainer_params["reward_signals"]
32-
self.inference_dict = {}
33-
self.update_dict = {}
39+
self.inference_dict: Dict[str, tf.Tensor] = {}
40+
self.update_dict: Dict[str, tf.Tensor] = {}
3441
self.stats_name_to_update_name = {
3542
"Losses/Value Loss": "value_loss",
3643
"Losses/Policy Loss": "policy_loss",
@@ -42,6 +49,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
4249
self.create_reward_signals(reward_signal_configs)
4350

4451
with self.graph.as_default():
52+
self.bc_module: Optional[BCModule] = None
4553
# Create pretrainer if needed
4654
if "pretraining" in trainer_params:
4755
BCModule.check_config(trainer_params["pretraining"])
@@ -52,8 +60,6 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
5260
default_num_epoch=trainer_params["num_epoch"],
5361
**trainer_params["pretraining"],
5462
)
55-
else:
56-
self.bc_module = None
5763

5864
if load:
5965
self._load_graph()

0 commit comments

Comments
 (0)