Skip to content

Commit 881f0ca

Browse files
unlimblueshuaipengli
andauthored
Change lr_min to min_lr_factor (#1471)
[fixed] #1457 --------- Co-authored-by: shuaipengli <[email protected]>
1 parent 327a99c commit 881f0ca

File tree

11 files changed

+30
-26
lines changed

11 files changed

+30
-26
lines changed

tests/unit_tests/test_lr_scheduler.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def create_job_config(
3737
warmup_steps=None,
3838
decay_ratio=None,
3939
decay_type=None,
40-
lr_min=None,
40+
min_lr_factor=None,
4141
):
4242
# Create a job config with the specified parameters
4343
args = [
@@ -58,7 +58,11 @@ def create_job_config(
5858
args += (
5959
["--lr_scheduler.decay_type", decay_type] if decay_type is not None else []
6060
)
61-
args += ["--lr_scheduler.lr_min", str(lr_min)] if lr_min is not None else []
61+
args += (
62+
["--lr_scheduler.min_lr_factor", str(min_lr_factor)]
63+
if min_lr_factor is not None
64+
else []
65+
)
6266

6367
config_manager = ConfigManager()
6468
# Create base config with parameters passed directly
@@ -74,7 +78,7 @@ def test_linear_warmup_decay(self):
7478
warmup_steps=2,
7579
decay_ratio=None, # Use default decay: start decay immediately
7680
decay_type=None,
77-
lr_min=None,
81+
min_lr_factor=None,
7882
)
7983

8084
# Build the lr scheduler
@@ -116,7 +120,7 @@ def test_warmup_stable_decay(self):
116120
warmup_steps=2,
117121
decay_ratio=0.5, # 50% of steps for decay
118122
decay_type="linear",
119-
lr_min=0.0,
123+
min_lr_factor=0.0,
120124
)
121125

122126
# Build the lr scheduler
@@ -157,7 +161,7 @@ def test_min_lr(self):
157161
warmup_steps=2,
158162
decay_ratio=None,
159163
decay_type="linear",
160-
lr_min=0.2, # 20% of base LR as minimum
164+
min_lr_factor=0.2, # 20% of base LR as minimum
161165
)
162166

163167
# Build the lr scheduler
@@ -180,7 +184,7 @@ def test_warmup_exceeds_training(self):
180184
warmup_steps=10, # More than training steps
181185
decay_ratio=None,
182186
decay_type="linear",
183-
lr_min=0.0,
187+
min_lr_factor=0.0,
184188
)
185189

186190
# Build the lr scheduler - should adjust warmup steps
@@ -216,7 +220,7 @@ def test_warmup_stable_only(self):
216220
warmup_steps=2,
217221
decay_ratio=0.0, # 0% of steps for decay (no decay)
218222
decay_type="linear",
219-
lr_min=0.0,
223+
min_lr_factor=0.0,
220224
)
221225

222226
# Build the lr scheduler
@@ -258,7 +262,7 @@ def test_warmup_plus_decay_exceeds_training(self):
258262
warmup_steps=5,
259263
decay_ratio=0.8, # 80% of steps for decay (8 steps)
260264
decay_type="linear",
261-
lr_min=0.0,
265+
min_lr_factor=0.0,
262266
)
263267

264268
# Build the lr scheduler - should adjust warmup steps

torchtitan/components/lr_scheduler.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,15 @@ def build_lr_schedulers(
127127
# Add a vitual last step to prevent the learning rate from dropping to 0
128128
stable_steps = training_steps + 1 - warmup_steps - decay_steps
129129
lr_decay_type = lr_scheduler_config.decay_type
130-
lr_min = lr_scheduler_config.lr_min
130+
min_lr_factor = lr_scheduler_config.min_lr_factor
131131

132132
def linear_warmup_stable_decay(
133133
current_step: int,
134134
warmup_steps: int,
135135
stable_steps: int,
136136
decay_steps: int,
137137
lr_decay_type: str,
138-
lr_min: float,
138+
min_lr_factor: float,
139139
):
140140
"""
141141
Computes linear warmup followed by stable learning rate for a while,
@@ -150,7 +150,7 @@ def linear_warmup_stable_decay(
150150
2. `sqrt`: decays as 1 minus the square root of the decay progress.
151151
3. `cosine`: follows a cosine curve, decaying according to the values of the half-period of the cosine function.
152152
153-
If `lr_min` is specified, the decay range is scaled from 1 to `lr_min`
153+
If `min_lr_factor` is specified, the decay range is scaled from 1 to `min_lr_factor`
154154
to ensure the learning rate does not drop below this minimum value.
155155
"""
156156
warmup_stable_steps = warmup_steps + stable_steps
@@ -176,7 +176,7 @@ def linear_warmup_stable_decay(
176176
curr_adjustment = 1 - math.sqrt(progress)
177177
elif lr_decay_type == "cosine":
178178
curr_adjustment = 0.5 * (1.0 + math.cos(math.pi * progress))
179-
curr_adjustment = lr_min + (1 - lr_min) * curr_adjustment
179+
curr_adjustment = min_lr_factor + (1 - min_lr_factor) * curr_adjustment
180180
return curr_adjustment
181181

182182
lr_lambda = functools.partial(
@@ -185,6 +185,6 @@ def linear_warmup_stable_decay(
185185
stable_steps=stable_steps,
186186
decay_steps=decay_steps,
187187
lr_decay_type=lr_decay_type,
188-
lr_min=lr_min,
188+
min_lr_factor=min_lr_factor,
189189
)
190190
return LRSchedulersContainer(optimizers, lr_lambda)

torchtitan/config/job_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,11 @@ class LRScheduler:
155155
- 'cosine': smoothly decays learning rate following a cosine curve
156156
"""
157157

158-
lr_min: float = 0.0
158+
min_lr_factor: float = 0.0
159159
"""
160160
Min lr ratio for lr scheduler.
161-
If provided, the range of decay factor is scaled from 1 to `lr_min`
162-
to ensure the learning rate does not drop below `optimizer.lr * lr_scheduler.lr_min`.
161+
If provided, the range of decay factor is scaled from 1 to `min_lr_factor`
162+
to ensure the learning rate does not drop below `optimizer.lr * lr_scheduler.min_lr_factor`.
163163
"""
164164

165165

torchtitan/experiments/deepseek_v3/train_configs/deepseek_v2.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ implementation = "foreach"
3535
warmup_steps = 100 # lr scheduler warm up, normally 20% of the train steps
3636
decay_ratio = 0.8 # lr scheduler decay ratio, 80% of the train steps
3737
decay_type = "linear"
38-
lr_min = 0.1
38+
min_lr_factor = 0.1
3939

4040
[training]
4141
local_batch_size = 2 # 8

torchtitan/experiments/llama4/train_configs/debug_model.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ eps = 1e-15
3434
warmup_steps = 2 # lr scheduler warm up, normally 20% of the train steps
3535
decay_ratio = 0.8 # lr scheduler decay ratio, 80% of the train steps
3636
decay_type = "linear"
37-
lr_min = 0.1
37+
min_lr_factor = 0.1
3838

3939
[training]
4040
local_batch_size = 8

torchtitan/experiments/llama4/train_configs/llama4_17bx128e.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ eps = 1e-15
2727

2828
[lr_scheduler]
2929
warmup_steps = 600
30-
lr_min = 0.1
30+
min_lr_factor = 0.1
3131

3232
[training]
3333
local_batch_size = 1

torchtitan/experiments/llama4/train_configs/llama4_17bx16e.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ eps = 1e-15
2727

2828
[lr_scheduler]
2929
warmup_steps = 600
30-
lr_min = 0.1
30+
min_lr_factor = 0.1
3131

3232
[training]
3333
local_batch_size = 8

torchtitan/models/deepseek_v3/train_configs/debug_model.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ eps = 1e-8
3636
warmup_steps = 2 # lr scheduler warm up, normally 20% of the train steps
3737
decay_ratio = 0.8 # lr scheduler decay ratio, 80% of the train steps
3838
decay_type = "linear"
39-
lr_min = 0.0
39+
min_lr_factor = 0.0
4040

4141
[training]
4242
local_batch_size = 8

torchtitan/models/deepseek_v3/train_configs/deepseek_v3_16b.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ eps = 1e-8
3333
[lr_scheduler]
3434
warmup_steps = 200 # lr scheduler warm up, normally 20% of the train steps
3535
decay_ratio = 0.8 # lr scheduler decay ratio, 80% of the train steps
36-
decay_type = "linear"
37-
lr_min = 2.2e-5
36+
decay_type = "cosine"
37+
min_lr_factor = 0.1
3838

3939
[training]
4040
local_batch_size = 8

torchtitan/models/deepseek_v3/train_configs/deepseek_v3_671b.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ eps = 1e-8
3333
[lr_scheduler]
3434
warmup_steps = 2_000 # lr scheduler warm up, normally 20% of the train steps
3535
decay_ratio = 0.8 # lr scheduler decay ratio, 80% of the train steps
36-
decay_type = "linear"
37-
lr_min = 2.2e-5
36+
decay_type = "cosine"
37+
min_lr_factor = 0.1
3838

3939
[training]
4040
local_batch_size = 4

0 commit comments

Comments
 (0)