@@ -134,8 +134,7 @@ def execute_backward_optimizers_( # noqa C901
134
134
]
135
135
)
136
136
and (
137
- use_cpu
138
- or optimizer != OptimType .EXACT_ROWWISE_ADAGRAD
137
+ optimizer != OptimType .EXACT_ROWWISE_ADAGRAD
139
138
or weight_decay_mode
140
139
not in [
141
140
WeightDecayMode .COUNTER ,
@@ -1205,7 +1204,7 @@ def test_backward_optimizers_partial_rowwise_adam_bf16_momentum( # noqa C901
1205
1204
deadline = None ,
1206
1205
suppress_health_check = [HealthCheck .filter_too_much , HealthCheck .data_too_large ],
1207
1206
)
1208
- @unittest .skipIf (* gpu_unavailable )
1207
+ # @unittest.skipIf(*gpu_unavailable)
1209
1208
def test_backward_optimizers_adagrad ( # noqa C901
1210
1209
self ,
1211
1210
T : int ,
@@ -1247,6 +1246,79 @@ def test_backward_optimizers_adagrad( # noqa C901
1247
1246
counter_halflife = counter_halflife ,
1248
1247
)
1249
1248
1249
+ @given (
1250
+ T = st .integers (min_value = 1 , max_value = 5 ),
1251
+ D = st .integers (min_value = 2 , max_value = 256 ),
1252
+ B = st .integers (min_value = 1 , max_value = 128 ),
1253
+ log_E = st .integers (min_value = 3 , max_value = 5 ),
1254
+ L = st .integers (min_value = 2 , max_value = 20 ),
1255
+ weighted = st .booleans (),
1256
+ mixed = st .booleans (),
1257
+ mixed_B = st .booleans (),
1258
+ long_segments = st .booleans (),
1259
+ pooling_mode = st .sampled_from (
1260
+ [
1261
+ PoolingMode .SUM ,
1262
+ PoolingMode .MEAN ,
1263
+ PoolingMode .NONE ,
1264
+ ]
1265
+ ),
1266
+ weight_decay_mode = st .sampled_from (
1267
+ [
1268
+ WeightDecayMode .COUNTER ,
1269
+ WeightDecayMode .COWCLIP ,
1270
+ ]
1271
+ ),
1272
+ counter_weight_decay_mode = st .sampled_from (
1273
+ [
1274
+ CounterWeightDecayMode .NONE ,
1275
+ CounterWeightDecayMode .L2 ,
1276
+ CounterWeightDecayMode .DECOUPLE ,
1277
+ CounterWeightDecayMode .ADAGRADW ,
1278
+ ]
1279
+ ),
1280
+ )
1281
+ @settings (
1282
+ verbosity = VERBOSITY ,
1283
+ max_examples = MAX_EXAMPLES_LONG_RUNNING ,
1284
+ deadline = None ,
1285
+ suppress_health_check = [HealthCheck .filter_too_much , HealthCheck .data_too_large ],
1286
+ )
1287
+ # @unittest.skipIf(*gpu_unavailable)
1288
+ def test_backward_optimizers_adagrad_with_counter_cpu ( # noqa C901
1289
+ self ,
1290
+ T : int ,
1291
+ D : int ,
1292
+ B : int ,
1293
+ log_E : int ,
1294
+ L : int ,
1295
+ weighted : bool ,
1296
+ mixed : bool ,
1297
+ mixed_B : bool ,
1298
+ long_segments : bool ,
1299
+ pooling_mode : PoolingMode ,
1300
+ weight_decay_mode : WeightDecayMode ,
1301
+ counter_weight_decay_mode : CounterWeightDecayMode ,
1302
+ ) -> None :
1303
+ if pooling_mode == PoolingMode .NONE :
1304
+ mixed_B = False
1305
+ self .execute_backward_optimizers_ (
1306
+ T ,
1307
+ D ,
1308
+ B ,
1309
+ log_E ,
1310
+ L ,
1311
+ weighted ,
1312
+ mixed ,
1313
+ mixed_B ,
1314
+ OptimType .EXACT_ROWWISE_ADAGRAD ,
1315
+ long_segments ,
1316
+ pooling_mode ,
1317
+ True , # use_cpu
1318
+ weight_decay_mode ,
1319
+ counter_weight_decay_mode = counter_weight_decay_mode ,
1320
+ )
1321
+
1250
1322
@given (
1251
1323
T = st .integers (min_value = 1 , max_value = 5 ),
1252
1324
D = st .sampled_from ([16 , 32 , 40 , 48 , 64 ]),
0 commit comments