We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
use_custom_sort_vjp
False
1 parent ca2ae66 commit 2c9501aCopy full SHA for 2c9501a
src/MaxText/configs/base.yml
@@ -167,7 +167,7 @@ sparse_matmul: True
167
capacity_factor: -1.0 # a factor to decide expert capacity for token dropping, and no dropping by default
168
load_balance_loss_weight: 0.01 # weight for the load balance loss
169
use_random_routing: False # whether to use random routing for debug/test purpose
170
-use_custom_sort_vjp: True # whether to use a custom sort vjp for sparse matmul ops
+use_custom_sort_vjp: False # whether to use a custom sort vjp for sparse matmul ops
171
use_ring_of_experts: False # whether to use ring of experts for sparse matmul expert parallelism
172
# Tunable tiling dimensions used for Megablox
173
tile_batch_seq: 512
0 commit comments