@@ -69,16 +69,15 @@ def __init__(
69
69
is_training = True ,
70
70
text_config = {
71
71
"vocab_size" : 99 ,
72
- "hidden_size" : 32 ,
73
- "intermediate_size" : 37 ,
74
- "num_hidden_layers" : 4 ,
75
- "num_attention_heads" : 4 ,
76
- "num_key_value_heads" : 2 ,
72
+ "hidden_size" : 16 ,
73
+ "intermediate_size" : 22 ,
74
+ "num_hidden_layers" : 2 ,
75
+ "num_attention_heads" : 2 ,
76
+ "num_key_value_heads" : 1 ,
77
77
"output_channels" : 64 ,
78
78
"hidden_act" : "silu" ,
79
79
"max_position_embeddings" : 512 ,
80
80
"rope_scaling" : {"type" : "default" , "mrope_section" : [2 , 1 , 1 ]},
81
- "max_window_layers" : 3 ,
82
81
"rope_theta" : 10000 ,
83
82
"tie_word_embeddings" : True ,
84
83
"bos_token_id" : 0 ,
@@ -87,11 +86,10 @@ def __init__(
87
86
},
88
87
vision_config = {
89
88
"depth" : 2 ,
90
- "embed_dim" : 32 ,
91
89
"hidden_act" : "silu" ,
92
- "hidden_size" : 32 ,
93
- "mlp_ratio " : 4 ,
94
- "num_heads " : 4 ,
90
+ "hidden_size" : 48 ,
91
+ "out_hidden_size " : 16 ,
92
+ "intermediate_size " : 22 ,
95
93
"patch_size" : 14 ,
96
94
"spatial_merge_size" : 1 ,
97
95
"temporal_patch_size" : 2 ,
@@ -239,10 +237,6 @@ def test_sdpa_can_dispatch_on_flash(self):
239
237
def test_multi_gpu_data_parallel_forward (self ):
240
238
pass
241
239
242
- @unittest .skip (reason = "We cannot configure to output a smaller model." )
243
- def test_model_is_small (self ):
244
- pass
245
-
246
240
@unittest .skip ("Error with compilation" )
247
241
def test_generate_from_inputs_embeds_with_static_cache (self ):
248
242
pass
0 commit comments