@@ -1054,11 +1054,13 @@ class TensorNameMap:
10541054
10551055 MODEL_TENSOR .V_ENC_EMBD_CLS : (
10561056 "vision_tower.vision_model.embeddings.class_embedding" ,
1057+ "model.vision_tower.embeddings.cls_token" , # Intern-S1
10571058 "vision_model.class_embedding" , # llama 4
10581059 ),
10591060
10601061 MODEL_TENSOR .V_ENC_EMBD_PATCH : (
10611062 "vision_tower.vision_model.embeddings.patch_embedding" ,
1063+ "model.vision_tower.embeddings.patch_embeddings.projection" , # Intern-S1
10621064 "vpm.embeddings.patch_embedding" ,
10631065 "model.vision_model.embeddings.patch_embedding" , # SmolVLM
10641066 "vision_tower.patch_conv" , # pixtral
@@ -1068,13 +1070,15 @@ class TensorNameMap:
10681070
10691071 MODEL_TENSOR .V_ENC_EMBD_POS : (
10701072 "vision_tower.vision_model.embeddings.position_embedding" ,
1073+ "model.vision_tower.embeddings.position_embeddings" , # Intern-S1
10711074 "vpm.embeddings.position_embedding" ,
10721075 "model.vision_model.embeddings.position_embedding" , # SmolVLM
10731076 "vision_model.positional_embedding_vlm" , # llama 4
10741077 ),
10751078
10761079 MODEL_TENSOR .V_ENC_ATTN_Q : (
10771080 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj" ,
1081+ "model.vision_tower.encoder.layer.{bid}.attention.q_proj" , # Intern-S1
10781082 "vpm.encoder.layers.{bid}.self_attn.q_proj" ,
10791083 "model.vision_model.encoder.layers.{bid}.self_attn.q_proj" , # SmolVLM
10801084 "vision_model.model.layers.{bid}.self_attn.q_proj" , # llama4
@@ -1084,10 +1088,12 @@ class TensorNameMap:
10841088
10851089 MODEL_TENSOR .V_ENC_ATTN_Q_NORM : (
10861090 "vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm" , # InternVL
1091+ "model.vision_tower.encoder.layer.{bid}.attention.q_norm" , # Intern-S1
10871092 ),
10881093
10891094 MODEL_TENSOR .V_ENC_ATTN_K : (
10901095 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj" ,
1096+ "model.vision_tower.encoder.layer.{bid}.attention.k_proj" , # Intern-S1
10911097 "vpm.encoder.layers.{bid}.self_attn.k_proj" ,
10921098 "model.vision_model.encoder.layers.{bid}.self_attn.k_proj" , # SmolVLM
10931099 "vision_model.model.layers.{bid}.self_attn.k_proj" , # llama4
@@ -1097,10 +1103,12 @@ class TensorNameMap:
10971103
10981104 MODEL_TENSOR .V_ENC_ATTN_K_NORM : (
10991105 "vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm" , # InternVL
1106+ "model.vision_tower.encoder.layer.{bid}.attention.k_norm" , # Intern-S1
11001107 ),
11011108
11021109 MODEL_TENSOR .V_ENC_ATTN_V : (
11031110 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj" ,
1111+ "model.vision_tower.encoder.layer.{bid}.attention.v_proj" , # Intern-S1
11041112 "vpm.encoder.layers.{bid}.self_attn.v_proj" ,
11051113 "model.vision_model.encoder.layers.{bid}.self_attn.v_proj" , # SmolVLM
11061114 "vision_model.model.layers.{bid}.self_attn.v_proj" , # llama4
@@ -1111,6 +1119,7 @@ class TensorNameMap:
11111119 MODEL_TENSOR .V_ENC_INPUT_NORM : (
11121120 "vision_tower.vision_model.encoder.layers.{bid}.layer_norm1" ,
11131121 "vision_tower.vision_model.encoder.layers.{bid}.norm1" , # InternVL
1122+ "model.vision_tower.encoder.layer.{bid}.layernorm_before" , # Intern-S1
11141123 "vpm.encoder.layers.{bid}.layer_norm1" ,
11151124 "model.vision_model.encoder.layers.{bid}.layer_norm1" , # SmolVLM
11161125 "vision_tower.transformer.layers.{bid}.attention_norm" , # pixtral
@@ -1121,6 +1130,7 @@ class TensorNameMap:
11211130 MODEL_TENSOR .V_ENC_ATTN_O : (
11221131 "vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj" ,
11231132 "vision_tower.vision_model.encoder.layers.{bid}.attn.proj" , # InternVL
1133+ "model.vision_tower.encoder.layer.{bid}.attention.projection_layer" , # Intern-S1
11241134 "vpm.encoder.layers.{bid}.self_attn.out_proj" ,
11251135 "model.vision_model.encoder.layers.{bid}.self_attn.out_proj" , # SmolVLM
11261136 "vision_model.model.layers.{bid}.self_attn.o_proj" , # llama4
@@ -1131,6 +1141,7 @@ class TensorNameMap:
11311141 MODEL_TENSOR .V_ENC_POST_ATTN_NORM : (
11321142 "vision_tower.vision_model.encoder.layers.{bid}.layer_norm2" ,
11331143 "vision_tower.vision_model.encoder.layers.{bid}.norm2" , # InternVL
1144+ "model.vision_tower.encoder.layer.{bid}.layernorm_after" , # Intern-S1
11341145 "vpm.encoder.layers.{bid}.layer_norm2" ,
11351146 "model.vision_model.encoder.layers.{bid}.layer_norm2" , # SmolVLM
11361147 "vision_model.model.layers.{bid}.post_attention_layernorm" , # llama4
@@ -1140,6 +1151,7 @@ class TensorNameMap:
11401151
11411152 MODEL_TENSOR .V_ENC_FFN_UP : (
11421153 "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1" ,
1154+ "model.vision_tower.encoder.layer.{bid}.mlp.fc1" , # Intern-S1
11431155 "vpm.encoder.layers.{bid}.mlp.fc1" ,
11441156 "model.vision_model.encoder.layers.{bid}.mlp.fc1" , # SmolVLM, gemma3
11451157 "vision_tower.transformer.layers.{bid}.feed_forward.up_proj" , # pixtral
@@ -1155,6 +1167,7 @@ class TensorNameMap:
11551167
11561168 MODEL_TENSOR .V_ENC_FFN_DOWN : (
11571169 "vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2" ,
1170+ "model.vision_tower.encoder.layer.{bid}.mlp.fc2" , # Intern-S1
11581171 "vpm.encoder.layers.{bid}.mlp.fc2" ,
11591172 "model.vision_model.encoder.layers.{bid}.mlp.fc2" , # SmolVLM, gemma3
11601173 "vision_tower.transformer.layers.{bid}.feed_forward.down_proj" , # pixtral
@@ -1165,10 +1178,12 @@ class TensorNameMap:
11651178
11661179 MODEL_TENSOR .V_LAYER_SCALE_1 : (
11671180 "vision_tower.vision_model.encoder.layers.{bid}.ls1" , # InternVL
1181+ "model.vision_tower.encoder.layer.{bid}.lambda_1" , # Intern-S1
11681182 ),
11691183
11701184 MODEL_TENSOR .V_LAYER_SCALE_2 : (
11711185 "vision_tower.vision_model.encoder.layers.{bid}.ls2" , # InternVL
1186+ "model.vision_tower.encoder.layer.{bid}.lambda_2" , # Intern-S1
11721187 ),
11731188
11741189 MODEL_TENSOR .V_PRE_NORM : (
@@ -1190,6 +1205,7 @@ class TensorNameMap:
11901205
11911206 MODEL_TENSOR .V_MM_INP_NORM : (
11921207 "multi_modal_projector.norm" ,
1208+ "model.multi_modal_projector.layer_norm" , # Intern-S1
11931209 ),
11941210
11951211 MODEL_TENSOR .V_MM_SOFT_EMB_NORM : (
0 commit comments