Skip to content

Commit 42113d1

Browse files
committed
Fix broken rebase
1 parent 6bc2158 commit 42113d1

File tree

4 files changed

+35
-54
lines changed

4 files changed

+35
-54
lines changed

gguf-py/gguf/tensor_mapping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,8 @@ class TensorNameMap:
420420
"model.layers.{bid}.residual_mlp.w1", # arctic
421421
"transformer.h.{bid}.mlp.c_fc_0", # exaone
422422
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid
423-
"model.layers.{bid}.block_sparse_moe.gate", # smallthinker
424-
"model.transformer.blocks.{bid}.ff_proj", # llada
423+
"model.layers.{bid}.block_sparse_moe.gate", # smallthinker
424+
"model.transformer.blocks.{bid}.ff_proj", # llada
425425
"model.layers.{bid}.mlp.language_mlp.gate_proj", # cogvlm
426426
),
427427

src/llama-arch.cpp

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,27 +1959,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
19591959
},
19601960
{
19611961
LLM_ARCH_DREAM,
1962-
{
1963-
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1964-
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1965-
{ LLM_TENSOR_OUTPUT, "output" },
1966-
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1967-
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1968-
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1969-
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1970-
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1971-
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1972-
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1973-
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1974-
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1975-
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1976-
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1977-
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1978-
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }
1979-
},
1980-
},
1981-
{
1982-
LLM_ARCH_COGVLM,
19831962
{
19841963
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
19851964
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
@@ -1993,13 +1972,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
19931972
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
19941973
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
19951974
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1996-
{ LLM_TENSOR_VISEXP_ATTN_WQ, "blk.%d.vis_attn_q" },
1997-
{ LLM_TENSOR_VISEXP_ATTN_WK, "blk.%d.vis_attn_k" },
1998-
{ LLM_TENSOR_VISEXP_ATTN_WV, "blk.%d.vis_attn_v" },
1999-
{ LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
2000-
{ LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
2001-
{ LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
2002-
{ LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
20031975
},
20041976
},
20051977
{
@@ -2019,6 +1991,30 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
20191991
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
20201992
},
20211993
},
1994+
{
1995+
LLM_ARCH_COGVLM,
1996+
{
1997+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1998+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1999+
{ LLM_TENSOR_OUTPUT, "output" },
2000+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2001+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2002+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2003+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2004+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2005+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2006+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2007+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2008+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2009+
{ LLM_TENSOR_VISEXP_ATTN_WQ, "blk.%d.vis_attn_q" },
2010+
{ LLM_TENSOR_VISEXP_ATTN_WK, "blk.%d.vis_attn_k" },
2011+
{ LLM_TENSOR_VISEXP_ATTN_WV, "blk.%d.vis_attn_v" },
2012+
{ LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
2013+
{ LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
2014+
{ LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
2015+
{ LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
2016+
},
2017+
},
20222018
{
20232019
LLM_ARCH_UNKNOWN,
20242020
{

src/llama-model.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5307,12 +5307,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
53075307

53085308
// output
53095309
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
5310-
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
5311-
5312-
// if output is NULL, init from the input tok embed
5313-
if (output == NULL) {
5314-
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
5315-
}
5310+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
53165311

53175312
for (int i = 0; i < n_layer; ++i) {
53185313
auto & layer = layers[i];
@@ -17477,7 +17472,6 @@ struct llm_build_cogvlm : public llm_graph_context {
1747717472
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
1747817473
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
1747917474

17480-
// TODO: Check Rope because this might not be the same as cogvlm
1748117475
Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
1748217476
Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
1748317477

@@ -17491,12 +17485,6 @@ struct llm_build_cogvlm : public llm_graph_context {
1749117485
cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
1749217486
cb(cur, "ffn_norm", il);
1749317487

17494-
// Make a standard ffn without the build_ffn function
17495-
//ggml_tensor * tmp = build_lora_mm(ffn_up, cur);
17496-
//ggml_tensor * gate = build_lora_mm(ffn_gate, cur);
17497-
//gate = ggml_silu(ctx0, gate);
17498-
//cur = ggml_mul(ctx0, gate, tmp);
17499-
//cur = build_lora_mm(ffn_down, cur);
1750017488
cur = build_ffn(cur,
1750117489
ffn_up, NULL, NULL,
1750217490
ffn_gate, NULL, NULL,

tools/mtmd/clip.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,11 +1669,8 @@ struct clip_graph {
16691669
auto & layer = model.layers[il];
16701670
ggml_tensor * cur = inpL; // inpL = residual, cur = hidden_states
16711671

1672-
// Check if this is COGVLM projector type for post-norm layernorm order
1673-
const bool is_cogvlm = ctx->proj_type() == PROJECTOR_TYPE_COGVLM;
1674-
1675-
// layernorm1 (only for non-COGVLM)
1676-
if (!is_cogvlm) {
1672+
// layernorm1
1673+
if (ctx->proj_type() != PROJECTOR_TYPE_COGVLM) {
16771674
cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, norm_t, eps, il);
16781675
cb(cur, "layer_inp_normed", il);
16791676
}
@@ -1730,8 +1727,8 @@ struct clip_graph {
17301727
cb(cur, "attn_out_scaled", il);
17311728
}
17321729

1733-
// Apply layernorm AFTER attention for COGVLM (post-norm)
1734-
if (is_cogvlm) {
1730+
// Apply layernorm after attention for cogvlm
1731+
if (ctx->proj_type() == PROJECTOR_TYPE_COGVLM) {
17351732
cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, norm_t, eps, il);
17361733
cb(cur, "attn_post_norm", il);
17371734
}
@@ -1743,8 +1740,8 @@ struct clip_graph {
17431740

17441741
cb(cur, "ffn_inp", il);
17451742

1746-
// layernorm2 (only for non-COGVLM)
1747-
if (!is_cogvlm) {
1743+
// layernorm2
1744+
if (ctx->proj_type() != PROJECTOR_TYPE_COGVLM) {
17481745
cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, norm_t, eps, il);
17491746
cb(cur, "ffn_inp_normed", il);
17501747
}
@@ -1763,8 +1760,8 @@ struct clip_graph {
17631760
cb(cur, "ffn_out_scaled", il);
17641761
}
17651762

1766-
// Apply layernorm AFTER MLP for COGVLM (post-norm)
1767-
if (is_cogvlm) {
1763+
// Apply layernorm after mlp for cogvlm
1764+
if (ctx->proj_type() == PROJECTOR_TYPE_COGVLM) {
17681765
cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, norm_t, eps, il);
17691766
cb(cur, "ffn_post_norm", il);
17701767
}

0 commit comments

Comments
 (0)