Skip to content

Commit 272c1f0

Browse files
committed
feat: support GLM 4.5 family of models
1 parent fdf3da0 commit 272c1f0

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6645,10 +6645,11 @@ def set_gguf_parameters(self):
66456645
def modify_tensors(
66466646
self, data_torch: Tensor, name: str, bid: int | None
66476647
) -> Iterable[tuple[str, Tensor]]:
6648-
# Handle special GLM4_MOE layer 46 tensors (nextn prediction layer)
6648+
# Handle layer 46 tensors - preserve all for future MTP support
66496649
if bid is not None and bid == 46:
6650-
# Layer 46 is the nextn prediction layer - skip all tensors
6651-
return []
6650+
# Convert layer 46 tensors to GGUF naming but don't try to map them
6651+
new_name = name.replace("model.layers.", "blk.")
6652+
return [(new_name, data_torch)]
66526653

66536654
if name.startswith("model.visual."): # ignore visual part
66546655
return []
@@ -6659,8 +6660,8 @@ def modify_tensors(
66596660
if name == "model.embed_tokens.weight":
66606661
return [(self.map_tensor_name("token_embd.weight"), data_torch)]
66616662

6662-
# Handle routed experts
6663-
if name.find("mlp.experts") != -1 and "shared_experts" not in name:
6663+
# Handle routed experts (skip for NextN layer 46)
6664+
if name.find("mlp.experts") != -1 and "shared_experts" not in name and bid != 46:
66646665
n_experts = self.hparams["n_routed_experts"]
66656666
assert bid is not None
66666667

@@ -6727,16 +6728,17 @@ def modify_tensors(
67276728
new_name = name
67286729
return [(self.map_tensor_name(new_name), data_torch)]
67296730

6730-
# Handle other special GLM4_MOE tensors (nextn prediction)
6731+
# Handle special NextN tensors - preserve for future MTP support
67316732
if (
67326733
".embed_tokens." in name
67336734
or ".shared_head." in name
67346735
or ".eh_proj." in name
67356736
or ".enorm." in name
67366737
or ".hnorm." in name
67376738
):
6738-
# Skip these special tensors - they are for nextn prediction
6739-
return []
6739+
# For NextN tensors, convert to GGUF naming convention
6740+
new_name = name.replace("model.layers.", "blk.").replace("model.", "")
6741+
return [(new_name, data_torch)]
67406742

67416743
return super().modify_tensors(data_torch, name, bid)
67426744

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4399,7 +4399,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
43994399
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED);
44004400
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED);
44014401

4402-
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd }, 0);
4402+
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, 0);
44034403
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), { n_embd }, 0);
44044404

44054405
// K/Q norm tensors (optional for GLM-4.5 355B variant)

0 commit comments

Comments
 (0)