@@ -2851,6 +2851,159 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2851
2851
yield from super ().modify_tensors (data_torch , name , bid )
2852
2852
2853
2853
2854
+ @ModelBase .register ("LLaDAModelLM" )
2855
+ class LLaDAModel (TextModel ):
2856
+ model_arch = gguf .MODEL_ARCH .LLADA
2857
+ undo_permute = True
2858
+
2859
+ def __init__ (self , * args , ** kwargs ):
2860
+ super ().__init__ (* args , ** kwargs )
2861
+ # fix for SmolVLM2, missing `num_attention_heads` in config.json
2862
+ if self .hf_arch == "VLlama3ForCausalLM" :
2863
+ self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
2864
+
2865
+ def get_vocab_base (self ) -> tuple [list [str ], list [int ], str ]:
2866
+ tokens : list [str ] = []
2867
+ toktypes : list [int ] = []
2868
+
2869
+ from transformers import AutoTokenizer
2870
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
2871
+
2872
+ vocab_dict = tokenizer .get_vocab ()
2873
+ vocab_size = self .hparams .get ("vocab_size" , len (vocab_dict ))
2874
+ assert max (vocab_dict .values ()) < vocab_size
2875
+
2876
+ tokpre = self .get_vocab_base_pre (tokenizer )
2877
+
2878
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab_dict .items ()}
2879
+ added_vocab = tokenizer .get_added_vocab ()
2880
+
2881
+ for i in range (vocab_size ):
2882
+ if i not in reverse_vocab :
2883
+ tokens .append (f"[PAD{ i } ]" )
2884
+ toktypes .append (gguf .TokenType .UNUSED )
2885
+ elif reverse_vocab [i ] in added_vocab :
2886
+ tokens .append (reverse_vocab [i ])
2887
+ # Check if it's a special token - treat special tokens as CONTROL tokens
2888
+ if hasattr (tokenizer , 'added_tokens_decoder' ) and i in tokenizer .added_tokens_decoder :
2889
+ if tokenizer .added_tokens_decoder [i ].special :
2890
+ toktypes .append (gguf .TokenType .CONTROL )
2891
+ else :
2892
+ toktypes .append (gguf .TokenType .USER_DEFINED )
2893
+ else :
2894
+ # Fallback: treat all added vocab as control tokens for special tokens like <|im_start|>
2895
+ toktypes .append (gguf .TokenType .CONTROL )
2896
+ else :
2897
+ tokens .append (reverse_vocab [i ])
2898
+ toktypes .append (gguf .TokenType .NORMAL )
2899
+
2900
+ return tokens , toktypes , tokpre
2901
+
2902
+ def set_vocab (self ):
2903
+ try :
2904
+ self ._set_vocab_sentencepiece ()
2905
+ except FileNotFoundError :
2906
+ try :
2907
+ self ._set_vocab_llama_hf ()
2908
+ except (FileNotFoundError , TypeError ):
2909
+ # Llama 3
2910
+ self ._set_vocab_gpt2 ()
2911
+
2912
+ # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
2913
+ if self .hparams .get ("vocab_size" , 32000 ) == 32016 :
2914
+ special_vocab = gguf .SpecialVocab (
2915
+ self .dir_model , load_merges = False ,
2916
+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ]
2917
+ )
2918
+ special_vocab ._set_special_token ("prefix" , 32007 )
2919
+ special_vocab ._set_special_token ("suffix" , 32008 )
2920
+ special_vocab ._set_special_token ("middle" , 32009 )
2921
+ special_vocab ._set_special_token ("eot" , 32010 )
2922
+ special_vocab .add_to_gguf (self .gguf_writer )
2923
+
2924
+ tokenizer_config_file = self .dir_model / 'tokenizer_config.json'
2925
+ if tokenizer_config_file .is_file ():
2926
+ with open (tokenizer_config_file , "r" , encoding = "utf-8" ) as f :
2927
+ tokenizer_config_json = json .load (f )
2928
+ if "add_prefix_space" in tokenizer_config_json :
2929
+ self .gguf_writer .add_add_space_prefix (tokenizer_config_json ["add_prefix_space" ])
2930
+
2931
+ # Apply to granite small models only
2932
+ if self .hparams .get ("vocab_size" , 32000 ) == 49152 :
2933
+ self .gguf_writer .add_add_bos_token (False )
2934
+
2935
+ def set_gguf_parameters (self ):
2936
+ super ().set_gguf_parameters ()
2937
+ self ._try_set_pooling_type ()
2938
+
2939
+ # Add parameters similar to LlamaModel
2940
+ hparams = self .hparams
2941
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
2942
+
2943
+ if (rope_dim := hparams .get ("head_dim" )) is None :
2944
+ n_heads = hparams .get ("num_attention_heads" , hparams .get ("n_heads" ))
2945
+ rope_dim = hparams .get ("hidden_size" , hparams .get ("d_model" )) // n_heads
2946
+ self .gguf_writer .add_rope_dimension_count (rope_dim )
2947
+
2948
+ # Set context length for LLaDA
2949
+ context_length = self .hparams .get ("max_sequence_length" )
2950
+ self .gguf_writer .add_context_length (context_length )
2951
+
2952
+ # Set embedding length (dimension size)
2953
+ embedding_length = self .hparams .get ("d_model" )
2954
+ self .gguf_writer .add_embedding_length (embedding_length )
2955
+
2956
+ # Set feed forward length (MLP hidden size)
2957
+ feed_forward_length = self .hparams .get ("mlp_hidden_size" )
2958
+ self .gguf_writer .add_feed_forward_length (feed_forward_length )
2959
+
2960
+ # Set RoPE parameters
2961
+ if "rope_theta" in self .hparams :
2962
+ self .gguf_writer .add_rope_freq_base (self .hparams ["rope_theta" ])
2963
+
2964
+ # Set RMS norm epsilon
2965
+ if "rms_norm_eps" in self .hparams :
2966
+ self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
2967
+
2968
+ # LLaDA models use non-causal attention for diffusion, similar to Dream
2969
+ self .gguf_writer .add_causal_attention (False )
2970
+ # Handle RoPE scaling similar to LlamaModel and Dream
2971
+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2972
+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
2973
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2974
+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2975
+ elif rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2976
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2977
+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2978
+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
2979
+
2980
+ # Add LLaDA-specific parameters
2981
+ mask_token_id = self .hparams .get ("mask_token_id" )
2982
+ if mask_token_id is not None :
2983
+ self .gguf_writer .add_mask_token_id (mask_token_id )
2984
+
2985
+ @staticmethod
2986
+ def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
2987
+ if n_head_kv is not None and n_head != n_head_kv :
2988
+ n_head = n_head_kv
2989
+ return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
2990
+ .swapaxes (1 , 2 )
2991
+ .reshape (weights .shape ))
2992
+
2993
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2994
+ n_head = self .hparams .get ("num_attention_heads" , self .hparams .get ("n_heads" ))
2995
+ n_kv_head = self .hparams .get ("num_key_value_heads" , self .hparams .get ("n_kv_heads" ))
2996
+
2997
+ if self .undo_permute :
2998
+ if name .endswith (("q_proj.weight" , "q_proj.bias" )):
2999
+ data_torch = LLaDAModel .permute (data_torch , n_head , n_head )
3000
+ if name .endswith (("k_proj.weight" , "k_proj.bias" )):
3001
+ data_torch = LLaDAModel .permute (data_torch , n_head , n_kv_head )
3002
+
3003
+ # LLaDA model tensors should be mapped directly since it's the base model
3004
+ yield from super ().modify_tensors (data_torch , name , bid )
3005
+
3006
+
2854
3007
@ModelBase .register ("Ernie4_5_ForCausalLM" )
2855
3008
class Ernie4_5Model (TextModel ):
2856
3009
model_arch = gguf .MODEL_ARCH .ERNIE4_5
0 commit comments