@@ -2848,6 +2848,159 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2848
2848
yield from super ().modify_tensors (data_torch , name , bid )
2849
2849
2850
2850
2851
+ @ModelBase .register ("LLaDAModelLM" )
2852
+ class LLaDAModel (TextModel ):
2853
+ model_arch = gguf .MODEL_ARCH .LLADA
2854
+ undo_permute = True
2855
+
2856
+ def __init__ (self , * args , ** kwargs ):
2857
+ super ().__init__ (* args , ** kwargs )
2858
+ # fix for SmolVLM2, missing `num_attention_heads` in config.json
2859
+ if self .hf_arch == "VLlama3ForCausalLM" :
2860
+ self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
2861
+
2862
+ def get_vocab_base (self ) -> tuple [list [str ], list [int ], str ]:
2863
+ tokens : list [str ] = []
2864
+ toktypes : list [int ] = []
2865
+
2866
+ from transformers import AutoTokenizer
2867
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
2868
+
2869
+ vocab_dict = tokenizer .get_vocab ()
2870
+ vocab_size = self .hparams .get ("vocab_size" , len (vocab_dict ))
2871
+ assert max (vocab_dict .values ()) < vocab_size
2872
+
2873
+ tokpre = self .get_vocab_base_pre (tokenizer )
2874
+
2875
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab_dict .items ()}
2876
+ added_vocab = tokenizer .get_added_vocab ()
2877
+
2878
+ for i in range (vocab_size ):
2879
+ if i not in reverse_vocab :
2880
+ tokens .append (f"[PAD{ i } ]" )
2881
+ toktypes .append (gguf .TokenType .UNUSED )
2882
+ elif reverse_vocab [i ] in added_vocab :
2883
+ tokens .append (reverse_vocab [i ])
2884
+ # Check if it's a special token - treat special tokens as CONTROL tokens
2885
+ if hasattr (tokenizer , 'added_tokens_decoder' ) and i in tokenizer .added_tokens_decoder :
2886
+ if tokenizer .added_tokens_decoder [i ].special :
2887
+ toktypes .append (gguf .TokenType .CONTROL )
2888
+ else :
2889
+ toktypes .append (gguf .TokenType .USER_DEFINED )
2890
+ else :
2891
+ # Fallback: treat all added vocab as control tokens for special tokens like <|im_start|>
2892
+ toktypes .append (gguf .TokenType .CONTROL )
2893
+ else :
2894
+ tokens .append (reverse_vocab [i ])
2895
+ toktypes .append (gguf .TokenType .NORMAL )
2896
+
2897
+ return tokens , toktypes , tokpre
2898
+
2899
+ def set_vocab (self ):
2900
+ try :
2901
+ self ._set_vocab_sentencepiece ()
2902
+ except FileNotFoundError :
2903
+ try :
2904
+ self ._set_vocab_llama_hf ()
2905
+ except (FileNotFoundError , TypeError ):
2906
+ # Llama 3
2907
+ self ._set_vocab_gpt2 ()
2908
+
2909
+ # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
2910
+ if self .hparams .get ("vocab_size" , 32000 ) == 32016 :
2911
+ special_vocab = gguf .SpecialVocab (
2912
+ self .dir_model , load_merges = False ,
2913
+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ]
2914
+ )
2915
+ special_vocab ._set_special_token ("prefix" , 32007 )
2916
+ special_vocab ._set_special_token ("suffix" , 32008 )
2917
+ special_vocab ._set_special_token ("middle" , 32009 )
2918
+ special_vocab ._set_special_token ("eot" , 32010 )
2919
+ special_vocab .add_to_gguf (self .gguf_writer )
2920
+
2921
+ tokenizer_config_file = self .dir_model / 'tokenizer_config.json'
2922
+ if tokenizer_config_file .is_file ():
2923
+ with open (tokenizer_config_file , "r" , encoding = "utf-8" ) as f :
2924
+ tokenizer_config_json = json .load (f )
2925
+ if "add_prefix_space" in tokenizer_config_json :
2926
+ self .gguf_writer .add_add_space_prefix (tokenizer_config_json ["add_prefix_space" ])
2927
+
2928
+ # Apply to granite small models only
2929
+ if self .hparams .get ("vocab_size" , 32000 ) == 49152 :
2930
+ self .gguf_writer .add_add_bos_token (False )
2931
+
2932
+ def set_gguf_parameters (self ):
2933
+ super ().set_gguf_parameters ()
2934
+ self ._try_set_pooling_type ()
2935
+
2936
+ # Add parameters similar to LlamaModel
2937
+ hparams = self .hparams
2938
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
2939
+
2940
+ if (rope_dim := hparams .get ("head_dim" )) is None :
2941
+ n_heads = hparams .get ("num_attention_heads" , hparams .get ("n_heads" ))
2942
+ rope_dim = hparams .get ("hidden_size" , hparams .get ("d_model" )) // n_heads
2943
+ self .gguf_writer .add_rope_dimension_count (rope_dim )
2944
+
2945
+ # Set context length for LLaDA
2946
+ context_length = self .hparams .get ("max_sequence_length" )
2947
+ self .gguf_writer .add_context_length (context_length )
2948
+
2949
+ # Set embedding length (dimension size)
2950
+ embedding_length = self .hparams .get ("d_model" )
2951
+ self .gguf_writer .add_embedding_length (embedding_length )
2952
+
2953
+ # Set feed forward length (MLP hidden size)
2954
+ feed_forward_length = self .hparams .get ("mlp_hidden_size" )
2955
+ self .gguf_writer .add_feed_forward_length (feed_forward_length )
2956
+
2957
+ # Set RoPE parameters
2958
+ if "rope_theta" in self .hparams :
2959
+ self .gguf_writer .add_rope_freq_base (self .hparams ["rope_theta" ])
2960
+
2961
+ # Set RMS norm epsilon
2962
+ if "rms_norm_eps" in self .hparams :
2963
+ self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
2964
+
2965
+ # LLaDA models use non-causal attention for diffusion, similar to Dream
2966
+ self .gguf_writer .add_causal_attention (False )
2967
+ # Handle RoPE scaling similar to LlamaModel and Dream
2968
+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2969
+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
2970
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2971
+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2972
+ elif rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2973
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2974
+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2975
+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
2976
+
2977
+ # Add LLaDA-specific parameters
2978
+ mask_token_id = self .hparams .get ("mask_token_id" )
2979
+ if mask_token_id is not None :
2980
+ self .gguf_writer .add_mask_token_id (mask_token_id )
2981
+
2982
+ @staticmethod
2983
+ def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
2984
+ if n_head_kv is not None and n_head != n_head_kv :
2985
+ n_head = n_head_kv
2986
+ return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
2987
+ .swapaxes (1 , 2 )
2988
+ .reshape (weights .shape ))
2989
+
2990
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2991
+ n_head = self .hparams .get ("num_attention_heads" , self .hparams .get ("n_heads" ))
2992
+ n_kv_head = self .hparams .get ("num_key_value_heads" , self .hparams .get ("n_kv_heads" ))
2993
+
2994
+ if self .undo_permute :
2995
+ if name .endswith (("q_proj.weight" , "q_proj.bias" )):
2996
+ data_torch = LLaDAModel .permute (data_torch , n_head , n_head )
2997
+ if name .endswith (("k_proj.weight" , "k_proj.bias" )):
2998
+ data_torch = LLaDAModel .permute (data_torch , n_head , n_kv_head )
2999
+
3000
+ # LLaDA model tensors should be mapped directly since it's the base model
3001
+ yield from super ().modify_tensors (data_torch , name , bid )
3002
+
3003
+
2851
3004
@ModelBase .register ("Ernie4_5_ForCausalLM" )
2852
3005
class Ernie4_5Model (TextModel ):
2853
3006
model_arch = gguf .MODEL_ARCH .ERNIE4_5
0 commit comments