Skip to content

Commit c3deb47

Browse files
kanpuriyanawabmehtamansi29
authored andcommitted
Qwen3 causal lm (keras-team#2311)
* init * update * bug fixes * add qwen causal lm test * fix qwen3 tests
1 parent e39b128 commit c3deb47

File tree

5 files changed

+564
-5
lines changed

5 files changed

+564
-5
lines changed

keras_hub/api/models/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,19 @@
322322
from keras_hub.src.models.qwen.qwen_tokenizer import (
323323
QwenTokenizer as Qwen2Tokenizer,
324324
)
325+
<<<<<<< HEAD
325326
from keras_hub.src.models.qwen3.qwen3_backbone import Qwen3Backbone
327+
=======
328+
from keras_hub.src.models.qwen.qwen_tokenizer import (
329+
QwenTokenizer as QwenTokenizer,
330+
)
331+
from keras_hub.src.models.qwen3.qwen3_backbone import (
332+
Qwen3Backbone as Qwen3Backbone,
333+
)
334+
from keras_hub.src.models.qwen3.qwen3_causal_lm import (
335+
Qwen3CausalLM as Qwen3CausalLM,
336+
)
337+
>>>>>>> 5fd34b15 (Qwen3 causal lm (#2311))
326338
from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
327339
Qwen3CausalLMPreprocessor,
328340
)

keras_hub/src/models/qwen3/qwen3_attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def _compute_attention(
303303
attention_mask = self._mask_sliding_window(
304304
attention_mask,
305305
cache_update_index=cache_update_index
306-
if cache_update_index
306+
if cache_update_index is not None
307307
else 0,
308308
)
309309
attention_scores = self._masked_softmax(

0 commit comments

Comments
 (0)