keras-team
diff --git a/‎.kokoro/github/ubuntu/gpu/build.sh
Lines changed: 2 additions & 2 deletions b/‎.kokoro/github/ubuntu/gpu/build.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎keras_hub/api/layers/__init__.py
Lines changed: 3 additions & 0 deletions b/‎keras_hub/api/layers/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎keras_hub/api/models/__init__.py
Lines changed: 12 additions & 0 deletions b/‎keras_hub/api/models/__init__.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎keras_hub/src/layers/modeling/transformer_encoder.py
Lines changed: 6 additions & 3 deletions b/‎keras_hub/src/layers/modeling/transformer_encoder.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎keras_hub/src/models/gemma/gemma_attention.py
Lines changed: 1 addition & 1 deletion b/‎keras_hub/src/models/gemma/gemma_attention.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py
Lines changed: 1 addition & 1 deletion b/‎keras_hub/src/models/gemma3/gemma3_interleave_embeddings.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎keras_hub/src/models/hgnetv2/__init__.py
Lines changed: 5 additions & 0 deletions b/‎keras_hub/src/models/hgnetv2/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎keras_hub/src/models/hgnetv2/hgnetv2_backbone.py
Lines changed: 193 additions & 0 deletions b/‎keras_hub/src/models/hgnetv2/hgnetv2_backbone.py
Lines changed: 193 additions & 0 deletions
@@ -57,8 +57,8 @@ pip install huggingface_hub
 if [ "${RUN_XLARGE:-0}" == "1" ]
 then
    pytest keras_hub --check_gpu --run_large --run_extra_large \
-      --cov=keras-hub
+      --cov=keras_hub
 else
    pytest keras_hub --check_gpu --run_large \
-      --cov=keras-hub
+      --cov=keras_hub
 fi
@@ -90,6 +90,9 @@
 from keras_hub.src.models.gemma3.gemma3_image_converter import (
     Gemma3ImageConverter as Gemma3ImageConverter,
 )
+from keras_hub.src.models.hgnetv2.hgnetv2_image_converter import (
+    HGNetV2ImageConverter as HGNetV2ImageConverter,
+)
 from keras_hub.src.models.mit.mit_image_converter import (
     MiTImageConverter as MiTImageConverter,
 )
 
@@ -294,6 +294,15 @@
 from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import (
     GPTNeoXTokenizer as GPTNeoXTokenizer,
 )
+from keras_hub.src.models.hgnetv2.hgnetv2_backbone import (
+    HGNetV2Backbone as HGNetV2Backbone,
+)
+from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier import (
+    HGNetV2ImageClassifier as HGNetV2ImageClassifier,
+)
+from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier_preprocessor import (
+    HGNetV2ImageClassifierPreprocessor as HGNetV2ImageClassifierPreprocessor,
+)
 from keras_hub.src.models.image_classifier import (
     ImageClassifier as ImageClassifier,
 )
@@ -454,6 +463,9 @@
 from keras_hub.src.models.qwen3.qwen3_backbone import (
     Qwen3Backbone as Qwen3Backbone,
 )
+from keras_hub.src.models.qwen3.qwen3_causal_lm import (
+    Qwen3CausalLM as Qwen3CausalLM,
+)
 from keras_hub.src.models.qwen3.qwen3_causal_lm_preprocessor import (
     Qwen3CausalLMPreprocessor as Qwen3CausalLMPreprocessor,
 )
 
@@ -16,9 +16,12 @@ class TransformerEncoder(keras.layers.Layer):
     paper [Attention is All You Need](https://arxiv.org/abs/1706.03762). Users
     can instantiate multiple instances of this class to stack up an encoder.
 
-    This layer will correctly compute an attention mask from an implicit
-    Keras padding mask (for example, by passing `mask_zero=True` to a
-    `keras.layers.Embedding` layer). See the Masking and Padding
+    This layer will compute an attention mask, prioritizing explicitly provided
+    masks (a `padding_mask` or a custom `attention_mask`) over an implicit Keras
+    padding mask (for example, by passing `mask_zero=True` to a
+    `keras.layers.Embedding` layer). If both a `padding_mask` and a
+    `attention_mask` are provided, they will be combined to determine the final
+    mask. See the Masking and Padding
     [guide](https://keras.io/guides/understanding_masking_and_padding/)
     for more details.
 
 
@@ -152,7 +152,7 @@ def _compute_attention(
                 attention_mask = ops.expand_dims(attention_mask, axis=1)
                 attention_mask = ops.cast(attention_mask, dtype="bool")
             # Only pass soft cap if needed as not all keras versions support.
-            if self.logit_soft_cap:
+            if self.logit_soft_cap is not None:
                 kwargs = {"attn_logits_soft_cap": self.logit_soft_cap}
             else:
                 kwargs = {}
 
@@ -65,7 +65,7 @@ def call(self, image_embeddings, text_embeddings, vision_indices):
         to_add = ops.multiply(
             keras.ops.arange(batch_size, dtype="int32"), seq_length
         )
-        to_add = ops.expand_dims(to_add, axis=-1)
+        to_add = ops.cast(ops.expand_dims(to_add, axis=-1), "int32")
         vision_indices = ops.add(vision_indices, to_add)
 
         # indices should be of shape `(num_updates, 1)`. `num_updates` is
 
@@ -0,0 +1,5 @@
+from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone
+from keras_hub.src.models.hgnetv2.hgnetv2_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+
+register_presets(backbone_presets, HGNetV2Backbone)
@@ -0,0 +1,193 @@
+import keras
+
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.backbone import Backbone
+from keras_hub.src.models.hgnetv2.hgnetv2_encoder import HGNetV2Encoder
+from keras_hub.src.models.hgnetv2.hgnetv2_layers import HGNetV2Embeddings
+from keras_hub.src.utils.keras_utils import standardize_data_format
+
+
+@keras_hub_export("keras_hub.models.HGNetV2Backbone")
+class HGNetV2Backbone(Backbone):
+    """This class represents a Keras Backbone of the HGNetV2 model.
+
+    This class implements an HGNetV2 backbone architecture, a convolutional
+    neural network (CNN) optimized for GPU efficiency. HGNetV2 is frequently
+    used as a lightweight CNN backbone in object detection pipelines like
+    RT-DETR and YOLO variants, delivering strong performance on classification
+    and detection tasks, with speed-ups and accuracy gains compared to larger
+    CNN backbones.
+
+    Args:
+        depths: list of ints, the number of blocks in each stage.
+        embedding_size: int, the size of the embedding layer.
+        hidden_sizes: list of ints, the sizes of the hidden layers.
+        stem_channels: list of ints, the channels for the stem part.
+        hidden_act: str, the activation function for hidden layers.
+        use_learnable_affine_block: bool, whether to use learnable affine
+            transformations.
+        stackwise_stage_filters: list of tuples, where each tuple contains
+            configuration for a stage: (stage_in_channels, stage_mid_channels,
+            stage_out_channels, stage_num_blocks, stage_num_of_layers,
+            stage_kernel_size).
+            - stage_in_channels: int, input channels for the stage
+            - stage_mid_channels: int, middle channels for the stage
+            - stage_out_channels: int, output channels for the stage
+            - stage_num_blocks: int, number of blocks in the stage
+            - stage_num_of_layers: int, number of layers in each block
+            - stage_kernel_size: int, kernel size for the stage
+        apply_downsample: list of bools, whether to downsample in each stage.
+        use_lightweight_conv_block: list of bools, whether to use HGNetV2
+            lightweight convolutional blocks in each stage.
+        image_shape: tuple, the shape of the input image without the batch size.
+            Defaults to `(None, None, 3)`.
+        data_format: `None` or str, the data format ('channels_last' or
+            'channels_first'). If not specified, defaults to the
+            `image_data_format` value in your Keras config.
+        out_features: list of str or `None`, the names of the output features to
+            return. If `None`, returns all available features from all stages.
+            Defaults to `None`.
+        dtype: `None` or str or `keras.mixed_precision.DTypePolicy`, the data
+            type for computations and weights.
+
+    Examples:
+    ```python
+    import numpy as np
+    from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone
+    input_data = np.ones(shape=(8, 224, 224, 3))
+
+    # Pretrained backbone.
+    model = keras_hub.models.HGNetV2Backbone.from_preset(
+        "hgnetv2_b5_ssld_stage2_ft_in1k"
+    )
+    model(input_data)
+
+    # Randomly initialized backbone with a custom config.
+    model = HGNetV2Backbone(
+        depths=[1, 2, 4],
+        embedding_size=32,
+        hidden_sizes=[64, 128, 256],
+        stem_channels=[3, 16, 32],
+        hidden_act="relu",
+        use_learnable_affine_block=False,
+        stackwise_stage_filters=[
+            (32, 16, 64, 1, 1, 3),     # Stage 0
+            (64, 32, 128, 2, 1, 3),    # Stage 1
+            (128, 64, 256, 4, 1, 3),   # Stage 2
+        ],
+        apply_downsample=[False, True, True],
+        use_lightweight_conv_block=[False, False, False],
+        image_shape=(224, 224, 3),
+    )
+    model(input_data)
+    ```
+    """
+
+    def __init__(
+        self,
+        depths,
+        embedding_size,
+        hidden_sizes,
+        stem_channels,
+        hidden_act,
+        use_learnable_affine_block,
+        stackwise_stage_filters,
+        apply_downsample,
+        use_lightweight_conv_block,
+        image_shape=(None, None, 3),
+        data_format=None,
+        out_features=None,
+        dtype=None,
+        **kwargs,
+    ):
+        name = kwargs.get("name", None)
+        data_format = standardize_data_format(data_format)
+        channel_axis = -1 if data_format == "channels_last" else 1
+        self.image_shape = image_shape
+        (
+            stage_in_channels,
+            stage_mid_channels,
+            stage_out_filters,
+            stage_num_blocks,
+            stage_num_of_layers,
+            stage_kernel_size,
+        ) = zip(*stackwise_stage_filters)
+
+        # === Layers ===
+        self.embedder_layer = HGNetV2Embeddings(
+            stem_channels=stem_channels,
+            hidden_act=hidden_act,
+            use_learnable_affine_block=use_learnable_affine_block,
+            data_format=data_format,
+            channel_axis=channel_axis,
+            name=f"{name}_embedder" if name else "embedder",
+            dtype=dtype,
+        )
+        self.encoder_layer = HGNetV2Encoder(
+            stage_in_channels=stage_in_channels,
+            stage_mid_channels=stage_mid_channels,
+            stage_out_channels=stage_out_filters,
+            stage_num_blocks=stage_num_blocks,
+            stage_num_of_layers=stage_num_of_layers,
+            apply_downsample=apply_downsample,
+            use_lightweight_conv_block=use_lightweight_conv_block,
+            stage_kernel_size=stage_kernel_size,
+            use_learnable_affine_block=use_learnable_affine_block,
+            data_format=data_format,
+            channel_axis=channel_axis,
+            name=f"{name}_encoder" if name else "encoder",
+            dtype=dtype,
+        )
+        self.stage_names = ["stem"] + [
+            f"stage{i + 1}" for i in range(len(stackwise_stage_filters))
+        ]
+        self.out_features = (
+            out_features if out_features is not None else self.stage_names
+        )
+
+        # === Functional Model ===
+        pixel_values = keras.layers.Input(
+            shape=image_shape, name="pixel_values_input"
+        )
+        embedding_output = self.embedder_layer(pixel_values)
+        all_encoder_hidden_states_tuple = self.encoder_layer(embedding_output)
+        feature_maps_output = {
+            stage_name: all_encoder_hidden_states_tuple[idx]
+            for idx, stage_name in enumerate(self.stage_names)
+            if stage_name in self.out_features
+        }
+        super().__init__(
+            inputs=pixel_values, outputs=feature_maps_output, **kwargs
+        )
+
+        # === Config ===
+        self.depths = depths
+        self.embedding_size = embedding_size
+        self.hidden_sizes = hidden_sizes
+        self.stem_channels = stem_channels
+        self.hidden_act = hidden_act
+        self.use_learnable_affine_block = use_learnable_affine_block
+        self.stackwise_stage_filters = stackwise_stage_filters
+        self.apply_downsample = apply_downsample
+        self.use_lightweight_conv_block = use_lightweight_conv_block
+        self.data_format = data_format
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "depths": self.depths,
+                "embedding_size": self.embedding_size,
+                "hidden_sizes": self.hidden_sizes,
+                "stem_channels": self.stem_channels,
+                "hidden_act": self.hidden_act,
+                "use_learnable_affine_block": self.use_learnable_affine_block,
+                "stackwise_stage_filters": self.stackwise_stage_filters,
+                "apply_downsample": self.apply_downsample,
+                "use_lightweight_conv_block": self.use_lightweight_conv_block,
+                "image_shape": self.image_shape,
+                "out_features": self.out_features,
+                "data_format": self.data_format,
+            }
+        )
+        return config
Original file line number	Diff line number	Diff line change
`@@ -90,6 +90,9 @@`
`90`	`90`	`from keras_hub.src.models.gemma3.gemma3_image_converter import (`
`91`	`91`	`Gemma3ImageConverter as Gemma3ImageConverter,`
`92`	`92`	`)`
	`93`	`+from keras_hub.src.models.hgnetv2.hgnetv2_image_converter import (`
	`94`	`+ HGNetV2ImageConverter as HGNetV2ImageConverter,`
	`95`	`+)`
`93`	`96`	`from keras_hub.src.models.mit.mit_image_converter import (`
`94`	`97`	`MiTImageConverter as MiTImageConverter,`
`95`	`98`	`)`
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,7 @@ def call(self, image_embeddings, text_embeddings, vision_indices):`
`65`	`65`	`to_add = ops.multiply(`
`66`	`66`	`keras.ops.arange(batch_size, dtype="int32"), seq_length`
`67`	`67`	`)`
`68`		`- to_add = ops.expand_dims(to_add, axis=-1)`
	`68`	`+ to_add = ops.cast(ops.expand_dims(to_add, axis=-1), "int32")`
`69`	`69`	`vision_indices = ops.add(vision_indices, to_add)`
`70`	`70`
`71`	`71`	# indices should be of shape `(num_updates, 1)`. `num_updates` is