keras-team · divyashreepathihalli · Jul 11, 2025 · Jun 9, 2025 · Jun 9, 2025 · Jun 23, 2025
diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py
@@ -90,6 +90,9 @@
 from keras_hub.src.models.gemma3.gemma3_image_converter import (
     Gemma3ImageConverter as Gemma3ImageConverter,
 )
+from keras_hub.src.models.hgnetv2.hgnetv2_image_converter import (
+    HGNetV2ImageConverter as HGNetV2ImageConverter,
+)
 from keras_hub.src.models.mit.mit_image_converter import (
     MiTImageConverter as MiTImageConverter,
 )

diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -294,6 +294,15 @@
 from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import (
     GPTNeoXTokenizer as GPTNeoXTokenizer,
 )
+from keras_hub.src.models.hgnetv2.hgnetv2_backbone import (
+    HGNetV2Backbone as HGNetV2Backbone,
+)
+from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier import (
+    HGNetV2ImageClassifier as HGNetV2ImageClassifier,
+)
+from keras_hub.src.models.hgnetv2.hgnetv2_image_classifier_preprocessor import (
+    HGNetV2ImageClassifierPreprocessor as HGNetV2ImageClassifierPreprocessor,
+)
 from keras_hub.src.models.image_classifier import (
     ImageClassifier as ImageClassifier,
 )

diff --git a/keras_hub/src/models/hgnetv2/__init__.py b/keras_hub/src/models/hgnetv2/__init__.py
diff --git a/keras_hub/src/models/hgnetv2/hgnetv2_backbone.py b/keras_hub/src/models/hgnetv2/hgnetv2_backbone.py
@@ -0,0 +1,193 @@
+import keras
+
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.backbone import Backbone
+from keras_hub.src.models.hgnetv2.hgnetv2_encoder import HGNetV2Encoder
+from keras_hub.src.models.hgnetv2.hgnetv2_layers import HGNetV2Embeddings
+from keras_hub.src.utils.keras_utils import standardize_data_format
+
+
+@keras_hub_export("keras_hub.models.HGNetV2Backbone")
+class HGNetV2Backbone(Backbone):
+    """This class represents a Keras Backbone of the HGNetV2 model.
+
+    This class implements an HGNetV2 backbone architecture, a convolutional
+    neural network (CNN) optimized for GPU efficiency. HGNetV2 is frequently
+    used as a lightweight CNN backbone in object detection pipelines like
+    RT-DETR and YOLO variants, delivering strong performance on classification
+    and detection tasks, with speed-ups and accuracy gains compared to larger
+    CNN backbones.
+
+    Args:
+        depths: list of ints, the number of blocks in each stage.
+        embedding_size: int, the size of the embedding layer.
+        hidden_sizes: list of ints, the sizes of the hidden layers.
+        stem_channels: list of ints, the channels for the stem part.
+        hidden_act: str, the activation function for hidden layers.
+        use_learnable_affine_block: bool, whether to use learnable affine
+            transformations.
+        stackwise_stage_filters: list of tuples, where each tuple contains
+            configuration for a stage: (stage_in_channels, stage_mid_channels,
+            stage_out_channels, stage_num_blocks, stage_num_of_layers,
+            stage_kernel_size).
+            - stage_in_channels: int, input channels for the stage
+            - stage_mid_channels: int, middle channels for the stage
+            - stage_out_channels: int, output channels for the stage
+            - stage_num_blocks: int, number of blocks in the stage
+            - stage_num_of_layers: int, number of layers in each block
+            - stage_kernel_size: int, kernel size for the stage
+        apply_downsample: list of bools, whether to downsample in each stage.
+        use_lightweight_conv_block: list of bools, whether to use HGNetV2
+            lightweight convolutional blocks in each stage.
+        image_shape: tuple, the shape of the input image without the batch size.
+            Defaults to `(None, None, 3)`.
+        data_format: `None` or str, the data format ('channels_last' or
+            'channels_first'). If not specified, defaults to the
+            `image_data_format` value in your Keras config.
+        out_features: list of str or `None`, the names of the output features to
+            return. If `None`, returns all available features from all stages.
+            Defaults to `None`.
+        dtype: `None` or str or `keras.mixed_precision.DTypePolicy`, the data
+            type for computations and weights.
+
+    Examples:
+    ```python
+    import numpy as np
+    from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone
+    input_data = np.ones(shape=(8, 224, 224, 3))
+
+    # Pretrained backbone.
+    model = keras_hub.models.HGNetV2Backbone.from_preset(
+        "hgnetv2_b5_ssld_stage2_ft_in1k"
+    )
+    model(input_data)
+
+    # Randomly initialized backbone with a custom config.
+    model = HGNetV2Backbone(
+        depths=[1, 2, 4],
+        embedding_size=32,
+        hidden_sizes=[64, 128, 256],
+        stem_channels=[3, 16, 32],
+        hidden_act="relu",
+        use_learnable_affine_block=False,
+        stackwise_stage_filters=[
+            (32, 16, 64, 1, 1, 3),     # Stage 0
+            (64, 32, 128, 2, 1, 3),    # Stage 1
+            (128, 64, 256, 4, 1, 3),   # Stage 2
+        ],
+        apply_downsample=[False, True, True],
+        use_lightweight_conv_block=[False, False, False],
+        image_shape=(224, 224, 3),
+    )
+    model(input_data)
+    ```
+    """
+
+    def __init__(
+        self,
+        depths,
+        embedding_size,
+        hidden_sizes,
+        stem_channels,
+        hidden_act,
+        use_learnable_affine_block,
+        stackwise_stage_filters,
+        apply_downsample,
+        use_lightweight_conv_block,
+        image_shape=(None, None, 3),
+        data_format=None,
+        out_features=None,
+        dtype=None,
+        **kwargs,
+    ):
+        name = kwargs.get("name", None)
+        data_format = standardize_data_format(data_format)
+        channel_axis = -1 if data_format == "channels_last" else 1
+        self.image_shape = image_shape
+        (
+            stage_in_channels,
+            stage_mid_channels,
+            stage_out_filters,
+            stage_num_blocks,
+            stage_num_of_layers,
+            stage_kernel_size,
+        ) = zip(*stackwise_stage_filters)
+
+        # === Layers ===
+        self.embedder_layer = HGNetV2Embeddings(
+            stem_channels=stem_channels,
+            hidden_act=hidden_act,
+            use_learnable_affine_block=use_learnable_affine_block,
+            data_format=data_format,
+            channel_axis=channel_axis,
+            name=f"{name}_embedder" if name else "embedder",
+            dtype=dtype,
+        )
+        self.encoder_layer = HGNetV2Encoder(
+            stage_in_channels=stage_in_channels,
+            stage_mid_channels=stage_mid_channels,
+            stage_out_channels=stage_out_filters,
+            stage_num_blocks=stage_num_blocks,
+            stage_num_of_layers=stage_num_of_layers,
+            apply_downsample=apply_downsample,
+            use_lightweight_conv_block=use_lightweight_conv_block,
+            stage_kernel_size=stage_kernel_size,
+            use_learnable_affine_block=use_learnable_affine_block,
+            data_format=data_format,
+            channel_axis=channel_axis,
+            name=f"{name}_encoder" if name else "encoder",
+            dtype=dtype,
+        )
+        self.stage_names = ["stem"] + [
+            f"stage{i + 1}" for i in range(len(stackwise_stage_filters))
+        ]
+        self.out_features = (
+            out_features if out_features is not None else self.stage_names
+        )
+
+        # === Functional Model ===
+        pixel_values = keras.layers.Input(
+            shape=image_shape, name="pixel_values_input"
+        )
+        embedding_output = self.embedder_layer(pixel_values)
+        all_encoder_hidden_states_tuple = self.encoder_layer(embedding_output)
+        feature_maps_output = {
+            stage_name: all_encoder_hidden_states_tuple[idx]
+            for idx, stage_name in enumerate(self.stage_names)
+            if stage_name in self.out_features
+        }
+        super().__init__(
+            inputs=pixel_values, outputs=feature_maps_output, **kwargs
+        )
+
+        # === Config ===
+        self.depths = depths
+        self.embedding_size = embedding_size
+        self.hidden_sizes = hidden_sizes
+        self.stem_channels = stem_channels
+        self.hidden_act = hidden_act
+        self.use_learnable_affine_block = use_learnable_affine_block
+        self.stackwise_stage_filters = stackwise_stage_filters
+        self.apply_downsample = apply_downsample
+        self.use_lightweight_conv_block = use_lightweight_conv_block
+        self.data_format = data_format
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "depths": self.depths,
+                "embedding_size": self.embedding_size,
+                "hidden_sizes": self.hidden_sizes,
+                "stem_channels": self.stem_channels,
+                "hidden_act": self.hidden_act,
+                "use_learnable_affine_block": self.use_learnable_affine_block,
+                "stackwise_stage_filters": self.stackwise_stage_filters,
+                "apply_downsample": self.apply_downsample,
+                "use_lightweight_conv_block": self.use_lightweight_conv_block,
+                "image_shape": self.image_shape,
+                "out_features": self.out_features,
+                "data_format": self.data_format,
+            }
+        )
+        return config
diff --git a/keras_hub/src/models/hgnetv2/hgnetv2_backbone_test.py b/keras_hub/src/models/hgnetv2/hgnetv2_backbone_test.py
@@ -0,0 +1,133 @@
+import keras
+import numpy as np
+import pytest
+from absl.testing import parameterized
+
+from keras_hub.src.models.hgnetv2.hgnetv2_backbone import HGNetV2Backbone
+from keras_hub.src.tests.test_case import TestCase
+
+
+class HGNetV2BackboneTest(TestCase):
+    def setUp(self):
+        self.default_input_shape = (64, 64, 3)
+        self.num_channels = self.default_input_shape[-1]
+        self.batch_size = 2
+        self.stem_channels = [self.num_channels, 16, 32]
+        self.default_stage_out_filters = [64, 128]
+        self.default_num_stages = 2
+        self.stackwise_stage_filters = [
+            [32, 16, 64, 1, 1, 3],
+            [64, 32, 128, 1, 1, 3],
+        ]
+        self.init_kwargs = {
+            "embedding_size": self.stem_channels[-1],
+            "stem_channels": self.stem_channels,
+            "hidden_act": "relu",
+            "use_learnable_affine_block": False,
+            "image_shape": self.default_input_shape,
+            "depths": [1] * self.default_num_stages,
+            "hidden_sizes": [
+                stage[2] for stage in self.stackwise_stage_filters
+            ],
+            "stackwise_stage_filters": self.stackwise_stage_filters,
+            "apply_downsample": [False, True],
+            "use_lightweight_conv_block": [False, False],
+            # Explicitly pass the out_features arg to ensure comprehensive
+            # test coverage for D-FINE.
+            "out_features": ["stem", "stage1", "stage2"],
+        }
+        self.input_data = keras.ops.convert_to_tensor(
+            np.random.rand(self.batch_size, *self.default_input_shape).astype(
+                np.float32
+            )
+        )
+
+    @parameterized.named_parameters(
+        (
+            "default",
+            [False, True],
+            [False, False],
+            2,
+            {
+                "stem": (2, 16, 16, 32),
+                "stage1": (2, 16, 16, 64),
+                "stage2": (2, 8, 8, 128),
+            },
+        ),
+        (
+            "early_downsample_light_blocks",
+            [True, True],
+            [True, True],
+            2,
+            {
+                "stem": (2, 16, 16, 32),
+                "stage1": (2, 8, 8, 64),
+                "stage2": (2, 4, 4, 128),
+            },
+        ),
+        (
+            "single_stage_no_downsample",
+            [False],
+            [False],
+            1,
+            {
+                "stem": (2, 16, 16, 32),
+                "stage1": (2, 16, 16, 64),
+            },
+        ),
+        (
+            "all_no_downsample",
+            [False, False],
+            [False, False],
+            2,
+            {
+                "stem": (2, 16, 16, 32),
+                "stage1": (2, 16, 16, 64),
+                "stage2": (2, 16, 16, 128),
+            },
+        ),
+    )
+    def test_backbone_basics(
+        self,
+        apply_downsample,
+        use_lightweight_conv_block,
+        num_stages,
+        expected_shapes,
+    ):
+        test_filters = self.stackwise_stage_filters[:num_stages]
+        hidden_sizes = [stage[2] for stage in test_filters]
+        test_kwargs = {
+            **self.init_kwargs,
+            "depths": [1] * num_stages,
+            "hidden_sizes": hidden_sizes,
+            "stackwise_stage_filters": test_filters,
+            "apply_downsample": apply_downsample,
+            "use_lightweight_conv_block": use_lightweight_conv_block,
+            "out_features": ["stem"]
+            + [f"stage{i + 1}" for i in range(num_stages)],
+        }
+        self.run_vision_backbone_test(
+            cls=HGNetV2Backbone,
+            init_kwargs=test_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=expected_shapes,
+            run_mixed_precision_check=False,
+            run_data_format_check=False,
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=HGNetV2Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in HGNetV2Backbone.presets:
+            self.run_preset_test(
+                cls=HGNetV2Backbone,
+                preset=preset,
+                input_data=self.input_data,
+            )