diff --git a/label_studio_ml/examples/d_fine/Dockerfile b/label_studio_ml/examples/d_fine/Dockerfile
new file mode 100644
index 00000000..01ef979b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/Dockerfile
@@ -0,0 +1,72 @@
+# Use a PyTorch base image with CUDA support
+FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
+ARG DEBIAN_FRONTEND=noninteractive
+ARG TEST_ENV
+
+WORKDIR /app
+
+# Install essential packages and D-FINE dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    wget \
+    curl \
+    # For OpenCV if needed by D-FINE or its deps, though D-FINE requirements.txt might handle it via pip
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_CACHE_DIR=/.cache \
+    PORT=9090 \
+    WORKERS=1 \
+    THREADS=4 \
+    CUDA_HOME=/usr/local/cuda \
+    DFINE_CODE_DIR=/app/d-fine-code
+
+# Set Conda's CUDA_HOME if it's a conda based PyTorch image, otherwise system CUDA_HOME
+# For official pytorch/pytorch images, system CUDA_HOME is usually fine.
+# ENV CUDA_HOME=/opt/conda
+
+ENV PATH="${CUDA_HOME}/bin:${PATH}"
+ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.9;9.0"
+
+# Install base requirements for Label Studio ML Backend
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install --no-cache-dir -r requirements-base.txt
+
+# --- D-FINE specific setup ---
+# 1. Copy D-FINE's requirements.txt
+COPY d_fine_requirements.txt .
+# 2. Install D-FINE's Python dependencies
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install --no-cache-dir -r d_fine_requirements.txt
+
+# 3. Copy D-FINE's 'src' and 'configs' directories
+COPY d-fine-code/src ${DFINE_CODE_DIR}/src
+COPY d-fine-code/configs ${DFINE_CODE_DIR}/configs
+# --- End D-FINE specific setup ---
+
+# Install ML backend example specific requirements (if any, usually empty for this setup)
+#COPY requirements.txt .
+#RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+#    pip install --no-cache-dir -r requirements.txt
+
+# install test requirements if needed
+COPY requirements-test.txt .
+# build only when TEST_ENV="true"
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+      pip3 install -r requirements-test.txt; \
+    fi
+
+# Copy the rest of the ML backend example files
+COPY . ./
+
+# Set PYTHONPATH to include the D-FINE source code directory
+ENV PYTHONPATH=${DFINE_CODE_DIR}:${PYTHONPATH}
+
+EXPOSE ${PORT}
+
+CMD gunicorn --preload --bind :${PORT} --workers ${WORKERS} --threads ${THREADS} --timeout 0 _wsgi:app
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/README.md b/label_studio_ml/examples/d_fine/README.md
new file mode 100644
index 00000000..efbd1079
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/README.md
@@ -0,0 +1,101 @@
+# D-FINE ML Backend for Label Studio
+
+This ML backend integrates the [D-FINE](https://github.com/Peterande/D-FINE) object detection model with Label Studio. It allows you to get pre-annotations for object detection tasks using pre-trained D-FINE models.
+
+## Features
+
+-   Loads pre-trained D-FINE models (e.g., COCO-trained models).
+-   Provides bounding box predictions for `RectangleLabels` in Label Studio.
+-   Configurable via environment variables for model paths, device, and thresholds.
+
+## Prerequisites
+
+1.  **Docker and Docker Compose**: For building and running the ML backend.
+2.  **D-FINE Model Files**:
+    *   **Source Code**: You need the `src` and `configs` directories from the [official D-FINE repository](https://github.com/Peterande/D-FINE).
+    *   **Model Weights**: Download the desired `.pth` model weights (e.g., `dfine_l_coco.pth`).
+3.  **Label Studio**: A running instance of Label Studio.
+
+## Setup
+
+1.  **Clone this repository** (if you haven't already) and navigate to this example directory:
+    ```bash
+    # Assuming you are in the root of label-studio-ml-backend
+    cd label_studio_ml/examples/d_fine
+    ```
+
+2.  **Prepare D-FINE code**:
+    *   Create a directory named `d-fine-code` within the current `label_studio_ml/examples/d_fine` directory.
+    *   Copy the `src` and `configs` directories from your clone of the [D-FINE repository](https://github.com/Peterande/D-FINE) into this newly created `d-fine-code` directory.
+    Your structure should look like:
+    ```
+    label_studio_ml/examples/d_fine/
+    ├── d-fine-code/
+    │   ├── src/
+    │   └── configs/
+    ├── Dockerfile
+    ├── docker-compose.yml
+    ├── model.py
+    └── ... (other files in this example)
+    ```
+
+3.  **Prepare D-FINE model weights**:
+    *   Create a directory named `models` within the current `label_studio_ml/examples/d_fine` directory.
+    *   Place your downloaded D-FINE `.pth` model weights file (e.g., `dfine_l_coco.pth`) into this `models` directory.
+    Your structure should look like:
+    ```
+    label_studio_ml/examples/d_fine/
+    ├── models/
+    │   └── dfine_l_coco.pth  (or your chosen model weights)
+    └── ... (other files)
+    ```
+
+4.  **Configure `docker-compose.yml`**:
+    *   Adjust environment variables as needed, especially:
+        *   `DFINE_CONFIG_FILE`: Name of the D-FINE `.yml` config file (e.g., `dfine_hgnetv2_l_coco.yml`). This file must exist in `d-fine-code/configs/dfine/`.
+        *   `DFINE_MODEL_WEIGHTS`: Name of the D-FINE `.pth` weights file (e.g., `dfine_l_coco.pth`). This file must exist in the `models` directory you created.
+        *   `DEVICE`: Set to `cuda` if you have a GPU and want to use it, otherwise `cpu`.
+        *   `LABEL_STUDIO_URL` and `LABEL_STUDIO_API_KEY` (if Label Studio needs to serve image data to the backend, e.g., for local file uploads or cloud storage not directly accessible by the backend).
+
+## Running the ML Backend
+
+1.  **Build and start the Docker container**:
+    ```bash
+    docker-compose up --build
+    ```
+    If you have a GPU and configured it in `docker-compose.yml`, it should be utilized.
+
+2.  **Verify the backend is running**:
+    Open your browser or use `curl` to check the health endpoint:
+    ```bash
+    curl http://localhost:9090/health
+    ```
+    You should see `{"status":"UP","model_class":"DFINEModel"}`.
+
+## Connecting to Label Studio
+
+1.  Open your Label Studio project.
+2.  Go to **Settings > Machine Learning**.
+3.  Click **Add Model**.
+4.  Enter a **Title** for your ML backend (e.g., "D-FINE Detector").
+5.  Set the **URL** to `http://localhost:9090` (or the appropriate host/port if not running locally or on a different port).
+6.  Enable **Interactive preannotations** if desired.
+7.  Click **Validate and Save**.
+
+## Labeling Configuration
+
+This ML backend expects a labeling configuration with an `Image` object tag and a `RectangleLabels` control tag.
+
+Example:
+```xml
+<View>
+  <Image name="image" value="$image"/>
+  <RectangleLabels name="label" toName="image" model_score_threshold="0.3">
+    <!-- Map Label Studio labels to D-FINE model's COCO class names -->
+    <!-- D-FINE outputs COCO class names like 'person', 'car', etc. -->
+    <Label value="Pedestrian" background="green" predicted_values="person"/>
+    <Label value="Vehicle" background="blue" predicted_values="car,truck,bus,motorcycle"/>
+    <Label value="Bicycle" background="orange" predicted_values="bicycle"/>
+    <!-- Add more labels as needed, mapping to COCO_CLASSES -->
+  </RectangleLabels>
+</View>
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/_wsgi.py b/label_studio_ml/examples/d_fine/_wsgi.py
new file mode 100644
index 00000000..033fe276
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/_wsgi.py
@@ -0,0 +1,139 @@
+import os
+import argparse
+import json
+import logging
+import logging.config
+
+# Set a default log level if LOG_LEVEL is not defined
+log_level = os.getenv("LOG_LEVEL", "INFO")
+
+logging.config.dictConfig({
+  "version": 1,
+  "disable_existing_loggers": False,
+  "formatters": {
+    "standard": {
+      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+    }
+  },
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": log_level,
+      "stream": "ext://sys.stdout",
+      "formatter": "standard"
+    }
+  },
+  "root": {
+    "level": log_level,
+    "handlers": [
+      "console"
+    ],
+    "propagate": True
+  }
+})
+
+from label_studio_ml.api import init_app
+from model import DFINEModel  # Changed from NewModel to DFINEModel
+
+
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+
+
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+    if not os.path.exists(config_path):
+        return dict()
+    with open(config_path) as f:
+        config = json.load(f)
+    assert isinstance(config, dict)
+    return config
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Label Studio ML Backend for D-FINE')
+    parser.add_argument(
+        '-p', '--port', dest='port', type=int, default=9090,
+        help='Server port')
+    parser.add_argument(
+        '--host', dest='host', type=str, default='0.0.0.0',
+        help='Server host')
+    parser.add_argument(
+        '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+        help='Additional LabelStudioMLBase model initialization kwargs')
+    parser.add_argument(
+        '-d', '--debug', dest='debug', action='store_true',
+        help='Switch debug mode')
+    parser.add_argument(
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=log_level,
+        help='Logging level')
+    parser.add_argument(
+        '--model-dir', dest='model_dir', default=os.getenv('MODEL_DIR', '/data/models'), # Default from Docker env
+        help='Directory where models (.pth weights) are stored')
+    parser.add_argument(
+        '--check', dest='check', action='store_true',
+        help='Validate model instance before launching server')
+    parser.add_argument('--basic-auth-user',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None),
+                        help='Basic auth user')
+    
+    parser.add_argument('--basic-auth-pass',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None),
+                        help='Basic auth pass')    
+    
+    args = parser.parse_args()
+
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    def parse_kwargs():
+        param = dict()
+        if args.kwargs:
+            for k, v in args.kwargs:
+                if v.isdigit():
+                    param[k] = int(v)
+                elif v == 'True' or v == 'true':
+                    param[k] = True
+                elif v == 'False' or v == 'false':
+                    param[k] = False
+                elif isfloat(v):
+                    param[k] = float(v)
+                else:
+                    param[k] = v
+        return param
+
+    kwargs_parsed = get_kwargs_from_config()
+    kwargs_parsed.update(parse_kwargs())
+
+    # Pass MODEL_DIR to the model constructor if needed, or rely on env vars within the model
+    if args.model_dir:
+        kwargs_parsed['model_dir'] = args.model_dir
+        # Also update environment variable if model relies on it directly and it's not already set
+        if not os.getenv('MODEL_DIR'):
+            os.environ['MODEL_DIR'] = args.model_dir
+
+
+    if args.check:
+        print('Check "' + DFINEModel.__name__ + '" instance creation..')
+        model = DFINEModel(**kwargs_parsed)
+
+    app = init_app(model_class=DFINEModel, **kwargs_parsed) # Pass parsed kwargs here
+
+    app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+    # for uWSGI use
+    # Ensure MODEL_DIR is available for the model initialization
+    kwargs_for_init = get_kwargs_from_config()
+    if not os.getenv('MODEL_DIR') and 'model_dir' not in kwargs_for_init:
+         kwargs_for_init['model_dir'] = os.getenv('MODEL_DIR', '/data/models')
+         if not os.getenv('MODEL_DIR'):
+            os.environ['MODEL_DIR'] = kwargs_for_init['model_dir']
+            
+    app = init_app(model_class=DFINEModel, **kwargs_for_init)
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/coco_detection.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/coco_detection.yml
new file mode 100644
index 00000000..670c3e33
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/coco_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 80
+remap_mscoco_category: True
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/COCO2017/train2017/
+    ann_file: /data/COCO2017/annotations/instances_train2017.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/COCO2017/val2017/
+    ann_file: /data/COCO2017/annotations/instances_val2017.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/crowdhuman_detection.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/crowdhuman_detection.yml
new file mode 100644
index 00000000..0a4bbad3
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/crowdhuman_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 1 # your dataset classes
+remap_mscoco_category: False
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/CrowdHuman/coco/CrowdHuman_train
+    ann_file: /data/CrowdHuman/coco/Chuman-train.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/CrowdHuman/coco/CrowdHuman_val
+    ann_file: /data/CrowdHuman/coco/Chuman-val.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/custom_detection.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/custom_detection.yml
new file mode 100644
index 00000000..35435ad6
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/custom_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 777 # your dataset classes
+remap_mscoco_category: False
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/yourdataset/train
+    ann_file: /data/yourdataset/train/train.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/yourdataset/val
+    ann_file: /data/yourdataset/val/val.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/obj365_detection.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/obj365_detection.yml
new file mode 100644
index 00000000..e58fdbc1
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/obj365_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 366
+remap_mscoco_category: False
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/Objects365/data/train
+    ann_file: /data/Objects365/data/train/new_zhiyuan_objv2_train_resized.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/Objects365/data/val/
+    ann_file: /data/Objects365/data/val/new_zhiyuan_objv2_val_resized.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/voc_detection.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/voc_detection.yml
new file mode 100644
index 00000000..1f9ceeb8
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dataset/voc_detection.yml
@@ -0,0 +1,40 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 20
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: VOCDetection
+    root: ./dataset/voc/
+    ann_file: trainval.txt
+    label_file: label_list.txt
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: VOCDetection
+    root: ./dataset/voc/
+    ann_file: test.txt
+    label_file: label_list.txt
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml
new file mode 100644
index 00000000..761bcf8c
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml
@@ -0,0 +1,44 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_crowdhuman
+
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 140
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml
new file mode 100644
index 00000000..48d37f11
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml
@@ -0,0 +1,60 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 220
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml
new file mode 100644
index 00000000..51528bcf
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml
@@ -0,0 +1,82 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 220
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: ~
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml
new file mode 100644
index 00000000..55ab8b87
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml
@@ -0,0 +1,65 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0002
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0002
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0004
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 220
+train_dataloader:
+  total_batch_size: 64
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: 20
+
+val_dataloader:
+  total_batch_size: 128
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml
new file mode 100644
index 00000000..2ab49a02
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml
@@ -0,0 +1,55 @@
+__include__: [
+  '../../dataset/crowdhuman_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_crowdhuman
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 140
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9998
+    base_size_repeat: 3
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_l_custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_l_custom.yml
new file mode 100644
index 00000000..7526401f
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_l_custom.yml
@@ -0,0 +1,44 @@
+__include__: [
+  '../../dataset/custom_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_custom
+
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 80 # 72 + 2n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 72
+  collate_fn:
+    stop_epoch: 72
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_m_custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_m_custom.yml
new file mode 100644
index 00000000..bf31c1c4
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_m_custom.yml
@@ -0,0 +1,60 @@
+__include__: [
+  '../../dataset/custom_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 132 # 120 + 4n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_n_custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_n_custom.yml
new file mode 100644
index 00000000..6da77026
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_n_custom.yml
@@ -0,0 +1,82 @@
+__include__: [
+  '../../dataset/custom_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 220
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: ~
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_s_custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_s_custom.yml
new file mode 100644
index 00000000..285cb77b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_s_custom.yml
@@ -0,0 +1,65 @@
+__include__: [
+  '../../dataset/custom_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0002
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0002
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0004
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 220
+train_dataloader:
+  total_batch_size: 64
+  dataset:
+    transforms:
+      policy:
+        epoch: 200
+  collate_fn:
+    stop_epoch: 200
+    ema_restart_decay: 0.9999
+    base_size_repeat: 20
+
+val_dataloader:
+  total_batch_size: 128
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_x_custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_x_custom.yml
new file mode 100644
index 00000000..2ba56460
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/dfine_hgnetv2_x_custom.yml
@@ -0,0 +1,55 @@
+__include__: [
+  '../../dataset/custom_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 80 # 72 + 2n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 72
+  collate_fn:
+    stop_epoch: 72
+    ema_restart_decay: 0.9998
+    base_size_repeat: 3
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml
new file mode 100644
index 00000000..ef6a95df
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml
@@ -0,0 +1,53 @@
+__include__: [
+  '../../../dataset/custom_detection.yml',
+  '../../../runtime.yml',
+  '../../include/dataloader.yml',
+  '../../include/optimizer.yml',
+  '../../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_obj2custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+  pretrained: False
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 30
+  collate_fn:
+    stop_epoch: 30
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml
new file mode 100644
index 00000000..f21fbbc7
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml
@@ -0,0 +1,66 @@
+__include__: [
+  '../../../dataset/custom_detection.yml',
+  '../../../runtime.yml',
+  '../../include/dataloader.yml',
+  '../../include/optimizer.yml',
+  '../../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_obj2custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+  pretrained: False
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 56 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 48
+  collate_fn:
+    stop_epoch: 48
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml
new file mode 100644
index 00000000..9a74e34d
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml
@@ -0,0 +1,67 @@
+__include__: [
+  '../../../dataset/custom_detection.yml',
+  '../../../runtime.yml',
+  '../../include/dataloader.yml',
+  '../../include/optimizer.yml',
+  '../../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_obj2custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+  pretrained: False
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000125
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000125
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 64 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 56
+  collate_fn:
+    stop_epoch: 56
+    ema_restart_decay: 0.9999
+    base_size_repeat: 10
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml
new file mode 100644
index 00000000..717812f2
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml
@@ -0,0 +1,62 @@
+__include__: [
+  '../../../dataset/custom_detection.yml',
+  '../../../runtime.yml',
+  '../../include/dataloader.yml',
+  '../../include/optimizer.yml',
+  '../../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_obj2custom
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+  pretrained: False
+
+HybridEncoder:
+  # intra
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 30
+  collate_fn:
+    stop_epoch: 30
+    ema_restart_decay: 0.9999
+    base_size_repeat: 3
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_l_coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_l_coco.yml
new file mode 100644
index 00000000..718074c3
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_l_coco.yml
@@ -0,0 +1,44 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_coco
+
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 80 # 72 + 2n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 72
+  collate_fn:
+    stop_epoch: 72
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_m_coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_m_coco.yml
new file mode 100644
index 00000000..9837eacc
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_m_coco.yml
@@ -0,0 +1,60 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.00002
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.00002
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0002
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 132 # 120 + 4n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_n_coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_n_coco.yml
new file mode 100644
index 00000000..f0c3a1a4
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_n_coco.yml
@@ -0,0 +1,82 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 160 # 148 + 4n
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 148
+  collate_fn:
+    stop_epoch: 148
+    ema_restart_decay: 0.9999
+    base_size_repeat: ~
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_s_coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_s_coco.yml
new file mode 100644
index 00000000..e7aaa22e
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_s_coco.yml
@@ -0,0 +1,61 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0001
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0001
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0002
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epochs: 132 # 120 + 4n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 120
+  collate_fn:
+    stop_epoch: 120
+    ema_restart_decay: 0.9999
+    base_size_repeat: 20
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_x_coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_x_coco.yml
new file mode 100644
index 00000000..38d976b3
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/dfine_hgnetv2_x_coco.yml
@@ -0,0 +1,56 @@
+__include__: [
+  '../dataset/coco_detection.yml',
+  '../runtime.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  # intra
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+# Increase to search for the optimal ema
+epochs: 80 # 72 + 2n
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 72
+  collate_fn:
+    stop_epoch: 72
+    ema_restart_decay: 0.9998
+    base_size_repeat: 3
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dataloader.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dataloader.yml
new file mode 100644
index 00000000..3f7c67c8
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dataloader.yml
@@ -0,0 +1,39 @@
+
+train_dataloader:
+  dataset:
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
+      policy:
+        name: stop_epoch
+        epoch: 72 # epoch in [71, ~) stop `ops`
+        ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
+
+  collate_fn:
+    type: BatchImageCollateFunction
+    base_size: 640
+    base_size_repeat: 3
+    stop_epoch: 72 # epoch in [72, ~) stop `multiscales`
+
+  shuffle: True
+  total_batch_size: 32 # total batch size equals to 32 (4 * 8)
+  num_workers: 4
+
+
+val_dataloader:
+  dataset:
+    transforms:
+      ops:
+        - {type: Resize, size: [640, 640], }
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}
+  shuffle: False
+  total_batch_size: 64
+  num_workers: 4
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dfine_hgnetv2.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dfine_hgnetv2.yml
new file mode 100644
index 00000000..91ad4a08
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/dfine_hgnetv2.yml
@@ -0,0 +1,82 @@
+task: detection
+
+model: DFINE
+criterion: DFINECriterion
+postprocessor: DFINEPostProcessor
+
+use_focal_loss: True
+eval_spatial_size: [640, 640] # h w
+
+DFINE:
+  backbone: HGNetv2
+  encoder: HybridEncoder
+  decoder: DFINETransformer
+
+HGNetv2:
+  pretrained: True
+  local_model_dir: weight/hgnetv2/
+
+HybridEncoder:
+  in_channels: [512, 1024, 2048]
+  feat_strides: [8, 16, 32]
+
+  # intra
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.
+  enc_act: 'gelu'
+
+  # cross
+  expansion: 1.0
+  depth_mult: 1
+  act: 'silu'
+
+
+DFINETransformer:
+  feat_channels: [256, 256, 256]
+  feat_strides: [8, 16, 32]
+  hidden_dim: 256
+  num_levels: 3
+
+  num_layers: 6
+  eval_idx: -1
+  num_queries: 300
+
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+
+  # NEW
+  reg_max: 32
+  reg_scale: 4
+
+  # Auxiliary decoder layers dimension scaling
+  # "eg. If num_layers: 6 eval_idx: -4,
+  # then layer 3, 4, 5 are auxiliary decoder layers."
+  layer_scale: 1  # 2
+
+
+  num_points: [3, 6, 3] # [4, 4, 4] [3, 6, 3]
+  cross_attn_method: default # default, discrete
+  query_select_method: default # default, agnostic
+
+
+DFINEPostProcessor:
+  num_top_queries: 300
+
+
+DFINECriterion:
+  weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2, loss_fgl: 0.15, loss_ddf: 1.5}
+  losses: ['vfl', 'boxes', 'local']
+  alpha: 0.75
+  gamma: 2.0
+  reg_max: 32
+
+  matcher:
+    type: HungarianMatcher
+    weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
+    alpha: 0.25
+    gamma: 2.0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/optimizer.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/optimizer.yml
new file mode 100644
index 00000000..8ec87c77
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/include/optimizer.yml
@@ -0,0 +1,36 @@
+use_amp: True
+use_ema: True
+ema:
+  type: ModelEMA
+  decay: 0.9999
+  warmups: 1000
+  start: 0
+
+
+epochs: 72
+clip_max_norm: 0.1
+
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+lr_scheduler:
+  type: MultiStepLR
+  milestones: [500]
+  gamma: 0.1
+
+lr_warmup_scheduler:
+  type: LinearWarmup
+  warmup_duration: 500
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml
new file mode 100644
index 00000000..f4fda630
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml
@@ -0,0 +1,52 @@
+__include__: [
+  '../../dataset/coco_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_obj2coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 30
+  collate_fn:
+    stop_epoch: 30
+    ema_restart_decay: 0.9999
+    base_size_repeat: 4
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj365.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj365.yml
new file mode 100644
index 00000000..c7a2876b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_l_obj365.yml
@@ -0,0 +1,49 @@
+__include__: [
+  '../../dataset/obj365_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_l_obj365
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B4'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000125
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+#   weight_decay: 0.00005  # Faster convergence (optional)
+
+
+epochs: 24 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 500
+  collate_fn:
+    stop_epoch: 500
+    base_size_repeat: 4
+
+checkpoint_freq: 1
+print_freq: 1000
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml
new file mode 100644
index 00000000..fd77cca6
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml
@@ -0,0 +1,65 @@
+__include__: [
+  '../../dataset/coco_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_m_obj2coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 56 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 48
+  collate_fn:
+    stop_epoch: 48
+    ema_restart_decay: 0.9999
+    base_size_repeat: 6
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj365.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj365.yml
new file mode 100644
index 00000000..108d4e6b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_m_obj365.yml
@@ -0,0 +1,62 @@
+__include__: [
+  '../../dataset/obj365_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: .output/dfine_hgnetv2_s_obj365
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B2'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 4  # 5 6
+  eval_idx: -1  # -2 -3
+
+HybridEncoder:
+  in_channels: [384, 768, 1536]
+  hidden_dim: 256
+  depth_mult: 0.67
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000025
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000025
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+#   weight_decay: 0.00005  # Faster convergence (optional)
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 500
+  collate_fn:
+    stop_epoch: 500
+    base_size_repeat: 6
+
+checkpoint_freq: 1
+print_freq: 1000
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj2coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj2coco.yml
new file mode 100644
index 00000000..6febf658
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj2coco.yml
@@ -0,0 +1,88 @@
+__include__: [
+  '../../dataset/coco_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_obj2coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+
+epochs: 64 # Early stop
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 56
+  collate_fn:
+    stop_epoch: 56
+    ema_restart_decay: 0.9999
+    base_size_repeat: ~
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj365.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj365.yml
new file mode 100644
index 00000000..b5f148ab
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_n_obj365.yml
@@ -0,0 +1,84 @@
+__include__: [
+  '../../dataset/obj365_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_n_obj365
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+
+HybridEncoder:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+
+DFINETransformer:
+  feat_channels: [128, 128]
+  feat_strides: [16, 32]
+  hidden_dim: 128
+  dim_feedforward: 512
+  num_levels: 2
+
+  num_layers: 3
+  eval_idx: -1
+
+  num_points: [6, 6]
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+
+epochs: 48 # Early stop
+train_dataloader:
+  total_batch_size: 128
+  dataset:
+    transforms:
+      policy:
+        epoch: 500
+  collate_fn:
+    stop_epoch: 500
+    base_size_repeat: ~
+
+checkpoint_freq: 1
+print_freq: 500
+
+val_dataloader:
+  total_batch_size: 256
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml
new file mode 100644
index 00000000..9fa01e2e
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml
@@ -0,0 +1,66 @@
+__include__: [
+  '../../dataset/coco_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_obj2coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000125
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000125
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 64 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 56
+  collate_fn:
+    stop_epoch: 56
+    ema_restart_decay: 0.9999
+    base_size_repeat: 10
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj365.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj365.yml
new file mode 100644
index 00000000..1e98a0db
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_s_obj365.yml
@@ -0,0 +1,63 @@
+__include__: [
+  '../../dataset/obj365_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_s_obj365
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B0'
+  return_idx: [1, 2, 3]
+  freeze_at: -1
+  freeze_norm: False
+  use_lab: True
+
+DFINETransformer:
+  num_layers: 3  # 4 5 6
+  eval_idx: -1  # -2 -3 -4
+
+HybridEncoder:
+  in_channels: [256, 512, 1024]
+  hidden_dim: 256
+  depth_mult: 0.34
+  expansion: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.000125
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.000125
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+#   weight_decay: 0.00005  # Faster convergence (optional)
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 500
+  collate_fn:
+    stop_epoch: 500
+    base_size_repeat: 20
+
+checkpoint_freq: 1
+print_freq: 1000
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml
new file mode 100644
index 00000000..738c75d3
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml
@@ -0,0 +1,61 @@
+__include__: [
+  '../../dataset/coco_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_obj2coco
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  # intra
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+
+
+epochs: 36 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 30
+  collate_fn:
+    stop_epoch: 30
+    ema_restart_decay: 0.9999
+    base_size_repeat: 3
+
+ema:
+  warmups: 0
+
+lr_warmup_scheduler:
+  warmup_duration: 0
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj365.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj365.yml
new file mode 100644
index 00000000..0449d724
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/dfine/objects365/dfine_hgnetv2_x_obj365.yml
@@ -0,0 +1,58 @@
+__include__: [
+  '../../dataset/obj365_detection.yml',
+  '../../runtime.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  '../include/dfine_hgnetv2.yml',
+]
+
+output_dir: ./output/dfine_hgnetv2_x_obj365
+
+
+DFINE:
+  backbone: HGNetv2
+
+HGNetv2:
+  name: 'B5'
+  return_idx: [1, 2, 3]
+  freeze_stem_only: True
+  freeze_at: 0
+  freeze_norm: True
+
+HybridEncoder:
+  # intra
+  hidden_dim: 384
+  dim_feedforward: 2048
+
+DFINETransformer:
+  feat_channels: [384, 384, 384]
+  reg_scale: 8
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0000025
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.00025
+  betas: [0.9, 0.999]
+  weight_decay: 0.000125
+#   weight_decay: 0.00005  # Faster convergence (optional)
+
+
+epochs: 24 # Early stop
+train_dataloader:
+  dataset:
+    transforms:
+      policy:
+        epoch: 500
+  collate_fn:
+    stop_epoch: 500
+    base_size_repeat: 3
+
+checkpoint_freq: 1
+print_freq: 1000
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/configs/runtime.yml b/label_studio_ml/examples/d_fine/d-fine-code/configs/runtime.yml
new file mode 100644
index 00000000..b99af4af
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/configs/runtime.yml
@@ -0,0 +1,24 @@
+print_freq: 100
+output_dir: './logs'
+checkpoint_freq: 12
+
+
+sync_bn: True
+find_unused_parameters: False
+
+
+use_amp: False
+scaler:
+  type: GradScaler
+  enabled: True
+
+
+use_ema: False
+ema:
+  type: ModelEMA
+  decay: 0.9999
+  warmups: 1000
+
+use_wandb: False
+project_name: D-FINE # for wandb
+exp_name: baseline # wandb experiment name
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/__init__.py
new file mode 100644
index 00000000..15e3d5ad
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/__init__.py
@@ -0,0 +1,6 @@
+"""
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+# for register purpose
+from . import data, nn, optim, zoo
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/core/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/core/__init__.py
new file mode 100644
index 00000000..a938d563
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/core/__init__.py
@@ -0,0 +1,9 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from ._config import BaseConfig
+from .workspace import GLOBAL_CONFIG, create, register
+from .yaml_config import YAMLConfig
+from .yaml_utils import *
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/core/_config.py b/label_studio_ml/examples/d_fine/d-fine-code/src/core/_config.py
new file mode 100644
index 00000000..3bef0756
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/core/_config.py
@@ -0,0 +1,299 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from pathlib import Path
+from typing import Callable, Dict, List
+
+import torch
+import torch.nn as nn
+from torch.cuda.amp.grad_scaler import GradScaler
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import LRScheduler
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.tensorboard import SummaryWriter
+
+__all__ = [
+    "BaseConfig",
+]
+
+
+class BaseConfig(object):
+    # TODO property
+
+    def __init__(self) -> None:
+        super().__init__()
+
+        self.task: str = None
+
+        # instance / function
+        self._model: nn.Module = None
+        self._postprocessor: nn.Module = None
+        self._criterion: nn.Module = None
+        self._optimizer: Optimizer = None
+        self._lr_scheduler: LRScheduler = None
+        self._lr_warmup_scheduler: LRScheduler = None
+        self._train_dataloader: DataLoader = None
+        self._val_dataloader: DataLoader = None
+        self._ema: nn.Module = None
+        self._scaler: GradScaler = None
+        self._train_dataset: Dataset = None
+        self._val_dataset: Dataset = None
+        self._collate_fn: Callable = None
+        self._evaluator: Callable[[nn.Module, DataLoader, str],] = None
+        self._writer: SummaryWriter = None
+
+        # dataset
+        self.num_workers: int = 0
+        self.batch_size: int = None
+        self._train_batch_size: int = None
+        self._val_batch_size: int = None
+        self._train_shuffle: bool = None
+        self._val_shuffle: bool = None
+
+        # runtime
+        self.resume: str = None
+        self.tuning: str = None
+
+        self.epochs: int = None
+        self.last_epoch: int = -1
+
+        self.use_amp: bool = False
+        self.use_ema: bool = False
+        self.ema_decay: float = 0.9999
+        self.ema_warmups: int = 2000
+        self.sync_bn: bool = False
+        self.clip_max_norm: float = 0.0
+        self.find_unused_parameters: bool = None
+
+        self.seed: int = None
+        self.print_freq: int = None
+        self.checkpoint_freq: int = 1
+        self.output_dir: str = None
+        self.summary_dir: str = None
+        self.device: str = ""
+
+    @property
+    def model(self) -> nn.Module:
+        return self._model
+
+    @model.setter
+    def model(self, m):
+        assert isinstance(m, nn.Module), f"{type(m)} != nn.Module, please check your model class"
+        self._model = m
+
+    @property
+    def postprocessor(self) -> nn.Module:
+        return self._postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, m):
+        assert isinstance(m, nn.Module), f"{type(m)} != nn.Module, please check your model class"
+        self._postprocessor = m
+
+    @property
+    def criterion(self) -> nn.Module:
+        return self._criterion
+
+    @criterion.setter
+    def criterion(self, m):
+        assert isinstance(m, nn.Module), f"{type(m)} != nn.Module, please check your model class"
+        self._criterion = m
+
+    @property
+    def optimizer(self) -> Optimizer:
+        return self._optimizer
+
+    @optimizer.setter
+    def optimizer(self, m):
+        assert isinstance(
+            m, Optimizer
+        ), f"{type(m)} != optim.Optimizer, please check your model class"
+        self._optimizer = m
+
+    @property
+    def lr_scheduler(self) -> LRScheduler:
+        return self._lr_scheduler
+
+    @lr_scheduler.setter
+    def lr_scheduler(self, m):
+        assert isinstance(
+            m, LRScheduler
+        ), f"{type(m)} != LRScheduler, please check your model class"
+        self._lr_scheduler = m
+
+    @property
+    def lr_warmup_scheduler(self) -> LRScheduler:
+        return self._lr_warmup_scheduler
+
+    @lr_warmup_scheduler.setter
+    def lr_warmup_scheduler(self, m):
+        self._lr_warmup_scheduler = m
+
+    @property
+    def train_dataloader(self) -> DataLoader:
+        if self._train_dataloader is None and self.train_dataset is not None:
+            loader = DataLoader(
+                self.train_dataset,
+                batch_size=self.train_batch_size,
+                num_workers=self.num_workers,
+                collate_fn=self.collate_fn,
+                shuffle=self.train_shuffle,
+            )
+            loader.shuffle = self.train_shuffle
+            self._train_dataloader = loader
+
+        return self._train_dataloader
+
+    @train_dataloader.setter
+    def train_dataloader(self, loader):
+        self._train_dataloader = loader
+
+    @property
+    def val_dataloader(self) -> DataLoader:
+        if self._val_dataloader is None and self.val_dataset is not None:
+            loader = DataLoader(
+                self.val_dataset,
+                batch_size=self.val_batch_size,
+                num_workers=self.num_workers,
+                drop_last=False,
+                collate_fn=self.collate_fn,
+                shuffle=self.val_shuffle,
+                persistent_workers=True,
+            )
+            loader.shuffle = self.val_shuffle
+            self._val_dataloader = loader
+
+        return self._val_dataloader
+
+    @val_dataloader.setter
+    def val_dataloader(self, loader):
+        self._val_dataloader = loader
+
+    @property
+    def ema(self) -> nn.Module:
+        if self._ema is None and self.use_ema and self.model is not None:
+            from ..optim import ModelEMA
+
+            self._ema = ModelEMA(self.model, self.ema_decay, self.ema_warmups)
+        return self._ema
+
+    @ema.setter
+    def ema(self, obj):
+        self._ema = obj
+
+    @property
+    def scaler(self) -> GradScaler:
+        if self._scaler is None and self.use_amp and torch.cuda.is_available():
+            self._scaler = GradScaler()
+        return self._scaler
+
+    @scaler.setter
+    def scaler(self, obj: GradScaler):
+        self._scaler = obj
+
+    @property
+    def val_shuffle(self) -> bool:
+        if self._val_shuffle is None:
+            print("warning: set default val_shuffle=False")
+            return False
+        return self._val_shuffle
+
+    @val_shuffle.setter
+    def val_shuffle(self, shuffle):
+        assert isinstance(shuffle, bool), "shuffle must be bool"
+        self._val_shuffle = shuffle
+
+    @property
+    def train_shuffle(self) -> bool:
+        if self._train_shuffle is None:
+            print("warning: set default train_shuffle=True")
+            return True
+        return self._train_shuffle
+
+    @train_shuffle.setter
+    def train_shuffle(self, shuffle):
+        assert isinstance(shuffle, bool), "shuffle must be bool"
+        self._train_shuffle = shuffle
+
+    @property
+    def train_batch_size(self) -> int:
+        if self._train_batch_size is None and isinstance(self.batch_size, int):
+            print(f"warning: set train_batch_size=batch_size={self.batch_size}")
+            return self.batch_size
+        return self._train_batch_size
+
+    @train_batch_size.setter
+    def train_batch_size(self, batch_size):
+        assert isinstance(batch_size, int), "batch_size must be int"
+        self._train_batch_size = batch_size
+
+    @property
+    def val_batch_size(self) -> int:
+        if self._val_batch_size is None:
+            print(f"warning: set val_batch_size=batch_size={self.batch_size}")
+            return self.batch_size
+        return self._val_batch_size
+
+    @val_batch_size.setter
+    def val_batch_size(self, batch_size):
+        assert isinstance(batch_size, int), "batch_size must be int"
+        self._val_batch_size = batch_size
+
+    @property
+    def train_dataset(self) -> Dataset:
+        return self._train_dataset
+
+    @train_dataset.setter
+    def train_dataset(self, dataset):
+        assert isinstance(dataset, Dataset), f"{type(dataset)} must be Dataset"
+        self._train_dataset = dataset
+
+    @property
+    def val_dataset(self) -> Dataset:
+        return self._val_dataset
+
+    @val_dataset.setter
+    def val_dataset(self, dataset):
+        assert isinstance(dataset, Dataset), f"{type(dataset)} must be Dataset"
+        self._val_dataset = dataset
+
+    @property
+    def collate_fn(self) -> Callable:
+        return self._collate_fn
+
+    @collate_fn.setter
+    def collate_fn(self, fn):
+        assert isinstance(fn, Callable), f"{type(fn)} must be Callable"
+        self._collate_fn = fn
+
+    @property
+    def evaluator(self) -> Callable:
+        return self._evaluator
+
+    @evaluator.setter
+    def evaluator(self, fn):
+        assert isinstance(fn, Callable), f"{type(fn)} must be Callable"
+        self._evaluator = fn
+
+    @property
+    def writer(self) -> SummaryWriter:
+        if self._writer is None:
+            if self.summary_dir:
+                self._writer = SummaryWriter(self.summary_dir)
+            elif self.output_dir:
+                self._writer = SummaryWriter(Path(self.output_dir) / "summary")
+        return self._writer
+
+    @writer.setter
+    def writer(self, m):
+        assert isinstance(m, SummaryWriter), f"{type(m)} must be SummaryWriter"
+        self._writer = m
+
+    def __repr__(self):
+        s = ""
+        for k, v in self.__dict__.items():
+            if not k.startswith("_"):
+                s += f"{k}: {v}\n"
+        return s
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/core/workspace.py b/label_studio_ml/examples/d_fine/d-fine-code/src/core/workspace.py
new file mode 100644
index 00000000..b99e317f
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/core/workspace.py
@@ -0,0 +1,178 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import functools
+import importlib
+import inspect
+from collections import defaultdict
+from typing import Any, Dict, List, Optional
+
+GLOBAL_CONFIG = defaultdict(dict)
+
+
+def register(dct: Any = GLOBAL_CONFIG, name=None, force=False):
+    """
+    dct:
+        if dct is Dict, register foo into dct as key-value pair
+        if dct is Clas, register as modules attibute
+    force
+        whether force register.
+    """
+
+    def decorator(foo):
+        register_name = foo.__name__ if name is None else name
+        if not force:
+            if inspect.isclass(dct):
+                assert not hasattr(dct, foo.__name__), f"module {dct.__name__} has {foo.__name__}"
+            else:
+                assert foo.__name__ not in dct, f"{foo.__name__} has been already registered"
+
+        if inspect.isfunction(foo):
+
+            @functools.wraps(foo)
+            def wrap_func(*args, **kwargs):
+                return foo(*args, **kwargs)
+
+            if isinstance(dct, dict):
+                dct[foo.__name__] = wrap_func
+            elif inspect.isclass(dct):
+                setattr(dct, foo.__name__, wrap_func)
+            else:
+                raise AttributeError("")
+            return wrap_func
+
+        elif inspect.isclass(foo):
+            dct[register_name] = extract_schema(foo)
+
+        else:
+            raise ValueError(f"Do not support {type(foo)} register")
+
+        return foo
+
+    return decorator
+
+
+def extract_schema(module: type):
+    """
+    Args:
+        module (type),
+    Return:
+        Dict,
+    """
+    argspec = inspect.getfullargspec(module.__init__)
+    arg_names = [arg for arg in argspec.args if arg != "self"]
+    num_defualts = len(argspec.defaults) if argspec.defaults is not None else 0
+    num_requires = len(arg_names) - num_defualts
+
+    schame = dict()
+    schame["_name"] = module.__name__
+    schame["_pymodule"] = importlib.import_module(module.__module__)
+    schame["_inject"] = getattr(module, "__inject__", [])
+    schame["_share"] = getattr(module, "__share__", [])
+    schame["_kwargs"] = {}
+    for i, name in enumerate(arg_names):
+        if name in schame["_share"]:
+            assert i >= num_requires, "share config must have default value."
+            value = argspec.defaults[i - num_requires]
+
+        elif i >= num_requires:
+            value = argspec.defaults[i - num_requires]
+
+        else:
+            value = None
+
+        schame[name] = value
+        schame["_kwargs"][name] = value
+
+    return schame
+
+
+def create(type_or_name, global_cfg=GLOBAL_CONFIG, **kwargs):
+    """ """
+    assert type(type_or_name) in (type, str), "create should be modules or name."
+
+    name = type_or_name if isinstance(type_or_name, str) else type_or_name.__name__
+
+    if name in global_cfg:
+        if hasattr(global_cfg[name], "__dict__"):
+            return global_cfg[name]
+    else:
+        raise ValueError("The module {} is not registered".format(name))
+
+    cfg = global_cfg[name]
+
+    if isinstance(cfg, dict) and "type" in cfg:
+        _cfg: dict = global_cfg[cfg["type"]]
+        # clean args
+        _keys = [k for k in _cfg.keys() if not k.startswith("_")]
+        for _arg in _keys:
+            del _cfg[_arg]
+        _cfg.update(_cfg["_kwargs"])  # restore default args
+        _cfg.update(cfg)  # load config args
+        _cfg.update(kwargs)  # TODO recive extra kwargs
+        name = _cfg.pop("type")  # pop extra key `type` (from cfg)
+
+        return create(name, global_cfg)
+
+    module = getattr(cfg["_pymodule"], name)
+    module_kwargs = {}
+    module_kwargs.update(cfg)
+
+    # shared var
+    for k in cfg["_share"]:
+        if k in global_cfg:
+            module_kwargs[k] = global_cfg[k]
+        else:
+            module_kwargs[k] = cfg[k]
+
+    # inject
+    for k in cfg["_inject"]:
+        _k = cfg[k]
+
+        if _k is None:
+            continue
+
+        if isinstance(_k, str):
+            if _k not in global_cfg:
+                raise ValueError(f"Missing inject config of {_k}.")
+
+            _cfg = global_cfg[_k]
+
+            if isinstance(_cfg, dict):
+                module_kwargs[k] = create(_cfg["_name"], global_cfg)
+            else:
+                module_kwargs[k] = _cfg
+
+        elif isinstance(_k, dict):
+            if "type" not in _k.keys():
+                raise ValueError("Missing inject for `type` style.")
+
+            _type = str(_k["type"])
+            if _type not in global_cfg:
+                raise ValueError(f"Missing {_type} in inspect stage.")
+
+            # TODO
+            _cfg: dict = global_cfg[_type]
+            # clean args
+            _keys = [k for k in _cfg.keys() if not k.startswith("_")]
+            for _arg in _keys:
+                del _cfg[_arg]
+            _cfg.update(_cfg["_kwargs"])  # restore default values
+            _cfg.update(_k)  # load config args
+            name = _cfg.pop("type")  # pop extra key (`type` from _k)
+            module_kwargs[k] = create(name, global_cfg)
+
+        else:
+            raise ValueError(f"Inject does not support {_k}")
+
+    # TODO hard code
+    module_kwargs = {k: v for k, v in module_kwargs.items() if not k.startswith("_")}
+
+    # TODO for **kwargs
+    # extra_args = set(module_kwargs.keys()) - set(arg_names)
+    # if len(extra_args) > 0:
+    #     raise RuntimeError(f'Error: unknown args {extra_args} for {module}')
+
+    return module(**module_kwargs)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_config.py b/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_config.py
new file mode 100644
index 00000000..0f7faf41
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_config.py
@@ -0,0 +1,187 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import copy
+import re
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+
+from ._config import BaseConfig
+from .workspace import create
+from .yaml_utils import load_config, merge_config, merge_dict
+
+
+class YAMLConfig(BaseConfig):
+    def __init__(self, cfg_path: str, **kwargs) -> None:
+        super().__init__()
+
+        cfg = load_config(cfg_path)
+        cfg = merge_dict(cfg, kwargs)
+
+        self.yaml_cfg = copy.deepcopy(cfg)
+
+        for k in super().__dict__:
+            if not k.startswith("_") and k in cfg:
+                self.__dict__[k] = cfg[k]
+
+    @property
+    def global_cfg(self):
+        return merge_config(self.yaml_cfg, inplace=False, overwrite=False)
+
+    @property
+    def model(self) -> torch.nn.Module:
+        if self._model is None and "model" in self.yaml_cfg:
+            self._model = create(self.yaml_cfg["model"], self.global_cfg)
+        return super().model
+
+    @property
+    def postprocessor(self) -> torch.nn.Module:
+        if self._postprocessor is None and "postprocessor" in self.yaml_cfg:
+            self._postprocessor = create(self.yaml_cfg["postprocessor"], self.global_cfg)
+        return super().postprocessor
+
+    @property
+    def criterion(self) -> torch.nn.Module:
+        if self._criterion is None and "criterion" in self.yaml_cfg:
+            self._criterion = create(self.yaml_cfg["criterion"], self.global_cfg)
+        return super().criterion
+
+    @property
+    def optimizer(self) -> optim.Optimizer:
+        if self._optimizer is None and "optimizer" in self.yaml_cfg:
+            params = self.get_optim_params(self.yaml_cfg["optimizer"], self.model)
+            self._optimizer = create("optimizer", self.global_cfg, params=params)
+        return super().optimizer
+
+    @property
+    def lr_scheduler(self) -> optim.lr_scheduler.LRScheduler:
+        if self._lr_scheduler is None and "lr_scheduler" in self.yaml_cfg:
+            self._lr_scheduler = create("lr_scheduler", self.global_cfg, optimizer=self.optimizer)
+            print(f"Initial lr: {self._lr_scheduler.get_last_lr()}")
+        return super().lr_scheduler
+
+    @property
+    def lr_warmup_scheduler(self) -> optim.lr_scheduler.LRScheduler:
+        if self._lr_warmup_scheduler is None and "lr_warmup_scheduler" in self.yaml_cfg:
+            self._lr_warmup_scheduler = create(
+                "lr_warmup_scheduler", self.global_cfg, lr_scheduler=self.lr_scheduler
+            )
+        return super().lr_warmup_scheduler
+
+    @property
+    def train_dataloader(self) -> DataLoader:
+        if self._train_dataloader is None and "train_dataloader" in self.yaml_cfg:
+            self._train_dataloader = self.build_dataloader("train_dataloader")
+        return super().train_dataloader
+
+    @property
+    def val_dataloader(self) -> DataLoader:
+        if self._val_dataloader is None and "val_dataloader" in self.yaml_cfg:
+            self._val_dataloader = self.build_dataloader("val_dataloader")
+        return super().val_dataloader
+
+    @property
+    def ema(self) -> torch.nn.Module:
+        if self._ema is None and self.yaml_cfg.get("use_ema", False):
+            self._ema = create("ema", self.global_cfg, model=self.model)
+        return super().ema
+
+    @property
+    def scaler(self):
+        if self._scaler is None and self.yaml_cfg.get("use_amp", False):
+            self._scaler = create("scaler", self.global_cfg)
+        return super().scaler
+
+    @property
+    def evaluator(self):
+        if self._evaluator is None and "evaluator" in self.yaml_cfg:
+            if self.yaml_cfg["evaluator"]["type"] == "CocoEvaluator":
+                from ..data import get_coco_api_from_dataset
+
+                base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset)
+                self._evaluator = create("evaluator", self.global_cfg, coco_gt=base_ds)
+            else:
+                raise NotImplementedError(f"{self.yaml_cfg['evaluator']['type']}")
+        return super().evaluator
+
+    @property
+    def use_wandb(self) -> bool:
+        return self.yaml_cfg.get("use_wandb", False)
+
+    @staticmethod
+    def get_optim_params(cfg: dict, model: nn.Module):
+        """
+        E.g.:
+            ^(?=.*a)(?=.*b).*$  means including a and b
+            ^(?=.*(?:a|b)).*$   means including a or b
+            ^(?=.*a)(?!.*b).*$  means including a, but not b
+        """
+        assert "type" in cfg, ""
+        cfg = copy.deepcopy(cfg)
+
+        if "params" not in cfg:
+            return model.parameters()
+
+        assert isinstance(cfg["params"], list), ""
+
+        param_groups = []
+        visited = []
+        for pg in cfg["params"]:
+            pattern = pg["params"]
+            params = {
+                k: v
+                for k, v in model.named_parameters()
+                if v.requires_grad and len(re.findall(pattern, k)) > 0
+            }
+            pg["params"] = params.values()
+            param_groups.append(pg)
+            visited.extend(list(params.keys()))
+            # print(params.keys())
+
+        names = [k for k, v in model.named_parameters() if v.requires_grad]
+
+        if len(visited) < len(names):
+            unseen = set(names) - set(visited)
+            params = {k: v for k, v in model.named_parameters() if v.requires_grad and k in unseen}
+            param_groups.append({"params": params.values()})
+            visited.extend(list(params.keys()))
+            # print(params.keys())
+
+        assert len(visited) == len(names), ""
+
+        return param_groups
+
+    @staticmethod
+    def get_rank_batch_size(cfg):
+        """compute batch size for per rank if total_batch_size is provided."""
+        assert ("total_batch_size" in cfg or "batch_size" in cfg) and not (
+            "total_batch_size" in cfg and "batch_size" in cfg
+        ), "`batch_size` or `total_batch_size` should be choosed one"
+
+        total_batch_size = cfg.get("total_batch_size", None)
+        if total_batch_size is None:
+            bs = cfg.get("batch_size")
+        else:
+            from ..misc import dist_utils
+
+            assert (
+                total_batch_size % dist_utils.get_world_size() == 0
+            ), "total_batch_size should be divisible by world size"
+            bs = total_batch_size // dist_utils.get_world_size()
+        return bs
+
+    def build_dataloader(self, name: str):
+        bs = self.get_rank_batch_size(self.yaml_cfg[name])
+        global_cfg = self.global_cfg
+        if "total_batch_size" in global_cfg[name]:
+            # pop unexpected key for dataloader init
+            _ = global_cfg[name].pop("total_batch_size")
+        print(f"building {name} with batch_size={bs}...")
+        loader = create(name, global_cfg, batch_size=bs)
+        loader.shuffle = self.yaml_cfg[name].get("shuffle", False)
+        return loader
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_utils.py
new file mode 100644
index 00000000..b77e0719
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/core/yaml_utils.py
@@ -0,0 +1,126 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import copy
+import os
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+from .workspace import GLOBAL_CONFIG
+
+__all__ = [
+    "load_config",
+    "merge_config",
+    "merge_dict",
+    "parse_cli",
+]
+
+
+INCLUDE_KEY = "__include__"
+
+
+def load_config(file_path, cfg=dict()):
+    """load config"""
+    _, ext = os.path.splitext(file_path)
+    assert ext in [".yml", ".yaml"], "only support yaml files"
+
+    with open(file_path) as f:
+        file_cfg = yaml.load(f, Loader=yaml.Loader)
+        if file_cfg is None:
+            return {}
+
+    if INCLUDE_KEY in file_cfg:
+        base_yamls = list(file_cfg[INCLUDE_KEY])
+        for base_yaml in base_yamls:
+            if base_yaml.startswith("~"):
+                base_yaml = os.path.expanduser(base_yaml)
+
+            if not base_yaml.startswith("/"):
+                base_yaml = os.path.join(os.path.dirname(file_path), base_yaml)
+
+            with open(base_yaml) as f:
+                base_cfg = load_config(base_yaml, cfg)
+                merge_dict(cfg, base_cfg)
+
+    return merge_dict(cfg, file_cfg)
+
+
+def merge_dict(dct, another_dct, inplace=True) -> Dict:
+    """merge another_dct into dct"""
+
+    def _merge(dct, another) -> Dict:
+        for k in another:
+            if k in dct and isinstance(dct[k], dict) and isinstance(another[k], dict):
+                _merge(dct[k], another[k])
+            else:
+                dct[k] = another[k]
+
+        return dct
+
+    if not inplace:
+        dct = copy.deepcopy(dct)
+
+    return _merge(dct, another_dct)
+
+
+def dictify(s: str, v: Any) -> Dict:
+    if "." not in s:
+        return {s: v}
+    key, rest = s.split(".", 1)
+    return {key: dictify(rest, v)}
+
+
+def parse_cli(nargs: List[str]) -> Dict:
+    """
+    parse command-line arguments
+        convert `a.c=3 b=10` to `{'a': {'c': 3}, 'b': 10}`
+    """
+    cfg = {}
+    if nargs is None or len(nargs) == 0:
+        return cfg
+
+    for s in nargs:
+        s = s.strip()
+        k, v = s.split("=", 1)
+        d = dictify(k, yaml.load(v, Loader=yaml.Loader))
+        cfg = merge_dict(cfg, d)
+
+    return cfg
+
+
+def merge_config(cfg, another_cfg=GLOBAL_CONFIG, inplace: bool = False, overwrite: bool = False):
+    """
+    Merge another_cfg into cfg, return the merged config
+
+    Example:
+
+        cfg1 = load_config('./dfine_r18vd_6x_coco.yml')
+        cfg1 = merge_config(cfg, inplace=True)
+
+        cfg2 = load_config('./dfine_r50vd_6x_coco.yml')
+        cfg2 = merge_config(cfg2, inplace=True)
+
+        model1 = create(cfg1['model'], cfg1)
+        model2 = create(cfg2['model'], cfg2)
+    """
+
+    def _merge(dct, another):
+        for k in another:
+            if k not in dct:
+                dct[k] = another[k]
+
+            elif isinstance(dct[k], dict) and isinstance(another[k], dict):
+                _merge(dct[k], another[k])
+
+            elif overwrite:
+                dct[k] = another[k]
+
+        return cfg
+
+    if not inplace:
+        cfg = copy.deepcopy(cfg)
+
+    return _merge(cfg, another_cfg)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/__init__.py
new file mode 100644
index 00000000..bb6b8d4b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/__init__.py
@@ -0,0 +1,20 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from ._misc import convert_to_tv_tensor
+from .dataloader import *
+from .dataset import *
+from .transforms import *
+
+
+# def set_epoch(self, epoch) -> None:
+#     self.epoch = epoch
+# def _set_epoch_func(datasets):
+#     """Add `set_epoch` for datasets
+#     """
+#     from ..core import register
+#     for ds in datasets:
+#         register(ds)(set_epoch)
+# _set_epoch_func([CIFAR10, VOCDetection, CocoDetection])
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/_misc.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/_misc.py
new file mode 100644
index 00000000..74c1e3fb
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/_misc.py
@@ -0,0 +1,62 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import importlib.metadata
+
+from torch import Tensor
+
+if "0.15.2" in importlib.metadata.version("torchvision"):
+    import torchvision
+
+    torchvision.disable_beta_transforms_warning()
+
+    from torchvision.datapoints import BoundingBox as BoundingBoxes
+    from torchvision.datapoints import BoundingBoxFormat, Image, Mask, Video
+    from torchvision.transforms.v2 import SanitizeBoundingBox as SanitizeBoundingBoxes
+
+    _boxes_keys = ["format", "spatial_size"]
+
+elif "0.17" > importlib.metadata.version("torchvision") >= "0.16":
+    import torchvision
+
+    torchvision.disable_beta_transforms_warning()
+
+    from torchvision.transforms.v2 import SanitizeBoundingBoxes
+    from torchvision.tv_tensors import BoundingBoxes, BoundingBoxFormat, Image, Mask, Video
+
+    _boxes_keys = ["format", "canvas_size"]
+
+elif importlib.metadata.version("torchvision") >= "0.17":
+    import torchvision
+    from torchvision.transforms.v2 import SanitizeBoundingBoxes
+    from torchvision.tv_tensors import BoundingBoxes, BoundingBoxFormat, Image, Mask, Video
+
+    _boxes_keys = ["format", "canvas_size"]
+
+else:
+    raise RuntimeError("Please make sure torchvision version >= 0.15.2")
+
+
+def convert_to_tv_tensor(tensor: Tensor, key: str, box_format="xyxy", spatial_size=None) -> Tensor:
+    """
+    Args:
+        tensor (Tensor): input tensor
+        key (str): transform to key
+
+    Return:
+        Dict[str, TV_Tensor]
+    """
+    assert key in (
+        "boxes",
+        "masks",
+    ), "Only support 'boxes' and 'masks'"
+
+    if key == "boxes":
+        box_format = getattr(BoundingBoxFormat, box_format.upper())
+        _kwargs = dict(zip(_boxes_keys, [box_format, spatial_size]))
+        return BoundingBoxes(tensor, **_kwargs)
+
+    if key == "masks":
+        return Mask(tensor)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataloader.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataloader.py
new file mode 100644
index 00000000..ce2c0a16
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataloader.py
@@ -0,0 +1,122 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import random
+from functools import partial
+
+import torch
+import torch.nn.functional as F
+import torch.utils.data as data
+import torchvision
+import torchvision.transforms.v2 as VT
+from torch.utils.data import default_collate
+from torchvision.transforms.v2 import InterpolationMode
+from torchvision.transforms.v2 import functional as VF
+
+from ..core import register
+
+torchvision.disable_beta_transforms_warning()
+
+
+__all__ = [
+    "DataLoader",
+    "BaseCollateFunction",
+    "BatchImageCollateFunction",
+    "batch_image_collate_fn",
+]
+
+
+@register()
+class DataLoader(data.DataLoader):
+    __inject__ = ["dataset", "collate_fn"]
+
+    def __repr__(self) -> str:
+        format_string = self.__class__.__name__ + "("
+        for n in ["dataset", "batch_size", "num_workers", "drop_last", "collate_fn"]:
+            format_string += "\n"
+            format_string += "    {0}: {1}".format(n, getattr(self, n))
+        format_string += "\n)"
+        return format_string
+
+    def set_epoch(self, epoch):
+        self._epoch = epoch
+        self.dataset.set_epoch(epoch)
+        self.collate_fn.set_epoch(epoch)
+
+    @property
+    def epoch(self):
+        return self._epoch if hasattr(self, "_epoch") else -1
+
+    @property
+    def shuffle(self):
+        return self._shuffle
+
+    @shuffle.setter
+    def shuffle(self, shuffle):
+        assert isinstance(shuffle, bool), "shuffle must be a boolean"
+        self._shuffle = shuffle
+
+
+@register()
+def batch_image_collate_fn(items):
+    """only batch image"""
+    return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items]
+
+
+class BaseCollateFunction(object):
+    def set_epoch(self, epoch):
+        self._epoch = epoch
+
+    @property
+    def epoch(self):
+        return self._epoch if hasattr(self, "_epoch") else -1
+
+    def __call__(self, items):
+        raise NotImplementedError("")
+
+
+def generate_scales(base_size, base_size_repeat):
+    scale_repeat = (base_size - int(base_size * 0.75 / 32) * 32) // 32
+    scales = [int(base_size * 0.75 / 32) * 32 + i * 32 for i in range(scale_repeat)]
+    scales += [base_size] * base_size_repeat
+    scales += [int(base_size * 1.25 / 32) * 32 - i * 32 for i in range(scale_repeat)]
+    return scales
+
+
+@register()
+class BatchImageCollateFunction(BaseCollateFunction):
+    def __init__(
+        self,
+        stop_epoch=None,
+        ema_restart_decay=0.9999,
+        base_size=640,
+        base_size_repeat=None,
+    ) -> None:
+        super().__init__()
+        self.base_size = base_size
+        self.scales = (
+            generate_scales(base_size, base_size_repeat) if base_size_repeat is not None else None
+        )
+        self.stop_epoch = stop_epoch if stop_epoch is not None else 100000000
+        self.ema_restart_decay = ema_restart_decay
+        # self.interpolation = interpolation
+
+    def __call__(self, items):
+        images = torch.cat([x[0][None] for x in items], dim=0)
+        targets = [x[1] for x in items]
+
+        if self.scales is not None and self.epoch < self.stop_epoch:
+            # sz = random.choice(self.scales)
+            # sz = [sz] if isinstance(sz, int) else list(sz)
+            # VF.resize(inpt, sz, interpolation=self.interpolation)
+
+            sz = random.choice(self.scales)
+            images = F.interpolate(images, size=sz)
+            if "masks" in targets[0]:
+                for tg in targets:
+                    tg["masks"] = F.interpolate(tg["masks"], size=sz, mode="nearest")
+                raise NotImplementedError("")
+
+        return images, targets
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/__init__.py
new file mode 100644
index 00000000..eb758883
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+# from ._dataset import DetDataset
+from .cifar_dataset import CIFAR10
+from .coco_dataset import (
+    CocoDetection,
+    mscoco_category2label,
+    mscoco_category2name,
+    mscoco_label2category,
+)
+from .coco_eval import CocoEvaluator
+from .coco_utils import get_coco_api_from_dataset
+from .voc_detection import VOCDetection
+from .voc_eval import VOCEvaluator
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/_dataset.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/_dataset.py
new file mode 100644
index 00000000..62b131ef
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/_dataset.py
@@ -0,0 +1,27 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.utils.data as data
+
+
+class DetDataset(data.Dataset):
+    def __getitem__(self, index):
+        img, target = self.load_item(index)
+        if self.transforms is not None:
+            img, target, _ = self.transforms(img, target, self)
+        return img, target
+
+    def load_item(self, index):
+        raise NotImplementedError(
+            "Please implement this function to return item before `transforms`."
+        )
+
+    def set_epoch(self, epoch) -> None:
+        self._epoch = epoch
+
+    @property
+    def epoch(self):
+        return self._epoch if hasattr(self, "_epoch") else -1
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/cifar_dataset.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/cifar_dataset.py
new file mode 100644
index 00000000..e825da7d
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/cifar_dataset.py
@@ -0,0 +1,25 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import Callable, Optional
+
+import torchvision
+
+from ...core import register
+
+
+@register()
+class CIFAR10(torchvision.datasets.CIFAR10):
+    __inject__ = ["transform", "target_transform"]
+
+    def __init__(
+        self,
+        root: str,
+        train: bool = True,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, train, transform, target_transform, download)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_dataset.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_dataset.py
new file mode 100644
index 00000000..51a55545
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_dataset.py
@@ -0,0 +1,282 @@
+"""
+Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
+
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import faster_coco_eval
+import faster_coco_eval.core.mask as coco_mask
+import torch
+import torch.utils.data
+import torchvision
+import os
+from PIL import Image
+
+from ...core import register
+from .._misc import convert_to_tv_tensor
+from ._dataset import DetDataset
+
+torchvision.disable_beta_transforms_warning()
+faster_coco_eval.init_as_pycocotools()
+Image.MAX_IMAGE_PIXELS = None
+
+__all__ = ["CocoDetection"]
+
+
+@register()
+class CocoDetection(torchvision.datasets.CocoDetection, DetDataset):
+    __inject__ = [
+        "transforms",
+    ]
+    __share__ = ["remap_mscoco_category"]
+
+    def __init__(
+        self, img_folder, ann_file, transforms, return_masks=False, remap_mscoco_category=False
+    ):
+        super(CocoDetection, self).__init__(img_folder, ann_file)
+        self._transforms = transforms
+        self.prepare = ConvertCocoPolysToMask(return_masks)
+        self.img_folder = img_folder
+        self.ann_file = ann_file
+        self.return_masks = return_masks
+        self.remap_mscoco_category = remap_mscoco_category
+
+    def __getitem__(self, idx):
+        img, target = self.load_item(idx)
+        if self._transforms is not None:
+            img, target, _ = self._transforms(img, target, self)
+        return img, target
+
+    def load_item(self, idx):
+        image, target = super(CocoDetection, self).__getitem__(idx)
+        image_id = self.ids[idx]
+        image_path = os.path.join(self.img_folder, self.coco.loadImgs(image_id)[0]["file_name"])
+        target = {"image_id": image_id, "image_path": image_path, "annotations": target}
+
+        if self.remap_mscoco_category:
+            image, target = self.prepare(image, target, category2label=mscoco_category2label)
+        else:
+            image, target = self.prepare(image, target)
+
+        target["idx"] = torch.tensor([idx])
+
+        if "boxes" in target:
+            target["boxes"] = convert_to_tv_tensor(
+                target["boxes"], key="boxes", spatial_size=image.size[::-1]
+            )
+
+        if "masks" in target:
+            target["masks"] = convert_to_tv_tensor(target["masks"], key="masks")
+
+        return image, target
+
+    def extra_repr(self) -> str:
+        s = f" img_folder: {self.img_folder}\n ann_file: {self.ann_file}\n"
+        s += f" return_masks: {self.return_masks}\n"
+        if hasattr(self, "_transforms") and self._transforms is not None:
+            s += f" transforms:\n   {repr(self._transforms)}"
+        if hasattr(self, "_preset") and self._preset is not None:
+            s += f" preset:\n   {repr(self._preset)}"
+        return s
+
+    @property
+    def categories(
+        self,
+    ):
+        return self.coco.dataset["categories"]
+
+    @property
+    def category2name(
+        self,
+    ):
+        return {cat["id"]: cat["name"] for cat in self.categories}
+
+    @property
+    def category2label(
+        self,
+    ):
+        return {cat["id"]: i for i, cat in enumerate(self.categories)}
+
+    @property
+    def label2category(
+        self,
+    ):
+        return {i: cat["id"] for i, cat in enumerate(self.categories)}
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+class ConvertCocoPolysToMask(object):
+    def __init__(self, return_masks=False):
+        self.return_masks = return_masks
+
+    def __call__(self, image: Image.Image, target, **kwargs):
+        w, h = image.size
+
+        image_id = target["image_id"]
+        image_id = torch.tensor([image_id])
+
+        image_path = target["image_path"]
+
+        anno = target["annotations"]
+
+        anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
+
+        boxes = [obj["bbox"] for obj in anno]
+        # guard against no boxes via resizing
+        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        boxes[:, 0::2].clamp_(min=0, max=w)
+        boxes[:, 1::2].clamp_(min=0, max=h)
+
+        category2label = kwargs.get("category2label", None)
+        if category2label is not None:
+            labels = [category2label[obj["category_id"]] for obj in anno]
+        else:
+            labels = [obj["category_id"] for obj in anno]
+
+        labels = torch.tensor(labels, dtype=torch.int64)
+
+        if self.return_masks:
+            segmentations = [obj["segmentation"] for obj in anno]
+            masks = convert_coco_poly_to_mask(segmentations, h, w)
+
+        keypoints = None
+        if anno and "keypoints" in anno[0]:
+            keypoints = [obj["keypoints"] for obj in anno]
+            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
+            num_keypoints = keypoints.shape[0]
+            if num_keypoints:
+                keypoints = keypoints.view(num_keypoints, -1, 3)
+
+        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        boxes = boxes[keep]
+        labels = labels[keep]
+        if self.return_masks:
+            masks = masks[keep]
+        if keypoints is not None:
+            keypoints = keypoints[keep]
+
+        target = {}
+        target["boxes"] = boxes
+        target["labels"] = labels
+        if self.return_masks:
+            target["masks"] = masks
+        target["image_id"] = image_id
+        target["image_path"] = image_path
+        if keypoints is not None:
+            target["keypoints"] = keypoints
+
+        # for conversion to coco api
+        area = torch.tensor([obj["area"] for obj in anno])
+        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
+        target["area"] = area[keep]
+        target["iscrowd"] = iscrowd[keep]
+
+        target["orig_size"] = torch.as_tensor([int(w), int(h)])
+        # target["size"] = torch.as_tensor([int(w), int(h)])
+
+        return image, target
+
+
+mscoco_category2name = {
+    1: "person",
+    2: "bicycle",
+    3: "car",
+    4: "motorcycle",
+    5: "airplane",
+    6: "bus",
+    7: "train",
+    8: "truck",
+    9: "boat",
+    10: "traffic light",
+    11: "fire hydrant",
+    13: "stop sign",
+    14: "parking meter",
+    15: "bench",
+    16: "bird",
+    17: "cat",
+    18: "dog",
+    19: "horse",
+    20: "sheep",
+    21: "cow",
+    22: "elephant",
+    23: "bear",
+    24: "zebra",
+    25: "giraffe",
+    27: "backpack",
+    28: "umbrella",
+    31: "handbag",
+    32: "tie",
+    33: "suitcase",
+    34: "frisbee",
+    35: "skis",
+    36: "snowboard",
+    37: "sports ball",
+    38: "kite",
+    39: "baseball bat",
+    40: "baseball glove",
+    41: "skateboard",
+    42: "surfboard",
+    43: "tennis racket",
+    44: "bottle",
+    46: "wine glass",
+    47: "cup",
+    48: "fork",
+    49: "knife",
+    50: "spoon",
+    51: "bowl",
+    52: "banana",
+    53: "apple",
+    54: "sandwich",
+    55: "orange",
+    56: "broccoli",
+    57: "carrot",
+    58: "hot dog",
+    59: "pizza",
+    60: "donut",
+    61: "cake",
+    62: "chair",
+    63: "couch",
+    64: "potted plant",
+    65: "bed",
+    67: "dining table",
+    70: "toilet",
+    72: "tv",
+    73: "laptop",
+    74: "mouse",
+    75: "remote",
+    76: "keyboard",
+    77: "cell phone",
+    78: "microwave",
+    79: "oven",
+    80: "toaster",
+    81: "sink",
+    82: "refrigerator",
+    84: "book",
+    85: "clock",
+    86: "vase",
+    87: "scissors",
+    88: "teddy bear",
+    89: "hair drier",
+    90: "toothbrush",
+}
+
+mscoco_category2label = {k: i for i, k in enumerate(mscoco_category2name.keys())}
+mscoco_label2category = {v: k for k, v in mscoco_category2label.items()}
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_eval.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_eval.py
new file mode 100644
index 00000000..7d292a28
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_eval.py
@@ -0,0 +1,214 @@
+"""
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+COCO evaluator that works in distributed mode.
+Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
+The difference is that there is less copy-pasting from pycocotools
+in the end of the file, as python3 can suppress prints with contextlib
+"""
+
+import contextlib
+import copy
+import os
+
+import faster_coco_eval.core.mask as mask_util
+import numpy as np
+import torch
+from faster_coco_eval import COCO, COCOeval_faster
+
+from ...core import register
+from ...misc import dist_utils
+
+__all__ = [
+    "CocoEvaluator",
+]
+
+
+@register()
+class CocoEvaluator(object):
+    def __init__(self, coco_gt, iou_types):
+        assert isinstance(iou_types, (list, tuple))
+        coco_gt = copy.deepcopy(coco_gt)
+        self.coco_gt: COCO = coco_gt
+        self.iou_types = iou_types
+
+        self.coco_eval = {}
+        for iou_type in iou_types:
+            self.coco_eval[iou_type] = COCOeval_faster(
+                coco_gt, iouType=iou_type, print_function=print, separate_eval=True
+            )
+
+        self.img_ids = []
+        self.eval_imgs = {k: [] for k in iou_types}
+
+    def cleanup(self):
+        self.coco_eval = {}
+        for iou_type in self.iou_types:
+            self.coco_eval[iou_type] = COCOeval_faster(
+                self.coco_gt, iouType=iou_type, print_function=print, separate_eval=True
+            )
+        self.img_ids = []
+        self.eval_imgs = {k: [] for k in self.iou_types}
+
+    def update(self, predictions):
+        img_ids = list(np.unique(list(predictions.keys())))
+        self.img_ids.extend(img_ids)
+
+        for iou_type in self.iou_types:
+            results = self.prepare(predictions, iou_type)
+            coco_eval = self.coco_eval[iou_type]
+
+            # suppress pycocotools prints
+            with open(os.devnull, "w") as devnull:
+                with contextlib.redirect_stdout(devnull):
+                    coco_dt = self.coco_gt.loadRes(results) if results else COCO()
+                    coco_eval.cocoDt = coco_dt
+                    coco_eval.params.imgIds = list(img_ids)
+                    coco_eval.evaluate()
+
+            self.eval_imgs[iou_type].append(
+                np.array(coco_eval._evalImgs_cpp).reshape(
+                    len(coco_eval.params.catIds),
+                    len(coco_eval.params.areaRng),
+                    len(coco_eval.params.imgIds),
+                )
+            )
+
+    def synchronize_between_processes(self):
+        for iou_type in self.iou_types:
+            img_ids, eval_imgs = merge(self.img_ids, self.eval_imgs[iou_type])
+
+            coco_eval = self.coco_eval[iou_type]
+            coco_eval.params.imgIds = img_ids
+            coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
+            coco_eval._evalImgs_cpp = eval_imgs
+
+    def accumulate(self):
+        for coco_eval in self.coco_eval.values():
+            coco_eval.accumulate()
+
+    def summarize(self):
+        for iou_type, coco_eval in self.coco_eval.items():
+            print("IoU metric: {}".format(iou_type))
+            coco_eval.summarize()
+
+    def prepare(self, predictions, iou_type):
+        if iou_type == "bbox":
+            return self.prepare_for_coco_detection(predictions)
+        elif iou_type == "segm":
+            return self.prepare_for_coco_segmentation(predictions)
+        elif iou_type == "keypoints":
+            return self.prepare_for_coco_keypoint(predictions)
+        else:
+            raise ValueError("Unknown iou type {}".format(iou_type))
+
+    def prepare_for_coco_detection(self, predictions):
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "bbox": box,
+                        "score": scores[k],
+                    }
+                    for k, box in enumerate(boxes)
+                ]
+            )
+        return coco_results
+
+    def prepare_for_coco_segmentation(self, predictions):
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            scores = prediction["scores"]
+            labels = prediction["labels"]
+            masks = prediction["masks"]
+
+            masks = masks > 0.5
+
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+
+            rles = [
+                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
+                for mask in masks
+            ]
+            for rle in rles:
+                rle["counts"] = rle["counts"].decode("utf-8")
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "segmentation": rle,
+                        "score": scores[k],
+                    }
+                    for k, rle in enumerate(rles)
+                ]
+            )
+        return coco_results
+
+    def prepare_for_coco_keypoint(self, predictions):
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+            keypoints = prediction["keypoints"]
+            keypoints = keypoints.flatten(start_dim=1).tolist()
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "keypoints": keypoint,
+                        "score": scores[k],
+                    }
+                    for k, keypoint in enumerate(keypoints)
+                ]
+            )
+        return coco_results
+
+
+def convert_to_xywh(boxes):
+    xmin, ymin, xmax, ymax = boxes.unbind(1)
+    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
+
+
+def merge(img_ids, eval_imgs):
+    all_img_ids = dist_utils.all_gather(img_ids)
+    all_eval_imgs = dist_utils.all_gather(eval_imgs)
+
+    merged_img_ids = []
+    for p in all_img_ids:
+        merged_img_ids.extend(p)
+
+    merged_eval_imgs = []
+    for p in all_eval_imgs:
+        merged_eval_imgs.extend(p)
+
+    merged_img_ids = np.array(merged_img_ids)
+    merged_eval_imgs = np.concatenate(merged_eval_imgs, axis=2).ravel()
+    # merged_eval_imgs = np.array(merged_eval_imgs).T.ravel()
+
+    # keep only unique (and in sorted order) images
+    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
+
+    return merged_img_ids.tolist(), merged_eval_imgs.tolist()
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_utils.py
new file mode 100644
index 00000000..216816c7
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/coco_utils.py
@@ -0,0 +1,191 @@
+"""
+copy and modified https://github.com/pytorch/vision/blob/main/references/detection/coco_utils.py
+
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import faster_coco_eval.core.mask as coco_mask
+import torch
+import torch.utils.data
+import torchvision
+import torchvision.transforms.functional as TVF
+from faster_coco_eval import COCO
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+class ConvertCocoPolysToMask:
+    def __call__(self, image, target):
+        w, h = image.size
+
+        image_id = target["image_id"]
+
+        anno = target["annotations"]
+
+        anno = [obj for obj in anno if obj["iscrowd"] == 0]
+
+        boxes = [obj["bbox"] for obj in anno]
+        # guard against no boxes via resizing
+        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        boxes[:, 0::2].clamp_(min=0, max=w)
+        boxes[:, 1::2].clamp_(min=0, max=h)
+
+        classes = [obj["category_id"] for obj in anno]
+        classes = torch.tensor(classes, dtype=torch.int64)
+
+        segmentations = [obj["segmentation"] for obj in anno]
+        masks = convert_coco_poly_to_mask(segmentations, h, w)
+
+        keypoints = None
+        if anno and "keypoints" in anno[0]:
+            keypoints = [obj["keypoints"] for obj in anno]
+            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
+            num_keypoints = keypoints.shape[0]
+            if num_keypoints:
+                keypoints = keypoints.view(num_keypoints, -1, 3)
+
+        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        boxes = boxes[keep]
+        classes = classes[keep]
+        masks = masks[keep]
+        if keypoints is not None:
+            keypoints = keypoints[keep]
+
+        target = {}
+        target["boxes"] = boxes
+        target["labels"] = classes
+        target["masks"] = masks
+        target["image_id"] = image_id
+        if keypoints is not None:
+            target["keypoints"] = keypoints
+
+        # for conversion to coco api
+        area = torch.tensor([obj["area"] for obj in anno])
+        iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
+        target["area"] = area
+        target["iscrowd"] = iscrowd
+
+        return image, target
+
+
+def _coco_remove_images_without_annotations(dataset, cat_list=None):
+    def _has_only_empty_bbox(anno):
+        return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
+
+    def _count_visible_keypoints(anno):
+        return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
+
+    min_keypoints_per_image = 10
+
+    def _has_valid_annotation(anno):
+        # if it's empty, there is no annotation
+        if len(anno) == 0:
+            return False
+        # if all boxes have close to zero area, there is no annotation
+        if _has_only_empty_bbox(anno):
+            return False
+        # keypoints task have a slight different criteria for considering
+        # if an annotation is valid
+        if "keypoints" not in anno[0]:
+            return True
+        # for keypoint detection tasks, only consider valid images those
+        # containing at least min_keypoints_per_image
+        if _count_visible_keypoints(anno) >= min_keypoints_per_image:
+            return True
+        return False
+
+    ids = []
+    for ds_idx, img_id in enumerate(dataset.ids):
+        ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+        anno = dataset.coco.loadAnns(ann_ids)
+        if cat_list:
+            anno = [obj for obj in anno if obj["category_id"] in cat_list]
+        if _has_valid_annotation(anno):
+            ids.append(ds_idx)
+
+    dataset = torch.utils.data.Subset(dataset, ids)
+    return dataset
+
+
+def convert_to_coco_api(ds):
+    coco_ds = COCO()
+    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
+    ann_id = 1
+    dataset = {"images": [], "categories": [], "annotations": []}
+    categories = set()
+    for img_idx in range(len(ds)):
+        # find better way to get target
+        # targets = ds.get_annotations(img_idx)
+        # img, targets = ds[img_idx]
+
+        img, targets = ds.load_item(img_idx)
+        width, height = img.size
+
+        image_id = targets["image_id"].item()
+        img_dict = {}
+        img_dict["id"] = image_id
+        img_dict["width"] = width
+        img_dict["height"] = height
+        dataset["images"].append(img_dict)
+        bboxes = targets["boxes"].clone()
+        bboxes[:, 2:] -= bboxes[:, :2]  # xyxy -> xywh
+        bboxes = bboxes.tolist()
+        labels = targets["labels"].tolist()
+        areas = targets["area"].tolist()
+        iscrowd = targets["iscrowd"].tolist()
+        if "masks" in targets:
+            masks = targets["masks"]
+            # make masks Fortran contiguous for coco_mask
+            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
+        if "keypoints" in targets:
+            keypoints = targets["keypoints"]
+            keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
+        num_objs = len(bboxes)
+        for i in range(num_objs):
+            ann = {}
+            ann["image_id"] = image_id
+            ann["bbox"] = bboxes[i]
+            ann["category_id"] = labels[i]
+            categories.add(labels[i])
+            ann["area"] = areas[i]
+            ann["iscrowd"] = iscrowd[i]
+            ann["id"] = ann_id
+            if "masks" in targets:
+                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
+            if "keypoints" in targets:
+                ann["keypoints"] = keypoints[i]
+                ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3])
+            dataset["annotations"].append(ann)
+            ann_id += 1
+    dataset["categories"] = [{"id": i} for i in sorted(categories)]
+    coco_ds.dataset = dataset
+    coco_ds.createIndex()
+    return coco_ds
+
+
+def get_coco_api_from_dataset(dataset):
+    # FIXME: This is... awful?
+    for _ in range(10):
+        if isinstance(dataset, torchvision.datasets.CocoDetection):
+            break
+        if isinstance(dataset, torch.utils.data.Subset):
+            dataset = dataset.dataset
+    if isinstance(dataset, torchvision.datasets.CocoDetection):
+        return dataset.coco
+    return convert_to_coco_api(dataset)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_detection.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_detection.py
new file mode 100644
index 00000000..e6735608
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_detection.py
@@ -0,0 +1,86 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import os
+from typing import Callable, Optional
+
+import torch
+import torchvision
+import torchvision.transforms.functional as TVF
+from PIL import Image
+from sympy import im
+
+try:
+    from defusedxml.ElementTree import parse as ET_parse
+except ImportError:
+    from xml.etree.ElementTree import parse as ET_parse
+
+from ...core import register
+from .._misc import convert_to_tv_tensor
+from ._dataset import DetDataset
+
+
+@register()
+class VOCDetection(torchvision.datasets.VOCDetection, DetDataset):
+    __inject__ = [
+        "transforms",
+    ]
+
+    def __init__(
+        self,
+        root: str,
+        ann_file: str = "trainval.txt",
+        label_file: str = "label_list.txt",
+        transforms: Optional[Callable] = None,
+    ):
+        with open(os.path.join(root, ann_file), "r") as f:
+            lines = [x.strip() for x in f.readlines()]
+            lines = [x.split(" ") for x in lines]
+
+        self.images = [os.path.join(root, lin[0]) for lin in lines]
+        self.targets = [os.path.join(root, lin[1]) for lin in lines]
+        assert len(self.images) == len(self.targets)
+
+        with open(os.path.join(root + label_file), "r") as f:
+            labels = f.readlines()
+            labels = [lab.strip() for lab in labels]
+
+        self.transforms = transforms
+        self.labels_map = {lab: i for i, lab in enumerate(labels)}
+
+    def __getitem__(self, index: int):
+        image, target = self.load_item(index)
+        if self.transforms is not None:
+            image, target, _ = self.transforms(image, target, self)
+        # target["orig_size"] = torch.tensor(TVF.get_image_size(image))
+        return image, target
+
+    def load_item(self, index: int):
+        image = Image.open(self.images[index]).convert("RGB")
+        target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot())
+
+        output = {}
+        output["image_id"] = torch.tensor([index])
+        for k in ["area", "boxes", "labels", "iscrowd"]:
+            output[k] = []
+
+        for blob in target["annotation"]["object"]:
+            box = [float(v) for v in blob["bndbox"].values()]
+            output["boxes"].append(box)
+            output["labels"].append(blob["name"])
+            output["area"].append((box[2] - box[0]) * (box[3] - box[1]))
+            output["iscrowd"].append(0)
+
+        w, h = image.size
+        boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4)
+        output["boxes"] = convert_to_tv_tensor(
+            boxes, "boxes", box_format="xyxy", spatial_size=[h, w]
+        )
+        output["labels"] = torch.tensor([self.labels_map[lab] for lab in output["labels"]])
+        output["area"] = torch.tensor(output["area"])
+        output["iscrowd"] = torch.tensor(output["iscrowd"])
+        output["orig_size"] = torch.tensor([w, h])
+
+        return image, output
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_eval.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_eval.py
new file mode 100644
index 00000000..0bee50ae
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/dataset/voc_eval.py
@@ -0,0 +1,12 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torchvision
+
+
+class VOCEvaluator(object):
+    def __init__(self) -> None:
+        pass
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/__init__.py
new file mode 100644
index 00000000..f555c9f5
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/__init__.py
@@ -0,0 +1,21 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from ._transforms import (
+    ConvertBoxes,
+    ConvertPILImage,
+    EmptyTransform,
+    Normalize,
+    PadToSize,
+    RandomCrop,
+    RandomHorizontalFlip,
+    RandomIoUCrop,
+    RandomPhotometricDistort,
+    RandomZoomOut,
+    Resize,
+    SanitizeBoundingBoxes,
+)
+from .container import Compose
+from .mosaic import Mosaic
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/_transforms.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/_transforms.py
new file mode 100644
index 00000000..a3fe9173
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/_transforms.py
@@ -0,0 +1,161 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import Any, Dict, List, Optional
+
+import PIL
+import PIL.Image
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms.v2 as T
+import torchvision.transforms.v2.functional as F
+
+from ...core import register
+from .._misc import (
+    BoundingBoxes,
+    Image,
+    Mask,
+    SanitizeBoundingBoxes,
+    Video,
+    _boxes_keys,
+    convert_to_tv_tensor,
+)
+
+torchvision.disable_beta_transforms_warning()
+
+
+RandomPhotometricDistort = register()(T.RandomPhotometricDistort)
+RandomZoomOut = register()(T.RandomZoomOut)
+RandomHorizontalFlip = register()(T.RandomHorizontalFlip)
+Resize = register()(T.Resize)
+# ToImageTensor = register()(T.ToImageTensor)
+# ConvertDtype = register()(T.ConvertDtype)
+# PILToTensor = register()(T.PILToTensor)
+SanitizeBoundingBoxes = register(name="SanitizeBoundingBoxes")(SanitizeBoundingBoxes)
+RandomCrop = register()(T.RandomCrop)
+Normalize = register()(T.Normalize)
+
+
+@register()
+class EmptyTransform(T.Transform):
+    def __init__(
+        self,
+    ) -> None:
+        super().__init__()
+
+    def forward(self, *inputs):
+        inputs = inputs if len(inputs) > 1 else inputs[0]
+        return inputs
+
+
+@register()
+class PadToSize(T.Pad):
+    _transformed_types = (
+        PIL.Image.Image,
+        Image,
+        Video,
+        Mask,
+        BoundingBoxes,
+    )
+
+    def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+        sp = F.get_spatial_size(flat_inputs[0])
+        h, w = self.size[1] - sp[0], self.size[0] - sp[1]
+        self.padding = [0, 0, w, h]
+        return dict(padding=self.padding)
+
+    def __init__(self, size, fill=0, padding_mode="constant") -> None:
+        if isinstance(size, int):
+            size = (size, size)
+        self.size = size
+        super().__init__(0, fill, padding_mode)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        fill = self._fill[type(inpt)]
+        padding = params["padding"]
+        return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode)  # type: ignore[arg-type]
+
+    def __call__(self, *inputs: Any) -> Any:
+        outputs = super().forward(*inputs)
+        if len(outputs) > 1 and isinstance(outputs[1], dict):
+            outputs[1]["padding"] = torch.tensor(self.padding)
+        return outputs
+
+
+@register()
+class RandomIoUCrop(T.RandomIoUCrop):
+    def __init__(
+        self,
+        min_scale: float = 0.3,
+        max_scale: float = 1,
+        min_aspect_ratio: float = 0.5,
+        max_aspect_ratio: float = 2,
+        sampler_options: Optional[List[float]] = None,
+        trials: int = 40,
+        p: float = 1.0,
+    ):
+        super().__init__(
+            min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials
+        )
+        self.p = p
+
+    def __call__(self, *inputs: Any) -> Any:
+        if torch.rand(1) >= self.p:
+            return inputs if len(inputs) > 1 else inputs[0]
+
+        return super().forward(*inputs)
+
+
+@register()
+class ConvertBoxes(T.Transform):
+    _transformed_types = (BoundingBoxes,)
+
+    def __init__(self, fmt="", normalize=False) -> None:
+        super().__init__()
+        self.fmt = fmt
+        self.normalize = normalize
+
+    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        return self._transform(inpt, params)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        spatial_size = getattr(inpt, _boxes_keys[1])
+        if self.fmt:
+            in_fmt = inpt.format.value.lower()
+            inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.fmt.lower())
+            inpt = convert_to_tv_tensor(
+                inpt, key="boxes", box_format=self.fmt.upper(), spatial_size=spatial_size
+            )
+
+        if self.normalize:
+            inpt = inpt / torch.tensor(spatial_size[::-1]).tile(2)[None]
+
+        return inpt
+
+
+@register()
+class ConvertPILImage(T.Transform):
+    _transformed_types = (PIL.Image.Image,)
+
+    def __init__(self, dtype="float32", scale=True) -> None:
+        super().__init__()
+        self.dtype = dtype
+        self.scale = scale
+
+    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        return self._transform(inpt, params)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        inpt = F.pil_to_tensor(inpt)
+        if self.dtype == "float32":
+            inpt = inpt.float()
+
+        if self.scale:
+            inpt = inpt / 255.0
+
+        inpt = Image(inpt)
+
+        return inpt
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/container.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/container.py
new file mode 100644
index 00000000..b40a0d66
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/container.py
@@ -0,0 +1,99 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import Any, Dict, List, Optional
+
+import torch
+import torch.nn as nn
+import torchvision
+import torchvision.transforms.v2 as T
+
+from ...core import GLOBAL_CONFIG, register
+from ._transforms import EmptyTransform
+
+torchvision.disable_beta_transforms_warning()
+
+
+@register()
+class Compose(T.Compose):
+    def __init__(self, ops, policy=None) -> None:
+        transforms = []
+        if ops is not None:
+            for op in ops:
+                if isinstance(op, dict):
+                    name = op.pop("type")
+                    transform = getattr(
+                        GLOBAL_CONFIG[name]["_pymodule"], GLOBAL_CONFIG[name]["_name"]
+                    )(**op)
+                    transforms.append(transform)
+                    op["type"] = name
+
+                elif isinstance(op, nn.Module):
+                    transforms.append(op)
+
+                else:
+                    raise ValueError("")
+        else:
+            transforms = [
+                EmptyTransform(),
+            ]
+
+        super().__init__(transforms=transforms)
+
+        if policy is None:
+            policy = {"name": "default"}
+
+        self.policy = policy
+        self.global_samples = 0
+
+    def forward(self, *inputs: Any) -> Any:
+        return self.get_forward(self.policy["name"])(*inputs)
+
+    def get_forward(self, name):
+        forwards = {
+            "default": self.default_forward,
+            "stop_epoch": self.stop_epoch_forward,
+            "stop_sample": self.stop_sample_forward,
+        }
+        return forwards[name]
+
+    def default_forward(self, *inputs: Any) -> Any:
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        for transform in self.transforms:
+            sample = transform(sample)
+        return sample
+
+    def stop_epoch_forward(self, *inputs: Any):
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        dataset = sample[-1]
+        cur_epoch = dataset.epoch
+        policy_ops = self.policy["ops"]
+        policy_epoch = self.policy["epoch"]
+
+        for transform in self.transforms:
+            if type(transform).__name__ in policy_ops and cur_epoch >= policy_epoch:
+                pass
+            else:
+                sample = transform(sample)
+
+        return sample
+
+    def stop_sample_forward(self, *inputs: Any):
+        sample = inputs if len(inputs) > 1 else inputs[0]
+        dataset = sample[-1]
+
+        cur_epoch = dataset.epoch
+        policy_ops = self.policy["ops"]
+        policy_sample = self.policy["sample"]
+
+        for transform in self.transforms:
+            if type(transform).__name__ in policy_ops and self.global_samples >= policy_sample:
+                pass
+            else:
+                sample = transform(sample)
+
+        self.global_samples += 1
+
+        return sample
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/functional.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/functional.py
new file mode 100644
index 00000000..10ef8b58
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/functional.py
@@ -0,0 +1,172 @@
+from typing import List, Optional
+
+import torch
+
+# needed due to empty tensor bug in pytorch and torchvision 0.5
+import torchvision
+import torchvision.transforms.functional as F
+from packaging import version
+from torch import Tensor
+
+if version.parse(torchvision.__version__) < version.parse("0.7"):
+    from torchvision.ops import _new_empty_tensor
+    from torchvision.ops.misc import _output_size
+
+
+def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+    """
+    Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+    This will eventually be supported natively by PyTorch, and this
+    class can go away.
+    """
+    if version.parse(torchvision.__version__) < version.parse("0.7"):
+        if input.numel() > 0:
+            return torch.nn.functional.interpolate(input, size, scale_factor, mode, align_corners)
+
+        output_shape = _output_size(2, input, size, scale_factor)
+        output_shape = list(input.shape[:-2]) + list(output_shape)
+        return _new_empty_tensor(input, output_shape)
+    else:
+        return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
+
+
+def crop(image, target, region):
+    cropped_image = F.crop(image, *region)
+
+    target = target.copy()
+    i, j, h, w = region
+
+    # should we do something wrt the original size?
+    target["size"] = torch.tensor([h, w])
+
+    fields = ["labels", "area", "iscrowd"]
+
+    if "boxes" in target:
+        boxes = target["boxes"]
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+        target["boxes"] = cropped_boxes.reshape(-1, 4)
+        target["area"] = area
+        fields.append("boxes")
+
+    if "masks" in target:
+        # FIXME should we update the area here if there are no boxes?
+        target["masks"] = target["masks"][:, i : i + h, j : j + w]
+        fields.append("masks")
+
+    # remove elements for which the boxes or masks that have zero area
+    if "boxes" in target or "masks" in target:
+        # favor boxes selection when defining which elements to keep
+        # this is compatible with previous implementation
+        if "boxes" in target:
+            cropped_boxes = target["boxes"].reshape(-1, 2, 2)
+            keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
+        else:
+            keep = target["masks"].flatten(1).any(1)
+
+        for field in fields:
+            target[field] = target[field][keep]
+
+    return cropped_image, target
+
+
+def hflip(image, target):
+    flipped_image = F.hflip(image)
+
+    w, h = image.size
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor(
+            [w, 0, w, 0]
+        )
+        target["boxes"] = boxes
+
+    if "masks" in target:
+        target["masks"] = target["masks"].flip(-1)
+
+    return flipped_image, target
+
+
+def resize(image, target, size, max_size=None):
+    # size can be min_size (scalar) or (w, h) tuple
+
+    def get_size_with_aspect_ratio(image_size, size, max_size=None):
+        w, h = image_size
+        if max_size is not None:
+            min_original_size = float(min((w, h)))
+            max_original_size = float(max((w, h)))
+            if max_original_size / min_original_size * size > max_size:
+                size = int(round(max_size * min_original_size / max_original_size))
+
+        if (w <= h and w == size) or (h <= w and h == size):
+            return (h, w)
+
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+        else:
+            oh = size
+            ow = int(size * w / h)
+
+        # r = min(size / min(h, w), max_size / max(h, w))
+        # ow = int(w * r)
+        # oh = int(h * r)
+
+        return (oh, ow)
+
+    def get_size(image_size, size, max_size=None):
+        if isinstance(size, (list, tuple)):
+            return size[::-1]
+        else:
+            return get_size_with_aspect_ratio(image_size, size, max_size)
+
+    size = get_size(image.size, size, max_size)
+    rescaled_image = F.resize(image, size)
+
+    if target is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        scaled_boxes = boxes * torch.as_tensor(
+            [ratio_width, ratio_height, ratio_width, ratio_height]
+        )
+        target["boxes"] = scaled_boxes
+
+    if "area" in target:
+        area = target["area"]
+        scaled_area = area * (ratio_width * ratio_height)
+        target["area"] = scaled_area
+
+    h, w = size
+    target["size"] = torch.tensor([h, w])
+
+    if "masks" in target:
+        target["masks"] = (
+            interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0] > 0.5
+        )
+
+    return rescaled_image, target
+
+
+def pad(image, target, padding):
+    # assumes that we only pad on the bottom right corners
+    padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
+    if target is None:
+        return padded_image, None
+    target = target.copy()
+    # should we do something wrt the original size?
+    target["size"] = torch.tensor(padded_image.size[::-1])
+    if "masks" in target:
+        target["masks"] = torch.nn.functional.pad(target["masks"], (0, padding[0], 0, padding[1]))
+    return padded_image, target
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/mosaic.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/mosaic.py
new file mode 100644
index 00000000..413900c5
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/mosaic.py
@@ -0,0 +1,83 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import random
+
+import torch
+import torchvision
+import torchvision.transforms.v2 as T
+import torchvision.transforms.v2.functional as F
+from PIL import Image
+
+from ...core import register
+from .._misc import convert_to_tv_tensor
+
+torchvision.disable_beta_transforms_warning()
+
+
+@register()
+class Mosaic(T.Transform):
+    def __init__(
+        self,
+        size,
+        max_size=None,
+    ) -> None:
+        super().__init__()
+        self.resize = T.Resize(size=size, max_size=max_size)
+        self.crop = T.RandomCrop(size=max_size if max_size else size)
+
+        # TODO add arg `output_size` for affine`
+        # self.random_perspective = T.RandomPerspective(distortion_scale=0.5, p=1., )
+        self.random_affine = T.RandomAffine(
+            degrees=0, translate=(0.1, 0.1), scale=(0.5, 1.5), fill=114
+        )
+
+    def forward(self, *inputs):
+        inputs = inputs if len(inputs) > 1 else inputs[0]
+        image, target, dataset = inputs
+
+        images = []
+        targets = []
+        indices = random.choices(range(len(dataset)), k=3)
+        for i in indices:
+            image, target = dataset.load_item(i)
+            image, target = self.resize(image, target)
+            images.append(image)
+            targets.append(target)
+
+        h, w = F.get_spatial_size(images[0])
+        offset = [[0, 0], [w, 0], [0, h], [w, h]]
+        image = Image.new(mode=images[0].mode, size=(w * 2, h * 2), color=0)
+        for i, im in enumerate(images):
+            image.paste(im, offset[i])
+
+        offset = torch.tensor([[0, 0], [w, 0], [0, h], [w, h]]).repeat(1, 2)
+        target = {}
+        for k in targets[0]:
+            if k == "boxes":
+                v = [t[k] + offset[i] for i, t in enumerate(targets)]
+            else:
+                v = [t[k] for t in targets]
+
+            if isinstance(v[0], torch.Tensor):
+                v = torch.cat(v, dim=0)
+
+            target[k] = v
+
+        if "boxes" in target:
+            # target['boxes'] = target['boxes'].clamp(0, 640 * 2 - 1)
+            w, h = image.size
+            target["boxes"] = convert_to_tv_tensor(
+                target["boxes"], "boxes", box_format="xyxy", spatial_size=[h, w]
+            )
+
+        if "masks" in target:
+            target["masks"] = convert_to_tv_tensor(target["masks"], "masks")
+
+        image, target = self.random_affine(image, target)
+        # image, target = self.resize(image, target)
+        image, target = self.crop(image, target)
+
+        return image, target, dataset
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/presets.py b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/presets.py
new file mode 100644
index 00000000..6f6d59c9
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/data/transforms/presets.py
@@ -0,0 +1,4 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/__init__.py
new file mode 100644
index 00000000..3d1f9551
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/__init__.py
@@ -0,0 +1,9 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .dist_utils import setup_print, setup_seed
+from .logger import *
+from .profiler_utils import stats
+from .visualizer import *
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/box_ops.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/box_ops.py
new file mode 100644
index 00000000..64dff0dc
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/box_ops.py
@@ -0,0 +1,106 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import List, Tuple
+
+import torch
+import torchvision
+from torch import Tensor
+
+
+def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
+    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
+    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
+    return torchvision.ops.generalized_box_iou(boxes1, boxes2)
+
+
+# elementwise
+def elementwise_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
+    """
+    Args:
+        boxes1, [N, 4]
+        boxes2, [N, 4]
+    Returns:
+        iou, [N, ]
+        union, [N, ]
+    """
+    area1 = torchvision.ops.box_area(boxes1)  # [N, ]
+    area2 = torchvision.ops.box_area(boxes2)  # [N, ]
+    lt = torch.max(boxes1[:, :2], boxes2[:, :2])  # [N, 2]
+    rb = torch.min(boxes1[:, 2:], boxes2[:, 2:])  # [N, 2]
+    wh = (rb - lt).clamp(min=0)  # [N, 2]
+    inter = wh[:, 0] * wh[:, 1]  # [N, ]
+    union = area1 + area2 - inter
+    iou = inter / union
+    return iou, union
+
+
+def elementwise_generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
+    """
+    Args:
+        boxes1, [N, 4] with [x1, y1, x2, y2]
+        boxes2, [N, 4] with [x1, y1, x2, y2]
+    Returns:
+        giou, [N, ]
+    """
+    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
+    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
+    iou, union = elementwise_box_iou(boxes1, boxes2)
+    lt = torch.min(boxes1[:, :2], boxes2[:, :2])  # [N, 2]
+    rb = torch.max(boxes1[:, 2:], boxes2[:, 2:])  # [N, 2]
+    wh = (rb - lt).clamp(min=0)  # [N, 2]
+    area = wh[:, 0] * wh[:, 1]
+    return iou - (area - union) / area
+
+
+def check_point_inside_box(points: Tensor, boxes: Tensor, eps=1e-9) -> Tensor:
+    """
+    Args:
+        points, [K, 2], (x, y)
+        boxes, [N, 4], (x1, y1, y2, y2)
+    Returns:
+        Tensor (bool), [K, N]
+    """
+    x, y = [p.unsqueeze(-1) for p in points.unbind(-1)]
+    x1, y1, x2, y2 = [x.unsqueeze(0) for x in boxes.unbind(-1)]
+
+    l = x - x1
+    t = y - y1
+    r = x2 - x
+    b = y2 - y
+
+    ltrb = torch.stack([l, t, r, b], dim=-1)
+    mask = ltrb.min(dim=-1).values > eps
+
+    return mask
+
+
+def point_box_distance(points: Tensor, boxes: Tensor) -> Tensor:
+    """
+    Args:
+        boxes, [N, 4], (x1, y1, x2, y2)
+        points, [N, 2], (x, y)
+    Returns:
+        Tensor (N, 4), (l, t, r, b)
+    """
+    x1y1, x2y2 = torch.split(boxes, 2, dim=-1)
+    lt = points - x1y1
+    rb = x2y2 - points
+    return torch.concat([lt, rb], dim=-1)
+
+
+def point_distance_box(points: Tensor, distances: Tensor) -> Tensor:
+    """
+    Args:
+        points (Tensor), [N, 2], (x, y)
+        distances (Tensor), [N, 4], (l, t, r, b)
+    Returns:
+        boxes (Tensor),  (N, 4), (x1, y1, x2, y2)
+    """
+    lt, rb = torch.split(distances, 2, dim=-1)
+    x1y1 = -lt + points
+    x2y2 = rb + points
+    boxes = torch.concat([x1y1, x2y2], dim=-1)
+    return boxes
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/dist_utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/dist_utils.py
new file mode 100644
index 00000000..959ded69
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/dist_utils.py
@@ -0,0 +1,281 @@
+"""
+reference
+- https://github.com/pytorch/vision/blob/main/references/detection/utils.py
+- https://github.com/facebookresearch/detr/blob/master/util/misc.py#L406
+
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import atexit
+import os
+import random
+import time
+
+import numpy as np
+import torch
+import torch.backends.cudnn
+import torch.distributed
+import torch.nn as nn
+from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
+from torch.nn.parallel import DataParallel as DP
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import DistributedSampler
+
+# from torch.utils.data.dataloader import DataLoader
+from ..data import DataLoader
+
+
+def setup_distributed(
+    print_rank: int = 0,
+    print_method: str = "builtin",
+    seed: int = None,
+):
+    """
+    env setup
+    args:
+        print_rank,
+        print_method, (builtin, rich)
+        seed,
+    """
+    try:
+        # https://pytorch.org/docs/stable/elastic/run.html
+        RANK = int(os.getenv("RANK", -1))
+        LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))
+        WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+
+        # torch.distributed.init_process_group(backend=backend, init_method='env://')
+        torch.distributed.init_process_group(init_method="env://")
+        torch.distributed.barrier()
+
+        rank = torch.distributed.get_rank()
+        torch.cuda.set_device(rank)
+        torch.cuda.empty_cache()
+        enabled_dist = True
+        if get_rank() == print_rank:
+            print("Initialized distributed mode...")
+
+    except Exception:
+        enabled_dist = False
+        print("Not init distributed mode.")
+
+    setup_print(get_rank() == print_rank, method=print_method)
+    if seed is not None:
+        setup_seed(seed)
+
+    return enabled_dist
+
+
+def setup_print(is_main, method="builtin"):
+    """This function disables printing when not in master process"""
+    import builtins as __builtin__
+
+    if method == "builtin":
+        builtin_print = __builtin__.print
+
+    elif method == "rich":
+        import rich
+
+        builtin_print = rich.print
+
+    else:
+        raise AttributeError("")
+
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_main or force:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def is_dist_available_and_initialized():
+    if not torch.distributed.is_available():
+        return False
+    if not torch.distributed.is_initialized():
+        return False
+    return True
+
+
+@atexit.register
+def cleanup():
+    """cleanup distributed environment"""
+    if is_dist_available_and_initialized():
+        torch.distributed.barrier()
+        torch.distributed.destroy_process_group()
+
+
+def get_rank():
+    if not is_dist_available_and_initialized():
+        return 0
+    return torch.distributed.get_rank()
+
+
+def get_world_size():
+    if not is_dist_available_and_initialized():
+        return 1
+    return torch.distributed.get_world_size()
+
+
+def is_main_process():
+    return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+    if is_main_process():
+        torch.save(*args, **kwargs)
+
+
+def warp_model(
+    model: torch.nn.Module,
+    sync_bn: bool = False,
+    dist_mode: str = "ddp",
+    find_unused_parameters: bool = False,
+    compile: bool = False,
+    compile_mode: str = "reduce-overhead",
+    **kwargs,
+):
+    if is_dist_available_and_initialized():
+        rank = get_rank()
+        model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if sync_bn else model
+        if dist_mode == "dp":
+            model = DP(model, device_ids=[rank], output_device=rank)
+        elif dist_mode == "ddp":
+            model = DDP(
+                model,
+                device_ids=[rank],
+                output_device=rank,
+                find_unused_parameters=find_unused_parameters,
+            )
+        else:
+            raise AttributeError("")
+
+    if compile:
+        model = torch.compile(model, mode=compile_mode)
+
+    return model
+
+
+def de_model(model):
+    return de_parallel(de_complie(model))
+
+
+def warp_loader(loader, shuffle=False):
+    if is_dist_available_and_initialized():
+        sampler = DistributedSampler(loader.dataset, shuffle=shuffle)
+        loader = DataLoader(
+            loader.dataset,
+            loader.batch_size,
+            sampler=sampler,
+            drop_last=loader.drop_last,
+            collate_fn=loader.collate_fn,
+            pin_memory=loader.pin_memory,
+            num_workers=loader.num_workers,
+        )
+    return loader
+
+
+def is_parallel(model) -> bool:
+    # Returns True if model is of type DP or DDP
+    return type(model) in (
+        torch.nn.parallel.DataParallel,
+        torch.nn.parallel.DistributedDataParallel,
+    )
+
+
+def de_parallel(model) -> nn.Module:
+    # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
+    return model.module if is_parallel(model) else model
+
+
+def reduce_dict(data, avg=True):
+    """
+    Args
+        data dict: input, {k: v, ...}
+        avg bool: true
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return data
+
+    with torch.no_grad():
+        keys, values = [], []
+        for k in sorted(data.keys()):
+            keys.append(k)
+            values.append(data[k])
+
+        values = torch.stack(values, dim=0)
+        torch.distributed.all_reduce(values)
+
+        if avg is True:
+            values /= world_size
+
+        return {k: v for k, v in zip(keys, values)}
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+    data_list = [None] * world_size
+    torch.distributed.all_gather_object(data_list, data)
+    return data_list
+
+
+def sync_time():
+    """sync_time"""
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+
+    return time.time()
+
+
+def setup_seed(seed: int, deterministic=False):
+    """setup_seed for reproducibility
+    torch.manual_seed(3407) is all you need. https://arxiv.org/abs/2109.08203
+    """
+    seed = seed + get_rank()
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+    # memory will be large when setting deterministic to True
+    if torch.backends.cudnn.is_available() and deterministic:
+        torch.backends.cudnn.deterministic = True
+
+
+# for torch.compile
+def check_compile():
+    import warnings
+
+    import torch
+
+    gpu_ok = False
+    if torch.cuda.is_available():
+        device_cap = torch.cuda.get_device_capability()
+        if device_cap in ((7, 0), (8, 0), (9, 0)):
+            gpu_ok = True
+    if not gpu_ok:
+        warnings.warn(
+            "GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower " "than expected."
+        )
+    return gpu_ok
+
+
+def is_compile(model):
+    import torch._dynamo
+
+    return type(model) in (torch._dynamo.OptimizedModule,)
+
+
+def de_complie(model):
+    return model._orig_mod if is_compile(model) else model
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/lazy_loader.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/lazy_loader.py
new file mode 100644
index 00000000..208fb123
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/lazy_loader.py
@@ -0,0 +1,70 @@
+"""
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py
+"""
+
+import importlib
+import types
+
+
+class LazyLoader(types.ModuleType):
+    """Lazily import a module, mainly to avoid pulling in large dependencies.
+
+    `paddle`, and `ffmpeg` are examples of modules that are large and not always
+    needed, and this allows them to only be loaded when they are used.
+    """
+
+    # The lint error here is incorrect.
+    def __init__(self, local_name, parent_module_globals, name, warning=None):
+        self._local_name = local_name
+        self._parent_module_globals = parent_module_globals
+        self._warning = warning
+
+        # These members allows doctest correctly process this module member without
+        # triggering self._load(). self._load() mutates parant_module_globals and
+        # triggers a dict mutated during iteration error from doctest.py.
+        # - for from_module()
+        self.__module__ = name.rsplit(".", 1)[0]
+        # - for is_routine()
+        self.__wrapped__ = None
+
+        super(LazyLoader, self).__init__(name)
+
+    def _load(self):
+        """Load the module and insert it into the parent's globals."""
+        # Import the target module and insert it into the parent's namespace
+        module = importlib.import_module(self.__name__)
+        self._parent_module_globals[self._local_name] = module
+
+        # Emit a warning if one was specified
+        if self._warning:
+            # logging.warning(self._warning)
+            # Make sure to only warn once.
+            self._warning = None
+
+        # Update this object's dict so that if someone keeps a reference to the
+        #   LazyLoader, lookups are efficient (__getattr__ is only called on lookups
+        #   that fail).
+        self.__dict__.update(module.__dict__)
+
+        return module
+
+    def __getattr__(self, item):
+        module = self._load()
+        return getattr(module, item)
+
+    def __repr__(self):
+        # Carefully to not trigger _load, since repr may be called in very
+        # sensitive places.
+        return f"<LazyLoader {self.__name__} as {self._local_name}>"
+
+    def __dir__(self):
+        module = self._load()
+        return dir(module)
+
+
+# import paddle.nn as nn
+# nn = LazyLoader("nn", globals(), "paddle.nn")
+
+# class M(nn.Layer):
+#     def __init__(self) -> None:
+#       super().__init__()
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/logger.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/logger.py
new file mode 100644
index 00000000..0c1ca189
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/logger.py
@@ -0,0 +1,255 @@
+"""
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+https://github.com/facebookresearch/detr/blob/main/util/misc.py
+Mostly copy-paste from torchvision references.
+"""
+
+import datetime
+import pickle
+import time
+from collections import defaultdict, deque
+from typing import Dict
+
+import torch
+import torch.distributed as tdist
+
+from .dist_utils import get_world_size, is_dist_available_and_initialized
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not is_dist_available_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
+        tdist.barrier()
+        tdist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+
+    @property
+    def global_avg(self):
+        return self.total / self.count
+
+    @property
+    def max(self):
+        return max(self.deque)
+
+    @property
+    def value(self):
+        return self.deque[-1]
+
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value,
+        )
+
+
+def all_gather(data):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    # serialized to a Tensor
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to("cuda")
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device="cuda")
+    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
+    tdist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
+    if local_size != max_size:
+        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        tensor = torch.cat((tensor, padding), dim=0)
+    tdist.all_gather(tensor_list, tensor)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def reduce_dict(input_dict, average=True) -> Dict[str, torch.Tensor]:
+    """
+    Args:
+        input_dict (dict): all the values will be reduced
+        average (bool): whether to do average or sum
+    Reduce the values in the dictionary from all processes so that all processes
+    have the averaged results. Returns a dict with the same fields as
+    input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        tdist.all_reduce(values)
+        if average:
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError("'{}' object has no attribute '{}'".format(type(self).__name__, attr))
+
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append("{}: {}".format(name, str(meter)))
+        return self.delimiter.join(loss_str)
+
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ""
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+        if torch.cuda.is_available():
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                    "max mem: {memory:.0f}",
+                ]
+            )
+        else:
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                ]
+            )
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
+                else:
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                        )
+                    )
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(
+            "{} Total time: {} ({:.4f} s / it)".format(
+                header, total_time_str, total_time / len(iterable)
+            )
+        )
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/profiler_utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/profiler_utils.py
new file mode 100644
index 00000000..84db32a5
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/profiler_utils.py
@@ -0,0 +1,30 @@
+"""
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+import copy
+from typing import Tuple
+
+from calflops import calculate_flops
+
+
+def stats(
+    cfg,
+    input_shape: Tuple = (1, 3, 640, 640),
+) -> Tuple[int, dict]:
+    base_size = cfg.train_dataloader.collate_fn.base_size
+    input_shape = (1, 3, base_size, base_size)
+
+    model_for_info = copy.deepcopy(cfg.model).deploy()
+
+    flops, macs, _ = calculate_flops(
+        model=model_for_info,
+        input_shape=input_shape,
+        output_as_string=True,
+        output_precision=4,
+        print_detailed=False,
+    )
+    params = sum(p.numel() for p in model_for_info.parameters())
+    del model_for_info
+
+    return params, {"Model FLOPs:%s   MACs:%s   Params:%s" % (flops, macs, params)}
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/misc/visualizer.py b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/visualizer.py
new file mode 100644
index 00000000..fa82f7c7
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/misc/visualizer.py
@@ -0,0 +1,121 @@
+""" "
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import PIL
+import numpy as np
+import torch
+import torch.utils.data
+import torchvision
+from typing import List, Dict
+
+torchvision.disable_beta_transforms_warning()
+
+__all__ = ["show_sample", "save_samples"]
+
+def save_samples(samples: torch.Tensor, targets: List[Dict], output_dir: str, split: str, normalized: bool, box_fmt: str):
+    '''
+    normalized: whether the boxes are normalized to [0, 1]
+    box_fmt: 'xyxy', 'xywh', 'cxcywh', D-FINE uses 'cxcywh' for training, 'xyxy' for validation
+    '''
+    from torchvision.transforms.functional import to_pil_image
+    from torchvision.ops import box_convert
+    from pathlib import Path
+    from PIL import ImageDraw, ImageFont
+    import os
+
+    os.makedirs(Path(output_dir) / Path(f"{split}_samples"), exist_ok=True)
+    # Predefined colors (standard color names recognized by PIL)
+    BOX_COLORS = [
+        "red", "blue", "green", "orange", "purple",
+        "cyan", "magenta", "yellow", "lime", "pink",
+        "teal", "lavender", "brown", "beige", "maroon",
+        "navy", "olive", "coral", "turquoise", "gold"
+    ]
+
+    LABEL_TEXT_COLOR = "white"
+
+    font = ImageFont.load_default()
+    font.size = 32
+
+    for i, (sample, target) in enumerate(zip(samples, targets)):
+        sample_visualization = sample.clone().cpu()
+        target_boxes = target["boxes"].clone().cpu()
+        target_labels = target["labels"].clone().cpu()
+        target_image_id = target["image_id"].item()
+        target_image_path = target["image_path"]
+        target_image_path_stem = Path(target_image_path).stem
+
+        sample_visualization = to_pil_image(sample_visualization)
+        sample_visualization_w, sample_visualization_h = sample_visualization.size
+
+        # normalized to pixel space
+        if normalized:
+            target_boxes[:, 0] = target_boxes[:, 0] * sample_visualization_w
+            target_boxes[:, 2] = target_boxes[:, 2] * sample_visualization_w
+            target_boxes[:, 1] = target_boxes[:, 1] * sample_visualization_h
+            target_boxes[:, 3] = target_boxes[:, 3] * sample_visualization_h
+
+        # any box format -> xyxy
+        target_boxes = box_convert(target_boxes, in_fmt=box_fmt, out_fmt="xyxy")
+
+        # clip to image size
+        target_boxes[:, 0] = torch.clamp(target_boxes[:, 0], 0, sample_visualization_w)
+        target_boxes[:, 1] = torch.clamp(target_boxes[:, 1], 0, sample_visualization_h)
+        target_boxes[:, 2] = torch.clamp(target_boxes[:, 2], 0, sample_visualization_w)
+        target_boxes[:, 3] = torch.clamp(target_boxes[:, 3], 0, sample_visualization_h)
+
+        target_boxes = target_boxes.numpy().astype(np.int32)
+        target_labels = target_labels.numpy().astype(np.int32)
+
+        draw = ImageDraw.Draw(sample_visualization)
+
+        # draw target boxes
+        for box, label in zip(target_boxes, target_labels):
+            x1, y1, x2, y2 = box
+
+            # Select color based on class ID
+            box_color = BOX_COLORS[int(label) % len(BOX_COLORS)]
+
+            # Draw box (thick)
+            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
+
+            label_text = f"{label}"
+
+            # Measure text size
+            text_width, text_height = draw.textbbox((0, 0), label_text, font=font)[2:4]
+
+            # Draw text background
+            padding = 2
+            draw.rectangle(
+                [x1, y1 - text_height - padding * 2, x1 + text_width + padding * 2, y1],
+                fill=box_color
+            )
+
+            # Draw text (LABEL_TEXT_COLOR)
+            draw.text((x1 + padding, y1 - text_height - padding), label_text,
+                     fill=LABEL_TEXT_COLOR, font=font)
+
+        save_path = Path(output_dir) / f"{split}_samples" / f"{target_image_id}_{target_image_path_stem}.webp"
+        sample_visualization.save(save_path)
+
+def show_sample(sample):
+    """for coco dataset/dataloader"""
+    import matplotlib.pyplot as plt
+    from torchvision.transforms.v2 import functional as F
+    from torchvision.utils import draw_bounding_boxes
+
+    image, target = sample
+    if isinstance(image, PIL.Image.Image):
+        image = F.to_image_tensor(image)
+
+    image = F.convert_dtype(image, torch.uint8)
+    annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
+
+    fig, ax = plt.subplots()
+    ax.imshow(annotated_image.permute(1, 2, 0).numpy())
+    ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
+    fig.tight_layout()
+    fig.show()
+    plt.show()
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/__init__.py
new file mode 100644
index 00000000..4544b3c8
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/__init__.py
@@ -0,0 +1,16 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .arch import *
+
+#
+from .backbone import *
+from .backbone import (
+    FrozenBatchNorm2d,
+    freeze_batch_norm2d,
+    get_activation,
+)
+from .criterion import *
+from .postprocessor import *
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/__init__.py
new file mode 100644
index 00000000..29f908cd
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/__init__.py
@@ -0,0 +1,7 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .classification import ClassHead, Classification
+from .yolo import YOLO
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/classification.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/classification.py
new file mode 100644
index 00000000..d2875430
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/classification.py
@@ -0,0 +1,45 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.nn as nn
+
+from ...core import register
+
+__all__ = ["Classification", "ClassHead"]
+
+
+@register()
+class Classification(torch.nn.Module):
+    __inject__ = ["backbone", "head"]
+
+    def __init__(self, backbone: nn.Module, head: nn.Module = None):
+        super().__init__()
+
+        self.backbone = backbone
+        self.head = head
+
+    def forward(self, x):
+        x = self.backbone(x)
+
+        if self.head is not None:
+            x = self.head(x)
+
+        return x
+
+
+@register()
+class ClassHead(nn.Module):
+    def __init__(self, hidden_dim, num_classes):
+        super().__init__()
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.proj = nn.Linear(hidden_dim, num_classes)
+
+    def forward(self, x):
+        x = x[0] if isinstance(x, (list, tuple)) else x
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.proj(x)
+        return x
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/yolo.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/yolo.py
new file mode 100644
index 00000000..c40a306f
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/arch/yolo.py
@@ -0,0 +1,42 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+
+from ...core import register
+
+__all__ = [
+    "YOLO",
+]
+
+
+@register()
+class YOLO(torch.nn.Module):
+    __inject__ = [
+        "backbone",
+        "neck",
+        "head",
+    ]
+
+    def __init__(self, backbone: torch.nn.Module, neck, head):
+        super().__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.head = head
+
+    def forward(self, x, **kwargs):
+        x = self.backbone(x)
+        x = self.neck(x)
+        x = self.head(x)
+        return x
+
+    def deploy(
+        self,
+    ):
+        self.eval()
+        for m in self.modules():
+            if m is not self and hasattr(m, "deploy"):
+                m.deploy()
+        return self
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/__init__.py
new file mode 100644
index 00000000..497d8dfb
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .common import (
+    FrozenBatchNorm2d,
+    freeze_batch_norm2d,
+    get_activation,
+)
+from .csp_darknet import CSPPAN, CSPDarkNet
+from .csp_resnet import CSPResNet
+from .hgnetv2 import HGNetv2
+from .presnet import PResNet
+from .test_resnet import MResNet
+from .timm_model import TimmModel
+from .torchvision_model import TorchVisionModel
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/common.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/common.py
new file mode 100644
index 00000000..fcdb9064
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/common.py
@@ -0,0 +1,117 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.nn as nn
+
+
+class ConvNormLayer(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            ch_in,
+            ch_out,
+            kernel_size,
+            stride,
+            padding=(kernel_size - 1) // 2 if padding is None else padding,
+            bias=bias,
+        )
+        self.norm = nn.BatchNorm2d(ch_out)
+        self.act = nn.Identity() if act is None else get_activation(act)
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+
+class FrozenBatchNorm2d(nn.Module):
+    """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+    Copy-paste from torchvision.misc.ops with added eps before rqsrt,
+    without which any other models than torchvision.models.resnet[18,34,50,101]
+    produce nans.
+    """
+
+    def __init__(self, num_features, eps=1e-5):
+        super(FrozenBatchNorm2d, self).__init__()
+        n = num_features
+        self.register_buffer("weight", torch.ones(n))
+        self.register_buffer("bias", torch.zeros(n))
+        self.register_buffer("running_mean", torch.zeros(n))
+        self.register_buffer("running_var", torch.ones(n))
+        self.eps = eps
+        self.num_features = n
+
+    def _load_from_state_dict(
+        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+    ):
+        num_batches_tracked_key = prefix + "num_batches_tracked"
+        if num_batches_tracked_key in state_dict:
+            del state_dict[num_batches_tracked_key]
+
+        super(FrozenBatchNorm2d, self)._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+        )
+
+    def forward(self, x):
+        # move reshapes to the beginning
+        # to make it fuser-friendly
+        w = self.weight.reshape(1, -1, 1, 1)
+        b = self.bias.reshape(1, -1, 1, 1)
+        rv = self.running_var.reshape(1, -1, 1, 1)
+        rm = self.running_mean.reshape(1, -1, 1, 1)
+        scale = w * (rv + self.eps).rsqrt()
+        bias = b - rm * scale
+        return x * scale + bias
+
+    def extra_repr(self):
+        return "{num_features}, eps={eps}".format(**self.__dict__)
+
+
+def freeze_batch_norm2d(module: nn.Module) -> nn.Module:
+    if isinstance(module, nn.BatchNorm2d):
+        module = FrozenBatchNorm2d(module.num_features)
+    else:
+        for name, child in module.named_children():
+            _child = freeze_batch_norm2d(child)
+            if _child is not child:
+                setattr(module, name, _child)
+    return module
+
+
+def get_activation(act: str, inplace: bool = True):
+    """get activation"""
+    if act is None:
+        return nn.Identity()
+
+    elif isinstance(act, nn.Module):
+        return act
+
+    act = act.lower()
+
+    if act == "silu" or act == "swish":
+        m = nn.SiLU()
+
+    elif act == "relu":
+        m = nn.ReLU()
+
+    elif act == "leaky_relu":
+        m = nn.LeakyReLU()
+
+    elif act == "silu":
+        m = nn.SiLU()
+
+    elif act == "gelu":
+        m = nn.GELU()
+
+    elif act == "hardsigmoid":
+        m = nn.Hardsigmoid()
+
+    else:
+        raise RuntimeError("")
+
+    if hasattr(m, "inplace"):
+        m.inplace = inplace
+
+    return m
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_darknet.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_darknet.py
new file mode 100644
index 00000000..f6efa012
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_darknet.py
@@ -0,0 +1,203 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import math
+import warnings
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+from .common import get_activation
+
+
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p
+
+
+def make_divisible(c, d):
+    return math.ceil(c / d) * d
+
+
+class Conv(nn.Module):
+    def __init__(self, cin, cout, k=1, s=1, p=None, g=1, act="silu") -> None:
+        super().__init__()
+        self.conv = nn.Conv2d(cin, cout, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(cout)
+        self.act = get_activation(act, inplace=True)
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, act="silu"):
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1, act=act)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g, act=act)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(
+        self, c1, c2, n=1, shortcut=True, g=1, e=0.5, act="silu"
+    ):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1, act=act)
+        self.cv2 = Conv(c1, c_, 1, 1, act=act)
+        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0, act=act) for _ in range(n)))
+        self.cv3 = Conv(2 * c_, c2, 1, act=act)
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+
+
+class SPPF(nn.Module):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(self, c1, c2, k=5, act="silu"):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1, act=act)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1, act=act)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warning
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
+
+
+@register()
+class CSPDarkNet(nn.Module):
+    __share__ = ["depth_multi", "width_multi"]
+
+    def __init__(
+        self,
+        in_channels=3,
+        width_multi=1.0,
+        depth_multi=1.0,
+        return_idx=[2, 3, -1],
+        act="silu",
+    ) -> None:
+        super().__init__()
+
+        channels = [64, 128, 256, 512, 1024]
+        channels = [make_divisible(c * width_multi, 8) for c in channels]
+
+        depths = [3, 6, 9, 3]
+        depths = [max(round(d * depth_multi), 1) for d in depths]
+
+        self.layers = nn.ModuleList([Conv(in_channels, channels[0], 6, 2, 2, act=act)])
+        for i, (c, d) in enumerate(zip(channels, depths), 1):
+            layer = nn.Sequential(
+                *[Conv(c, channels[i], 3, 2, act=act), C3(channels[i], channels[i], n=d, act=act)]
+            )
+            self.layers.append(layer)
+
+        self.layers.append(SPPF(channels[-1], channels[-1], k=5, act=act))
+
+        self.return_idx = return_idx
+        self.out_channels = [channels[i] for i in self.return_idx]
+        self.strides = [[2, 4, 8, 16, 32][i] for i in self.return_idx]
+        self.depths = depths
+        self.act = act
+
+    def forward(self, x):
+        outputs = []
+        for _, m in enumerate(self.layers):
+            x = m(x)
+            outputs.append(x)
+
+        return [outputs[i] for i in self.return_idx]
+
+
+@register()
+class CSPPAN(nn.Module):
+    """
+    P5 ---> 1x1  ---------------------------------> concat --> c3 --> det
+             | up                                     | conv /2
+    P4 ---> concat ---> c3 ---> 1x1  -->  concat ---> c3 -----------> det
+                                 | up       | conv /2
+    P3 -----------------------> concat ---> c3 ---------------------> det
+    """
+
+    __share__ = [
+        "depth_multi",
+    ]
+
+    def __init__(self, in_channels=[256, 512, 1024], depth_multi=1.0, act="silu") -> None:
+        super().__init__()
+        depth = max(round(3 * depth_multi), 1)
+
+        self.out_channels = in_channels
+        self.fpn_stems = nn.ModuleList(
+            [
+                Conv(cin, cout, 1, 1, act=act)
+                for cin, cout in zip(in_channels[::-1], in_channels[::-1][1:])
+            ]
+        )
+        self.fpn_csps = nn.ModuleList(
+            [
+                C3(cin, cout, depth, False, act=act)
+                for cin, cout in zip(in_channels[::-1], in_channels[::-1][1:])
+            ]
+        )
+
+        self.pan_stems = nn.ModuleList([Conv(c, c, 3, 2, act=act) for c in in_channels[:-1]])
+        self.pan_csps = nn.ModuleList([C3(c, c, depth, False, act=act) for c in in_channels[1:]])
+
+    def forward(self, feats):
+        fpn_feats = []
+        for i, feat in enumerate(feats[::-1]):
+            if i == 0:
+                feat = self.fpn_stems[i](feat)
+                fpn_feats.append(feat)
+            else:
+                _feat = F.interpolate(fpn_feats[-1], scale_factor=2, mode="nearest")
+                feat = torch.concat([_feat, feat], dim=1)
+                feat = self.fpn_csps[i - 1](feat)
+                if i < len(self.fpn_stems):
+                    feat = self.fpn_stems[i](feat)
+                fpn_feats.append(feat)
+
+        pan_feats = []
+        for i, feat in enumerate(fpn_feats[::-1]):
+            if i == 0:
+                pan_feats.append(feat)
+            else:
+                _feat = self.pan_stems[i - 1](pan_feats[-1])
+                feat = torch.concat([_feat, feat], dim=1)
+                feat = self.pan_csps[i - 1](feat)
+                pan_feats.append(feat)
+
+        return pan_feats
+
+
+if __name__ == "__main__":
+    data = torch.rand(1, 3, 320, 640)
+
+    width_multi = 0.75
+    depth_multi = 0.33
+
+    m = CSPDarkNet(3, width_multi=width_multi, depth_multi=depth_multi, act="silu")
+    outputs = m(data)
+    print([o.shape for o in outputs])
+
+    m = CSPPAN(in_channels=m.out_channels, depth_multi=depth_multi, act="silu")
+    outputs = m(outputs)
+    print([o.shape for o in outputs])
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_resnet.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_resnet.py
new file mode 100644
index 00000000..17887c78
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/csp_resnet.py
@@ -0,0 +1,302 @@
+"""
+https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/ppdet/modeling/backbones/cspresnet.py
+
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+from .common import get_activation
+
+__all__ = ["CSPResNet"]
+
+
+donwload_url = {
+    "s": "https://github.com/lyuwenyu/storage/releases/download/v0.1/CSPResNetb_s_pretrained_from_paddle.pth",
+    "m": "https://github.com/lyuwenyu/storage/releases/download/v0.1/CSPResNetb_m_pretrained_from_paddle.pth",
+    "l": "https://github.com/lyuwenyu/storage/releases/download/v0.1/CSPResNetb_l_pretrained_from_paddle.pth",
+    "x": "https://github.com/lyuwenyu/storage/releases/download/v0.1/CSPResNetb_x_pretrained_from_paddle.pth",
+}
+
+
+class ConvBNLayer(nn.Module):
+    def __init__(self, ch_in, ch_out, filter_size=3, stride=1, groups=1, padding=0, act=None):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            ch_in, ch_out, filter_size, stride, padding, groups=groups, bias=False
+        )
+        self.bn = nn.BatchNorm2d(ch_out)
+        self.act = get_activation(act)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+        return x
+
+
+class RepVggBlock(nn.Module):
+    def __init__(self, ch_in, ch_out, act="relu", alpha: bool = False):
+        super().__init__()
+        self.ch_in = ch_in
+        self.ch_out = ch_out
+        self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=None)
+        self.conv2 = ConvBNLayer(ch_in, ch_out, 1, stride=1, padding=0, act=None)
+        self.act = get_activation(act)
+
+        if alpha:
+            self.alpha = nn.Parameter(
+                torch.ones(
+                    1,
+                )
+            )
+        else:
+            self.alpha = None
+
+    def forward(self, x):
+        if hasattr(self, "conv"):
+            y = self.conv(x)
+        else:
+            if self.alpha:
+                y = self.conv1(x) + self.alpha * self.conv2(x)
+            else:
+                y = self.conv1(x) + self.conv2(x)
+        y = self.act(y)
+        return y
+
+    def convert_to_deploy(self):
+        if not hasattr(self, "conv"):
+            self.conv = nn.Conv2d(self.ch_in, self.ch_out, 3, 1, padding=1)
+
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv.weight.data = kernel
+        self.conv.bias.data = bias
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+
+        if self.alpha:
+            return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor(
+                kernel1x1
+            ), bias3x3 + self.alpha * bias1x1
+        else:
+            return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return F.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch: ConvBNLayer):
+        if branch is None:
+            return 0, 0
+        kernel = branch.conv.weight
+        running_mean = branch.norm.running_mean
+        running_var = branch.norm.running_var
+        gamma = branch.norm.weight
+        beta = branch.norm.bias
+        eps = branch.norm.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, ch_in, ch_out, act="relu", shortcut=True, use_alpha=False):
+        super().__init__()
+        assert ch_in == ch_out
+        self.conv1 = ConvBNLayer(ch_in, ch_out, 3, stride=1, padding=1, act=act)
+        self.conv2 = RepVggBlock(ch_out, ch_out, act=act, alpha=use_alpha)
+        self.shortcut = shortcut
+
+    def forward(self, x):
+        y = self.conv1(x)
+        y = self.conv2(y)
+        if self.shortcut:
+            return x + y
+        else:
+            return y
+
+
+class EffectiveSELayer(nn.Module):
+    """Effective Squeeze-Excitation
+    From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
+    """
+
+    def __init__(self, channels, act="hardsigmoid"):
+        super(EffectiveSELayer, self).__init__()
+        self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0)
+        self.act = get_activation(act)
+
+    def forward(self, x: torch.Tensor):
+        x_se = x.mean((2, 3), keepdim=True)
+        x_se = self.fc(x_se)
+        x_se = self.act(x_se)
+        return x * x_se
+
+
+class CSPResStage(nn.Module):
+    def __init__(self, block_fn, ch_in, ch_out, n, stride, act="relu", attn="eca", use_alpha=False):
+        super().__init__()
+        ch_mid = (ch_in + ch_out) // 2
+        if stride == 2:
+            self.conv_down = ConvBNLayer(ch_in, ch_mid, 3, stride=2, padding=1, act=act)
+        else:
+            self.conv_down = None
+        self.conv1 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+        self.conv2 = ConvBNLayer(ch_mid, ch_mid // 2, 1, act=act)
+        self.blocks = nn.Sequential(
+            *[
+                block_fn(ch_mid // 2, ch_mid // 2, act=act, shortcut=True, use_alpha=use_alpha)
+                for i in range(n)
+            ]
+        )
+        if attn:
+            self.attn = EffectiveSELayer(ch_mid, act="hardsigmoid")
+        else:
+            self.attn = None
+
+        self.conv3 = ConvBNLayer(ch_mid, ch_out, 1, act=act)
+
+    def forward(self, x):
+        if self.conv_down is not None:
+            x = self.conv_down(x)
+        y1 = self.conv1(x)
+        y2 = self.blocks(self.conv2(x))
+        y = torch.concat([y1, y2], dim=1)
+        if self.attn is not None:
+            y = self.attn(y)
+        y = self.conv3(y)
+        return y
+
+
+@register()
+class CSPResNet(nn.Module):
+    layers = [3, 6, 6, 3]
+    channels = [64, 128, 256, 512, 1024]
+    model_cfg = {
+        "s": {
+            "depth_mult": 0.33,
+            "width_mult": 0.50,
+        },
+        "m": {
+            "depth_mult": 0.67,
+            "width_mult": 0.75,
+        },
+        "l": {
+            "depth_mult": 1.00,
+            "width_mult": 1.00,
+        },
+        "x": {
+            "depth_mult": 1.33,
+            "width_mult": 1.25,
+        },
+    }
+
+    def __init__(
+        self,
+        name: str,
+        act="silu",
+        return_idx=[1, 2, 3],
+        use_large_stem=True,
+        use_alpha=False,
+        pretrained=False,
+    ):
+        super().__init__()
+        depth_mult = self.model_cfg[name]["depth_mult"]
+        width_mult = self.model_cfg[name]["width_mult"]
+
+        channels = [max(round(c * width_mult), 1) for c in self.channels]
+        layers = [max(round(l * depth_mult), 1) for l in self.layers]
+        act = get_activation(act)
+
+        if use_large_stem:
+            self.stem = nn.Sequential(
+                OrderedDict(
+                    [
+                        (
+                            "conv1",
+                            ConvBNLayer(3, channels[0] // 2, 3, stride=2, padding=1, act=act),
+                        ),
+                        (
+                            "conv2",
+                            ConvBNLayer(
+                                channels[0] // 2, channels[0] // 2, 3, stride=1, padding=1, act=act
+                            ),
+                        ),
+                        (
+                            "conv3",
+                            ConvBNLayer(
+                                channels[0] // 2, channels[0], 3, stride=1, padding=1, act=act
+                            ),
+                        ),
+                    ]
+                )
+            )
+        else:
+            self.stem = nn.Sequential(
+                OrderedDict(
+                    [
+                        (
+                            "conv1",
+                            ConvBNLayer(3, channels[0] // 2, 3, stride=2, padding=1, act=act),
+                        ),
+                        (
+                            "conv2",
+                            ConvBNLayer(
+                                channels[0] // 2, channels[0], 3, stride=1, padding=1, act=act
+                            ),
+                        ),
+                    ]
+                )
+            )
+
+        n = len(channels) - 1
+        self.stages = nn.Sequential(
+            OrderedDict(
+                [
+                    (
+                        str(i),
+                        CSPResStage(
+                            BasicBlock,
+                            channels[i],
+                            channels[i + 1],
+                            layers[i],
+                            2,
+                            act=act,
+                            use_alpha=use_alpha,
+                        ),
+                    )
+                    for i in range(n)
+                ]
+            )
+        )
+
+        self._out_channels = channels[1:]
+        self._out_strides = [4 * 2**i for i in range(n)]
+        self.return_idx = return_idx
+
+        if pretrained:
+            if isinstance(pretrained, bool) or "http" in pretrained:
+                state = torch.hub.load_state_dict_from_url(donwload_url[name], map_location="cpu")
+            else:
+                state = torch.load(pretrained, map_location="cpu")
+            self.load_state_dict(state)
+            print(f"Load CSPResNet_{name} state_dict")
+
+    def forward(self, x):
+        x = self.stem(x)
+        outs = []
+        for idx, stage in enumerate(self.stages):
+            x = stage(x)
+            if idx in self.return_idx:
+                outs.append(x)
+
+        return outs
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/hgnetv2.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/hgnetv2.py
new file mode 100644
index 00000000..85d99d6c
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/hgnetv2.py
@@ -0,0 +1,579 @@
+"""
+reference
+- https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
+
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+import logging
+import os
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+from .common import FrozenBatchNorm2d
+
+# Constants for initialization
+kaiming_normal_ = nn.init.kaiming_normal_
+zeros_ = nn.init.zeros_
+ones_ = nn.init.ones_
+
+__all__ = ["HGNetv2"]
+
+def safe_barrier():
+    if torch.distributed.is_available() and torch.distributed.is_initialized():
+        torch.distributed.barrier()
+    else:
+        pass
+
+def safe_get_rank():
+    if torch.distributed.is_available() and torch.distributed.is_initialized():
+        return torch.distributed.get_rank()
+    else:
+        return 0
+
+class LearnableAffineBlock(nn.Module):
+    def __init__(self, scale_value=1.0, bias_value=0.0):
+        super().__init__()
+        self.scale = nn.Parameter(torch.tensor([scale_value]), requires_grad=True)
+        self.bias = nn.Parameter(torch.tensor([bias_value]), requires_grad=True)
+
+    def forward(self, x):
+        return self.scale * x + self.bias
+
+
+class ConvBNAct(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        out_chs,
+        kernel_size,
+        stride=1,
+        groups=1,
+        padding="",
+        use_act=True,
+        use_lab=False,
+    ):
+        super().__init__()
+        self.use_act = use_act
+        self.use_lab = use_lab
+        if padding == "same":
+            self.conv = nn.Sequential(
+                nn.ZeroPad2d([0, 1, 0, 1]),
+                nn.Conv2d(in_chs, out_chs, kernel_size, stride, groups=groups, bias=False),
+            )
+        else:
+            self.conv = nn.Conv2d(
+                in_chs,
+                out_chs,
+                kernel_size,
+                stride,
+                padding=(kernel_size - 1) // 2,
+                groups=groups,
+                bias=False,
+            )
+        self.bn = nn.BatchNorm2d(out_chs)
+        if self.use_act:
+            self.act = nn.ReLU()
+        else:
+            self.act = nn.Identity()
+        if self.use_act and self.use_lab:
+            self.lab = LearnableAffineBlock()
+        else:
+            self.lab = nn.Identity()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.act(x)
+        x = self.lab(x)
+        return x
+
+
+class LightConvBNAct(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        out_chs,
+        kernel_size,
+        groups=1,
+        use_lab=False,
+    ):
+        super().__init__()
+        self.conv1 = ConvBNAct(
+            in_chs,
+            out_chs,
+            kernel_size=1,
+            use_act=False,
+            use_lab=use_lab,
+        )
+        self.conv2 = ConvBNAct(
+            out_chs,
+            out_chs,
+            kernel_size=kernel_size,
+            groups=out_chs,
+            use_act=True,
+            use_lab=use_lab,
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+
+
+class StemBlock(nn.Module):
+    # for HGNetv2
+    def __init__(self, in_chs, mid_chs, out_chs, use_lab=False):
+        super().__init__()
+        self.stem1 = ConvBNAct(
+            in_chs,
+            mid_chs,
+            kernel_size=3,
+            stride=2,
+            use_lab=use_lab,
+        )
+        self.stem2a = ConvBNAct(
+            mid_chs,
+            mid_chs // 2,
+            kernel_size=2,
+            stride=1,
+            use_lab=use_lab,
+        )
+        self.stem2b = ConvBNAct(
+            mid_chs // 2,
+            mid_chs,
+            kernel_size=2,
+            stride=1,
+            use_lab=use_lab,
+        )
+        self.stem3 = ConvBNAct(
+            mid_chs * 2,
+            mid_chs,
+            kernel_size=3,
+            stride=2,
+            use_lab=use_lab,
+        )
+        self.stem4 = ConvBNAct(
+            mid_chs,
+            out_chs,
+            kernel_size=1,
+            stride=1,
+            use_lab=use_lab,
+        )
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=1, ceil_mode=True)
+
+    def forward(self, x):
+        x = self.stem1(x)
+        x = F.pad(x, (0, 1, 0, 1))
+        x2 = self.stem2a(x)
+        x2 = F.pad(x2, (0, 1, 0, 1))
+        x2 = self.stem2b(x2)
+        x1 = self.pool(x)
+        x = torch.cat([x1, x2], dim=1)
+        x = self.stem3(x)
+        x = self.stem4(x)
+        return x
+
+
+class EseModule(nn.Module):
+    def __init__(self, chs):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            chs,
+            chs,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        identity = x
+        x = x.mean((2, 3), keepdim=True)
+        x = self.conv(x)
+        x = self.sigmoid(x)
+        return torch.mul(identity, x)
+
+
+class HG_Block(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        mid_chs,
+        out_chs,
+        layer_num,
+        kernel_size=3,
+        residual=False,
+        light_block=False,
+        use_lab=False,
+        agg="ese",
+        drop_path=0.0,
+    ):
+        super().__init__()
+        self.residual = residual
+
+        self.layers = nn.ModuleList()
+        for i in range(layer_num):
+            if light_block:
+                self.layers.append(
+                    LightConvBNAct(
+                        in_chs if i == 0 else mid_chs,
+                        mid_chs,
+                        kernel_size=kernel_size,
+                        use_lab=use_lab,
+                    )
+                )
+            else:
+                self.layers.append(
+                    ConvBNAct(
+                        in_chs if i == 0 else mid_chs,
+                        mid_chs,
+                        kernel_size=kernel_size,
+                        stride=1,
+                        use_lab=use_lab,
+                    )
+                )
+
+        # feature aggregation
+        total_chs = in_chs + layer_num * mid_chs
+        if agg == "se":
+            aggregation_squeeze_conv = ConvBNAct(
+                total_chs,
+                out_chs // 2,
+                kernel_size=1,
+                stride=1,
+                use_lab=use_lab,
+            )
+            aggregation_excitation_conv = ConvBNAct(
+                out_chs // 2,
+                out_chs,
+                kernel_size=1,
+                stride=1,
+                use_lab=use_lab,
+            )
+            self.aggregation = nn.Sequential(
+                aggregation_squeeze_conv,
+                aggregation_excitation_conv,
+            )
+        else:
+            aggregation_conv = ConvBNAct(
+                total_chs,
+                out_chs,
+                kernel_size=1,
+                stride=1,
+                use_lab=use_lab,
+            )
+            att = EseModule(out_chs)
+            self.aggregation = nn.Sequential(
+                aggregation_conv,
+                att,
+            )
+
+        self.drop_path = nn.Dropout(drop_path) if drop_path else nn.Identity()
+
+    def forward(self, x):
+        identity = x
+        output = [x]
+        for layer in self.layers:
+            x = layer(x)
+            output.append(x)
+        x = torch.cat(output, dim=1)
+        x = self.aggregation(x)
+        if self.residual:
+            x = self.drop_path(x) + identity
+        return x
+
+
+class HG_Stage(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        mid_chs,
+        out_chs,
+        block_num,
+        layer_num,
+        downsample=True,
+        light_block=False,
+        kernel_size=3,
+        use_lab=False,
+        agg="se",
+        drop_path=0.0,
+    ):
+        super().__init__()
+        self.downsample = downsample
+        if downsample:
+            self.downsample = ConvBNAct(
+                in_chs,
+                in_chs,
+                kernel_size=3,
+                stride=2,
+                groups=in_chs,
+                use_act=False,
+                use_lab=use_lab,
+            )
+        else:
+            self.downsample = nn.Identity()
+
+        blocks_list = []
+        for i in range(block_num):
+            blocks_list.append(
+                HG_Block(
+                    in_chs if i == 0 else out_chs,
+                    mid_chs,
+                    out_chs,
+                    layer_num,
+                    residual=False if i == 0 else True,
+                    kernel_size=kernel_size,
+                    light_block=light_block,
+                    use_lab=use_lab,
+                    agg=agg,
+                    drop_path=drop_path[i] if isinstance(drop_path, (list, tuple)) else drop_path,
+                )
+            )
+        self.blocks = nn.Sequential(*blocks_list)
+
+    def forward(self, x):
+        x = self.downsample(x)
+        x = self.blocks(x)
+        return x
+
+
+@register()
+class HGNetv2(nn.Module):
+    """
+    HGNetV2
+    Args:
+        stem_channels: list. Number of channels for the stem block.
+        stage_type: str. The stage configuration of HGNet. such as the number of channels, stride, etc.
+        use_lab: boolean. Whether to use LearnableAffineBlock in network.
+        lr_mult_list: list. Control the learning rate of different stages.
+    Returns:
+        model: nn.Layer. Specific HGNetV2 model depends on args.
+    """
+
+    arch_configs = {
+        "B0": {
+            "stem_channels": [3, 16, 16],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [16, 16, 64, 1, False, False, 3, 3],
+                "stage2": [64, 32, 256, 1, True, False, 3, 3],
+                "stage3": [256, 64, 512, 2, True, True, 5, 3],
+                "stage4": [512, 128, 1024, 1, True, True, 5, 3],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B0_stage1.pth",
+        },
+        "B1": {
+            "stem_channels": [3, 24, 32],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [32, 32, 64, 1, False, False, 3, 3],
+                "stage2": [64, 48, 256, 1, True, False, 3, 3],
+                "stage3": [256, 96, 512, 2, True, True, 5, 3],
+                "stage4": [512, 192, 1024, 1, True, True, 5, 3],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B1_stage1.pth",
+        },
+        "B2": {
+            "stem_channels": [3, 24, 32],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [32, 32, 96, 1, False, False, 3, 4],
+                "stage2": [96, 64, 384, 1, True, False, 3, 4],
+                "stage3": [384, 128, 768, 3, True, True, 5, 4],
+                "stage4": [768, 256, 1536, 1, True, True, 5, 4],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B2_stage1.pth",
+        },
+        "B3": {
+            "stem_channels": [3, 24, 32],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [32, 32, 128, 1, False, False, 3, 5],
+                "stage2": [128, 64, 512, 1, True, False, 3, 5],
+                "stage3": [512, 128, 1024, 3, True, True, 5, 5],
+                "stage4": [1024, 256, 2048, 1, True, True, 5, 5],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B3_stage1.pth",
+        },
+        "B4": {
+            "stem_channels": [3, 32, 48],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [48, 48, 128, 1, False, False, 3, 6],
+                "stage2": [128, 96, 512, 1, True, False, 3, 6],
+                "stage3": [512, 192, 1024, 3, True, True, 5, 6],
+                "stage4": [1024, 384, 2048, 1, True, True, 5, 6],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B4_stage1.pth",
+        },
+        "B5": {
+            "stem_channels": [3, 32, 64],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [64, 64, 128, 1, False, False, 3, 6],
+                "stage2": [128, 128, 512, 2, True, False, 3, 6],
+                "stage3": [512, 256, 1024, 5, True, True, 5, 6],
+                "stage4": [1024, 512, 2048, 2, True, True, 5, 6],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B5_stage1.pth",
+        },
+        "B6": {
+            "stem_channels": [3, 48, 96],
+            "stage_config": {
+                # in_channels, mid_channels, out_channels, num_blocks, downsample, light_block, kernel_size, layer_num
+                "stage1": [96, 96, 192, 2, False, False, 3, 6],
+                "stage2": [192, 192, 512, 3, True, False, 3, 6],
+                "stage3": [512, 384, 1024, 6, True, True, 5, 6],
+                "stage4": [1024, 768, 2048, 3, True, True, 5, 6],
+            },
+            "url": "https://github.com/Peterande/storage/releases/download/dfinev1.0/PPHGNetV2_B6_stage1.pth",
+        },
+    }
+
+    def __init__(
+        self,
+        name,
+        use_lab=False,
+        return_idx=[1, 2, 3],
+        freeze_stem_only=True,
+        freeze_at=0,
+        freeze_norm=True,
+        pretrained=True,
+        local_model_dir="weight/hgnetv2/",
+    ):
+        super().__init__()
+        self.use_lab = use_lab
+        self.return_idx = return_idx
+
+        stem_channels = self.arch_configs[name]["stem_channels"]
+        stage_config = self.arch_configs[name]["stage_config"]
+        download_url = self.arch_configs[name]["url"]
+
+        self._out_strides = [4, 8, 16, 32]
+        self._out_channels = [stage_config[k][2] for k in stage_config]
+
+        # stem
+        self.stem = StemBlock(
+            in_chs=stem_channels[0],
+            mid_chs=stem_channels[1],
+            out_chs=stem_channels[2],
+            use_lab=use_lab,
+        )
+
+        # stages
+        self.stages = nn.ModuleList()
+        for i, k in enumerate(stage_config):
+            (
+                in_channels,
+                mid_channels,
+                out_channels,
+                block_num,
+                downsample,
+                light_block,
+                kernel_size,
+                layer_num,
+            ) = stage_config[k]
+            self.stages.append(
+                HG_Stage(
+                    in_channels,
+                    mid_channels,
+                    out_channels,
+                    block_num,
+                    layer_num,
+                    downsample,
+                    light_block,
+                    kernel_size,
+                    use_lab,
+                )
+            )
+
+        if freeze_at >= 0:
+            self._freeze_parameters(self.stem)
+            if not freeze_stem_only:
+                for i in range(min(freeze_at + 1, len(self.stages))):
+                    self._freeze_parameters(self.stages[i])
+
+        if freeze_norm:
+            self._freeze_norm(self)
+
+        if pretrained:
+            RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
+            try:
+                # If the file doesn't exist locally, download from the URL
+                if safe_get_rank() == 0:
+                    print(
+                        GREEN
+                        + "If the pretrained HGNetV2 can't be downloaded automatically. Please check your network connection."
+                        + RESET
+                    )
+                    print(
+                        GREEN
+                        + "Please check your network connection. Or download the model manually from "
+                        + RESET
+                        + f"{download_url}"
+                        + GREEN
+                        + " to "
+                        + RESET
+                        + f"{local_model_dir}."
+                        + RESET
+                    )
+                    state = torch.hub.load_state_dict_from_url(
+                        download_url, map_location="cpu", model_dir=local_model_dir
+                    )
+                    print(f"Loaded stage1 {name} HGNetV2 from URL.")
+
+                # Wait for rank 0 to download the model
+                safe_barrier()
+
+                # All processes load the downloaded model
+                model_path = local_model_dir + "PPHGNetV2_" + name + "_stage1.pth"
+                state = torch.load(model_path, map_location="cpu")
+
+                self.load_state_dict(state)
+                print(f"Loaded stage1 {name} HGNetV2 from URL.")
+
+            except (Exception, KeyboardInterrupt) as e:
+                if safe_get_rank() == 0:
+                    print(f"{str(e)}")
+                    logging.error(
+                        RED + "CRITICAL WARNING: Failed to load pretrained HGNetV2 model" + RESET
+                    )
+                    logging.error(
+                        GREEN
+                        + "Please check your network connection. Or download the model manually from "
+                        + RESET
+                        + f"{download_url}"
+                        + GREEN
+                        + " to "
+                        + RESET
+                        + f"{local_model_dir}."
+                        + RESET
+                    )
+                exit()
+
+    def _freeze_norm(self, m: nn.Module):
+        if isinstance(m, nn.BatchNorm2d):
+            m = FrozenBatchNorm2d(m.num_features)
+        else:
+            for name, child in m.named_children():
+                _child = self._freeze_norm(child)
+                if _child is not child:
+                    setattr(m, name, _child)
+        return m
+
+    def _freeze_parameters(self, m: nn.Module):
+        for p in m.parameters():
+            p.requires_grad = False
+
+    def forward(self, x):
+        x = self.stem(x)
+        outs = []
+        for idx, stage in enumerate(self.stages):
+            x = stage(x)
+            if idx in self.return_idx:
+                outs.append(x)
+        return outs
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/presnet.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/presnet.py
new file mode 100644
index 00000000..5543fe83
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/presnet.py
@@ -0,0 +1,263 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+from .common import FrozenBatchNorm2d, get_activation
+
+__all__ = ["PResNet"]
+
+
+ResNet_cfg = {
+    18: [2, 2, 2, 2],
+    34: [3, 4, 6, 3],
+    50: [3, 4, 6, 3],
+    101: [3, 4, 23, 3],
+    # 152: [3, 8, 36, 3],
+}
+
+
+donwload_url = {
+    18: "https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet18_vd_pretrained_from_paddle.pth",
+    34: "https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet34_vd_pretrained_from_paddle.pth",
+    50: "https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet50_vd_ssld_v2_pretrained_from_paddle.pth",
+    101: "https://github.com/lyuwenyu/storage/releases/download/v0.1/ResNet101_vd_ssld_pretrained_from_paddle.pth",
+}
+
+
+class ConvNormLayer(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None):
+        super().__init__()
+        self.conv = nn.Conv2d(
+            ch_in,
+            ch_out,
+            kernel_size,
+            stride,
+            padding=(kernel_size - 1) // 2 if padding is None else padding,
+            bias=bias,
+        )
+        self.norm = nn.BatchNorm2d(ch_out)
+        self.act = get_activation(act)
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
+        super().__init__()
+
+        self.shortcut = shortcut
+
+        if not shortcut:
+            if variant == "d" and stride == 2:
+                self.short = nn.Sequential(
+                    OrderedDict(
+                        [
+                            ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
+                            ("conv", ConvNormLayer(ch_in, ch_out, 1, 1)),
+                        ]
+                    )
+                )
+            else:
+                self.short = ConvNormLayer(ch_in, ch_out, 1, stride)
+
+        self.branch2a = ConvNormLayer(ch_in, ch_out, 3, stride, act=act)
+        self.branch2b = ConvNormLayer(ch_out, ch_out, 3, 1, act=None)
+        self.act = nn.Identity() if act is None else get_activation(act)
+
+    def forward(self, x):
+        out = self.branch2a(x)
+        out = self.branch2b(out)
+        if self.shortcut:
+            short = x
+        else:
+            short = self.short(x)
+
+        out = out + short
+        out = self.act(out)
+
+        return out
+
+
+class BottleNeck(nn.Module):
+    expansion = 4
+
+    def __init__(self, ch_in, ch_out, stride, shortcut, act="relu", variant="b"):
+        super().__init__()
+
+        if variant == "a":
+            stride1, stride2 = stride, 1
+        else:
+            stride1, stride2 = 1, stride
+
+        width = ch_out
+
+        self.branch2a = ConvNormLayer(ch_in, width, 1, stride1, act=act)
+        self.branch2b = ConvNormLayer(width, width, 3, stride2, act=act)
+        self.branch2c = ConvNormLayer(width, ch_out * self.expansion, 1, 1)
+
+        self.shortcut = shortcut
+        if not shortcut:
+            if variant == "d" and stride == 2:
+                self.short = nn.Sequential(
+                    OrderedDict(
+                        [
+                            ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)),
+                            ("conv", ConvNormLayer(ch_in, ch_out * self.expansion, 1, 1)),
+                        ]
+                    )
+                )
+            else:
+                self.short = ConvNormLayer(ch_in, ch_out * self.expansion, 1, stride)
+
+        self.act = nn.Identity() if act is None else get_activation(act)
+
+    def forward(self, x):
+        out = self.branch2a(x)
+        out = self.branch2b(out)
+        out = self.branch2c(out)
+
+        if self.shortcut:
+            short = x
+        else:
+            short = self.short(x)
+
+        out = out + short
+        out = self.act(out)
+
+        return out
+
+
+class Blocks(nn.Module):
+    def __init__(self, block, ch_in, ch_out, count, stage_num, act="relu", variant="b"):
+        super().__init__()
+
+        self.blocks = nn.ModuleList()
+        for i in range(count):
+            self.blocks.append(
+                block(
+                    ch_in,
+                    ch_out,
+                    stride=2 if i == 0 and stage_num != 2 else 1,
+                    shortcut=False if i == 0 else True,
+                    variant=variant,
+                    act=act,
+                )
+            )
+
+            if i == 0:
+                ch_in = ch_out * block.expansion
+
+    def forward(self, x):
+        out = x
+        for block in self.blocks:
+            out = block(out)
+        return out
+
+
+@register()
+class PResNet(nn.Module):
+    def __init__(
+        self,
+        depth,
+        variant="d",
+        num_stages=4,
+        return_idx=[0, 1, 2, 3],
+        act="relu",
+        freeze_at=-1,
+        freeze_norm=True,
+        pretrained=False,
+    ):
+        super().__init__()
+
+        block_nums = ResNet_cfg[depth]
+        ch_in = 64
+        if variant in ["c", "d"]:
+            conv_def = [
+                [3, ch_in // 2, 3, 2, "conv1_1"],
+                [ch_in // 2, ch_in // 2, 3, 1, "conv1_2"],
+                [ch_in // 2, ch_in, 3, 1, "conv1_3"],
+            ]
+        else:
+            conv_def = [[3, ch_in, 7, 2, "conv1_1"]]
+
+        self.conv1 = nn.Sequential(
+            OrderedDict(
+                [
+                    (name, ConvNormLayer(cin, cout, k, s, act=act))
+                    for cin, cout, k, s, name in conv_def
+                ]
+            )
+        )
+
+        ch_out_list = [64, 128, 256, 512]
+        block = BottleNeck if depth >= 50 else BasicBlock
+
+        _out_channels = [block.expansion * v for v in ch_out_list]
+        _out_strides = [4, 8, 16, 32]
+
+        self.res_layers = nn.ModuleList()
+        for i in range(num_stages):
+            stage_num = i + 2
+            self.res_layers.append(
+                Blocks(
+                    block, ch_in, ch_out_list[i], block_nums[i], stage_num, act=act, variant=variant
+                )
+            )
+            ch_in = _out_channels[i]
+
+        self.return_idx = return_idx
+        self.out_channels = [_out_channels[_i] for _i in return_idx]
+        self.out_strides = [_out_strides[_i] for _i in return_idx]
+
+        if freeze_at >= 0:
+            self._freeze_parameters(self.conv1)
+            for i in range(min(freeze_at, num_stages)):
+                self._freeze_parameters(self.res_layers[i])
+
+        if freeze_norm:
+            self._freeze_norm(self)
+
+        if pretrained:
+            if isinstance(pretrained, bool) or "http" in pretrained:
+                state = torch.hub.load_state_dict_from_url(
+                    donwload_url[depth], map_location="cpu", model_dir="weight"
+                )
+            else:
+                state = torch.load(pretrained, map_location="cpu")
+            self.load_state_dict(state)
+            print(f"Load PResNet{depth} state_dict")
+
+    def _freeze_parameters(self, m: nn.Module):
+        for p in m.parameters():
+            p.requires_grad = False
+
+    def _freeze_norm(self, m: nn.Module):
+        if isinstance(m, nn.BatchNorm2d):
+            m = FrozenBatchNorm2d(m.num_features)
+        else:
+            for name, child in m.named_children():
+                _child = self._freeze_norm(child)
+                if _child is not child:
+                    setattr(m, name, _child)
+        return m
+
+    def forward(self, x):
+        conv1 = self.conv1(x)
+        x = F.max_pool2d(conv1, kernel_size=3, stride=2, padding=1)
+        outs = []
+        for idx, stage in enumerate(self.res_layers):
+            x = stage(x)
+            if idx in self.return_idx:
+                outs.append(x)
+        return outs
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/test_resnet.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/test_resnet.py
new file mode 100644
index 00000000..ab877ff4
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/test_resnet.py
@@ -0,0 +1,83 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class _ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super().__init__()
+        self.in_planes = 64
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+
+        self.linear = nn.Linear(512 * block.expansion, num_classes)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+@register()
+class MResNet(nn.Module):
+    def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
+        super().__init__()
+        self.model = _ResNet(BasicBlock, num_blocks, num_classes)
+
+    def forward(self, x):
+        return self.model(x)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/timm_model.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/timm_model.py
new file mode 100644
index 00000000..d2258a51
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/timm_model.py
@@ -0,0 +1,66 @@
+"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+
+https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055#0583
+"""
+
+import torch
+from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
+
+from ...core import register
+from .utils import IntermediateLayerGetter
+
+
+@register()
+class TimmModel(torch.nn.Module):
+    def __init__(
+        self, name, return_layers, pretrained=False, exportable=True, features_only=True, **kwargs
+    ) -> None:
+        super().__init__()
+
+        import timm
+
+        model = timm.create_model(
+            name,
+            pretrained=pretrained,
+            exportable=exportable,
+            features_only=features_only,
+            **kwargs,
+        )
+        # nodes, _ = get_graph_node_names(model)
+        # print(nodes)
+        # features = {'': ''}
+        # model = create_feature_extractor(model, return_nodes=features)
+
+        assert set(return_layers).issubset(
+            model.feature_info.module_name()
+        ), f"return_layers should be a subset of {model.feature_info.module_name()}"
+
+        # self.model = model
+        self.model = IntermediateLayerGetter(model, return_layers)
+
+        return_idx = [model.feature_info.module_name().index(name) for name in return_layers]
+        self.strides = [model.feature_info.reduction()[i] for i in return_idx]
+        self.channels = [model.feature_info.channels()[i] for i in return_idx]
+        self.return_idx = return_idx
+        self.return_layers = return_layers
+
+    def forward(self, x: torch.Tensor):
+        outputs = self.model(x)
+        # outputs = [outputs[i] for i in self.return_idx]
+        return outputs
+
+
+if __name__ == "__main__":
+    model = TimmModel(name="resnet34", return_layers=["layer2", "layer3"])
+    data = torch.rand(1, 3, 640, 640)
+    outputs = model(data)
+
+    for output in outputs:
+        print(output.shape)
+
+    """
+    model:
+        type: TimmModel
+        name: resnet34
+        return_layers: ['layer2', 'layer4']
+    """
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/torchvision_model.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/torchvision_model.py
new file mode 100644
index 00000000..dd207d90
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/torchvision_model.py
@@ -0,0 +1,50 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torchvision
+
+from ...core import register
+from .utils import IntermediateLayerGetter
+
+__all__ = ["TorchVisionModel"]
+
+
+@register()
+class TorchVisionModel(torch.nn.Module):
+    def __init__(self, name, return_layers, weights=None, **kwargs) -> None:
+        super().__init__()
+
+        if weights is not None:
+            weights = getattr(torchvision.models.get_model_weights(name), weights)
+
+        model = torchvision.models.get_model(name, weights=weights, **kwargs)
+
+        # TODO hard code.
+        if hasattr(model, "features"):
+            model = IntermediateLayerGetter(model.features, return_layers)
+        else:
+            model = IntermediateLayerGetter(model, return_layers)
+
+        self.model = model
+
+    def forward(self, x):
+        return self.model(x)
+
+
+# TorchVisionModel('swin_t', return_layers=['5', '7'])
+# TorchVisionModel('resnet34', return_layers=['layer2','layer3', 'layer4'])
+
+# TorchVisionModel:
+#     name: swin_t
+#     return_layers: ['5', '7']
+#     weights: DEFAULT
+
+
+# model:
+#     type: TorchVisionModel
+#     name: resnet34
+#     return_layers: ['layer2','layer3', 'layer4']
+#     weights: DEFAULT
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/utils.py
new file mode 100644
index 00000000..58efeb38
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/backbone/utils.py
@@ -0,0 +1,56 @@
+"""
+https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
+
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from collections import OrderedDict
+from typing import Dict, List
+
+import torch.nn as nn
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+    """
+    Module wrapper that returns intermediate layers from a model
+
+    It has a strong assumption that the modules have been registered
+    into the model in the same order as they are used.
+    This means that one should **not** reuse the same nn.Module
+    twice in the forward if you want this to work.
+
+    Additionally, it is only able to query submodules that are directly
+    assigned to the model. So if `model` is passed, `model.feature1` can
+    be returned, but not `model.feature1.layer2`.
+    """
+
+    _version = 3
+
+    def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
+        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
+            raise ValueError(
+                "return_layers are not present in model. {}".format(
+                    [name for name, _ in model.named_children()]
+                )
+            )
+        orig_return_layers = return_layers
+        return_layers = {str(k): str(k) for k in return_layers}
+        layers = OrderedDict()
+        for name, module in model.named_children():
+            layers[name] = module
+            if name in return_layers:
+                del return_layers[name]
+            if not return_layers:
+                break
+
+        super().__init__(layers)
+        self.return_layers = orig_return_layers
+
+    def forward(self, x):
+        outputs = []
+        for name, module in self.items():
+            x = module(x)
+            if name in self.return_layers:
+                outputs.append(x)
+
+        return outputs
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/__init__.py
new file mode 100644
index 00000000..da74acf5
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/__init__.py
@@ -0,0 +1,11 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch.nn as nn
+
+from ...core import register
+from .det_criterion import DetCriterion
+
+CrossEntropyLoss = register()(nn.CrossEntropyLoss)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/det_criterion.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/det_criterion.py
new file mode 100644
index 00000000..d5dbde5b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/criterion/det_criterion.py
@@ -0,0 +1,188 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.distributed
+import torch.nn.functional as F
+import torchvision
+
+from ...core import register
+from ...misc import box_ops, dist_utils
+
+
+@register()
+class DetCriterion(torch.nn.Module):
+    """Default Detection Criterion"""
+
+    __share__ = ["num_classes"]
+    __inject__ = ["matcher"]
+
+    def __init__(
+        self,
+        losses,
+        weight_dict,
+        num_classes=80,
+        alpha=0.75,
+        gamma=2.0,
+        box_fmt="cxcywh",
+        matcher=None,
+    ):
+        """
+        Args:
+            losses (list[str]): requested losses, support ['boxes', 'vfl', 'focal']
+            weight_dict (dict[str, float)]: corresponding losses weight, including
+                ['loss_bbox', 'loss_giou', 'loss_vfl', 'loss_focal']
+            box_fmt (str): in box format, 'cxcywh' or 'xyxy'
+            matcher (Matcher): matcher used to match source to target
+        """
+        super().__init__()
+        self.losses = losses
+        self.weight_dict = weight_dict
+        self.alpha = alpha
+        self.gamma = gamma
+        self.num_classes = num_classes
+        self.box_fmt = box_fmt
+        assert matcher is not None, ""
+        self.matcher = matcher
+
+    def forward(self, outputs, targets, **kwargs):
+        """
+        Args:
+            outputs: Dict[Tensor], 'pred_boxes', 'pred_logits', 'meta'.
+            targets, List[Dict[str, Tensor]], len(targets) == batch_size.
+            kwargs, store other information such as current epoch id.
+        Return:
+            losses, Dict[str, Tensor]
+        """
+        matched = self.matcher(outputs, targets)
+        values = matched["values"]
+        indices = matched["indices"]
+        num_boxes = self._get_positive_nums(indices)
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            l_dict = self.get_loss(loss, outputs, targets, indices, num_boxes)
+            l_dict = {k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict}
+            losses.update(l_dict)
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def _get_positive_nums(self, indices):
+        # number of positive samples
+        num_pos = sum(len(i) for (i, _) in indices)
+        num_pos = torch.as_tensor([num_pos], dtype=torch.float32, device=indices[0][0].device)
+        if dist_utils.is_dist_available_and_initialized():
+            torch.distributed.all_reduce(num_pos)
+        num_pos = torch.clamp(num_pos / dist_utils.get_world_size(), min=1).item()
+        return num_pos
+
+    def loss_labels_focal(self, outputs, targets, indices, num_boxes):
+        assert "pred_logits" in outputs
+        src_logits = outputs["pred_logits"]
+
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][j] for t, (_, j) in zip(targets, indices)])
+        target_classes = torch.full(
+            src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device
+        )
+        target_classes[idx] = target_classes_o
+
+        target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1].to(
+            src_logits.dtype
+        )
+        loss = torchvision.ops.sigmoid_focal_loss(
+            src_logits, target, self.alpha, self.gamma, reduction="none"
+        )
+        loss = loss.sum() / num_boxes
+        return {"loss_focal": loss}
+
+    def loss_labels_vfl(self, outputs, targets, indices, num_boxes):
+        assert "pred_boxes" in outputs
+        idx = self._get_src_permutation_idx(indices)
+
+        src_boxes = outputs["pred_boxes"][idx]
+        target_boxes = torch.cat([t["boxes"][j] for t, (_, j) in zip(targets, indices)], dim=0)
+
+        src_boxes = torchvision.ops.box_convert(src_boxes, in_fmt=self.box_fmt, out_fmt="xyxy")
+        target_boxes = torchvision.ops.box_convert(
+            target_boxes, in_fmt=self.box_fmt, out_fmt="xyxy"
+        )
+        iou, _ = box_ops.elementwise_box_iou(src_boxes.detach(), target_boxes)
+
+        src_logits: torch.Tensor = outputs["pred_logits"]
+        target_classes_o = torch.cat([t["labels"][j] for t, (_, j) in zip(targets, indices)])
+        target_classes = torch.full(
+            src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device
+        )
+        target_classes[idx] = target_classes_o
+        target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1]
+
+        target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype)
+        target_score_o[idx] = iou.to(src_logits.dtype)
+        target_score = target_score_o.unsqueeze(-1) * target
+
+        src_score = F.sigmoid(src_logits.detach())
+        weight = self.alpha * src_score.pow(self.gamma) * (1 - target) + target_score
+
+        loss = F.binary_cross_entropy_with_logits(
+            src_logits, target_score, weight=weight, reduction="none"
+        )
+        loss = loss.sum() / num_boxes
+        return {"loss_vfl": loss}
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes):
+        assert "pred_boxes" in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs["pred_boxes"][idx]
+        target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+
+        losses = {}
+        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")
+        losses["loss_bbox"] = loss_bbox.sum() / num_boxes
+
+        src_boxes = torchvision.ops.box_convert(src_boxes, in_fmt=self.box_fmt, out_fmt="xyxy")
+        target_boxes = torchvision.ops.box_convert(
+            target_boxes, in_fmt=self.box_fmt, out_fmt="xyxy"
+        )
+        loss_giou = 1 - box_ops.elementwise_generalized_box_iou(src_boxes, target_boxes)
+        losses["loss_giou"] = loss_giou.sum() / num_boxes
+        return losses
+
+    def loss_boxes_giou(self, outputs, targets, indices, num_boxes):
+        assert "pred_boxes" in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs["pred_boxes"][idx]
+        target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+
+        losses = {}
+        src_boxes = torchvision.ops.box_convert(src_boxes, in_fmt=self.box_fmt, out_fmt="xyxy")
+        target_boxes = torchvision.ops.box_convert(
+            target_boxes, in_fmt=self.box_fmt, out_fmt="xyxy"
+        )
+        loss_giou = 1 - box_ops.elementwise_generalized_box_iou(src_boxes, target_boxes)
+        losses["loss_giou"] = loss_giou.sum() / num_boxes
+        return losses
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            "boxes": self.loss_boxes,
+            "giou": self.loss_boxes_giou,
+            "vfl": self.loss_labels_vfl,
+            "focal": self.loss_labels_focal,
+        }
+        assert loss in loss_map, f"do you really want to compute {loss} loss?"
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/__init__.py
new file mode 100644
index 00000000..0949998b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/__init__.py
@@ -0,0 +1,6 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .nms_postprocessor import DetNMSPostProcessor
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/box_revert.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/box_revert.py
new file mode 100644
index 00000000..c40f0c13
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/box_revert.py
@@ -0,0 +1,66 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from enum import Enum
+
+import torch
+import torchvision
+from torch import Tensor
+
+
+class BoxProcessFormat(Enum):
+    """Box process format
+
+    Available formats are
+    * ``RESIZE``
+    * ``RESIZE_KEEP_RATIO``
+    * ``RESIZE_KEEP_RATIO_PADDING``
+    """
+
+    RESIZE = 1
+    RESIZE_KEEP_RATIO = 2
+    RESIZE_KEEP_RATIO_PADDING = 3
+
+
+def box_revert(
+    boxes: Tensor,
+    orig_sizes: Tensor = None,
+    eval_sizes: Tensor = None,
+    inpt_sizes: Tensor = None,
+    inpt_padding: Tensor = None,
+    normalized: bool = True,
+    in_fmt: str = "cxcywh",
+    out_fmt: str = "xyxy",
+    process_fmt=BoxProcessFormat.RESIZE,
+) -> Tensor:
+    """
+    Args:
+        boxes(Tensor), [N, :, 4], (x1, y1, x2, y2), pred boxes.
+        inpt_sizes(Tensor), [N, 2], (w, h). input sizes.
+        orig_sizes(Tensor), [N, 2], (w, h). origin sizes.
+        inpt_padding (Tensor), [N, 2], (w_pad, h_pad, ...).
+        (inpt_sizes + inpt_padding) == eval_sizes
+    """
+    assert in_fmt in ("cxcywh", "xyxy"), ""
+
+    if normalized and eval_sizes is not None:
+        boxes = boxes * eval_sizes.repeat(1, 2).unsqueeze(1)
+
+    if inpt_padding is not None:
+        if in_fmt == "xyxy":
+            boxes -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
+        elif in_fmt == "cxcywh":
+            boxes[..., :2] -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
+
+    if orig_sizes is not None:
+        orig_sizes = orig_sizes.repeat(1, 2).unsqueeze(1)
+        if inpt_sizes is not None:
+            inpt_sizes = inpt_sizes.repeat(1, 2).unsqueeze(1)
+            boxes = boxes * (orig_sizes / inpt_sizes)
+        else:
+            boxes = boxes * orig_sizes
+
+    boxes = torchvision.ops.box_convert(boxes, in_fmt=in_fmt, out_fmt=out_fmt)
+    return boxes
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/detr_postprocessor.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/detr_postprocessor.py
new file mode 100644
index 00000000..6cccf721
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/detr_postprocessor.py
@@ -0,0 +1,86 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+__all__ = ["DetDETRPostProcessor"]
+
+from .box_revert import BoxProcessFormat, box_revert
+
+
+def mod(a, b):
+    out = a - a // b * b
+    return out
+
+
+class DetDETRPostProcessor(nn.Module):
+    def __init__(
+        self,
+        num_classes=80,
+        use_focal_loss=True,
+        num_top_queries=300,
+        box_process_format=BoxProcessFormat.RESIZE,
+    ) -> None:
+        super().__init__()
+        self.use_focal_loss = use_focal_loss
+        self.num_top_queries = num_top_queries
+        self.num_classes = int(num_classes)
+        self.box_process_format = box_process_format
+        self.deploy_mode = False
+
+    def extra_repr(self) -> str:
+        return f"use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}"
+
+    def forward(self, outputs, **kwargs):
+        logits, boxes = outputs["pred_logits"], outputs["pred_boxes"]
+
+        if self.use_focal_loss:
+            scores = F.sigmoid(logits)
+            scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
+            labels = index % self.num_classes
+            # labels = mod(index, self.num_classes) # for tensorrt
+            index = index // self.num_classes
+            boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
+
+        else:
+            scores = F.softmax(logits)[:, :, :-1]
+            scores, labels = scores.max(dim=-1)
+            if scores.shape[1] > self.num_top_queries:
+                scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
+                labels = torch.gather(labels, dim=1, index=index)
+                boxes = torch.gather(
+                    boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])
+                )
+
+        if kwargs is not None:
+            boxes = box_revert(
+                boxes,
+                in_fmt="cxcywh",
+                out_fmt="xyxy",
+                process_fmt=self.box_process_format,
+                normalized=True,
+                **kwargs,
+            )
+
+        # TODO for onnx export
+        if self.deploy_mode:
+            return labels, boxes, scores
+
+        results = []
+        for lab, box, sco in zip(labels, boxes, scores):
+            result = dict(labels=lab, boxes=box, scores=sco)
+            results.append(result)
+
+        return results
+
+    def deploy(
+        self,
+    ):
+        self.eval()
+        self.deploy_mode = True
+        return self
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/nms_postprocessor.py b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/nms_postprocessor.py
new file mode 100644
index 00000000..260e08c0
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/nn/postprocessor/nms_postprocessor.py
@@ -0,0 +1,86 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import Dict
+
+import torch
+import torch.distributed
+import torch.nn.functional as F
+import torchvision
+from torch import Tensor
+
+from ...core import register
+
+__all__ = [
+    "DetNMSPostProcessor",
+]
+
+
+@register()
+class DetNMSPostProcessor(torch.nn.Module):
+    def __init__(
+        self,
+        iou_threshold=0.7,
+        score_threshold=0.01,
+        keep_topk=300,
+        box_fmt="cxcywh",
+        logit_fmt="sigmoid",
+    ) -> None:
+        super().__init__()
+        self.iou_threshold = iou_threshold
+        self.score_threshold = score_threshold
+        self.keep_topk = keep_topk
+        self.box_fmt = box_fmt.lower()
+        self.logit_fmt = logit_fmt.lower()
+        self.logit_func = getattr(F, self.logit_fmt, None)
+        self.deploy_mode = False
+
+    def forward(self, outputs: Dict[str, Tensor], orig_target_sizes: Tensor):
+        logits, boxes = outputs["pred_logits"], outputs["pred_boxes"]
+        pred_boxes = torchvision.ops.box_convert(boxes, in_fmt=self.box_fmt, out_fmt="xyxy")
+        pred_boxes *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
+
+        values, pred_labels = torch.max(logits, dim=-1)
+
+        if self.logit_func:
+            pred_scores = self.logit_func(values)
+        else:
+            pred_scores = values
+
+        # TODO for onnx export
+        if self.deploy_mode:
+            blobs = {
+                "pred_labels": pred_labels,
+                "pred_boxes": pred_boxes,
+                "pred_scores": pred_scores,
+            }
+            return blobs
+
+        results = []
+        for i in range(logits.shape[0]):
+            score_keep = pred_scores[i] > self.score_threshold
+            pred_box = pred_boxes[i][score_keep]
+            pred_label = pred_labels[i][score_keep]
+            pred_score = pred_scores[i][score_keep]
+
+            keep = torchvision.ops.batched_nms(pred_box, pred_score, pred_label, self.iou_threshold)
+            keep = keep[: self.keep_topk]
+
+            blob = {
+                "labels": pred_label[keep],
+                "boxes": pred_box[keep],
+                "scores": pred_score[keep],
+            }
+
+            results.append(blob)
+
+        return results
+
+    def deploy(
+        self,
+    ):
+        self.eval()
+        self.deploy_mode = True
+        return self
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/optim/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/__init__.py
new file mode 100644
index 00000000..fbc06c01
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/__init__.py
@@ -0,0 +1,9 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .amp import *
+from .ema import *
+from .optim import *
+from .warmup import *
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/optim/amp.py b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/amp.py
new file mode 100644
index 00000000..53928993
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/amp.py
@@ -0,0 +1,12 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch.cuda.amp as amp
+
+from ..core import register
+
+__all__ = ["GradScaler"]
+
+GradScaler = register()(amp.grad_scaler.GradScaler)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/optim/ema.py b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/ema.py
new file mode 100644
index 00000000..9e508b65
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/ema.py
@@ -0,0 +1,108 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import math
+from copy import deepcopy
+
+import torch
+import torch.nn as nn
+
+from ..core import register
+from ..misc import dist_utils
+
+__all__ = ["ModelEMA"]
+
+
+@register()
+class ModelEMA(object):
+    """
+    Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    This is intended to allow functionality like
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    A smoothed version of the weights is necessary for some training schemes to perform well.
+    This class is sensitive where it is initialized in the sequence of model init,
+    GPU assignment and distributed training wrappers.
+    """
+
+    def __init__(
+        self, model: nn.Module, decay: float = 0.9999, warmups: int = 1000, start: int = 0
+    ):
+        super().__init__()
+
+        self.module = deepcopy(dist_utils.de_parallel(model)).eval()
+        # if next(model.parameters()).device.type != 'cpu':
+        #     self.module.half()  # FP16 EMA
+
+        self.decay = decay
+        self.warmups = warmups
+        self.before_start = 0
+        self.start = start
+        self.updates = 0  # number of EMA updates
+        if warmups == 0:
+            self.decay_fn = lambda x: decay
+        else:
+            self.decay_fn = lambda x: decay * (
+                1 - math.exp(-x / warmups)
+            )  # decay exponential ramp (to help early epochs)
+
+        for p in self.module.parameters():
+            p.requires_grad_(False)
+
+    def update(self, model: nn.Module):
+        if self.before_start < self.start:
+            self.before_start += 1
+            return
+        # Update EMA parameters
+        with torch.no_grad():
+            self.updates += 1
+            d = self.decay_fn(self.updates)
+            msd = dist_utils.de_parallel(model).state_dict()
+            for k, v in self.module.state_dict().items():
+                if v.dtype.is_floating_point:
+                    v *= d
+                    v += (1 - d) * msd[k].detach()
+
+    def to(self, *args, **kwargs):
+        self.module = self.module.to(*args, **kwargs)
+        return self
+
+    def state_dict(
+        self,
+    ):
+        return dict(module=self.module.state_dict(), updates=self.updates)
+
+    def load_state_dict(self, state, strict=True):
+        self.module.load_state_dict(state["module"], strict=strict)
+        if "updates" in state:
+            self.updates = state["updates"]
+
+    def forwad(
+        self,
+    ):
+        raise RuntimeError("ema...")
+
+    def extra_repr(self) -> str:
+        return f"decay={self.decay}, warmups={self.warmups}"
+
+
+class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel):
+    """Maintains moving averages of model parameters using an exponential decay.
+    ``ema_avg = decay * avg_model_param + (1 - decay) * model_param``
+    `torch.optim.swa_utils.AveragedModel <https://pytorch.org/docs/stable/optim.html#custom-averaging-strategies>`_
+    is used to compute the EMA.
+    """
+
+    def __init__(self, model, decay, device="cpu", use_buffers=True):
+        self.decay_fn = lambda x: decay * (1 - math.exp(-x / 2000))
+
+        def ema_avg(avg_model_param, model_param, num_averaged):
+            decay = self.decay_fn(num_averaged)
+            return decay * avg_model_param + (1 - decay) * model_param
+
+        super().__init__(model, device, ema_avg, use_buffers=use_buffers)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/optim/optim.py b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/optim.py
new file mode 100644
index 00000000..cec30305
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/optim.py
@@ -0,0 +1,22 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch.optim as optim
+import torch.optim.lr_scheduler as lr_scheduler
+
+from ..core import register
+
+__all__ = ["AdamW", "SGD", "Adam", "MultiStepLR", "CosineAnnealingLR", "OneCycleLR", "LambdaLR"]
+
+
+SGD = register()(optim.SGD)
+Adam = register()(optim.Adam)
+AdamW = register()(optim.AdamW)
+
+
+MultiStepLR = register()(lr_scheduler.MultiStepLR)
+CosineAnnealingLR = register()(lr_scheduler.CosineAnnealingLR)
+OneCycleLR = register()(lr_scheduler.OneCycleLR)
+LambdaLR = register()(lr_scheduler.LambdaLR)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/optim/warmup.py b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/warmup.py
new file mode 100644
index 00000000..cd475ef6
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/optim/warmup.py
@@ -0,0 +1,56 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from torch.optim.lr_scheduler import LRScheduler
+
+from ..core import register
+
+
+class Warmup(object):
+    def __init__(
+        self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int = -1
+    ) -> None:
+        self.lr_scheduler = lr_scheduler
+        self.warmup_end_values = [pg["lr"] for pg in lr_scheduler.optimizer.param_groups]
+        self.last_step = last_step
+        self.warmup_duration = warmup_duration
+        self.step()
+
+    def state_dict(self):
+        return {k: v for k, v in self.__dict__.items() if k != "lr_scheduler"}
+
+    def load_state_dict(self, state_dict):
+        self.__dict__.update(state_dict)
+
+    def get_warmup_factor(self, step, **kwargs):
+        raise NotImplementedError
+
+    def step(
+        self,
+    ):
+        self.last_step += 1
+        if self.last_step >= self.warmup_duration:
+            return
+        factor = self.get_warmup_factor(self.last_step)
+        for i, pg in enumerate(self.lr_scheduler.optimizer.param_groups):
+            pg["lr"] = factor * self.warmup_end_values[i]
+
+    def finished(
+        self,
+    ):
+        if self.last_step >= self.warmup_duration:
+            return True
+        return False
+
+
+@register()
+class LinearWarmup(Warmup):
+    def __init__(
+        self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int = -1
+    ) -> None:
+        super().__init__(lr_scheduler, warmup_duration, last_step)
+
+    def get_warmup_factor(self, step):
+        return min(1.0, (step + 1) / self.warmup_duration)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/__init__.py
new file mode 100644
index 00000000..440182a0
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from typing import Dict
+
+from ._solver import BaseSolver
+from .clas_solver import ClasSolver
+from .det_solver import DetSolver
+
+TASKS: Dict[str, BaseSolver] = {
+    "classification": ClasSolver,
+    "detection": DetSolver,
+}
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/_solver.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/_solver.py
new file mode 100644
index 00000000..3120437b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/_solver.py
@@ -0,0 +1,783 @@
+import atexit
+from datetime import datetime
+from pathlib import Path
+from typing import Dict
+
+import torch
+import torch.nn as nn
+
+from ..core import BaseConfig
+from ..misc import dist_utils
+
+
+def to(m: nn.Module, device: str):
+    if m is None:
+        return None
+    return m.to(device)
+
+
+def remove_module_prefix(state_dict):
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        if k.startswith("module."):
+            new_state_dict[k[7:]] = v
+        else:
+            new_state_dict[k] = v
+    return new_state_dict
+
+
+class BaseSolver(object):
+    def __init__(self, cfg: BaseConfig) -> None:
+        self.cfg = cfg
+        self.obj365_ids = [
+            0,
+            46,
+            5,
+            58,
+            114,
+            55,
+            116,
+            65,
+            21,
+            40,
+            176,
+            127,
+            249,
+            24,
+            56,
+            139,
+            92,
+            78,
+            99,
+            96,
+            144,
+            295,
+            178,
+            180,
+            38,
+            39,
+            13,
+            43,
+            120,
+            219,
+            148,
+            173,
+            165,
+            154,
+            137,
+            113,
+            145,
+            146,
+            204,
+            8,
+            35,
+            10,
+            88,
+            84,
+            93,
+            26,
+            112,
+            82,
+            265,
+            104,
+            141,
+            152,
+            234,
+            143,
+            150,
+            97,
+            2,
+            50,
+            25,
+            75,
+            98,
+            153,
+            37,
+            73,
+            115,
+            132,
+            106,
+            61,
+            163,
+            134,
+            277,
+            81,
+            133,
+            18,
+            94,
+            30,
+            169,
+            70,
+            328,
+            226,
+        ]
+
+    def _setup(self):
+        """Avoid instantiating unnecessary classes"""
+        cfg = self.cfg
+        if cfg.device:
+            device = torch.device(cfg.device)
+        else:
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+        self.model = cfg.model
+
+        # NOTE: Must load_tuning_state before EMA instance building
+        if self.cfg.tuning:
+            print(f"Tuning checkpoint from {self.cfg.tuning}")
+            self.load_tuning_state(self.cfg.tuning)
+
+        self.model = dist_utils.warp_model(
+            self.model.to(device),
+            sync_bn=cfg.sync_bn,
+            find_unused_parameters=cfg.find_unused_parameters,
+        )
+
+        self.criterion = self.to(cfg.criterion, device)
+        self.postprocessor = self.to(cfg.postprocessor, device)
+
+        self.ema = self.to(cfg.ema, device)
+        self.scaler = cfg.scaler
+
+        self.device = device
+        self.last_epoch = self.cfg.last_epoch
+
+        self.output_dir = Path(cfg.output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.writer = cfg.writer
+
+        if self.writer:
+            atexit.register(self.writer.close)
+            if dist_utils.is_main_process():
+                self.writer.add_text("config", "{:s}".format(cfg.__repr__()), 0)
+        self.use_wandb = self.cfg.use_wandb
+        if self.use_wandb:
+            try:
+                import wandb
+                self.use_wandb = True
+            except ImportError:
+                self.use_wandb = False
+
+    def cleanup(self):
+        if self.writer:
+            atexit.register(self.writer.close)
+
+    def train(self):
+        self._setup()
+        self.optimizer = self.cfg.optimizer
+        self.lr_scheduler = self.cfg.lr_scheduler
+        self.lr_warmup_scheduler = self.cfg.lr_warmup_scheduler
+
+        self.train_dataloader = dist_utils.warp_loader(
+            self.cfg.train_dataloader, shuffle=self.cfg.train_dataloader.shuffle
+        )
+        self.val_dataloader = dist_utils.warp_loader(
+            self.cfg.val_dataloader, shuffle=self.cfg.val_dataloader.shuffle
+        )
+
+        self.evaluator = self.cfg.evaluator
+
+        # NOTE: Instantiating order
+        if self.cfg.resume:
+            print(f"Resume checkpoint from {self.cfg.resume}")
+            self.load_resume_state(self.cfg.resume)
+
+    def eval(self):
+        self._setup()
+
+        self.val_dataloader = dist_utils.warp_loader(
+            self.cfg.val_dataloader, shuffle=self.cfg.val_dataloader.shuffle
+        )
+
+        self.evaluator = self.cfg.evaluator
+
+        if self.cfg.resume:
+            print(f"Resume checkpoint from {self.cfg.resume}")
+            self.load_resume_state(self.cfg.resume)
+
+    def to(self, module, device):
+        return module.to(device) if hasattr(module, "to") else module
+
+    def state_dict(self):
+        """State dict, train/eval"""
+        state = {}
+        state["date"] = datetime.now().isoformat()
+
+        # For resume
+        state["last_epoch"] = self.last_epoch
+
+        for k, v in self.__dict__.items():
+            if hasattr(v, "state_dict"):
+                v = dist_utils.de_parallel(v)
+                state[k] = v.state_dict()
+
+        return state
+
+    def load_state_dict(self, state):
+        """Load state dict, train/eval"""
+        if "last_epoch" in state:
+            self.last_epoch = state["last_epoch"]
+            print("Load last_epoch")
+
+        for k, v in self.__dict__.items():
+            if hasattr(v, "load_state_dict") and k in state:
+                v = dist_utils.de_parallel(v)
+                v.load_state_dict(state[k])
+                print(f"Load {k}.state_dict")
+
+            if hasattr(v, "load_state_dict") and k not in state:
+                if k == "ema":
+                    model = getattr(self, "model", None)
+                    if model is not None:
+                        ema = dist_utils.de_parallel(v)
+                        model_state_dict = remove_module_prefix(model.state_dict())
+                        ema.load_state_dict({"module": model_state_dict})
+                        print(f"Load {k}.state_dict from model.state_dict")
+                else:
+                    print(f"Not load {k}.state_dict")
+
+    def load_resume_state(self, path: str):
+        """Load resume"""
+        if path.startswith("http"):
+            state = torch.hub.load_state_dict_from_url(path, map_location="cpu")
+        else:
+            state = torch.load(path, map_location="cpu")
+
+        # state['model'] = remove_module_prefix(state['model'])
+        self.load_state_dict(state)
+
+    def load_tuning_state(self, path: str):
+        """Load model for tuning and adjust mismatched head parameters"""
+        if path.startswith("http"):
+            state = torch.hub.load_state_dict_from_url(path, map_location="cpu")
+        else:
+            state = torch.load(path, map_location="cpu")
+
+        module = dist_utils.de_parallel(self.model)
+
+        # Load the appropriate state dict
+        if "ema" in state:
+            pretrain_state_dict = state["ema"]["module"]
+        else:
+            pretrain_state_dict = state["model"]
+
+        # Adjust head parameters between datasets
+        try:
+            adjusted_state_dict = self._adjust_head_parameters(
+                module.state_dict(), pretrain_state_dict
+            )
+            stat, infos = self._matched_state(module.state_dict(), adjusted_state_dict)
+        except Exception:
+            stat, infos = self._matched_state(module.state_dict(), pretrain_state_dict)
+
+        module.load_state_dict(stat, strict=False)
+        print(f"Load model.state_dict, {infos}")
+
+    @staticmethod
+    def _matched_state(state: Dict[str, torch.Tensor], params: Dict[str, torch.Tensor]):
+        missed_list = []
+        unmatched_list = []
+        matched_state = {}
+        for k, v in state.items():
+            if k in params:
+                if v.shape == params[k].shape:
+                    matched_state[k] = params[k]
+                else:
+                    unmatched_list.append(k)
+            else:
+                missed_list.append(k)
+
+        return matched_state, {"missed": missed_list, "unmatched": unmatched_list}
+
+    def _adjust_head_parameters(self, cur_state_dict, pretrain_state_dict):
+        """Adjust head parameters between datasets."""
+        # List of parameters to adjust
+        if (
+            pretrain_state_dict["decoder.denoising_class_embed.weight"].size()
+            != cur_state_dict["decoder.denoising_class_embed.weight"].size()
+        ):
+            del pretrain_state_dict["decoder.denoising_class_embed.weight"]
+
+        head_param_names = ["decoder.enc_score_head.weight", "decoder.enc_score_head.bias"]
+        for i in range(8):
+            head_param_names.append(f"decoder.dec_score_head.{i}.weight")
+            head_param_names.append(f"decoder.dec_score_head.{i}.bias")
+
+        adjusted_params = []
+
+        for param_name in head_param_names:
+            if param_name in cur_state_dict and param_name in pretrain_state_dict:
+                cur_tensor = cur_state_dict[param_name]
+                pretrain_tensor = pretrain_state_dict[param_name]
+                adjusted_tensor = self.map_class_weights(cur_tensor, pretrain_tensor)
+                if adjusted_tensor is not None:
+                    pretrain_state_dict[param_name] = adjusted_tensor
+                    adjusted_params.append(param_name)
+                else:
+                    print(f"Cannot adjust parameter '{param_name}' due to size mismatch.")
+
+        return pretrain_state_dict
+
+    def map_class_weights(self, cur_tensor, pretrain_tensor):
+        """Map class weights from pretrain model to current model based on class IDs."""
+        if pretrain_tensor.size() == cur_tensor.size():
+            return pretrain_tensor
+
+        adjusted_tensor = cur_tensor.clone()
+        adjusted_tensor.requires_grad = False
+
+        if pretrain_tensor.size() > cur_tensor.size():
+            for coco_id, obj_id in enumerate(self.obj365_ids):
+                adjusted_tensor[coco_id] = pretrain_tensor[obj_id + 1]
+        else:
+            for coco_id, obj_id in enumerate(self.obj365_ids):
+                adjusted_tensor[obj_id + 1] = pretrain_tensor[coco_id]
+
+        return adjusted_tensor
+
+    def fit(self):
+        raise NotImplementedError("")
+
+    def val(self):
+        raise NotImplementedError("")
+
+
+# obj365_classes = [
+#         'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses',
+#         'Bottle', 'Desk', 'Cup', 'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel',
+#         'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book', 'Gloves', 'Storage box',
+#         'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
+#         'Pillow', 'Boots', 'Vase', 'Microphone', 'Necklace', 'Ring', 'SUV', 'Wine Glass',
+#         'Belt', 'Moniter/TV', 'Backpack', 'Umbrella', 'Traffic Light', 'Speaker', 'Watch',
+#         'Tie', 'Trash bin Can', 'Slippers', 'Bicycle', 'Stool', 'Barrel/bucket', 'Van',
+#         'Couch', 'Sandals', 'Bakset', 'Drum', 'Pen/Pencil', 'Bus', 'Wild Bird', 'High Heels',
+#         'Motorcycle', 'Guitar', 'Carpet', 'Cell Phone', 'Bread', 'Camera', 'Canned', 'Truck',
+#         'Traffic cone', 'Cymbal', 'Lifesaver', 'Towel', 'Stuffed Toy', 'Candle', 'Sailboat',
+#         'Laptop', 'Awning', 'Bed', 'Faucet', 'Tent', 'Horse', 'Mirror', 'Power outlet',
+#         'Sink', 'Apple', 'Air Conditioner', 'Knife', 'Hockey Stick', 'Paddle', 'Pickup Truck',
+#         'Fork', 'Traffic Sign', 'Ballon', 'Tripod', 'Dog', 'Spoon', 'Clock', 'Pot', 'Cow',
+#         'Cake', 'Dinning Table', 'Sheep', 'Hanger', 'Blackboard/Whiteboard', 'Napkin',
+#         'Other Fish', 'Orange/Tangerine', 'Toiletry', 'Keyboard', 'Tomato', 'Lantern',
+#         'Machinery Vehicle', 'Fan', 'Green Vegetables', 'Banana', 'Baseball Glove',
+#         'Airplane', 'Mouse', 'Train', 'Pumpkin', 'Soccer', 'Skiboard', 'Luggage', 'Nightstand',
+#         'Tea pot', 'Telephone', 'Trolley', 'Head Phone', 'Sports Car', 'Stop Sign', 'Dessert',
+#         'Scooter', 'Stroller', 'Crane', 'Remote', 'Refrigerator', 'Oven', 'Lemon', 'Duck',
+#         'Baseball Bat', 'Surveillance Camera', 'Cat', 'Jug', 'Broccoli', 'Piano', 'Pizza',
+#         'Elephant', 'Skateboard', 'Surfboard', 'Gun', 'Skating and Skiing shoes', 'Gas stove',
+#         'Donut', 'Bow Tie', 'Carrot', 'Toilet', 'Kite', 'Strawberry', 'Other Balls', 'Shovel',
+#         'Pepper', 'Computer Box', 'Toilet Paper', 'Cleaning Products', 'Chopsticks', 'Microwave',
+#         'Pigeon', 'Baseball', 'Cutting/chopping Board', 'Coffee Table', 'Side Table', 'Scissors',
+#         'Marker', 'Pie', 'Ladder', 'Snowboard', 'Cookies', 'Radiator', 'Fire Hydrant', 'Basketball',
+#         'Zebra', 'Grape', 'Giraffe', 'Potato', 'Sausage', 'Tricycle', 'Violin', 'Egg',
+#         'Fire Extinguisher', 'Candy', 'Fire Truck', 'Billards', 'Converter', 'Bathtub',
+#         'Wheelchair', 'Golf Club', 'Briefcase', 'Cucumber', 'Cigar/Cigarette ', 'Paint Brush',
+#         'Pear', 'Heavy Truck', 'Hamburger', 'Extractor', 'Extention Cord', 'Tong',
+#         'Tennis Racket', 'Folder', 'American Football', 'earphone', 'Mask', 'Kettle',
+#         'Tennis', 'Ship', 'Swing', 'Coffee Machine', 'Slide', 'Carriage', 'Onion',
+#         'Green beans', 'Projector', 'Frisbee', 'Washing Machine/Drying Machine', 'Chicken',
+#         'Printer', 'Watermelon', 'Saxophone', 'Tissue', 'Toothbrush', 'Ice cream',
+#         'Hotair ballon', 'Cello', 'French Fries', 'Scale', 'Trophy', 'Cabbage', 'Hot dog',
+#         'Blender', 'Peach', 'Rice', 'Wallet/Purse', 'Volleyball', 'Deer', 'Goose', 'Tape',
+#         'Tablet', 'Cosmetics', 'Trumpet', 'Pineapple', 'Golf Ball', 'Ambulance', 'Parking meter',
+#         'Mango', 'Key', 'Hurdle', 'Fishing Rod', 'Medal', 'Flute', 'Brush', 'Penguin',
+#         'Megaphone', 'Corn', 'Lettuce', 'Garlic', 'Swan', 'Helicopter', 'Green Onion',
+#         'Sandwich', 'Nuts', 'Speed Limit Sign', 'Induction Cooker', 'Broom', 'Trombone',
+#         'Plum', 'Rickshaw', 'Goldfish', 'Kiwi fruit', 'Router/modem', 'Poker Card', 'Toaster',
+#         'Shrimp', 'Sushi', 'Cheese', 'Notepaper', 'Cherry', 'Pliers', 'CD', 'Pasta', 'Hammer',
+#         'Cue', 'Avocado', 'Hamimelon', 'Flask', 'Mushroon', 'Screwdriver', 'Soap', 'Recorder',
+#         'Bear', 'Eggplant', 'Board Eraser', 'Coconut', 'Tape Measur/ Ruler', 'Pig',
+#         'Showerhead', 'Globe', 'Chips', 'Steak', 'Crosswalk Sign', 'Stapler', 'Campel',
+#         'Formula 1 ', 'Pomegranate', 'Dishwasher', 'Crab', 'Hoverboard', 'Meat ball',
+#         'Rice Cooker', 'Tuba', 'Calculator', 'Papaya', 'Antelope', 'Parrot', 'Seal',
+#         'Buttefly', 'Dumbbell', 'Donkey', 'Lion', 'Urinal', 'Dolphin', 'Electric Drill',
+#         'Hair Dryer', 'Egg tart', 'Jellyfish', 'Treadmill', 'Lighter', 'Grapefruit',
+#         'Game board', 'Mop', 'Radish', 'Baozi', 'Target', 'French', 'Spring Rolls', 'Monkey',
+#         'Rabbit', 'Pencil Case', 'Yak', 'Red Cabbage', 'Binoculars', 'Asparagus', 'Barbell',
+#         'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Teniis paddle',
+#         'Cosmetics Brush/Eyeliner Pencil', 'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra',
+#         'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis '
+# ]
+
+# coco_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+#                'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+#                'stop sign', 'parking meter', 'bench', 'wild bird', 'cat', 'dog',
+#                'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+#                'backpack', 'umbrella', 'handbag/satchel', 'tie', 'luggage', 'frisbee',
+#                'skating and skiing shoes', 'snowboard', 'baseball', 'kite', 'baseball bat',
+#                'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+#                'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl/basin',
+#                'banana', 'apple', 'sandwich', 'orange/tangerine', 'broccoli', 'carrot',
+#                'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+#                'potted plant', 'bed', 'dinning table', 'toilet', 'moniter/tv', 'laptop',
+#                'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+#                'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+#                'vase', 'scissors', 'stuffed toy', 'hair dryer', 'toothbrush']
+
+
+# obj365_classes = [
+#     (0, 'Person'),
+#     (1, 'Sneakers'),
+#     (2, 'Chair'),
+#     (3, 'Other Shoes'),
+#     (4, 'Hat'),
+#     (5, 'Car'),
+#     (6, 'Lamp'),
+#     (7, 'Glasses'),
+#     (8, 'Bottle'),
+#     (9, 'Desk'),
+#     (10, 'Cup'),
+#     (11, 'Street Lights'),
+#     (12, 'Cabinet/shelf'),
+#     (13, 'Handbag/Satchel'),
+#     (14, 'Bracelet'),
+#     (15, 'Plate'),
+#     (16, 'Picture/Frame'),
+#     (17, 'Helmet'),
+#     (18, 'Book'),
+#     (19, 'Gloves'),
+#     (20, 'Storage box'),
+#     (21, 'Boat'),
+#     (22, 'Leather Shoes'),
+#     (23, 'Flower'),
+#     (24, 'Bench'),
+#     (25, 'Potted Plant'),
+#     (26, 'Bowl/Basin'),
+#     (27, 'Flag'),
+#     (28, 'Pillow'),
+#     (29, 'Boots'),
+#     (30, 'Vase'),
+#     (31, 'Microphone'),
+#     (32, 'Necklace'),
+#     (33, 'Ring'),
+#     (34, 'SUV'),
+#     (35, 'Wine Glass'),
+#     (36, 'Belt'),
+#     (37, 'Monitor/TV'),
+#     (38, 'Backpack'),
+#     (39, 'Umbrella'),
+#     (40, 'Traffic Light'),
+#     (41, 'Speaker'),
+#     (42, 'Watch'),
+#     (43, 'Tie'),
+#     (44, 'Trash bin Can'),
+#     (45, 'Slippers'),
+#     (46, 'Bicycle'),
+#     (47, 'Stool'),
+#     (48, 'Barrel/bucket'),
+#     (49, 'Van'),
+#     (50, 'Couch'),
+#     (51, 'Sandals'),
+#     (52, 'Basket'),
+#     (53, 'Drum'),
+#     (54, 'Pen/Pencil'),
+#     (55, 'Bus'),
+#     (56, 'Wild Bird'),
+#     (57, 'High Heels'),
+#     (58, 'Motorcycle'),
+#     (59, 'Guitar'),
+#     (60, 'Carpet'),
+#     (61, 'Cell Phone'),
+#     (62, 'Bread'),
+#     (63, 'Camera'),
+#     (64, 'Canned'),
+#     (65, 'Truck'),
+#     (66, 'Traffic cone'),
+#     (67, 'Cymbal'),
+#     (68, 'Lifesaver'),
+#     (69, 'Towel'),
+#     (70, 'Stuffed Toy'),
+#     (71, 'Candle'),
+#     (72, 'Sailboat'),
+#     (73, 'Laptop'),
+#     (74, 'Awning'),
+#     (75, 'Bed'),
+#     (76, 'Faucet'),
+#     (77, 'Tent'),
+#     (78, 'Horse'),
+#     (79, 'Mirror'),
+#     (80, 'Power outlet'),
+#     (81, 'Sink'),
+#     (82, 'Apple'),
+#     (83, 'Air Conditioner'),
+#     (84, 'Knife'),
+#     (85, 'Hockey Stick'),
+#     (86, 'Paddle'),
+#     (87, 'Pickup Truck'),
+#     (88, 'Fork'),
+#     (89, 'Traffic Sign'),
+#     (90, 'Balloon'),
+#     (91, 'Tripod'),
+#     (92, 'Dog'),
+#     (93, 'Spoon'),
+#     (94, 'Clock'),
+#     (95, 'Pot'),
+#     (96, 'Cow'),
+#     (97, 'Cake'),
+#     (98, 'Dining Table'),
+#     (99, 'Sheep'),
+#     (100, 'Hanger'),
+#     (101, 'Blackboard/Whiteboard'),
+#     (102, 'Napkin'),
+#     (103, 'Other Fish'),
+#     (104, 'Orange/Tangerine'),
+#     (105, 'Toiletry'),
+#     (106, 'Keyboard'),
+#     (107, 'Tomato'),
+#     (108, 'Lantern'),
+#     (109, 'Machinery Vehicle'),
+#     (110, 'Fan'),
+#     (111, 'Green Vegetables'),
+#     (112, 'Banana'),
+#     (113, 'Baseball Glove'),
+#     (114, 'Airplane'),
+#     (115, 'Mouse'),
+#     (116, 'Train'),
+#     (117, 'Pumpkin'),
+#     (118, 'Soccer'),
+#     (119, 'Skiboard'),
+#     (120, 'Luggage'),
+#     (121, 'Nightstand'),
+#     (122, 'Tea pot'),
+#     (123, 'Telephone'),
+#     (124, 'Trolley'),
+#     (125, 'Head Phone'),
+#     (126, 'Sports Car'),
+#     (127, 'Stop Sign'),
+#     (128, 'Dessert'),
+#     (129, 'Scooter'),
+#     (130, 'Stroller'),
+#     (131, 'Crane'),
+#     (132, 'Remote'),
+#     (133, 'Refrigerator'),
+#     (134, 'Oven'),
+#     (135, 'Lemon'),
+#     (136, 'Duck'),
+#     (137, 'Baseball Bat'),
+#     (138, 'Surveillance Camera'),
+#     (139, 'Cat'),
+#     (140, 'Jug'),
+#     (141, 'Broccoli'),
+#     (142, 'Piano'),
+#     (143, 'Pizza'),
+#     (144, 'Elephant'),
+#     (145, 'Skateboard'),
+#     (146, 'Surfboard'),
+#     (147, 'Gun'),
+#     (148, 'Skating and Skiing Shoes'),
+#     (149, 'Gas Stove'),
+#     (150, 'Donut'),
+#     (151, 'Bow Tie'),
+#     (152, 'Carrot'),
+#     (153, 'Toilet'),
+#     (154, 'Kite'),
+#     (155, 'Strawberry'),
+#     (156, 'Other Balls'),
+#     (157, 'Shovel'),
+#     (158, 'Pepper'),
+#     (159, 'Computer Box'),
+#     (160, 'Toilet Paper'),
+#     (161, 'Cleaning Products'),
+#     (162, 'Chopsticks'),
+#     (163, 'Microwave'),
+#     (164, 'Pigeon'),
+#     (165, 'Baseball'),
+#     (166, 'Cutting/chopping Board'),
+#     (167, 'Coffee Table'),
+#     (168, 'Side Table'),
+#     (169, 'Scissors'),
+#     (170, 'Marker'),
+#     (171, 'Pie'),
+#     (172, 'Ladder'),
+#     (173, 'Snowboard'),
+#     (174, 'Cookies'),
+#     (175, 'Radiator'),
+#     (176, 'Fire Hydrant'),
+#     (177, 'Basketball'),
+#     (178, 'Zebra'),
+#     (179, 'Grape'),
+#     (180, 'Giraffe'),
+#     (181, 'Potato'),
+#     (182, 'Sausage'),
+#     (183, 'Tricycle'),
+#     (184, 'Violin'),
+#     (185, 'Egg'),
+#     (186, 'Fire Extinguisher'),
+#     (187, 'Candy'),
+#     (188, 'Fire Truck'),
+#     (189, 'Billiards'),
+#     (190, 'Converter'),
+#     (191, 'Bathtub'),
+#     (192, 'Wheelchair'),
+#     (193, 'Golf Club'),
+#     (194, 'Briefcase'),
+#     (195, 'Cucumber'),
+#     (196, 'Cigar/Cigarette'),
+#     (197, 'Paint Brush'),
+#     (198, 'Pear'),
+#     (199, 'Heavy Truck'),
+#     (200, 'Hamburger'),
+#     (201, 'Extractor'),
+#     (202, 'Extension Cord'),
+#     (203, 'Tong'),
+#     (204, 'Tennis Racket'),
+#     (205, 'Folder'),
+#     (206, 'American Football'),
+#     (207, 'Earphone'),
+#     (208, 'Mask'),
+#     (209, 'Kettle'),
+#     (210, 'Tennis'),
+#     (211, 'Ship'),
+#     (212, 'Swing'),
+#     (213, 'Coffee Machine'),
+#     (214, 'Slide'),
+#     (215, 'Carriage'),
+#     (216, 'Onion'),
+#     (217, 'Green Beans'),
+#     (218, 'Projector'),
+#     (219, 'Frisbee'),
+#     (220, 'Washing Machine/Drying Machine'),
+#     (221, 'Chicken'),
+#     (222, 'Printer'),
+#     (223, 'Watermelon'),
+#     (224, 'Saxophone'),
+#     (225, 'Tissue'),
+#     (226, 'Toothbrush'),
+#     (227, 'Ice Cream'),
+#     (228, 'Hot Air Balloon'),
+#     (229, 'Cello'),
+#     (230, 'French Fries'),
+#     (231, 'Scale'),
+#     (232, 'Trophy'),
+#     (233, 'Cabbage'),
+#     (234, 'Hot Dog'),
+#     (235, 'Blender'),
+#     (236, 'Peach'),
+#     (237, 'Rice'),
+#     (238, 'Wallet/Purse'),
+#     (239, 'Volleyball'),
+#     (240, 'Deer'),
+#     (241, 'Goose'),
+#     (242, 'Tape'),
+#     (243, 'Tablet'),
+#     (244, 'Cosmetics'),
+#     (245, 'Trumpet'),
+#     (246, 'Pineapple'),
+#     (247, 'Golf Ball'),
+#     (248, 'Ambulance'),
+#     (249, 'Parking Meter'),
+#     (250, 'Mango'),
+#     (251, 'Key'),
+#     (252, 'Hurdle'),
+#     (253, 'Fishing Rod'),
+#     (254, 'Medal'),
+#     (255, 'Flute'),
+#     (256, 'Brush'),
+#     (257, 'Penguin'),
+#     (258, 'Megaphone'),
+#     (259, 'Corn'),
+#     (260, 'Lettuce'),
+#     (261, 'Garlic'),
+#     (262, 'Swan'),
+#     (263, 'Helicopter'),
+#     (264, 'Green Onion'),
+#     (265, 'Sandwich'),
+#     (266, 'Nuts'),
+#     (267, 'Speed Limit Sign'),
+#     (268, 'Induction Cooker'),
+#     (269, 'Broom'),
+#     (270, 'Trombone'),
+#     (271, 'Plum'),
+#     (272, 'Rickshaw'),
+#     (273, 'Goldfish'),
+#     (274, 'Kiwi Fruit'),
+#     (275, 'Router/Modem'),
+#     (276, 'Poker Card'),
+#     (277, 'Toaster'),
+#     (278, 'Shrimp'),
+#     (279, 'Sushi'),
+#     (280, 'Cheese'),
+#     (281, 'Notepaper'),
+#     (282, 'Cherry'),
+#     (283, 'Pliers'),
+#     (284, 'CD'),
+#     (285, 'Pasta'),
+#     (286, 'Hammer'),
+#     (287, 'Cue'),
+#     (288, 'Avocado'),
+#     (289, 'Hami Melon'),
+#     (290, 'Flask'),
+#     (291, 'Mushroom'),
+#     (292, 'Screwdriver'),
+#     (293, 'Soap'),
+#     (294, 'Recorder'),
+#     (295, 'Bear'),
+#     (296, 'Eggplant'),
+#     (297, 'Board Eraser'),
+#     (298, 'Coconut'),
+#     (299, 'Tape Measure/Ruler'),
+#     (300, 'Pig'),
+#     (301, 'Showerhead'),
+#     (302, 'Globe'),
+#     (303, 'Chips'),
+#     (304, 'Steak'),
+#     (305, 'Crosswalk Sign'),
+#     (306, 'Stapler'),
+#     (307, 'Camel'),
+#     (308, 'Formula 1'),
+#     (309, 'Pomegranate'),
+#     (310, 'Dishwasher'),
+#     (311, 'Crab'),
+#     (312, 'Hoverboard'),
+#     (313, 'Meatball'),
+#     (314, 'Rice Cooker'),
+#     (315, 'Tuba'),
+#     (316, 'Calculator'),
+#     (317, 'Papaya'),
+#     (318, 'Antelope'),
+#     (319, 'Parrot'),
+#     (320, 'Seal'),
+#     (321, 'Butterfly'),
+#     (322, 'Dumbbell'),
+#     (323, 'Donkey'),
+#     (324, 'Lion'),
+#     (325, 'Urinal'),
+#     (326, 'Dolphin'),
+#     (327, 'Electric Drill'),
+#     (328, 'Hair Dryer'),
+#     (329, 'Egg Tart'),
+#     (330, 'Jellyfish'),
+#     (331, 'Treadmill'),
+#     (332, 'Lighter'),
+#     (333, 'Grapefruit'),
+#     (334, 'Game Board'),
+#     (335, 'Mop'),
+#     (336, 'Radish'),
+#     (337, 'Baozi'),
+#     (338, 'Target'),
+#     (339, 'French'),
+#     (340, 'Spring Rolls'),
+#     (341, 'Monkey'),
+#     (342, 'Rabbit'),
+#     (343, 'Pencil Case'),
+#     (344, 'Yak'),
+#     (345, 'Red Cabbage'),
+#     (346, 'Binoculars'),
+#     (347, 'Asparagus'),
+#     (348, 'Barbell'),
+#     (349, 'Scallop'),
+#     (350, 'Noodles'),
+#     (351, 'Comb'),
+#     (352, 'Dumpling'),
+#     (353, 'Oyster'),
+#     (354, 'Table Tennis Paddle'),
+#     (355, 'Cosmetics Brush/Eyeliner Pencil'),
+#     (356, 'Chainsaw'),
+#     (357, 'Eraser'),
+#     (358, 'Lobster'),
+#     (359, 'Durian'),
+#     (360, 'Okra'),
+#     (361, 'Lipstick'),
+#     (362, 'Cosmetics Mirror'),
+#     (363, 'Curling'),
+#     (364, 'Table Tennis')
+# ]
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_engine.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_engine.py
new file mode 100644
index 00000000..fc5a6802
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_engine.py
@@ -0,0 +1,74 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.nn as nn
+
+from ..misc import MetricLogger, SmoothedValue, reduce_dict
+
+
+def train_one_epoch(
+    model: nn.Module, criterion: nn.Module, dataloader, optimizer, ema, epoch, device
+):
+    """ """
+    model.train()
+
+    metric_logger = MetricLogger(delimiter="  ")
+    metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}"))
+    print_freq = 100
+    header = "Epoch: [{}]".format(epoch)
+
+    for imgs, labels in metric_logger.log_every(dataloader, print_freq, header):
+        imgs = imgs.to(device)
+        labels = labels.to(device)
+
+        preds = model(imgs)
+        loss: torch.Tensor = criterion(preds, labels, epoch)
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if ema is not None:
+            ema.update(model)
+
+        loss_reduced_values = {k: v.item() for k, v in reduce_dict({"loss": loss}).items()}
+        metric_logger.update(**loss_reduced_values)
+        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
+
+    metric_logger.synchronize_between_processes()
+    print("Averaged stats:", metric_logger)
+
+    stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
+    return stats
+
+
+@torch.no_grad()
+def evaluate(model, criterion, dataloader, device):
+    model.eval()
+
+    metric_logger = MetricLogger(delimiter="  ")
+    # metric_logger.add_meter('acc', SmoothedValue(window_size=1, fmt='{global_avg:.4f}'))
+    # metric_logger.add_meter('loss', SmoothedValue(window_size=1, fmt='{value:.2f}'))
+    metric_logger.add_meter("acc", SmoothedValue(window_size=1))
+    metric_logger.add_meter("loss", SmoothedValue(window_size=1))
+
+    header = "Test:"
+    for imgs, labels in metric_logger.log_every(dataloader, 10, header):
+        imgs, labels = imgs.to(device), labels.to(device)
+        preds = model(imgs)
+
+        acc = (preds.argmax(dim=-1) == labels).sum() / preds.shape[0]
+        loss = criterion(preds, labels)
+
+        dict_reduced = reduce_dict({"acc": acc, "loss": loss})
+        reduced_values = {k: v.item() for k, v in dict_reduced.items()}
+        metric_logger.update(**reduced_values)
+
+    metric_logger.synchronize_between_processes()
+    print("Averaged stats:", metric_logger)
+
+    stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
+    return stats
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_solver.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_solver.py
new file mode 100644
index 00000000..aefb3d0a
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/clas_solver.py
@@ -0,0 +1,75 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import datetime
+import json
+import time
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+
+from ..misc import dist_utils
+from ._solver import BaseSolver
+from .clas_engine import evaluate, train_one_epoch
+
+
+class ClasSolver(BaseSolver):
+    def fit(
+        self,
+    ):
+        print("Start training")
+        self.train()
+        args = self.cfg
+
+        n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+        print("Number of params:", n_parameters)
+
+        output_dir = Path(args.output_dir)
+        output_dir.mkdir(exist_ok=True)
+
+        start_time = time.time()
+        start_epoch = self.last_epoch + 1
+        for epoch in range(start_epoch, args.epochs):
+            if dist_utils.is_dist_available_and_initialized():
+                self.train_dataloader.sampler.set_epoch(epoch)
+
+            train_stats = train_one_epoch(
+                self.model,
+                self.criterion,
+                self.train_dataloader,
+                self.optimizer,
+                self.ema,
+                epoch=epoch,
+                device=self.device,
+            )
+            self.lr_scheduler.step()
+            self.last_epoch += 1
+
+            if output_dir:
+                checkpoint_paths = [output_dir / "checkpoint.pth"]
+                # extra checkpoint before LR drop and every 100 epochs
+                if (epoch + 1) % args.checkpoint_freq == 0:
+                    checkpoint_paths.append(output_dir / f"checkpoint{epoch:04}.pth")
+                for checkpoint_path in checkpoint_paths:
+                    dist_utils.save_on_master(self.state_dict(epoch), checkpoint_path)
+
+            module = self.ema.module if self.ema else self.model
+            test_stats = evaluate(module, self.criterion, self.val_dataloader, self.device)
+
+            log_stats = {
+                **{f"train_{k}": v for k, v in train_stats.items()},
+                **{f"test_{k}": v for k, v in test_stats.items()},
+                "epoch": epoch,
+                "n_parameters": n_parameters,
+            }
+
+            if output_dir and dist_utils.is_main_process():
+                with (output_dir / "log.txt").open("a") as f:
+                    f.write(json.dumps(log_stats) + "\n")
+
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print("Training time {}".format(total_time_str))
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_engine.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_engine.py
new file mode 100644
index 00000000..b877b09b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_engine.py
@@ -0,0 +1,259 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from DETR (https://github.com/facebookresearch/detr/blob/main/engine.py)
+Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+"""
+
+import math
+import sys
+from typing import Dict, Iterable, List
+
+import numpy as np
+import torch
+import torch.amp
+from torch.cuda.amp.grad_scaler import GradScaler
+from torch.utils.tensorboard import SummaryWriter
+
+from ..data import CocoEvaluator
+from ..data.dataset import mscoco_category2label
+from ..misc import MetricLogger, SmoothedValue, dist_utils, save_samples
+from ..optim import ModelEMA, Warmup
+from .validator import Validator, scale_boxes
+
+
+def train_one_epoch(
+    model: torch.nn.Module,
+    criterion: torch.nn.Module,
+    data_loader: Iterable,
+    optimizer: torch.optim.Optimizer,
+    device: torch.device,
+    epoch: int,
+    use_wandb: bool,
+    max_norm: float = 0,
+    **kwargs,
+):
+    if use_wandb:
+        import wandb
+
+    model.train()
+    criterion.train()
+    metric_logger = MetricLogger(delimiter="  ")
+    metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}"))
+
+    epochs = kwargs.get("epochs", None)
+    header = "Epoch: [{}]".format(epoch) if epochs is None else "Epoch: [{}/{}]".format(epoch, epochs)
+
+    print_freq = kwargs.get("print_freq", 10)
+    writer: SummaryWriter = kwargs.get("writer", None)
+
+    ema: ModelEMA = kwargs.get("ema", None)
+    scaler: GradScaler = kwargs.get("scaler", None)
+    lr_warmup_scheduler: Warmup = kwargs.get("lr_warmup_scheduler", None)
+    losses = []
+
+    output_dir = kwargs.get("output_dir", None)
+    num_visualization_sample_batch = kwargs.get("num_visualization_sample_batch", 1)
+
+    for i, (samples, targets) in enumerate(
+        metric_logger.log_every(data_loader, print_freq, header)
+    ):
+        global_step = epoch * len(data_loader) + i
+        metas = dict(epoch=epoch, step=i, global_step=global_step, epoch_step=len(data_loader))
+
+        if global_step < num_visualization_sample_batch and output_dir is not None and dist_utils.is_main_process():
+            save_samples(samples, targets, output_dir, "train", normalized=True, box_fmt="cxcywh")
+
+        samples = samples.to(device)
+        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
+
+        if scaler is not None:
+            with torch.autocast(device_type=str(device), cache_enabled=True):
+                outputs = model(samples, targets=targets)
+
+            if torch.isnan(outputs["pred_boxes"]).any() or torch.isinf(outputs["pred_boxes"]).any():
+                print(outputs["pred_boxes"])
+                state = model.state_dict()
+                new_state = {}
+                for key, value in model.state_dict().items():
+                    # Replace 'module' with 'model' in each key
+                    new_key = key.replace("module.", "")
+                    # Add the updated key-value pair to the state dictionary
+                    state[new_key] = value
+                new_state["model"] = state
+                dist_utils.save_on_master(new_state, "./NaN.pth")
+
+            with torch.autocast(device_type=str(device), enabled=False):
+                loss_dict = criterion(outputs, targets, **metas)
+
+            loss = sum(loss_dict.values())
+            scaler.scale(loss).backward()
+
+            if max_norm > 0:
+                scaler.unscale_(optimizer)
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
+
+            scaler.step(optimizer)
+            scaler.update()
+            optimizer.zero_grad()
+
+        else:
+            outputs = model(samples, targets=targets)
+            loss_dict = criterion(outputs, targets, **metas)
+
+            loss: torch.Tensor = sum(loss_dict.values())
+            optimizer.zero_grad()
+            loss.backward()
+
+            if max_norm > 0:
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
+
+            optimizer.step()
+
+        # ema
+        if ema is not None:
+            ema.update(model)
+
+        if lr_warmup_scheduler is not None:
+            lr_warmup_scheduler.step()
+
+        loss_dict_reduced = dist_utils.reduce_dict(loss_dict)
+        loss_value = sum(loss_dict_reduced.values())
+        losses.append(loss_value.detach().cpu().numpy())
+
+        if not math.isfinite(loss_value):
+            print("Loss is {}, stopping training".format(loss_value))
+            print(loss_dict_reduced)
+            sys.exit(1)
+
+        metric_logger.update(loss=loss_value, **loss_dict_reduced)
+        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
+
+        if writer and dist_utils.is_main_process() and global_step % 10 == 0:
+            writer.add_scalar("Loss/total", loss_value.item(), global_step)
+            for j, pg in enumerate(optimizer.param_groups):
+                writer.add_scalar(f"Lr/pg_{j}", pg["lr"], global_step)
+            for k, v in loss_dict_reduced.items():
+                writer.add_scalar(f"Loss/{k}", v.item(), global_step)
+
+    if use_wandb:
+        wandb.log(
+            {"lr": optimizer.param_groups[0]["lr"], "epoch": epoch, "train/loss": np.mean(losses)}
+        )
+    # gather the stats from all processes
+    metric_logger.synchronize_between_processes()
+    print("Averaged stats:", metric_logger)
+    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
+
+
+@torch.no_grad()
+def evaluate(
+    model: torch.nn.Module,
+    criterion: torch.nn.Module,
+    postprocessor,
+    data_loader,
+    coco_evaluator: CocoEvaluator,
+    device,
+    epoch: int,
+    use_wandb: bool,
+    **kwargs,
+):
+    if use_wandb:
+        import wandb
+
+    model.eval()
+    criterion.eval()
+    coco_evaluator.cleanup()
+
+    metric_logger = MetricLogger(delimiter="  ")
+    # metric_logger.add_meter('class_error', SmoothedValue(window_size=1, fmt='{value:.2f}'))
+    header = "Test:"
+
+    # iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessor.keys())
+    iou_types = coco_evaluator.iou_types
+    # coco_evaluator = CocoEvaluator(base_ds, iou_types)
+    # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75]
+
+    gt: List[Dict[str, torch.Tensor]] = []
+    preds: List[Dict[str, torch.Tensor]] = []
+
+    output_dir = kwargs.get("output_dir", None)
+    num_visualization_sample_batch = kwargs.get("num_visualization_sample_batch", 1)
+
+    for i, (samples, targets) in enumerate(metric_logger.log_every(data_loader, 10, header)):
+        global_step = epoch * len(data_loader) + i
+
+        if global_step < num_visualization_sample_batch and output_dir is not None and dist_utils.is_main_process():
+            save_samples(samples, targets, output_dir, "val", normalized=False, box_fmt="xyxy")
+
+        samples = samples.to(device)
+        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
+
+        outputs = model(samples)
+        # with torch.autocast(device_type=str(device)):
+        #     outputs = model(samples)
+
+        # TODO (lyuwenyu), fix dataset converted using `convert_to_coco_api`?
+        orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
+        # orig_target_sizes = torch.tensor([[samples.shape[-1], samples.shape[-2]]], device=samples.device)
+
+        results = postprocessor(outputs, orig_target_sizes)
+
+        # if 'segm' in postprocessor.keys():
+        #     target_sizes = torch.stack([t["size"] for t in targets], dim=0)
+        #     results = postprocessor['segm'](results, outputs, orig_target_sizes, target_sizes)
+
+        res = {target["image_id"].item(): output for target, output in zip(targets, results)}
+        if coco_evaluator is not None:
+            coco_evaluator.update(res)
+
+        # validator format for metrics
+        for idx, (target, result) in enumerate(zip(targets, results)):
+            gt.append(
+                {
+                    "boxes": scale_boxes(  # from model input size to original img size
+                        target["boxes"],
+                        (target["orig_size"][1], target["orig_size"][0]),
+                        (samples[idx].shape[-1], samples[idx].shape[-2]),
+                    ),
+                    "labels": target["labels"],
+                }
+            )
+            labels = (
+                torch.tensor([mscoco_category2label[int(x.item())] for x in result["labels"].flatten()])
+                .to(result["labels"].device)
+                .reshape(result["labels"].shape)
+            ) if postprocessor.remap_mscoco_category else result["labels"]
+            preds.append(
+                {"boxes": result["boxes"], "labels": labels, "scores": result["scores"]}
+            )
+
+    # Conf matrix, F1, Precision, Recall, box IoU
+    metrics = Validator(gt, preds).compute_metrics()
+    print("Metrics:", metrics)
+    if use_wandb:
+        metrics = {f"metrics/{k}": v for k, v in metrics.items()}
+        metrics["epoch"] = epoch
+        wandb.log(metrics)
+
+    # gather the stats from all processes
+    metric_logger.synchronize_between_processes()
+    print("Averaged stats:", metric_logger)
+    if coco_evaluator is not None:
+        coco_evaluator.synchronize_between_processes()
+
+    # accumulate predictions from all images
+    if coco_evaluator is not None:
+        coco_evaluator.accumulate()
+        coco_evaluator.summarize()
+
+    stats = {}
+    # stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
+    if coco_evaluator is not None:
+        if "bbox" in iou_types:
+            stats["coco_eval_bbox"] = coco_evaluator.coco_eval["bbox"].stats.tolist()
+        if "segm" in iou_types:
+            stats["coco_eval_masks"] = coco_evaluator.coco_eval["segm"].stats.tolist()
+
+    return stats, coco_evaluator
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_solver.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_solver.py
new file mode 100644
index 00000000..96b93764
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/det_solver.py
@@ -0,0 +1,228 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import datetime
+import json
+import time
+
+import torch
+
+from ..misc import dist_utils, stats
+from ._solver import BaseSolver
+from .det_engine import evaluate, train_one_epoch
+
+
+class DetSolver(BaseSolver):
+    def fit(self):
+        self.train()
+        args = self.cfg
+        metric_names = ["AP50:95", "AP50", "AP75", "APsmall", "APmedium", "APlarge"]
+
+        if self.use_wandb:
+            import wandb
+
+            wandb.init(
+                project=args.yaml_cfg["project_name"],
+                name=args.yaml_cfg["exp_name"],
+                config=args.yaml_cfg,
+            )
+            wandb.watch(self.model)
+
+        n_parameters, model_stats = stats(self.cfg)
+        print(model_stats)
+        print("-" * 42 + "Start training" + "-" * 43)
+        top1 = 0
+        best_stat = {
+            "epoch": -1,
+        }
+        if self.last_epoch > 0:
+            module = self.ema.module if self.ema else self.model
+            test_stats, coco_evaluator = evaluate(
+                module,
+                self.criterion,
+                self.postprocessor,
+                self.val_dataloader,
+                self.evaluator,
+                self.device,
+                self.last_epoch,
+                self.use_wandb
+            )
+            for k in test_stats:
+                best_stat["epoch"] = self.last_epoch
+                best_stat[k] = test_stats[k][0]
+                top1 = test_stats[k][0]
+                print(f"best_stat: {best_stat}")
+
+        best_stat_print = best_stat.copy()
+        start_time = time.time()
+        start_epoch = self.last_epoch + 1
+        for epoch in range(start_epoch, args.epochs):
+            self.train_dataloader.set_epoch(epoch)
+            # self.train_dataloader.dataset.set_epoch(epoch)
+            if dist_utils.is_dist_available_and_initialized():
+                self.train_dataloader.sampler.set_epoch(epoch)
+
+            if epoch == self.train_dataloader.collate_fn.stop_epoch:
+                self.load_resume_state(str(self.output_dir / "best_stg1.pth"))
+                if self.ema:
+                    self.ema.decay = self.train_dataloader.collate_fn.ema_restart_decay
+                    print(f"Refresh EMA at epoch {epoch} with decay {self.ema.decay}")
+
+            train_stats = train_one_epoch(
+                self.model,
+                self.criterion,
+                self.train_dataloader,
+                self.optimizer,
+                self.device,
+                epoch,
+                epochs=args.epochs,
+                max_norm=args.clip_max_norm,
+                print_freq=args.print_freq,
+                ema=self.ema,
+                scaler=self.scaler,
+                lr_warmup_scheduler=self.lr_warmup_scheduler,
+                writer=self.writer,
+                use_wandb=self.use_wandb,
+                output_dir=self.output_dir,
+            )
+
+            if self.lr_warmup_scheduler is None or self.lr_warmup_scheduler.finished():
+                self.lr_scheduler.step()
+
+            self.last_epoch += 1
+
+            if self.output_dir and epoch < self.train_dataloader.collate_fn.stop_epoch:
+                checkpoint_paths = [self.output_dir / "last.pth"]
+                # extra checkpoint before LR drop and every 100 epochs
+                if (epoch + 1) % args.checkpoint_freq == 0:
+                    checkpoint_paths.append(self.output_dir / f"checkpoint{epoch:04}.pth")
+                for checkpoint_path in checkpoint_paths:
+                    dist_utils.save_on_master(self.state_dict(), checkpoint_path)
+
+            module = self.ema.module if self.ema else self.model
+            test_stats, coco_evaluator = evaluate(
+                module,
+                self.criterion,
+                self.postprocessor,
+                self.val_dataloader,
+                self.evaluator,
+                self.device,
+                epoch,
+                self.use_wandb,
+                output_dir=self.output_dir,
+            )
+
+            # TODO
+            for k in test_stats:
+                if self.writer and dist_utils.is_main_process():
+                    for i, v in enumerate(test_stats[k]):
+                        self.writer.add_scalar(f"Test/{k}_{i}".format(k), v, epoch)
+
+                if k in best_stat:
+                    best_stat["epoch"] = (
+                        epoch if test_stats[k][0] > best_stat[k] else best_stat["epoch"]
+                    )
+                    best_stat[k] = max(best_stat[k], test_stats[k][0])
+                else:
+                    best_stat["epoch"] = epoch
+                    best_stat[k] = test_stats[k][0]
+
+                if best_stat[k] > top1:
+                    best_stat_print["epoch"] = epoch
+                    top1 = best_stat[k]
+                    if self.output_dir:
+                        if epoch >= self.train_dataloader.collate_fn.stop_epoch:
+                            dist_utils.save_on_master(
+                                self.state_dict(), self.output_dir / "best_stg2.pth"
+                            )
+                        else:
+                            dist_utils.save_on_master(
+                                self.state_dict(), self.output_dir / "best_stg1.pth"
+                            )
+
+                best_stat_print[k] = max(best_stat[k], top1)
+                print(f"best_stat: {best_stat_print}")  # global best
+
+                if best_stat["epoch"] == epoch and self.output_dir:
+                    if epoch >= self.train_dataloader.collate_fn.stop_epoch:
+                        if test_stats[k][0] > top1:
+                            top1 = test_stats[k][0]
+                            dist_utils.save_on_master(
+                                self.state_dict(), self.output_dir / "best_stg2.pth"
+                            )
+                    else:
+                        top1 = max(test_stats[k][0], top1)
+                        dist_utils.save_on_master(
+                            self.state_dict(), self.output_dir / "best_stg1.pth"
+                        )
+
+                elif epoch >= self.train_dataloader.collate_fn.stop_epoch:
+                    best_stat = {
+                        "epoch": -1,
+                    }
+                    if self.ema:
+                        self.ema.decay -= 0.0001
+                        self.load_resume_state(str(self.output_dir / "best_stg1.pth"))
+                        print(f"Refresh EMA at epoch {epoch} with decay {self.ema.decay}")
+
+            log_stats = {
+                **{f"train_{k}": v for k, v in train_stats.items()},
+                **{f"test_{k}": v for k, v in test_stats.items()},
+                "epoch": epoch,
+                "n_parameters": n_parameters,
+            }
+
+            if self.use_wandb:
+                wandb_logs = {}
+                for idx, metric_name in enumerate(metric_names):
+                    wandb_logs[f"metrics/{metric_name}"] = test_stats["coco_eval_bbox"][idx]
+                wandb_logs["epoch"] = epoch
+                wandb.log(wandb_logs)
+
+            if self.output_dir and dist_utils.is_main_process():
+                with (self.output_dir / "log.txt").open("a") as f:
+                    f.write(json.dumps(log_stats) + "\n")
+
+                # for evaluation logs
+                if coco_evaluator is not None:
+                    (self.output_dir / "eval").mkdir(exist_ok=True)
+                    if "bbox" in coco_evaluator.coco_eval:
+                        filenames = ["latest.pth"]
+                        if epoch % 50 == 0:
+                            filenames.append(f"{epoch:03}.pth")
+                        for name in filenames:
+                            torch.save(
+                                coco_evaluator.coco_eval["bbox"].eval,
+                                self.output_dir / "eval" / name,
+                            )
+
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print("Training time {}".format(total_time_str))
+
+    def val(self):
+        self.eval()
+
+        module = self.ema.module if self.ema else self.model
+        test_stats, coco_evaluator = evaluate(
+            module,
+            self.criterion,
+            self.postprocessor,
+            self.val_dataloader,
+            self.evaluator,
+            self.device,
+            epoch=-1,
+            use_wandb=False,
+        )
+
+        if self.output_dir:
+            dist_utils.save_on_master(
+                coco_evaluator.coco_eval["bbox"].eval, self.output_dir / "eval.pth"
+            )
+
+        return
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/solver/validator.py b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/validator.py
new file mode 100644
index 00000000..e38308ce
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/solver/validator.py
@@ -0,0 +1,347 @@
+import copy
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from loguru import logger
+from torchvision.ops import box_iou
+
+
+class Validator:
+    def __init__(
+        self,
+        gt: List[Dict[str, torch.Tensor]],
+        preds: List[Dict[str, torch.Tensor]],
+        conf_thresh=0.5,
+        iou_thresh=0.5,
+    ) -> None:
+        """
+        Format example:
+        gt = [{'labels': tensor([0]), 'boxes': tensor([[561.0, 297.0, 661.0, 359.0]])}, ...]
+        len(gt) is the number of images
+        bboxes are in format [x1, y1, x2, y2], absolute values
+        """
+        self.gt = gt
+        self.preds = preds
+        self.conf_thresh = conf_thresh
+        self.iou_thresh = iou_thresh
+        self.thresholds = np.arange(0.2, 1.0, 0.05)
+        self.conf_matrix = None
+
+    def compute_metrics(self, extended=False) -> Dict[str, float]:
+        filtered_preds = filter_preds(copy.deepcopy(self.preds), self.conf_thresh)
+        metrics = self._compute_main_metrics(filtered_preds)
+        if not extended:
+            metrics.pop("extended_metrics", None)
+        return metrics
+
+    def _compute_main_metrics(self, preds):
+        (
+            self.metrics_per_class,
+            self.conf_matrix,
+            self.class_to_idx,
+        ) = self._compute_metrics_and_confusion_matrix(preds)
+        tps, fps, fns = 0, 0, 0
+        ious = []
+        extended_metrics = {}
+        for key, value in self.metrics_per_class.items():
+            tps += value["TPs"]
+            fps += value["FPs"]
+            fns += value["FNs"]
+            ious.extend(value["IoUs"])
+
+            extended_metrics[f"precision_{key}"] = (
+                value["TPs"] / (value["TPs"] + value["FPs"])
+                if value["TPs"] + value["FPs"] > 0
+                else 0
+            )
+            extended_metrics[f"recall_{key}"] = (
+                value["TPs"] / (value["TPs"] + value["FNs"])
+                if value["TPs"] + value["FNs"] > 0
+                else 0
+            )
+
+            extended_metrics[f"iou_{key}"] = np.mean(value["IoUs"])
+
+        precision = tps / (tps + fps) if (tps + fps) > 0 else 0
+        recall = tps / (tps + fns) if (tps + fns) > 0 else 0
+        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+        iou = np.mean(ious).item() if ious else 0
+        return {
+            "f1": f1,
+            "precision": precision,
+            "recall": recall,
+            "iou": iou,
+            "TPs": tps,
+            "FPs": fps,
+            "FNs": fns,
+            "extended_metrics": extended_metrics,
+        }
+
+    def _compute_matrix_multi_class(self, preds):
+        metrics_per_class = defaultdict(lambda: {"TPs": 0, "FPs": 0, "FNs": 0, "IoUs": []})
+        for pred, gt in zip(preds, self.gt):
+            pred_boxes = pred["boxes"]
+            pred_labels = pred["labels"]
+            gt_boxes = gt["boxes"]
+            gt_labels = gt["labels"]
+
+            # isolate each class
+            labels = torch.unique(torch.cat([pred_labels, gt_labels]))
+            for label in labels:
+                pred_cl_boxes = pred_boxes[pred_labels == label]  # filter by bool mask
+                gt_cl_boxes = gt_boxes[gt_labels == label]
+
+                n_preds = len(pred_cl_boxes)
+                n_gts = len(gt_cl_boxes)
+                if not (n_preds or n_gts):
+                    continue
+                if not n_preds:
+                    metrics_per_class[label.item()]["FNs"] += n_gts
+                    metrics_per_class[label.item()]["IoUs"].extend([0] * n_gts)
+                    continue
+                if not n_gts:
+                    metrics_per_class[label.item()]["FPs"] += n_preds
+                    metrics_per_class[label.item()]["IoUs"].extend([0] * n_preds)
+                    continue
+
+                ious = box_iou(pred_cl_boxes, gt_cl_boxes)  # matrix of all IoUs
+                ious_mask = ious >= self.iou_thresh
+
+                # indeces of boxes that have IoU >= threshold
+                pred_indices, gt_indices = torch.nonzero(ious_mask, as_tuple=True)
+
+                if not pred_indices.numel():  # no predicts matched gts
+                    metrics_per_class[label.item()]["FNs"] += n_gts
+                    metrics_per_class[label.item()]["IoUs"].extend([0] * n_gts)
+                    metrics_per_class[label.item()]["FPs"] += n_preds
+                    metrics_per_class[label.item()]["IoUs"].extend([0] * n_preds)
+                    continue
+
+                iou_values = ious[pred_indices, gt_indices]
+
+                # sorting by IoU to match hgihest scores first
+                sorted_indices = torch.argsort(-iou_values)
+                pred_indices = pred_indices[sorted_indices]
+                gt_indices = gt_indices[sorted_indices]
+                iou_values = iou_values[sorted_indices]
+
+                matched_preds = set()
+                matched_gts = set()
+                for pred_idx, gt_idx, iou in zip(pred_indices, gt_indices, iou_values):
+                    if gt_idx.item() not in matched_gts and pred_idx.item() not in matched_preds:
+                        matched_preds.add(pred_idx.item())
+                        matched_gts.add(gt_idx.item())
+                        metrics_per_class[label.item()]["TPs"] += 1
+                        metrics_per_class[label.item()]["IoUs"].append(iou.item())
+
+                unmatched_preds = set(range(n_preds)) - matched_preds
+                unmatched_gts = set(range(n_gts)) - matched_gts
+                metrics_per_class[label.item()]["FPs"] += len(unmatched_preds)
+                metrics_per_class[label.item()]["IoUs"].extend([0] * len(unmatched_preds))
+                metrics_per_class[label.item()]["FNs"] += len(unmatched_gts)
+                metrics_per_class[label.item()]["IoUs"].extend([0] * len(unmatched_gts))
+        return metrics_per_class
+
+    def _compute_metrics_and_confusion_matrix(self, preds):
+        # Initialize per-class metrics
+        metrics_per_class = defaultdict(lambda: {"TPs": 0, "FPs": 0, "FNs": 0, "IoUs": []})
+
+        # Collect all class IDs
+        all_classes = set()
+        for pred in preds:
+            all_classes.update(pred["labels"].tolist())
+        for gt in self.gt:
+            all_classes.update(gt["labels"].tolist())
+        all_classes = sorted(list(all_classes))
+        class_to_idx = {cls_id: idx for idx, cls_id in enumerate(all_classes)}
+        n_classes = len(all_classes)
+        conf_matrix = np.zeros((n_classes + 1, n_classes + 1), dtype=int)  # +1 for background class
+
+        for pred, gt in zip(preds, self.gt):
+            pred_boxes = pred["boxes"]
+            pred_labels = pred["labels"]
+            gt_boxes = gt["boxes"]
+            gt_labels = gt["labels"]
+
+            n_preds = len(pred_boxes)
+            n_gts = len(gt_boxes)
+
+            if n_preds == 0 and n_gts == 0:
+                continue
+
+            ious = box_iou(pred_boxes, gt_boxes) if n_preds > 0 and n_gts > 0 else torch.tensor([])
+            # Assign matches between preds and gts
+            matched_pred_indices = set()
+            matched_gt_indices = set()
+
+            if ious.numel() > 0:
+                # For each pred box, find the gt box with highest IoU
+                ious_mask = ious >= self.iou_thresh
+                pred_indices, gt_indices = torch.nonzero(ious_mask, as_tuple=True)
+                iou_values = ious[pred_indices, gt_indices]
+
+                # Sorting by IoU to match highest scores first
+                sorted_indices = torch.argsort(-iou_values)
+                pred_indices = pred_indices[sorted_indices]
+                gt_indices = gt_indices[sorted_indices]
+                iou_values = iou_values[sorted_indices]
+
+                for pred_idx, gt_idx, iou in zip(pred_indices, gt_indices, iou_values):
+                    if (
+                        pred_idx.item() in matched_pred_indices
+                        or gt_idx.item() in matched_gt_indices
+                    ):
+                        continue
+                    matched_pred_indices.add(pred_idx.item())
+                    matched_gt_indices.add(gt_idx.item())
+
+                    pred_label = pred_labels[pred_idx].item()
+                    gt_label = gt_labels[gt_idx].item()
+
+                    pred_cls_idx = class_to_idx[pred_label]
+                    gt_cls_idx = class_to_idx[gt_label]
+
+                    # Update confusion matrix
+                    conf_matrix[gt_cls_idx, pred_cls_idx] += 1
+
+                    # Update per-class metrics
+                    if pred_label == gt_label:
+                        metrics_per_class[gt_label]["TPs"] += 1
+                        metrics_per_class[gt_label]["IoUs"].append(iou.item())
+                    else:
+                        # Misclassification
+                        metrics_per_class[gt_label]["FNs"] += 1
+                        metrics_per_class[pred_label]["FPs"] += 1
+                        metrics_per_class[gt_label]["IoUs"].append(0)
+                        metrics_per_class[pred_label]["IoUs"].append(0)
+
+            # Unmatched predictions (False Positives)
+            unmatched_pred_indices = set(range(n_preds)) - matched_pred_indices
+            for pred_idx in unmatched_pred_indices:
+                pred_label = pred_labels[pred_idx].item()
+                pred_cls_idx = class_to_idx[pred_label]
+                # Update confusion matrix: background row
+                conf_matrix[n_classes, pred_cls_idx] += 1
+                # Update per-class metrics
+                metrics_per_class[pred_label]["FPs"] += 1
+                metrics_per_class[pred_label]["IoUs"].append(0)
+
+            # Unmatched ground truths (False Negatives)
+            unmatched_gt_indices = set(range(n_gts)) - matched_gt_indices
+            for gt_idx in unmatched_gt_indices:
+                gt_label = gt_labels[gt_idx].item()
+                gt_cls_idx = class_to_idx[gt_label]
+                # Update confusion matrix: background column
+                conf_matrix[gt_cls_idx, n_classes] += 1
+                # Update per-class metrics
+                metrics_per_class[gt_label]["FNs"] += 1
+                metrics_per_class[gt_label]["IoUs"].append(0)
+
+        return metrics_per_class, conf_matrix, class_to_idx
+
+    def save_plots(self, path_to_save) -> None:
+        path_to_save = Path(path_to_save)
+        path_to_save.mkdir(parents=True, exist_ok=True)
+
+        if self.conf_matrix is not None:
+            class_labels = [str(cls_id) for cls_id in self.class_to_idx.keys()] + ["background"]
+
+            plt.figure(figsize=(10, 8))
+            plt.imshow(self.conf_matrix, interpolation="nearest", cmap=plt.cm.Blues)
+            plt.title("Confusion Matrix")
+            plt.colorbar()
+            tick_marks = np.arange(len(class_labels))
+            plt.xticks(tick_marks, class_labels, rotation=45)
+            plt.yticks(tick_marks, class_labels)
+
+            # Add labels to each cell
+            thresh = self.conf_matrix.max() / 2.0
+            for i in range(self.conf_matrix.shape[0]):
+                for j in range(self.conf_matrix.shape[1]):
+                    plt.text(
+                        j,
+                        i,
+                        format(self.conf_matrix[i, j], "d"),
+                        horizontalalignment="center",
+                        color="white" if self.conf_matrix[i, j] > thresh else "black",
+                    )
+
+            plt.ylabel("True label")
+            plt.xlabel("Predicted label")
+            plt.tight_layout()
+            plt.savefig(path_to_save / "confusion_matrix.png")
+            plt.close()
+
+        thresholds = self.thresholds
+        precisions, recalls, f1_scores = [], [], []
+
+        # Store the original predictions to reset after each threshold
+        original_preds = copy.deepcopy(self.preds)
+
+        for threshold in thresholds:
+            # Filter predictions based on the current threshold
+            filtered_preds = filter_preds(copy.deepcopy(original_preds), threshold)
+            # Compute metrics with the filtered predictions
+            metrics = self._compute_main_metrics(filtered_preds)
+            precisions.append(metrics["precision"])
+            recalls.append(metrics["recall"])
+            f1_scores.append(metrics["f1"])
+
+        # Plot Precision and Recall vs Threshold
+        plt.figure()
+        plt.plot(thresholds, precisions, label="Precision", marker="o")
+        plt.plot(thresholds, recalls, label="Recall", marker="o")
+        plt.xlabel("Threshold")
+        plt.ylabel("Value")
+        plt.title("Precision and Recall vs Threshold")
+        plt.legend()
+        plt.grid(True)
+        plt.savefig(path_to_save / "precision_recall_vs_threshold.png")
+        plt.close()
+
+        # Plot F1 Score vs Threshold
+        plt.figure()
+        plt.plot(thresholds, f1_scores, label="F1 Score", marker="o")
+        plt.xlabel("Threshold")
+        plt.ylabel("F1 Score")
+        plt.title("F1 Score vs Threshold")
+        plt.grid(True)
+        plt.savefig(path_to_save / "f1_score_vs_threshold.png")
+        plt.close()
+
+        # Find the best threshold based on F1 Score (last occurence)
+        best_idx = len(f1_scores) - np.argmax(f1_scores[::-1]) - 1
+        best_threshold = thresholds[best_idx]
+        best_f1 = f1_scores[best_idx]
+
+        logger.info(
+            f"Best Threshold: {round(best_threshold, 2)} with F1 Score: {round(best_f1, 3)}"
+        )
+
+
+def filter_preds(preds, conf_thresh):
+    for pred in preds:
+        keep_idxs = pred["scores"] >= conf_thresh
+        pred["scores"] = pred["scores"][keep_idxs]
+        pred["boxes"] = pred["boxes"][keep_idxs]
+        pred["labels"] = pred["labels"][keep_idxs]
+    return preds
+
+
+def scale_boxes(boxes, orig_shape, resized_shape):
+    """
+    boxes in format: [x1, y1, x2, y2], absolute values
+    orig_shape: [height, width]
+    resized_shape: [height, width]
+    """
+    scale_x = orig_shape[1] / resized_shape[1]
+    scale_y = orig_shape[0] / resized_shape[0]
+    boxes[:, 0] *= scale_x
+    boxes[:, 2] *= scale_x
+    boxes[:, 1] *= scale_y
+    boxes[:, 3] *= scale_y
+    return boxes
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/__init__.py
new file mode 100644
index 00000000..de38c3ac
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/__init__.py
@@ -0,0 +1,6 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from . import dfine
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/__init__.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/__init__.py
new file mode 100644
index 00000000..f61af2a4
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/__init__.py
@@ -0,0 +1,11 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+from .dfine import DFINE
+from .dfine_criterion import DFINECriterion
+from .dfine_decoder import DFINETransformer
+from .hybrid_encoder import HybridEncoder
+from .matcher import HungarianMatcher
+from .postprocessor import DFINEPostProcessor
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog.md b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog.md
new file mode 100644
index 00000000..24abf573
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog.md
@@ -0,0 +1,90 @@
+English Blog | [中文博客](blog_cn.md)
+
+## 🔥 Revolutionizing Real-Time Object Detection: D-FINE vs. YOLO and Other DETR Models
+
+In the rapidly evolving field of real-time object detection, **D-FINE** emerges as a revolutionary approach that significantly surpasses existing models like **YOLOv10**, **YOLO11**, and **RT-DETR v1/v2/v3**, raising the performance ceiling for real-time object detection. After pretraining on the large-scale dataset Objects365, **D-FINE** far exceeds its competitor **LW-DETR**, achieving up to **59.3%** AP on the COCO dataset while maintaining excellent frame rates, parameter counts, and computational complexity. This positions **D-FINE** as a leader in the realm of real-time object detection, laying the groundwork for future research advancements.
+
+Currently, all code, weights, logs, compilation tools, and the FiftyOne visualization tool for **D-FINE** have been fully open-sourced, thanks to the codebase provided by **RT-DETR**. This includes pretraining tutorials, custom dataset tutorials, and more. We will continue to update with improvement insights and tuning strategies. We welcome everyone to raise issues and collectively promote the **D-FINE** series. We also hope you can leave a ⭐; it's the best encouragement for us.
+
+**GitHub Repo**: https://github.com/Peterande/D-FINE
+
+**ArXiv Paper**: https://arxiv.org/abs/2410.13842
+
+---
+
+### 🔍 Exploring the Key Innovations Behind D-FINE
+
+**D-FINE** redefines the regression task in DETR-based object detectors as **FDR**, and based on this, develops a performance-enhancing self-distillation mechanism **GO-LSD**. Below is a brief introduction to **FDR** and **GO-LSD**:
+
+#### **FDR (Fine-grained Distribution Refinement)** Decouples the Bounding Box Generation Process:
+
+1. **Initial Box Prediction**: Similar to traditional DETR methods, the decoder of **D-FINE** transforms object queries into several initial bounding boxes in the first layer. These boxes do not need to be highly accurate and serve only as an initialization.
+2. **Fine-Grained Distribution Optimization**: Unlike traditional methods that directly decode new bounding boxes, **D-FINE** generates four sets of probability distributions based on these initial bounding boxes in the decoder layers and iteratively optimizes these distributions layer by layer. These distributions essentially act as a "fine-grained intermediate representation" of the detection boxes. Coupled with a carefully designed weighting function **W(n)**, **D-FINE** can adjust the initial bounding boxes by fine-tuning these representations, allowing for subtle modifications or significant shifts of the edges (top, bottom, left, right). The specific process is illustrated in the figure:
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/fdr-1.jpg" alt="Fine-grained Distribution Refinement Process" width="666">
+</p>
+
+For readability, we will not elaborate on the mathematical formulas and the Fine-Grained Localization (FGL) Loss that aids optimization here. Interested readers can refer to the original paper for derivations.
+
+The main advantages of redefining the bounding box regression task as **FDR** are:
+
+1. **Simplified Supervision**: While optimizing detection boxes using traditional L1 loss and IoU loss, the "residual" between labels and predictions can be additionally used to constrain these intermediate probability distributions. This allows each decoder layer to more effectively focus on and address the localization errors it currently faces. As the number of layers increases, their optimization objectives become progressively simpler, thereby simplifying the overall optimization process.
+
+2. **Robustness in Complex Scenarios**: The values of these probability distributions inherently represent the confidence level of fine-tuning for each edge. This enables **D-FINE** to independently model the uncertainty of each edge at different network depths, thereby exhibiting stronger robustness in complex real-world scenarios such as occlusion, motion blur, and low-light conditions, compared to directly regressing four fixed values.
+
+3. **Flexible Optimization Mechanism**: The probability distributions are transformed into final bounding box offsets through a weighted sum. The carefully designed weighting function ensures fine-grained adjustments when the initial box is accurate and provides significant corrections when necessary.
+
+4. **Research Potential and Scalability**: By transforming the regression task into a probability distribution prediction problem consistent with classification tasks, **FDR** not only enhances compatibility with other tasks but also enables object detection models to benefit from innovations in areas such as knowledge distillation, multi-task learning, and distribution optimization, opening new avenues for future research.
+
+---
+
+#### **GO-LSD (Global Optimal Localization Self-Distillation)** Integrates Knowledge Distillation into FDR-Based Detectors Seamlessly
+
+Based on the above, object detectors equipped with the **FDR** framework satisfy the following two points:
+
+1. **Ability to Achieve Knowledge Transfer**: As Hinton mentioned in the paper *"Distilling the Knowledge in a Neural Network"*, probabilities are "knowledge." The network's output becomes probability distributions, and these distributions carry localization knowledge. By calculating the KLD loss, this "knowledge" can be transferred from deeper layers to shallower layers. This is something that traditional fixed box representations (Dirac δ functions) cannot achieve.
+
+2. **Consistent Optimization Objectives**: Since each decoder layer in the **FDR** framework shares a common goal: reducing the residual between the initial bounding box and the ground truth bounding box, the precise probability distributions generated by the final layer can serve as the ultimate target for each preceding layer and guide them through distillation.
+
+Thus, based on **FDR**, we propose **GO-LSD (Global Optimal Localization Self-Distillation)**. By implementing localization knowledge distillation between network layers, we further extend the capabilities of **D-FINE**. The specific process is illustrated in the figure:
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/go_lsd-1.jpg" alt="GO-LSD Process" width="666">
+</p>
+
+Similarly, for readability, we will not elaborate on the mathematical formulas and the Decoupled Distillation Focal (DDF) Loss that aids optimization here. Interested readers can refer to the original paper for derivations.
+
+This results in a synergistic win-win effect: as training progresses, the predictions of the final layer become increasingly accurate, and its generated soft labels can better help the preceding layers improve prediction accuracy. Conversely, the earlier layers learn to localize accurately more quickly, simplifying the optimization tasks of the deeper layers and further enhancing overall accuracy.
+
+---
+
+### Visualization of D-FINE Predictions
+
+The following visualization showcases **D-FINE**'s predictions in various complex detection scenarios. These scenarios include occlusion, low-light conditions, motion blur, depth-of-field effects, and densely populated scenes. Despite these challenges, **D-FINE** still produces accurate localization results.
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/hard_case-1.jpg" alt="D-FINE Predictions in Complex Scenarios" width="666">
+</p>
+
+Additionally, the visualization below shows the prediction results of the first layer and the last layer, the corresponding distributions of the four edges, and the weighted distributions. It can be seen that the localization of the predicted boxes becomes more precise as the distributions are optimized.
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/merged_image.jpg" width="1000">
+</p>
+
+---
+
+### Frequently Asked Questions
+
+#### Question 1: Will FDR and GO-LSD increase the inference cost?
+
+**Answer**: No, FDR and the original prediction have almost no difference in speed, parameter count, and computational complexity, making it a seamless replacement.
+
+#### Question 2: Will FDR and GO-LSD increase the training cost?
+
+**Answer**: The increase in training cost mainly comes from how to generate the labels of the distributions. We have optimized this process, keeping the increase in training time and memory consumption to 6% and 2%, respectively, making it almost negligible.
+
+#### Question 3: Why is D-FINE faster and more lightweight than the RT-DETR series?
+
+**Answer**: Directly applying FDR and GO-LSD will significantly improve performance but will not make the network faster or lighter. Therefore, we performed a series of lightweight optimizations on RT-DETR. These adjustments led to a performance drop, but our methods compensated for these losses, achieving a perfect balance of speed, parameters, computational complexity, and performance.
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog_cn.md b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog_cn.md
new file mode 100644
index 00000000..04fd5df3
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/blog_cn.md
@@ -0,0 +1,90 @@
+[English Blog](blog.md) | 中文博客
+
+## 🔥 革新实时目标检测：D-FINE 与 YOLO 和其他 DETR 模型的对比
+
+在快速发展的实时目标检测领域，**D-FINE** 作为一项革命性的方法，显著超越了现有模型（如 **YOLOv10**、**YOLO11** 及 **RT-DETR v1/v2/v3**），提升了实时目标检测的性能上限。经过大规模数据集 Objects365 的预训练，**D-FINE** 远超其竞争对手 **LW-DETR**，在 COCO 数据集上实现了高达 **59.3%** 的 AP，同时保持了卓越的帧率、参数量和计算复杂度。这使得 **D-FINE** 成为实时目标检测领域的佼佼者，为未来的研究奠定了基础。
+
+目前，D-FINE 的所有代码、权重、日志、编译工具，以及 FiftyOne 可视化工具已经全部开源，感谢 RT-DETR 提供的 codebase。其中还包括了预训练教程、自定义数据集教程等。之后还会陆续更新一些改进心得和调参攻略，欢迎大家多提 issue，共同将 D-FINE 系列发扬光大。同时希望您能随手留下一颗 ⭐，这是对我们最好的鼓励。
+
+**Github Repo**: https://github.com/Peterande/D-FINE
+
+**Arxiv Paper**: https://arxiv.org/abs/2410.13842
+
+---
+
+### 🔍 探索 D-FINE 背后的关键创新
+
+**D-FINE** 将基于 DETR 的目标检测器中的回归任务重新定义为 FDR，并在此基础上开发出了无感提升性能的自蒸馏机制 GO-LSD。下面对 FDR 和 GO-LSD 进行简要介绍：
+
+#### FDR (Fine-grained Distribution Refinement) 将检测框的生成过程拆解为：
+
+1. **初始框预测**：与传统 DETR 方法类似，**D-FINE** 的解码器 (decoder) 会在第一层将 object queries 转变为若干个初始的边界框，这些框不需要特别精准，仅作为一种初始化。
+2. **细粒度的分布优化**：**D-FINE** 解码层不会像传统方法那样直接解码出新的边界框，而是基于这些初始化的边界框，生成四组概率分布；并迭代地对这四组概率分布进行逐层优化。这些分布本质上是作为检测框的一种“细粒度中间表征”；配合精心设计的加权函数 W(n)，**D-FINE** 能够通过微调这些表征来实现对初始边界框的调整，包含对其上下左右边缘进行细微的小幅度修正亦或是大幅度的搬移，具体的流程如图：
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/fdr-1.jpg" alt="精细分布优化过程" width="666">
+</p>
+
+为了方便阅读，我们不在此赘述数学公式及帮助优化的损失函数 Fine-Grained Localization (FGL) Loss，有兴趣的可以根据原文推导。
+
+将边界框回归任务重新定义为 FDR 的主要优势在于：
+
+1. **简化的监督**：在使用传统的 L1 损失、IoU 损失优化检测框的同时，可以额外用标签和预测结果之间的“残差”来约束这些中间态的概率分布。这使每个解码层 (decoder layer) 能够更有效地关注并解决其当前面临的定位误差，随着层数加深，其优化目标变得越来越简单，从而简化了整体优化过程。
+
+2. **复杂场景下的鲁棒性**：这些概率分布的值本质上代表了对每个边界“微调”的自信程度。这使 **D-FINE** 能够在不同网络深度独立建模每个边界的不确定性，从而在遮挡、运动模糊和低光照等复杂的实际场景下表现出更强的鲁棒性，相比直接回归四个固定值要更为稳健。
+
+3. **灵活的优化机制**：概率分布通过加权求和转化为最终的边界框偏移值。精心设计的加权函数确保在初始框准确时进行细微调整，而在必要时则提供较大的修正。
+
+4. **研究潜力与可扩展性**：FDR 通过将回归任务转变为同分类任务一致的概率分布预测问题，不仅提高了与其他任务的兼容性，还使得目标检测模型可以受益于知识蒸馏、多任务学习和分布优化等更多领域的创新，为未来的研究打开了新的大门。
+
+---
+
+#### GO-LSD (Global Optimal Localization Self-Distillation) 将知识蒸馏无痛应用到 FDR 框架检测器
+
+根据上文，搭载 FDR 框架的目标检测器满足了以下两点：
+
+1. **能够实现知识传递**：Hinton 早在 *"Distilling the Knowledge in a Neural Network"* 一文中就说过：概率即“知识”；网络输出变成了概率分布，而概率分布携带定位知识 (Localization Knowledge)，而通过计算 KLD 损失，可以将这些“知识”从深层传递到浅层。这是传统固定框表示（狄拉克 δ 函数）无法实现的。
+
+2. **一致的优化目标**：由于 FDR 架构中每一个解码层都共享一个共同目标：减少初始边界框与真实边界框之间的残差；因此最后一层生成的精确概率分布可以作为前面每一层的最终目标，并通过蒸馏引导前几层。
+
+于是，基于 FDR，我们提出了 GO-LSD（全局最优定位自蒸馏）。通过在网络层间实现定位知识蒸馏，进一步扩展了 **D-FINE** 的能力，具体流程如图：
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/go_lsd-1.jpg" alt="GO-LSD过程" width="666">
+</p>
+
+同样的，为了方便阅读，我们不在此赘述数学公式及帮助优化的损失函数 Decoupled Distillation Focal (DDF) Loss，有兴趣的可以根据原文推导。
+
+这产生了一种双赢的协同效应：随着训练的进行，最后一层的预测变得越来越准确，其生成的软标签能够更好地帮助前几层提高预测准确性。反过来，前几层学会更快地定位到准确位置，简化了深层的优化任务，进一步提高了整体准确性。
+
+---
+
+### D-FINE 预测的可视化
+
+以下可视化展示了 **D-FINE** 在各种复杂检测场景中的预测结果。这些场景包括遮挡、低光照、运动模糊、景深效果和密集场景。尽管面对这些挑战，**D-FINE** 依然能够产生准确的定位结果。
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/hard_case-1.jpg" alt="D-FINE在复杂场景中的预测" width="666">
+</p>
+
+同时下面给出的可视化结果展示了第一层和最后一层的预测结果、对应四条边的分布、以及加权后的分布。可以看到，预测框的定位会随着分布的优化而变得更加精准。
+
+<p align="center">
+    <img src="https://raw.githubusercontent.com/Peterande/storage/master/figs/merged_image.jpg" width="1000">
+</p>
+
+---
+
+### 常见问题解答
+
+#### 问题1：FDR 和 GO-LSD 会带来更多的推理成本吗？
+
+**回答**：并不会，FDR 和原始的预测几乎没有在速度、参数量和计算复杂度上的任何区别，完全是无感替换。
+
+#### 问题2：FDR 和 GO-LSD 会带来更多的训练成本吗？
+
+**回答**：训练成本的增加主要来源于如何生成分布的标签。我们已经对该过程进行了优化，将额外训练时长和显存占用控制在了 6% 和 2%，几乎无感。
+
+#### 问题3：D-FINE 为什么会比 RT-DETR 系列更快、更轻量？
+
+**回答**：直接应用 FDR 和 GO-LSD 只会显著提高性能，并不会让网络更快、更轻。所以我们对 RT-DETR 进行了一系列的轻量化处理，这些处理带来了性能的下降，但我们的方法弥补了这些损失，实现了速度-参数-计算量-性能的完美平衡。
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/box_ops.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/box_ops.py
new file mode 100644
index 00000000..1e2acba0
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/box_ops.py
@@ -0,0 +1,93 @@
+"""
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
+"""
+
+import torch
+from torch import Tensor
+from torchvision.ops.boxes import box_area
+
+
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [
+        (x_c - 0.5 * w.clamp(min=0.0)),
+        (y_c - 0.5 * h.clamp(min=0.0)),
+        (x_c + 0.5 * w.clamp(min=0.0)),
+        (y_c + 0.5 * h.clamp(min=0.0)),
+    ]
+    return torch.stack(b, dim=-1)
+
+
+def box_xyxy_to_cxcywh(x: Tensor) -> Tensor:
+    x0, y0, x1, y1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
+    return torch.stack(b, dim=-1)
+
+
+# modified from torchvision to also return the union
+def box_iou(boxes1: Tensor, boxes2: Tensor):
+    area1 = box_area(boxes1)
+    area2 = box_area(boxes2)
+
+    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
+
+    wh = (rb - lt).clamp(min=0)  # [N,M,2]
+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
+
+    union = area1[:, None] + area2 - inter
+
+    iou = inter / union
+    return iou, union
+
+
+def generalized_box_iou(boxes1, boxes2):
+    """
+    Generalized IoU from https://giou.stanford.edu/
+
+    The boxes should be in [x0, y0, x1, y1] format
+
+    Returns a [N, M] pairwise matrix, where N = len(boxes1)
+    and M = len(boxes2)
+    """
+    # degenerate boxes gives inf / nan results
+    # so do an early check
+    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
+    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
+    iou, union = box_iou(boxes1, boxes2)
+
+    lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
+    rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
+
+    wh = (rb - lt).clamp(min=0)  # [N,M,2]
+    area = wh[:, :, 0] * wh[:, :, 1]
+
+    return iou - (area - union) / area
+
+
+def masks_to_boxes(masks):
+    """Compute the bounding boxes around the provided masks
+
+    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
+
+    Returns a [N, 4] tensors, with the boxes in xyxy format
+    """
+    if masks.numel() == 0:
+        return torch.zeros((0, 4), device=masks.device)
+
+    h, w = masks.shape[-2:]
+
+    y = torch.arange(0, h, dtype=torch.float)
+    x = torch.arange(0, w, dtype=torch.float)
+    y, x = torch.meshgrid(y, x)
+
+    x_mask = masks * x.unsqueeze(0)
+    x_max = x_mask.flatten(1).max(-1)[0]
+    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    y_mask = masks * y.unsqueeze(0)
+    y_max = y_mask.flatten(1).max(-1)[0]
+    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    return torch.stack([x_min, y_min, x_max, y_max], 1)
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/denoising.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/denoising.py
new file mode 100644
index 00000000..4d2409a5
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/denoising.py
@@ -0,0 +1,121 @@
+"""Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+Modifications Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+import torch
+
+from .box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
+from .utils import inverse_sigmoid
+
+
+def get_contrastive_denoising_training_group(
+    targets,
+    num_classes,
+    num_queries,
+    class_embed,
+    num_denoising=100,
+    label_noise_ratio=0.5,
+    box_noise_scale=1.0,
+):
+    """cnd"""
+    if num_denoising <= 0:
+        return None, None, None, None
+
+    num_gts = [len(t["labels"]) for t in targets]
+    device = targets[0]["labels"].device
+
+    max_gt_num = max(num_gts)
+    if max_gt_num == 0:
+        dn_meta = {"dn_positive_idx": None, "dn_num_group": 0, "dn_num_split": [0, num_queries]}
+        return None, None, None, dn_meta
+
+    num_group = num_denoising // max_gt_num
+    num_group = 1 if num_group == 0 else num_group
+    # pad gt to max_num of a batch
+    bs = len(num_gts)
+
+    input_query_class = torch.full([bs, max_gt_num], num_classes, dtype=torch.int32, device=device)
+    input_query_bbox = torch.zeros([bs, max_gt_num, 4], device=device)
+    pad_gt_mask = torch.zeros([bs, max_gt_num], dtype=torch.bool, device=device)
+
+    for i in range(bs):
+        num_gt = num_gts[i]
+        if num_gt > 0:
+            input_query_class[i, :num_gt] = targets[i]["labels"]
+            input_query_bbox[i, :num_gt] = targets[i]["boxes"]
+            pad_gt_mask[i, :num_gt] = 1
+    # each group has positive and negative queries.
+    input_query_class = input_query_class.tile([1, 2 * num_group])
+    input_query_bbox = input_query_bbox.tile([1, 2 * num_group, 1])
+    pad_gt_mask = pad_gt_mask.tile([1, 2 * num_group])
+    # positive and negative mask
+    negative_gt_mask = torch.zeros([bs, max_gt_num * 2, 1], device=device)
+    negative_gt_mask[:, max_gt_num:] = 1
+    negative_gt_mask = negative_gt_mask.tile([1, num_group, 1])
+    positive_gt_mask = 1 - negative_gt_mask
+    # contrastive denoising training positive index
+    positive_gt_mask = positive_gt_mask.squeeze(-1) * pad_gt_mask
+    dn_positive_idx = torch.nonzero(positive_gt_mask)[:, 1]
+    dn_positive_idx = torch.split(dn_positive_idx, [n * num_group for n in num_gts])
+    # total denoising queries
+    num_denoising = int(max_gt_num * 2 * num_group)
+
+    if label_noise_ratio > 0:
+        mask = torch.rand_like(input_query_class, dtype=torch.float) < (label_noise_ratio * 0.5)
+        # randomly put a new one here
+        new_label = torch.randint_like(mask, 0, num_classes, dtype=input_query_class.dtype)
+        input_query_class = torch.where(mask & pad_gt_mask, new_label, input_query_class)
+
+    if box_noise_scale > 0:
+        known_bbox = box_cxcywh_to_xyxy(input_query_bbox)
+        diff = torch.tile(input_query_bbox[..., 2:] * 0.5, [1, 1, 2]) * box_noise_scale
+        rand_sign = torch.randint_like(input_query_bbox, 0, 2) * 2.0 - 1.0
+        rand_part = torch.rand_like(input_query_bbox)
+        rand_part = (rand_part + 1.0) * negative_gt_mask + rand_part * (1 - negative_gt_mask)
+        # shrink_mask = torch.zeros_like(rand_sign)
+        # shrink_mask[:, :, :2] = (rand_sign[:, :, :2] == 1)  # rand_sign == 1 → (x1, y1) ↘ →  smaller bbox
+        # shrink_mask[:, :, 2:] = (rand_sign[:, :, 2:] == -1)  # rand_sign == -1 →  (x2, y2) ↖ →  smaller bbox
+        # mask = rand_part > (upper_bound / (upper_bound+1))
+        # # this is to make sure the dn bbox can be reversed to the original bbox by dfine head.
+        # rand_sign = torch.where((shrink_mask * (1 - negative_gt_mask) * mask).bool(), \
+        #                         rand_sign * upper_bound / (upper_bound+1) / rand_part, rand_sign)
+        known_bbox += rand_sign * rand_part * diff
+        known_bbox = torch.clip(known_bbox, min=0.0, max=1.0)
+        input_query_bbox = box_xyxy_to_cxcywh(known_bbox)
+        input_query_bbox[input_query_bbox < 0] *= -1
+        input_query_bbox_unact = inverse_sigmoid(input_query_bbox)
+
+    input_query_logits = class_embed(input_query_class)
+
+    tgt_size = num_denoising + num_queries
+    attn_mask = torch.full([tgt_size, tgt_size], False, dtype=torch.bool, device=device)
+    # match query cannot see the reconstruction
+    attn_mask[num_denoising:, :num_denoising] = True
+
+    # reconstruct cannot see each other
+    for i in range(num_group):
+        if i == 0:
+            attn_mask[
+                max_gt_num * 2 * i : max_gt_num * 2 * (i + 1),
+                max_gt_num * 2 * (i + 1) : num_denoising,
+            ] = True
+        if i == num_group - 1:
+            attn_mask[max_gt_num * 2 * i : max_gt_num * 2 * (i + 1), : max_gt_num * i * 2] = True
+        else:
+            attn_mask[
+                max_gt_num * 2 * i : max_gt_num * 2 * (i + 1),
+                max_gt_num * 2 * (i + 1) : num_denoising,
+            ] = True
+            attn_mask[max_gt_num * 2 * i : max_gt_num * 2 * (i + 1), : max_gt_num * 2 * i] = True
+
+    dn_meta = {
+        "dn_positive_idx": dn_positive_idx,
+        "dn_num_group": num_group,
+        "dn_num_split": [num_denoising, num_queries],
+    }
+
+    # print(input_query_class.shape) # torch.Size([4, 196, 256])
+    # print(input_query_bbox.shape) # torch.Size([4, 196, 4])
+    # print(attn_mask.shape) # torch.Size([496, 496])
+
+    return input_query_logits, input_query_bbox_unact, attn_mask, dn_meta
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine.py
new file mode 100644
index 00000000..b83bd991
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine.py
@@ -0,0 +1,47 @@
+"""
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+import torch.nn as nn
+
+from ...core import register
+
+__all__ = [
+    "DFINE",
+]
+
+
+@register()
+class DFINE(nn.Module):
+    __inject__ = [
+        "backbone",
+        "encoder",
+        "decoder",
+    ]
+
+    def __init__(
+        self,
+        backbone: nn.Module,
+        encoder: nn.Module,
+        decoder: nn.Module,
+    ):
+        super().__init__()
+        self.backbone = backbone
+        self.decoder = decoder
+        self.encoder = encoder
+
+    def forward(self, x, targets=None):
+        x = self.backbone(x)
+        x = self.encoder(x)
+        x = self.decoder(x, targets)
+
+        return x
+
+    def deploy(
+        self,
+    ):
+        self.eval()
+        for m in self.modules():
+            if hasattr(m, "convert_to_deploy"):
+                m.convert_to_deploy()
+        return self
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_criterion.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_criterion.py
new file mode 100644
index 00000000..954d42b7
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_criterion.py
@@ -0,0 +1,525 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import copy
+
+import torch
+import torch.distributed
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+from ...core import register
+from ...misc.dist_utils import get_world_size, is_dist_available_and_initialized
+from .box_ops import box_cxcywh_to_xyxy, box_iou, generalized_box_iou
+from .dfine_utils import bbox2distance
+
+
+@register()
+class DFINECriterion(nn.Module):
+    """This class computes the loss for D-FINE."""
+
+    __share__ = [
+        "num_classes",
+    ]
+    __inject__ = [
+        "matcher",
+    ]
+
+    def __init__(
+        self,
+        matcher,
+        weight_dict,
+        losses,
+        alpha=0.2,
+        gamma=2.0,
+        num_classes=80,
+        reg_max=32,
+        boxes_weight_format=None,
+        share_matched_indices=False,
+    ):
+        """Create the criterion.
+        Parameters:
+            matcher: module able to compute a matching between targets and proposals.
+            weight_dict: dict containing as key the names of the losses and as values their relative weight.
+            losses: list of all the losses to be applied. See get_loss for list of available losses.
+            num_classes: number of object categories, omitting the special no-object category.
+            reg_max (int): Max number of the discrete bins in D-FINE.
+            boxes_weight_format: format for boxes weight (iou, ).
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.weight_dict = weight_dict
+        self.losses = losses
+        self.boxes_weight_format = boxes_weight_format
+        self.share_matched_indices = share_matched_indices
+        self.alpha = alpha
+        self.gamma = gamma
+        self.fgl_targets, self.fgl_targets_dn = None, None
+        self.own_targets, self.own_targets_dn = None, None
+        self.reg_max = reg_max
+        self.num_pos, self.num_neg = None, None
+
+    def loss_labels_focal(self, outputs, targets, indices, num_boxes):
+        assert "pred_logits" in outputs
+        src_logits = outputs["pred_logits"]
+        idx = self._get_src_permutation_idx(indices)
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(
+            src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device
+        )
+        target_classes[idx] = target_classes_o
+        target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1]
+        loss = torchvision.ops.sigmoid_focal_loss(
+            src_logits, target, self.alpha, self.gamma, reduction="none"
+        )
+        loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes
+
+        return {"loss_focal": loss}
+
+    def loss_labels_vfl(self, outputs, targets, indices, num_boxes, values=None):
+        assert "pred_boxes" in outputs
+        idx = self._get_src_permutation_idx(indices)
+        if values is None:
+            src_boxes = outputs["pred_boxes"][idx]
+            target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+            ious, _ = box_iou(box_cxcywh_to_xyxy(src_boxes), box_cxcywh_to_xyxy(target_boxes))
+            ious = torch.diag(ious).detach()
+        else:
+            ious = values
+
+        src_logits = outputs["pred_logits"]
+        target_classes_o = torch.cat([t["labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(
+            src_logits.shape[:2], self.num_classes, dtype=torch.int64, device=src_logits.device
+        )
+        target_classes[idx] = target_classes_o
+        target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[..., :-1]
+
+        target_score_o = torch.zeros_like(target_classes, dtype=src_logits.dtype)
+        target_score_o[idx] = ious.to(target_score_o.dtype)
+        target_score = target_score_o.unsqueeze(-1) * target
+
+        pred_score = F.sigmoid(src_logits).detach()
+        weight = self.alpha * pred_score.pow(self.gamma) * (1 - target) + target_score
+
+        loss = F.binary_cross_entropy_with_logits(
+            src_logits, target_score, weight=weight, reduction="none"
+        )
+        loss = loss.mean(1).sum() * src_logits.shape[1] / num_boxes
+        return {"loss_vfl": loss}
+
+    def loss_boxes(self, outputs, targets, indices, num_boxes, boxes_weight=None):
+        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
+        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
+        The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
+        """
+        assert "pred_boxes" in outputs
+        idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs["pred_boxes"][idx]
+        target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        losses = {}
+        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")
+        losses["loss_bbox"] = loss_bbox.sum() / num_boxes
+
+        loss_giou = 1 - torch.diag(
+            generalized_box_iou(box_cxcywh_to_xyxy(src_boxes), box_cxcywh_to_xyxy(target_boxes))
+        )
+        loss_giou = loss_giou if boxes_weight is None else loss_giou * boxes_weight
+        losses["loss_giou"] = loss_giou.sum() / num_boxes
+
+        return losses
+
+    def loss_local(self, outputs, targets, indices, num_boxes, T=5):
+        """Compute Fine-Grained Localization (FGL) Loss
+        and Decoupled Distillation Focal (DDF) Loss."""
+
+        losses = {}
+        if "pred_corners" in outputs:
+            idx = self._get_src_permutation_idx(indices)
+            target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+
+            pred_corners = outputs["pred_corners"][idx].reshape(-1, (self.reg_max + 1))
+            ref_points = outputs["ref_points"][idx].detach()
+            with torch.no_grad():
+                if self.fgl_targets_dn is None and "is_dn" in outputs:
+                    self.fgl_targets_dn = bbox2distance(
+                        ref_points,
+                        box_cxcywh_to_xyxy(target_boxes),
+                        self.reg_max,
+                        outputs["reg_scale"],
+                        outputs["up"],
+                    )
+                if self.fgl_targets is None and "is_dn" not in outputs:
+                    self.fgl_targets = bbox2distance(
+                        ref_points,
+                        box_cxcywh_to_xyxy(target_boxes),
+                        self.reg_max,
+                        outputs["reg_scale"],
+                        outputs["up"],
+                    )
+
+            target_corners, weight_right, weight_left = (
+                self.fgl_targets_dn if "is_dn" in outputs else self.fgl_targets
+            )
+
+            ious = torch.diag(
+                box_iou(
+                    box_cxcywh_to_xyxy(outputs["pred_boxes"][idx]), box_cxcywh_to_xyxy(target_boxes)
+                )[0]
+            )
+            weight_targets = ious.unsqueeze(-1).repeat(1, 1, 4).reshape(-1).detach()
+
+            losses["loss_fgl"] = self.unimodal_distribution_focal_loss(
+                pred_corners,
+                target_corners,
+                weight_right,
+                weight_left,
+                weight_targets,
+                avg_factor=num_boxes,
+            )
+
+            if "teacher_corners" in outputs:
+                pred_corners = outputs["pred_corners"].reshape(-1, (self.reg_max + 1))
+                target_corners = outputs["teacher_corners"].reshape(-1, (self.reg_max + 1))
+                if torch.equal(pred_corners, target_corners):
+                    losses["loss_ddf"] = pred_corners.sum() * 0
+                else:
+                    weight_targets_local = outputs["teacher_logits"].sigmoid().max(dim=-1)[0]
+
+                    mask = torch.zeros_like(weight_targets_local, dtype=torch.bool)
+                    mask[idx] = True
+                    mask = mask.unsqueeze(-1).repeat(1, 1, 4).reshape(-1)
+
+                    weight_targets_local[idx] = ious.reshape_as(weight_targets_local[idx]).to(
+                        weight_targets_local.dtype
+                    )
+                    weight_targets_local = (
+                        weight_targets_local.unsqueeze(-1).repeat(1, 1, 4).reshape(-1).detach()
+                    )
+
+                    loss_match_local = (
+                        weight_targets_local
+                        * (T**2)
+                        * (
+                            nn.KLDivLoss(reduction="none")(
+                                F.log_softmax(pred_corners / T, dim=1),
+                                F.softmax(target_corners.detach() / T, dim=1),
+                            )
+                        ).sum(-1)
+                    )
+                    if "is_dn" not in outputs:
+                        batch_scale = (
+                            8 / outputs["pred_boxes"].shape[0]
+                        )  # Avoid the influence of batch size per GPU
+                        self.num_pos, self.num_neg = (
+                            (mask.sum() * batch_scale) ** 0.5,
+                            ((~mask).sum() * batch_scale) ** 0.5,
+                        )
+                    loss_match_local1 = loss_match_local[mask].mean() if mask.any() else 0
+                    loss_match_local2 = loss_match_local[~mask].mean() if (~mask).any() else 0
+                    losses["loss_ddf"] = (
+                        loss_match_local1 * self.num_pos + loss_match_local2 * self.num_neg
+                    ) / (self.num_pos + self.num_neg)
+
+        return losses
+
+    def _get_src_permutation_idx(self, indices):
+        # permute predictions following indices
+        batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+        src_idx = torch.cat([src for (src, _) in indices])
+        return batch_idx, src_idx
+
+    def _get_tgt_permutation_idx(self, indices):
+        # permute targets following indices
+        batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+        tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+        return batch_idx, tgt_idx
+
+    def _get_go_indices(self, indices, indices_aux_list):
+        """Get a matching union set across all decoder layers."""
+        results = []
+        for indices_aux in indices_aux_list:
+            indices = [
+                (torch.cat([idx1[0], idx2[0]]), torch.cat([idx1[1], idx2[1]]))
+                for idx1, idx2 in zip(indices.copy(), indices_aux.copy())
+            ]
+
+        for ind in [torch.cat([idx[0][:, None], idx[1][:, None]], 1) for idx in indices]:
+            unique, counts = torch.unique(ind, return_counts=True, dim=0)
+            count_sort_indices = torch.argsort(counts, descending=True)
+            unique_sorted = unique[count_sort_indices]
+            column_to_row = {}
+            for idx in unique_sorted:
+                row_idx, col_idx = idx[0].item(), idx[1].item()
+                if row_idx not in column_to_row:
+                    column_to_row[row_idx] = col_idx
+            final_rows = torch.tensor(list(column_to_row.keys()), device=ind.device)
+            final_cols = torch.tensor(list(column_to_row.values()), device=ind.device)
+            results.append((final_rows.long(), final_cols.long()))
+        return results
+
+    def _clear_cache(self):
+        self.fgl_targets, self.fgl_targets_dn = None, None
+        self.own_targets, self.own_targets_dn = None, None
+        self.num_pos, self.num_neg = None, None
+
+    def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
+        loss_map = {
+            "boxes": self.loss_boxes,
+            "focal": self.loss_labels_focal,
+            "vfl": self.loss_labels_vfl,
+            "local": self.loss_local,
+        }
+        assert loss in loss_map, f"do you really want to compute {loss} loss?"
+        return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
+
+    def forward(self, outputs, targets, **kwargs):
+        """This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             targets: list of dicts, such that len(targets) == batch_size.
+                      The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        outputs_without_aux = {k: v for k, v in outputs.items() if "aux" not in k}
+
+        # Retrieve the matching between the outputs of the last layer and the targets
+        indices = self.matcher(outputs_without_aux, targets)["indices"]
+        self._clear_cache()
+
+        # Get the matching union set across all decoder layers.
+        if "aux_outputs" in outputs:
+            indices_aux_list, cached_indices, cached_indices_enc = [], [], []
+            for i, aux_outputs in enumerate(outputs["aux_outputs"] + [outputs["pre_outputs"]]):
+                indices_aux = self.matcher(aux_outputs, targets)["indices"]
+                cached_indices.append(indices_aux)
+                indices_aux_list.append(indices_aux)
+            for i, aux_outputs in enumerate(outputs["enc_aux_outputs"]):
+                indices_enc = self.matcher(aux_outputs, targets)["indices"]
+                cached_indices_enc.append(indices_enc)
+                indices_aux_list.append(indices_enc)
+            indices_go = self._get_go_indices(indices, indices_aux_list)
+
+            num_boxes_go = sum(len(x[0]) for x in indices_go)
+            num_boxes_go = torch.as_tensor(
+                [num_boxes_go], dtype=torch.float, device=next(iter(outputs.values())).device
+            )
+            if is_dist_available_and_initialized():
+                torch.distributed.all_reduce(num_boxes_go)
+            num_boxes_go = torch.clamp(num_boxes_go / get_world_size(), min=1).item()
+        else:
+            assert "aux_outputs" in outputs, ""
+
+        # Compute the average number of target boxes accross all nodes, for normalization purposes
+        num_boxes = sum(len(t["labels"]) for t in targets)
+        num_boxes = torch.as_tensor(
+            [num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device
+        )
+        if is_dist_available_and_initialized():
+            torch.distributed.all_reduce(num_boxes)
+        num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()
+
+        # Compute all the requested losses
+        losses = {}
+        for loss in self.losses:
+            indices_in = indices_go if loss in ["boxes", "local"] else indices
+            num_boxes_in = num_boxes_go if loss in ["boxes", "local"] else num_boxes
+            meta = self.get_loss_meta_info(loss, outputs, targets, indices_in)
+            l_dict = self.get_loss(loss, outputs, targets, indices_in, num_boxes_in, **meta)
+            l_dict = {k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict}
+            losses.update(l_dict)
+
+        # In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
+        if "aux_outputs" in outputs:
+            for i, aux_outputs in enumerate(outputs["aux_outputs"]):
+                aux_outputs["up"], aux_outputs["reg_scale"] = outputs["up"], outputs["reg_scale"]
+                for loss in self.losses:
+                    indices_in = indices_go if loss in ["boxes", "local"] else cached_indices[i]
+                    num_boxes_in = num_boxes_go if loss in ["boxes", "local"] else num_boxes
+                    meta = self.get_loss_meta_info(loss, aux_outputs, targets, indices_in)
+                    l_dict = self.get_loss(
+                        loss, aux_outputs, targets, indices_in, num_boxes_in, **meta
+                    )
+
+                    l_dict = {
+                        k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict
+                    }
+                    l_dict = {k + f"_aux_{i}": v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+        # In case of auxiliary traditional head output at first decoder layer.
+        if "pre_outputs" in outputs:
+            aux_outputs = outputs["pre_outputs"]
+            for loss in self.losses:
+                indices_in = indices_go if loss in ["boxes", "local"] else cached_indices[-1]
+                num_boxes_in = num_boxes_go if loss in ["boxes", "local"] else num_boxes
+                meta = self.get_loss_meta_info(loss, aux_outputs, targets, indices_in)
+                l_dict = self.get_loss(loss, aux_outputs, targets, indices_in, num_boxes_in, **meta)
+
+                l_dict = {
+                    k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict
+                }
+                l_dict = {k + "_pre": v for k, v in l_dict.items()}
+                losses.update(l_dict)
+
+        # In case of encoder auxiliary losses.
+        if "enc_aux_outputs" in outputs:
+            assert "enc_meta" in outputs, ""
+            class_agnostic = outputs["enc_meta"]["class_agnostic"]
+            if class_agnostic:
+                orig_num_classes = self.num_classes
+                self.num_classes = 1
+                enc_targets = copy.deepcopy(targets)
+                for t in enc_targets:
+                    t["labels"] = torch.zeros_like(t["labels"])
+            else:
+                enc_targets = targets
+
+            for i, aux_outputs in enumerate(outputs["enc_aux_outputs"]):
+                for loss in self.losses:
+                    indices_in = indices_go if loss == "boxes" else cached_indices_enc[i]
+                    num_boxes_in = num_boxes_go if loss == "boxes" else num_boxes
+                    meta = self.get_loss_meta_info(loss, aux_outputs, enc_targets, indices_in)
+                    l_dict = self.get_loss(
+                        loss, aux_outputs, enc_targets, indices_in, num_boxes_in, **meta
+                    )
+                    l_dict = {
+                        k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict
+                    }
+                    l_dict = {k + f"_enc_{i}": v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            if class_agnostic:
+                self.num_classes = orig_num_classes
+
+        # In case of cdn auxiliary losses. For dfine
+        if "dn_outputs" in outputs:
+            assert "dn_meta" in outputs, ""
+            indices_dn = self.get_cdn_matched_indices(outputs["dn_meta"], targets)
+            dn_num_boxes = num_boxes * outputs["dn_meta"]["dn_num_group"]
+            dn_num_boxes = dn_num_boxes if dn_num_boxes > 0 else 1
+
+            for i, aux_outputs in enumerate(outputs["dn_outputs"]):
+                aux_outputs["is_dn"] = True
+                aux_outputs["up"], aux_outputs["reg_scale"] = outputs["up"], outputs["reg_scale"]
+                for loss in self.losses:
+                    meta = self.get_loss_meta_info(loss, aux_outputs, targets, indices_dn)
+                    l_dict = self.get_loss(
+                        loss, aux_outputs, targets, indices_dn, dn_num_boxes, **meta
+                    )
+                    l_dict = {
+                        k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict
+                    }
+                    l_dict = {k + f"_dn_{i}": v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+            # In case of auxiliary traditional head output at first decoder layer.
+            if "dn_pre_outputs" in outputs:
+                aux_outputs = outputs["dn_pre_outputs"]
+                for loss in self.losses:
+                    meta = self.get_loss_meta_info(loss, aux_outputs, targets, indices_dn)
+                    l_dict = self.get_loss(
+                        loss, aux_outputs, targets, indices_dn, dn_num_boxes, **meta
+                    )
+                    l_dict = {
+                        k: l_dict[k] * self.weight_dict[k] for k in l_dict if k in self.weight_dict
+                    }
+                    l_dict = {k + "_dn_pre": v for k, v in l_dict.items()}
+                    losses.update(l_dict)
+
+        # For debugging Objects365 pre-train.
+        losses = {k: torch.nan_to_num(v, nan=0.0) for k, v in losses.items()}
+        return losses
+
+    def get_loss_meta_info(self, loss, outputs, targets, indices):
+        if self.boxes_weight_format is None:
+            return {}
+
+        src_boxes = outputs["pred_boxes"][self._get_src_permutation_idx(indices)]
+        target_boxes = torch.cat([t["boxes"][j] for t, (_, j) in zip(targets, indices)], dim=0)
+
+        if self.boxes_weight_format == "iou":
+            iou, _ = box_iou(
+                box_cxcywh_to_xyxy(src_boxes.detach()), box_cxcywh_to_xyxy(target_boxes)
+            )
+            iou = torch.diag(iou)
+        elif self.boxes_weight_format == "giou":
+            iou = torch.diag(
+                generalized_box_iou(
+                    box_cxcywh_to_xyxy(src_boxes.detach()), box_cxcywh_to_xyxy(target_boxes)
+                )
+            )
+        else:
+            raise AttributeError()
+
+        if loss in ("boxes",):
+            meta = {"boxes_weight": iou}
+        elif loss in ("vfl",):
+            meta = {"values": iou}
+        else:
+            meta = {}
+
+        return meta
+
+    @staticmethod
+    def get_cdn_matched_indices(dn_meta, targets):
+        """get_cdn_matched_indices"""
+        dn_positive_idx, dn_num_group = dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
+        num_gts = [len(t["labels"]) for t in targets]
+        device = targets[0]["labels"].device
+
+        dn_match_indices = []
+        for i, num_gt in enumerate(num_gts):
+            if num_gt > 0:
+                gt_idx = torch.arange(num_gt, dtype=torch.int64, device=device)
+                gt_idx = gt_idx.tile(dn_num_group)
+                assert len(dn_positive_idx[i]) == len(gt_idx)
+                dn_match_indices.append((dn_positive_idx[i], gt_idx))
+            else:
+                dn_match_indices.append(
+                    (
+                        torch.zeros(0, dtype=torch.int64, device=device),
+                        torch.zeros(0, dtype=torch.int64, device=device),
+                    )
+                )
+
+        return dn_match_indices
+
+    def feature_loss_function(self, fea, target_fea):
+        loss = (fea - target_fea) ** 2 * ((fea > 0) | (target_fea > 0)).float()
+        return torch.abs(loss)
+
+    def unimodal_distribution_focal_loss(
+        self, pred, label, weight_right, weight_left, weight=None, reduction="sum", avg_factor=None
+    ):
+        dis_left = label.long()
+        dis_right = dis_left + 1
+
+        loss = F.cross_entropy(pred, dis_left, reduction="none") * weight_left.reshape(
+            -1
+        ) + F.cross_entropy(pred, dis_right, reduction="none") * weight_right.reshape(-1)
+
+        if weight is not None:
+            weight = weight.float()
+            loss = loss * weight
+
+        if avg_factor is not None:
+            loss = loss.sum() / avg_factor
+        elif reduction == "mean":
+            loss = loss.mean()
+        elif reduction == "sum":
+            loss = loss.sum()
+
+        return loss
+
+    def get_gradual_steps(self, outputs):
+        num_layers = len(outputs["aux_outputs"]) + 1 if "aux_outputs" in outputs else 1
+        step = 0.5 / (num_layers - 1)
+        opt_list = [0.5 + step * i for i in range(num_layers)] if num_layers > 1 else [1]
+        return opt_list
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_decoder.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_decoder.py
new file mode 100644
index 00000000..d7c77f73
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_decoder.py
@@ -0,0 +1,959 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import copy
+import functools
+import math
+from collections import OrderedDict
+from typing import List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+
+from ...core import register
+from .denoising import get_contrastive_denoising_training_group
+from .dfine_utils import distance2bbox, weighting_function
+from .utils import (
+    bias_init_with_prob,
+    deformable_attention_core_func_v2,
+    get_activation,
+    inverse_sigmoid,
+)
+
+__all__ = ["DFINETransformer"]
+
+
+class MLP(nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act="relu"):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
+        self.act = get_activation(act)
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = self.act(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
+
+
+class MSDeformableAttention(nn.Module):
+    def __init__(
+        self,
+        embed_dim=256,
+        num_heads=8,
+        num_levels=4,
+        num_points=4,
+        method="default",
+        offset_scale=0.5,
+    ):
+        """Multi-Scale Deformable Attention"""
+        super(MSDeformableAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.num_levels = num_levels
+        self.offset_scale = offset_scale
+
+        if isinstance(num_points, list):
+            assert len(num_points) == num_levels, ""
+            num_points_list = num_points
+        else:
+            num_points_list = [num_points for _ in range(num_levels)]
+
+        self.num_points_list = num_points_list
+
+        num_points_scale = [1 / n for n in num_points_list for _ in range(n)]
+        self.register_buffer(
+            "num_points_scale", torch.tensor(num_points_scale, dtype=torch.float32)
+        )
+
+        self.total_points = num_heads * sum(num_points_list)
+        self.method = method
+
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+
+        self.sampling_offsets = nn.Linear(embed_dim, self.total_points * 2)
+        self.attention_weights = nn.Linear(embed_dim, self.total_points)
+
+        self.ms_deformable_attn_core = functools.partial(
+            deformable_attention_core_func_v2, method=self.method
+        )
+
+        self._reset_parameters()
+
+        if method == "discrete":
+            for p in self.sampling_offsets.parameters():
+                p.requires_grad = False
+
+    def _reset_parameters(self):
+        # sampling_offsets
+        init.constant_(self.sampling_offsets.weight, 0)
+        thetas = torch.arange(self.num_heads, dtype=torch.float32) * (
+            2.0 * math.pi / self.num_heads
+        )
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = grid_init / grid_init.abs().max(-1, keepdim=True).values
+        grid_init = grid_init.reshape(self.num_heads, 1, 2).tile([1, sum(self.num_points_list), 1])
+        scaling = torch.concat([torch.arange(1, n + 1) for n in self.num_points_list]).reshape(
+            1, -1, 1
+        )
+        grid_init *= scaling
+        self.sampling_offsets.bias.data[...] = grid_init.flatten()
+
+        # attention_weights
+        init.constant_(self.attention_weights.weight, 0)
+        init.constant_(self.attention_weights.bias, 0)
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        reference_points: torch.Tensor,
+        value: torch.Tensor,
+        value_spatial_shapes: List[int],
+    ):
+        """
+        Args:
+            query (Tensor): [bs, query_length, C]
+            reference_points (Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
+                bottom-right (1, 1), including padding area
+            value (Tensor): [bs, value_length, C]
+            value_spatial_shapes (List): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
+
+        Returns:
+            output (Tensor): [bs, Length_{query}, C]
+        """
+        bs, Len_q = query.shape[:2]
+
+        sampling_offsets: torch.Tensor = self.sampling_offsets(query)
+        sampling_offsets = sampling_offsets.reshape(
+            bs, Len_q, self.num_heads, sum(self.num_points_list), 2
+        )
+
+        attention_weights = self.attention_weights(query).reshape(
+            bs, Len_q, self.num_heads, sum(self.num_points_list)
+        )
+        attention_weights = F.softmax(attention_weights, dim=-1)
+
+        if reference_points.shape[-1] == 2:
+            offset_normalizer = torch.tensor(value_spatial_shapes)
+            offset_normalizer = offset_normalizer.flip([1]).reshape(1, 1, 1, self.num_levels, 1, 2)
+            sampling_locations = (
+                reference_points.reshape(bs, Len_q, 1, self.num_levels, 1, 2)
+                + sampling_offsets / offset_normalizer
+            )
+        elif reference_points.shape[-1] == 4:
+            # reference_points [8, 480, None, 1,  4]
+            # sampling_offsets [8, 480, 8,    12, 2]
+            num_points_scale = self.num_points_scale.to(dtype=query.dtype).unsqueeze(-1)
+            offset = (
+                sampling_offsets
+                * num_points_scale
+                * reference_points[:, :, None, :, 2:]
+                * self.offset_scale
+            )
+            sampling_locations = reference_points[:, :, None, :, :2] + offset
+        else:
+            raise ValueError(
+                "Last dim of reference_points must be 2 or 4, but get {} instead.".format(
+                    reference_points.shape[-1]
+                )
+            )
+
+        output = self.ms_deformable_attn_core(
+            value, value_spatial_shapes, sampling_locations, attention_weights, self.num_points_list
+        )
+
+        return output
+
+
+class TransformerDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model=256,
+        n_head=8,
+        dim_feedforward=1024,
+        dropout=0.0,
+        activation="relu",
+        n_levels=4,
+        n_points=4,
+        cross_attn_method="default",
+        layer_scale=None,
+    ):
+        super(TransformerDecoderLayer, self).__init__()
+        if layer_scale is not None:
+            dim_feedforward = round(layer_scale * dim_feedforward)
+            d_model = round(layer_scale * d_model)
+
+        # self attention
+        self.self_attn = nn.MultiheadAttention(d_model, n_head, dropout=dropout, batch_first=True)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # cross attention
+        self.cross_attn = MSDeformableAttention(
+            d_model, n_head, n_levels, n_points, method=cross_attn_method
+        )
+        self.dropout2 = nn.Dropout(dropout)
+
+        # gate
+        self.gateway = Gate(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.activation = get_activation(activation)
+        self.dropout3 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.dropout4 = nn.Dropout(dropout)
+        self.norm3 = nn.LayerNorm(d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        init.xavier_uniform_(self.linear1.weight)
+        init.xavier_uniform_(self.linear2.weight)
+
+    def with_pos_embed(self, tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, tgt):
+        return self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+
+    def forward(
+        self, target, reference_points, value, spatial_shapes, attn_mask=None, query_pos_embed=None
+    ):
+        # self attention
+        q = k = self.with_pos_embed(target, query_pos_embed)
+
+        target2, _ = self.self_attn(q, k, value=target, attn_mask=attn_mask)
+        target = target + self.dropout1(target2)
+        target = self.norm1(target)
+
+        # cross attention
+        target2 = self.cross_attn(
+            self.with_pos_embed(target, query_pos_embed), reference_points, value, spatial_shapes
+        )
+
+        target = self.gateway(target, self.dropout2(target2))
+
+        # ffn
+        target2 = self.forward_ffn(target)
+        target = target + self.dropout4(target2)
+        target = self.norm3(target.clamp(min=-65504, max=65504))
+
+        return target
+
+
+class Gate(nn.Module):
+    def __init__(self, d_model):
+        super(Gate, self).__init__()
+        self.gate = nn.Linear(2 * d_model, 2 * d_model)
+        bias = bias_init_with_prob(0.5)
+        init.constant_(self.gate.bias, bias)
+        init.constant_(self.gate.weight, 0)
+        self.norm = nn.LayerNorm(d_model)
+
+    def forward(self, x1, x2):
+        gate_input = torch.cat([x1, x2], dim=-1)
+        gates = torch.sigmoid(self.gate(gate_input))
+        gate1, gate2 = gates.chunk(2, dim=-1)
+        return self.norm(gate1 * x1 + gate2 * x2)
+
+
+class Integral(nn.Module):
+    """
+    A static layer that calculates integral results from a distribution.
+
+    This layer computes the target location using the formula: `sum{Pr(n) * W(n)}`,
+    where Pr(n) is the softmax probability vector representing the discrete
+    distribution, and W(n) is the non-uniform Weighting Function.
+
+    Args:
+        reg_max (int): Max number of the discrete bins. Default is 32.
+                       It can be adjusted based on the dataset or task requirements.
+    """
+
+    def __init__(self, reg_max=32):
+        super(Integral, self).__init__()
+        self.reg_max = reg_max
+
+    def forward(self, x, project):
+        shape = x.shape
+        x = F.softmax(x.reshape(-1, self.reg_max + 1), dim=1)
+        x = F.linear(x, project.to(x.device)).reshape(-1, 4)
+        return x.reshape(list(shape[:-1]) + [-1])
+
+
+class LQE(nn.Module):
+    def __init__(self, k, hidden_dim, num_layers, reg_max):
+        super(LQE, self).__init__()
+        self.k = k
+        self.reg_max = reg_max
+        self.reg_conf = MLP(4 * (k + 1), hidden_dim, 1, num_layers)
+        init.constant_(self.reg_conf.layers[-1].bias, 0)
+        init.constant_(self.reg_conf.layers[-1].weight, 0)
+
+    def forward(self, scores, pred_corners):
+        B, L, _ = pred_corners.size()
+        prob = F.softmax(pred_corners.reshape(B, L, 4, self.reg_max + 1), dim=-1)
+        prob_topk, _ = prob.topk(self.k, dim=-1)
+        stat = torch.cat([prob_topk, prob_topk.mean(dim=-1, keepdim=True)], dim=-1)
+        quality_score = self.reg_conf(stat.reshape(B, L, -1))
+        return scores + quality_score
+
+
+class TransformerDecoder(nn.Module):
+    """
+    Transformer Decoder implementing Fine-grained Distribution Refinement (FDR).
+
+    This decoder refines object detection predictions through iterative updates across multiple layers,
+    utilizing attention mechanisms, location quality estimators, and distribution refinement techniques
+    to improve bounding box accuracy and robustness.
+    """
+
+    def __init__(
+        self,
+        hidden_dim,
+        decoder_layer,
+        decoder_layer_wide,
+        num_layers,
+        num_head,
+        reg_max,
+        reg_scale,
+        up,
+        eval_idx=-1,
+        layer_scale=2,
+    ):
+        super(TransformerDecoder, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.layer_scale = layer_scale
+        self.num_head = num_head
+        self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
+        self.up, self.reg_scale, self.reg_max = up, reg_scale, reg_max
+        self.layers = nn.ModuleList(
+            [copy.deepcopy(decoder_layer) for _ in range(self.eval_idx + 1)]
+            + [copy.deepcopy(decoder_layer_wide) for _ in range(num_layers - self.eval_idx - 1)]
+        )
+        self.lqe_layers = nn.ModuleList(
+            [copy.deepcopy(LQE(4, 64, 2, reg_max)) for _ in range(num_layers)]
+        )
+
+    def value_op(self, memory, value_proj, value_scale, memory_mask, memory_spatial_shapes):
+        """
+        Preprocess values for MSDeformableAttention.
+        """
+        value = value_proj(memory) if value_proj is not None else memory
+        value = F.interpolate(memory, size=value_scale) if value_scale is not None else value
+        if memory_mask is not None:
+            value = value * memory_mask.to(value.dtype).unsqueeze(-1)
+        value = value.reshape(value.shape[0], value.shape[1], self.num_head, -1)
+        split_shape = [h * w for h, w in memory_spatial_shapes]
+        return value.permute(0, 2, 3, 1).split(split_shape, dim=-1)
+
+    def convert_to_deploy(self):
+        self.project = weighting_function(self.reg_max, self.up, self.reg_scale, deploy=True)
+        self.layers = self.layers[: self.eval_idx + 1]
+        self.lqe_layers = nn.ModuleList(
+            [nn.Identity()] * (self.eval_idx) + [self.lqe_layers[self.eval_idx]]
+        )
+
+    def forward(
+        self,
+        target,
+        ref_points_unact,
+        memory,
+        spatial_shapes,
+        bbox_head,
+        score_head,
+        query_pos_head,
+        pre_bbox_head,
+        integral,
+        up,
+        reg_scale,
+        attn_mask=None,
+        memory_mask=None,
+        dn_meta=None,
+    ):
+        output = target
+        output_detach = pred_corners_undetach = 0
+        value = self.value_op(memory, None, None, memory_mask, spatial_shapes)
+
+        dec_out_bboxes = []
+        dec_out_logits = []
+        dec_out_pred_corners = []
+        dec_out_refs = []
+        if not hasattr(self, "project"):
+            project = weighting_function(self.reg_max, up, reg_scale)
+        else:
+            project = self.project
+
+        ref_points_detach = F.sigmoid(ref_points_unact)
+
+        for i, layer in enumerate(self.layers):
+            ref_points_input = ref_points_detach.unsqueeze(2)
+            query_pos_embed = query_pos_head(ref_points_detach).clamp(min=-10, max=10)
+
+            # TODO Adjust scale if needed for detachable wider layers
+            if i >= self.eval_idx + 1 and self.layer_scale > 1:
+                query_pos_embed = F.interpolate(query_pos_embed, scale_factor=self.layer_scale)
+                value = self.value_op(
+                    memory, None, query_pos_embed.shape[-1], memory_mask, spatial_shapes
+                )
+                output = F.interpolate(output, size=query_pos_embed.shape[-1])
+                output_detach = output.detach()
+
+            output = layer(
+                output, ref_points_input, value, spatial_shapes, attn_mask, query_pos_embed
+            )
+
+            if i == 0:
+                # Initial bounding box predictions with inverse sigmoid refinement
+                pre_bboxes = F.sigmoid(pre_bbox_head(output) + inverse_sigmoid(ref_points_detach))
+                pre_scores = score_head[0](output)
+                ref_points_initial = pre_bboxes.detach()
+
+            # Refine bounding box corners using FDR, integrating previous layer's corrections
+            pred_corners = bbox_head[i](output + output_detach) + pred_corners_undetach
+            inter_ref_bbox = distance2bbox(
+                ref_points_initial, integral(pred_corners, project), reg_scale
+            )
+
+            if self.training or i == self.eval_idx:
+                scores = score_head[i](output)
+                # Lqe does not affect the performance here.
+                scores = self.lqe_layers[i](scores, pred_corners)
+                dec_out_logits.append(scores)
+                dec_out_bboxes.append(inter_ref_bbox)
+                dec_out_pred_corners.append(pred_corners)
+                dec_out_refs.append(ref_points_initial)
+
+                if not self.training:
+                    break
+
+            pred_corners_undetach = pred_corners
+            ref_points_detach = inter_ref_bbox.detach()
+            output_detach = output.detach()
+
+        return (
+            torch.stack(dec_out_bboxes),
+            torch.stack(dec_out_logits),
+            torch.stack(dec_out_pred_corners),
+            torch.stack(dec_out_refs),
+            pre_bboxes,
+            pre_scores,
+        )
+
+
+@register()
+class DFINETransformer(nn.Module):
+    __share__ = ["num_classes", "eval_spatial_size"]
+
+    def __init__(
+        self,
+        num_classes=80,
+        hidden_dim=256,
+        num_queries=300,
+        feat_channels=[512, 1024, 2048],
+        feat_strides=[8, 16, 32],
+        num_levels=3,
+        num_points=4,
+        nhead=8,
+        num_layers=6,
+        dim_feedforward=1024,
+        dropout=0.0,
+        activation="relu",
+        num_denoising=100,
+        label_noise_ratio=0.5,
+        box_noise_scale=1.0,
+        learn_query_content=False,
+        eval_spatial_size=None,
+        eval_idx=-1,
+        eps=1e-2,
+        aux_loss=True,
+        cross_attn_method="default",
+        query_select_method="default",
+        reg_max=32,
+        reg_scale=4.0,
+        layer_scale=1,
+    ):
+        super().__init__()
+        assert len(feat_channels) <= num_levels
+        assert len(feat_strides) == len(feat_channels)
+
+        for _ in range(num_levels - len(feat_strides)):
+            feat_strides.append(feat_strides[-1] * 2)
+
+        self.hidden_dim = hidden_dim
+        scaled_dim = round(layer_scale * hidden_dim)
+        self.nhead = nhead
+        self.feat_strides = feat_strides
+        self.num_levels = num_levels
+        self.num_classes = num_classes
+        self.num_queries = num_queries
+        self.eps = eps
+        self.num_layers = num_layers
+        self.eval_spatial_size = eval_spatial_size
+        self.aux_loss = aux_loss
+        self.reg_max = reg_max
+
+        assert query_select_method in ("default", "one2many", "agnostic"), ""
+        assert cross_attn_method in ("default", "discrete"), ""
+        self.cross_attn_method = cross_attn_method
+        self.query_select_method = query_select_method
+
+        # backbone feature projection
+        self._build_input_proj_layer(feat_channels)
+
+        # Transformer module
+        self.up = nn.Parameter(torch.tensor([0.5]), requires_grad=False)
+        self.reg_scale = nn.Parameter(torch.tensor([reg_scale]), requires_grad=False)
+        decoder_layer = TransformerDecoderLayer(
+            hidden_dim,
+            nhead,
+            dim_feedforward,
+            dropout,
+            activation,
+            num_levels,
+            num_points,
+            cross_attn_method=cross_attn_method,
+        )
+        decoder_layer_wide = TransformerDecoderLayer(
+            hidden_dim,
+            nhead,
+            dim_feedforward,
+            dropout,
+            activation,
+            num_levels,
+            num_points,
+            cross_attn_method=cross_attn_method,
+            layer_scale=layer_scale,
+        )
+        self.decoder = TransformerDecoder(
+            hidden_dim,
+            decoder_layer,
+            decoder_layer_wide,
+            num_layers,
+            nhead,
+            reg_max,
+            self.reg_scale,
+            self.up,
+            eval_idx,
+            layer_scale,
+        )
+        # denoising
+        self.num_denoising = num_denoising
+        self.label_noise_ratio = label_noise_ratio
+        self.box_noise_scale = box_noise_scale
+        if num_denoising > 0:
+            self.denoising_class_embed = nn.Embedding(
+                num_classes + 1, hidden_dim, padding_idx=num_classes
+            )
+            init.normal_(self.denoising_class_embed.weight[:-1])
+
+        # decoder embedding
+        self.learn_query_content = learn_query_content
+        if learn_query_content:
+            self.tgt_embed = nn.Embedding(num_queries, hidden_dim)
+        self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, 2)
+
+        # if num_select_queries != self.num_queries:
+        #     layer = TransformerEncoderLayer(hidden_dim, nhead, dim_feedforward, activation='gelu')
+        #     self.encoder = TransformerEncoder(layer, 1)
+
+        self.enc_output = nn.Sequential(
+            OrderedDict(
+                [
+                    ("proj", nn.Linear(hidden_dim, hidden_dim)),
+                    (
+                        "norm",
+                        nn.LayerNorm(
+                            hidden_dim,
+                        ),
+                    ),
+                ]
+            )
+        )
+
+        if query_select_method == "agnostic":
+            self.enc_score_head = nn.Linear(hidden_dim, 1)
+        else:
+            self.enc_score_head = nn.Linear(hidden_dim, num_classes)
+
+        self.enc_bbox_head = MLP(hidden_dim, hidden_dim, 4, 3)
+
+        # decoder head
+        self.eval_idx = eval_idx if eval_idx >= 0 else num_layers + eval_idx
+        self.dec_score_head = nn.ModuleList(
+            [nn.Linear(hidden_dim, num_classes) for _ in range(self.eval_idx + 1)]
+            + [nn.Linear(scaled_dim, num_classes) for _ in range(num_layers - self.eval_idx - 1)]
+        )
+        self.pre_bbox_head = MLP(hidden_dim, hidden_dim, 4, 3)
+        self.dec_bbox_head = nn.ModuleList(
+            [
+                MLP(hidden_dim, hidden_dim, 4 * (self.reg_max + 1), 3)
+                for _ in range(self.eval_idx + 1)
+            ]
+            + [
+                MLP(scaled_dim, scaled_dim, 4 * (self.reg_max + 1), 3)
+                for _ in range(num_layers - self.eval_idx - 1)
+            ]
+        )
+        self.integral = Integral(self.reg_max)
+
+        # init encoder output anchors and valid_mask
+        if self.eval_spatial_size:
+            anchors, valid_mask = self._generate_anchors()
+            self.register_buffer("anchors", anchors)
+            self.register_buffer("valid_mask", valid_mask)
+        # init encoder output anchors and valid_mask
+        if self.eval_spatial_size:
+            self.anchors, self.valid_mask = self._generate_anchors()
+
+        self._reset_parameters(feat_channels)
+
+    def convert_to_deploy(self):
+        self.dec_score_head = nn.ModuleList(
+            [nn.Identity()] * (self.eval_idx) + [self.dec_score_head[self.eval_idx]]
+        )
+        self.dec_bbox_head = nn.ModuleList(
+            [
+                self.dec_bbox_head[i] if i <= self.eval_idx else nn.Identity()
+                for i in range(len(self.dec_bbox_head))
+            ]
+        )
+
+    def _reset_parameters(self, feat_channels):
+        bias = bias_init_with_prob(0.01)
+        init.constant_(self.enc_score_head.bias, bias)
+        init.constant_(self.enc_bbox_head.layers[-1].weight, 0)
+        init.constant_(self.enc_bbox_head.layers[-1].bias, 0)
+
+        init.constant_(self.pre_bbox_head.layers[-1].weight, 0)
+        init.constant_(self.pre_bbox_head.layers[-1].bias, 0)
+
+        for cls_, reg_ in zip(self.dec_score_head, self.dec_bbox_head):
+            init.constant_(cls_.bias, bias)
+            if hasattr(reg_, "layers"):
+                init.constant_(reg_.layers[-1].weight, 0)
+                init.constant_(reg_.layers[-1].bias, 0)
+
+        init.xavier_uniform_(self.enc_output[0].weight)
+        if self.learn_query_content:
+            init.xavier_uniform_(self.tgt_embed.weight)
+        init.xavier_uniform_(self.query_pos_head.layers[0].weight)
+        init.xavier_uniform_(self.query_pos_head.layers[1].weight)
+        for m, in_channels in zip(self.input_proj, feat_channels):
+            if in_channels != self.hidden_dim:
+                init.xavier_uniform_(m[0].weight)
+
+    def _build_input_proj_layer(self, feat_channels):
+        self.input_proj = nn.ModuleList()
+        for in_channels in feat_channels:
+            if in_channels == self.hidden_dim:
+                self.input_proj.append(nn.Identity())
+            else:
+                self.input_proj.append(
+                    nn.Sequential(
+                        OrderedDict(
+                            [
+                                ("conv", nn.Conv2d(in_channels, self.hidden_dim, 1, bias=False)),
+                                (
+                                    "norm",
+                                    nn.BatchNorm2d(
+                                        self.hidden_dim,
+                                    ),
+                                ),
+                            ]
+                        )
+                    )
+                )
+
+        in_channels = feat_channels[-1]
+
+        for _ in range(self.num_levels - len(feat_channels)):
+            if in_channels == self.hidden_dim:
+                self.input_proj.append(nn.Identity())
+            else:
+                self.input_proj.append(
+                    nn.Sequential(
+                        OrderedDict(
+                            [
+                                (
+                                    "conv",
+                                    nn.Conv2d(
+                                        in_channels, self.hidden_dim, 3, 2, padding=1, bias=False
+                                    ),
+                                ),
+                                ("norm", nn.BatchNorm2d(self.hidden_dim)),
+                            ]
+                        )
+                    )
+                )
+                in_channels = self.hidden_dim
+
+    def _get_encoder_input(self, feats: List[torch.Tensor]):
+        # get projection features
+        proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)]
+        if self.num_levels > len(proj_feats):
+            len_srcs = len(proj_feats)
+            for i in range(len_srcs, self.num_levels):
+                if i == len_srcs:
+                    proj_feats.append(self.input_proj[i](feats[-1]))
+                else:
+                    proj_feats.append(self.input_proj[i](proj_feats[-1]))
+
+        # get encoder inputs
+        feat_flatten = []
+        spatial_shapes = []
+        for i, feat in enumerate(proj_feats):
+            _, _, h, w = feat.shape
+            # [b, c, h, w] -> [b, h*w, c]
+            feat_flatten.append(feat.flatten(2).permute(0, 2, 1))
+            # [num_levels, 2]
+            spatial_shapes.append([h, w])
+
+        # [b, l, c]
+        feat_flatten = torch.concat(feat_flatten, 1)
+        return feat_flatten, spatial_shapes
+
+    def _generate_anchors(
+        self, spatial_shapes=None, grid_size=0.05, dtype=torch.float32, device="cpu"
+    ):
+        if spatial_shapes is None:
+            spatial_shapes = []
+            eval_h, eval_w = self.eval_spatial_size
+            for s in self.feat_strides:
+                spatial_shapes.append([int(eval_h / s), int(eval_w / s)])
+
+        anchors = []
+        for lvl, (h, w) in enumerate(spatial_shapes):
+            grid_y, grid_x = torch.meshgrid(torch.arange(h), torch.arange(w), indexing="ij")
+            grid_xy = torch.stack([grid_x, grid_y], dim=-1)
+            grid_xy = (grid_xy.unsqueeze(0) + 0.5) / torch.tensor([w, h], dtype=dtype)
+            wh = torch.ones_like(grid_xy) * grid_size * (2.0**lvl)
+            lvl_anchors = torch.concat([grid_xy, wh], dim=-1).reshape(-1, h * w, 4)
+            anchors.append(lvl_anchors)
+
+        anchors = torch.concat(anchors, dim=1).to(device)
+        valid_mask = ((anchors > self.eps) * (anchors < 1 - self.eps)).all(-1, keepdim=True)
+        anchors = torch.log(anchors / (1 - anchors))
+        anchors = torch.where(valid_mask, anchors, torch.inf)
+
+        return anchors, valid_mask
+
+    def _get_decoder_input(
+        self, memory: torch.Tensor, spatial_shapes, denoising_logits=None, denoising_bbox_unact=None
+    ):
+        # prepare input for decoder
+        if self.training or self.eval_spatial_size is None:
+            anchors, valid_mask = self._generate_anchors(spatial_shapes, device=memory.device)
+        else:
+            anchors = self.anchors
+            valid_mask = self.valid_mask
+        if memory.shape[0] > 1:
+            anchors = anchors.repeat(memory.shape[0], 1, 1)
+
+        # memory = torch.where(valid_mask, memory, 0)
+        # TODO fix type error for onnx export
+        memory = valid_mask.to(memory.dtype) * memory
+
+        output_memory: torch.Tensor = self.enc_output(memory)
+        enc_outputs_logits: torch.Tensor = self.enc_score_head(output_memory)
+
+        enc_topk_bboxes_list, enc_topk_logits_list = [], []
+        enc_topk_memory, enc_topk_logits, enc_topk_anchors = self._select_topk(
+            output_memory, enc_outputs_logits, anchors, self.num_queries
+        )
+
+        enc_topk_bbox_unact: torch.Tensor = self.enc_bbox_head(enc_topk_memory) + enc_topk_anchors
+
+        if self.training:
+            enc_topk_bboxes = F.sigmoid(enc_topk_bbox_unact)
+            enc_topk_bboxes_list.append(enc_topk_bboxes)
+            enc_topk_logits_list.append(enc_topk_logits)
+
+        # if self.num_select_queries != self.num_queries:
+        #     raise NotImplementedError('')
+
+        if self.learn_query_content:
+            content = self.tgt_embed.weight.unsqueeze(0).tile([memory.shape[0], 1, 1])
+        else:
+            content = enc_topk_memory.detach()
+
+        enc_topk_bbox_unact = enc_topk_bbox_unact.detach()
+
+        if denoising_bbox_unact is not None:
+            enc_topk_bbox_unact = torch.concat([denoising_bbox_unact, enc_topk_bbox_unact], dim=1)
+            content = torch.concat([denoising_logits, content], dim=1)
+
+        return content, enc_topk_bbox_unact, enc_topk_bboxes_list, enc_topk_logits_list
+
+    def _select_topk(
+        self,
+        memory: torch.Tensor,
+        outputs_logits: torch.Tensor,
+        outputs_anchors_unact: torch.Tensor,
+        topk: int,
+    ):
+        if self.query_select_method == "default":
+            _, topk_ind = torch.topk(outputs_logits.max(-1).values, topk, dim=-1)
+
+        elif self.query_select_method == "one2many":
+            _, topk_ind = torch.topk(outputs_logits.flatten(1), topk, dim=-1)
+            topk_ind = topk_ind // self.num_classes
+
+        elif self.query_select_method == "agnostic":
+            _, topk_ind = torch.topk(outputs_logits.squeeze(-1), topk, dim=-1)
+
+        topk_ind: torch.Tensor
+
+        topk_anchors = outputs_anchors_unact.gather(
+            dim=1, index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_anchors_unact.shape[-1])
+        )
+
+        topk_logits = (
+            outputs_logits.gather(
+                dim=1, index=topk_ind.unsqueeze(-1).repeat(1, 1, outputs_logits.shape[-1])
+            )
+            if self.training
+            else None
+        )
+
+        topk_memory = memory.gather(
+            dim=1, index=topk_ind.unsqueeze(-1).repeat(1, 1, memory.shape[-1])
+        )
+
+        return topk_memory, topk_logits, topk_anchors
+
+    def forward(self, feats, targets=None):
+        # input projection and embedding
+        memory, spatial_shapes = self._get_encoder_input(feats)
+
+        # prepare denoising training
+        if self.training and self.num_denoising > 0:
+            denoising_logits, denoising_bbox_unact, attn_mask, dn_meta = (
+                get_contrastive_denoising_training_group(
+                    targets,
+                    self.num_classes,
+                    self.num_queries,
+                    self.denoising_class_embed,
+                    num_denoising=self.num_denoising,
+                    label_noise_ratio=self.label_noise_ratio,
+                    box_noise_scale=1.0,
+                )
+            )
+        else:
+            denoising_logits, denoising_bbox_unact, attn_mask, dn_meta = None, None, None, None
+
+        init_ref_contents, init_ref_points_unact, enc_topk_bboxes_list, enc_topk_logits_list = (
+            self._get_decoder_input(memory, spatial_shapes, denoising_logits, denoising_bbox_unact)
+        )
+
+        # decoder
+        out_bboxes, out_logits, out_corners, out_refs, pre_bboxes, pre_logits = self.decoder(
+            init_ref_contents,
+            init_ref_points_unact,
+            memory,
+            spatial_shapes,
+            self.dec_bbox_head,
+            self.dec_score_head,
+            self.query_pos_head,
+            self.pre_bbox_head,
+            self.integral,
+            self.up,
+            self.reg_scale,
+            attn_mask=attn_mask,
+            dn_meta=dn_meta,
+        )
+
+        if self.training and dn_meta is not None:
+            dn_pre_logits, pre_logits = torch.split(pre_logits, dn_meta["dn_num_split"], dim=1)
+            dn_pre_bboxes, pre_bboxes = torch.split(pre_bboxes, dn_meta["dn_num_split"], dim=1)
+            dn_out_bboxes, out_bboxes = torch.split(out_bboxes, dn_meta["dn_num_split"], dim=2)
+            dn_out_logits, out_logits = torch.split(out_logits, dn_meta["dn_num_split"], dim=2)
+
+            dn_out_corners, out_corners = torch.split(out_corners, dn_meta["dn_num_split"], dim=2)
+            dn_out_refs, out_refs = torch.split(out_refs, dn_meta["dn_num_split"], dim=2)
+
+        if self.training:
+            out = {
+                "pred_logits": out_logits[-1],
+                "pred_boxes": out_bboxes[-1],
+                "pred_corners": out_corners[-1],
+                "ref_points": out_refs[-1],
+                "up": self.up,
+                "reg_scale": self.reg_scale,
+            }
+        else:
+            out = {"pred_logits": out_logits[-1], "pred_boxes": out_bboxes[-1]}
+
+        if self.training and self.aux_loss:
+            out["aux_outputs"] = self._set_aux_loss2(
+                out_logits[:-1],
+                out_bboxes[:-1],
+                out_corners[:-1],
+                out_refs[:-1],
+                out_corners[-1],
+                out_logits[-1],
+            )
+            out["enc_aux_outputs"] = self._set_aux_loss(enc_topk_logits_list, enc_topk_bboxes_list)
+            out["pre_outputs"] = {"pred_logits": pre_logits, "pred_boxes": pre_bboxes}
+            out["enc_meta"] = {"class_agnostic": self.query_select_method == "agnostic"}
+
+            if dn_meta is not None:
+                out["dn_outputs"] = self._set_aux_loss2(
+                    dn_out_logits,
+                    dn_out_bboxes,
+                    dn_out_corners,
+                    dn_out_refs,
+                    dn_out_corners[-1],
+                    dn_out_logits[-1],
+                )
+                out["dn_pre_outputs"] = {"pred_logits": dn_pre_logits, "pred_boxes": dn_pre_bboxes}
+                out["dn_meta"] = dn_meta
+
+        return out
+
+    @torch.jit.unused
+    def _set_aux_loss(self, outputs_class, outputs_coord):
+        # this is a workaround to make torchscript happy, as torchscript
+        # doesn't support dictionary with non-homogeneous values, such
+        # as a dict having both a Tensor and a list.
+        return [{"pred_logits": a, "pred_boxes": b} for a, b in zip(outputs_class, outputs_coord)]
+
+    @torch.jit.unused
+    def _set_aux_loss2(
+        self,
+        outputs_class,
+        outputs_coord,
+        outputs_corners,
+        outputs_ref,
+        teacher_corners=None,
+        teacher_logits=None,
+    ):
+        # this is a workaround to make torchscript happy, as torchscript
+        # doesn't support dictionary with non-homogeneous values, such
+        # as a dict having both a Tensor and a list.
+        return [
+            {
+                "pred_logits": a,
+                "pred_boxes": b,
+                "pred_corners": c,
+                "ref_points": d,
+                "teacher_corners": teacher_corners,
+                "teacher_logits": teacher_logits,
+            }
+            for a, b, c, d in zip(outputs_class, outputs_coord, outputs_corners, outputs_ref)
+        ]
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_utils.py
new file mode 100644
index 00000000..512d161f
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/dfine_utils.py
@@ -0,0 +1,169 @@
+"""
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+"""
+
+import torch
+
+from .box_ops import box_xyxy_to_cxcywh
+
+
+def weighting_function(reg_max, up, reg_scale, deploy=False):
+    """
+    Generates the non-uniform Weighting Function W(n) for bounding box regression.
+
+    Args:
+        reg_max (int): Max number of the discrete bins.
+        up (Tensor): Controls upper bounds of the sequence,
+                     where maximum offset is ±up * H / W.
+        reg_scale (float): Controls the curvature of the Weighting Function.
+                           Larger values result in flatter weights near the central axis W(reg_max/2)=0
+                           and steeper weights at both ends.
+        deploy (bool): If True, uses deployment mode settings.
+
+    Returns:
+        Tensor: Sequence of Weighting Function.
+    """
+    if deploy:
+        upper_bound1 = (abs(up[0]) * abs(reg_scale)).item()
+        upper_bound2 = (abs(up[0]) * abs(reg_scale) * 2).item()
+        step = (upper_bound1 + 1) ** (2 / (reg_max - 2))
+        left_values = [-((step) ** i) + 1 for i in range(reg_max // 2 - 1, 0, -1)]
+        right_values = [(step) ** i - 1 for i in range(1, reg_max // 2)]
+        values = (
+            [-upper_bound2]
+            + left_values
+            + [torch.zeros_like(up[0][None])]
+            + right_values
+            + [upper_bound2]
+        )
+        return torch.tensor(values, dtype=up.dtype, device=up.device)
+    else:
+        upper_bound1 = abs(up[0]) * abs(reg_scale)
+        upper_bound2 = abs(up[0]) * abs(reg_scale) * 2
+        step = (upper_bound1 + 1) ** (2 / (reg_max - 2))
+        left_values = [-((step) ** i) + 1 for i in range(reg_max // 2 - 1, 0, -1)]
+        right_values = [(step) ** i - 1 for i in range(1, reg_max // 2)]
+        values = (
+            [-upper_bound2]
+            + left_values
+            + [torch.zeros_like(up[0][None])]
+            + right_values
+            + [upper_bound2]
+        )
+        return torch.cat(values, 0)
+
+
+def translate_gt(gt, reg_max, reg_scale, up):
+    """
+    Decodes bounding box ground truth (GT) values into distribution-based GT representations.
+
+    This function maps continuous GT values into discrete distribution bins, which can be used
+    for regression tasks in object detection models. It calculates the indices of the closest
+    bins to each GT value and assigns interpolation weights to these bins based on their proximity
+    to the GT value.
+
+    Args:
+        gt (Tensor): Ground truth bounding box values, shape (N, ).
+        reg_max (int): Maximum number of discrete bins for the distribution.
+        reg_scale (float): Controls the curvature of the Weighting Function.
+        up (Tensor): Controls the upper bounds of the Weighting Function.
+
+    Returns:
+        Tuple[Tensor, Tensor, Tensor]:
+            - indices (Tensor): Index of the left bin closest to each GT value, shape (N, ).
+            - weight_right (Tensor): Weight assigned to the right bin, shape (N, ).
+            - weight_left (Tensor): Weight assigned to the left bin, shape (N, ).
+    """
+    gt = gt.reshape(-1)
+    function_values = weighting_function(reg_max, up, reg_scale)
+
+    # Find the closest left-side indices for each value
+    diffs = function_values.unsqueeze(0) - gt.unsqueeze(1)
+    mask = diffs <= 0
+    closest_left_indices = torch.sum(mask, dim=1) - 1
+
+    # Calculate the weights for the interpolation
+    indices = closest_left_indices.float()
+
+    weight_right = torch.zeros_like(indices)
+    weight_left = torch.zeros_like(indices)
+
+    valid_idx_mask = (indices >= 0) & (indices < reg_max)
+    valid_indices = indices[valid_idx_mask].long()
+
+    # Obtain distances
+    left_values = function_values[valid_indices]
+    right_values = function_values[valid_indices + 1]
+
+    left_diffs = torch.abs(gt[valid_idx_mask] - left_values)
+    right_diffs = torch.abs(right_values - gt[valid_idx_mask])
+
+    # Valid weights
+    weight_right[valid_idx_mask] = left_diffs / (left_diffs + right_diffs)
+    weight_left[valid_idx_mask] = 1.0 - weight_right[valid_idx_mask]
+
+    # Invalid weights (out of range)
+    invalid_idx_mask_neg = indices < 0
+    weight_right[invalid_idx_mask_neg] = 0.0
+    weight_left[invalid_idx_mask_neg] = 1.0
+    indices[invalid_idx_mask_neg] = 0.0
+
+    invalid_idx_mask_pos = indices >= reg_max
+    weight_right[invalid_idx_mask_pos] = 1.0
+    weight_left[invalid_idx_mask_pos] = 0.0
+    indices[invalid_idx_mask_pos] = reg_max - 0.1
+
+    return indices, weight_right, weight_left
+
+
+def distance2bbox(points, distance, reg_scale):
+    """
+    Decodes edge-distances into bounding box coordinates.
+
+    Args:
+        points (Tensor): (B, N, 4) or (N, 4) format, representing [x, y, w, h],
+                         where (x, y) is the center and (w, h) are width and height.
+        distance (Tensor): (B, N, 4) or (N, 4), representing distances from the
+                           point to the left, top, right, and bottom boundaries.
+
+        reg_scale (float): Controls the curvature of the Weighting Function.
+
+    Returns:
+        Tensor: Bounding boxes in (N, 4) or (B, N, 4) format [cx, cy, w, h].
+    """
+    reg_scale = abs(reg_scale)
+    x1 = points[..., 0] - (0.5 * reg_scale + distance[..., 0]) * (points[..., 2] / reg_scale)
+    y1 = points[..., 1] - (0.5 * reg_scale + distance[..., 1]) * (points[..., 3] / reg_scale)
+    x2 = points[..., 0] + (0.5 * reg_scale + distance[..., 2]) * (points[..., 2] / reg_scale)
+    y2 = points[..., 1] + (0.5 * reg_scale + distance[..., 3]) * (points[..., 3] / reg_scale)
+
+    bboxes = torch.stack([x1, y1, x2, y2], -1)
+
+    return box_xyxy_to_cxcywh(bboxes)
+
+
+def bbox2distance(points, bbox, reg_max, reg_scale, up, eps=0.1):
+    """
+    Converts bounding box coordinates to distances from a reference point.
+
+    Args:
+        points (Tensor): (n, 4) [x, y, w, h], where (x, y) is the center.
+        bbox (Tensor): (n, 4) bounding boxes in "xyxy" format.
+        reg_max (float): Maximum bin value.
+        reg_scale (float): Controling curvarture of W(n).
+        up (Tensor): Controling upper bounds of W(n).
+        eps (float): Small value to ensure target < reg_max.
+
+    Returns:
+        Tensor: Decoded distances.
+    """
+    reg_scale = abs(reg_scale)
+    left = (points[:, 0] - bbox[:, 0]) / (points[..., 2] / reg_scale + 1e-16) - 0.5 * reg_scale
+    top = (points[:, 1] - bbox[:, 1]) / (points[..., 3] / reg_scale + 1e-16) - 0.5 * reg_scale
+    right = (bbox[:, 2] - points[:, 0]) / (points[..., 2] / reg_scale + 1e-16) - 0.5 * reg_scale
+    bottom = (bbox[:, 3] - points[:, 1]) / (points[..., 3] / reg_scale + 1e-16) - 0.5 * reg_scale
+    four_lens = torch.stack([left, top, right, bottom], -1)
+    four_lens, weight_right, weight_left = translate_gt(four_lens, reg_max, reg_scale, up)
+    if reg_max is not None:
+        four_lens = four_lens.clamp(min=0, max=reg_max - eps)
+    return four_lens.reshape(-1).detach(), weight_right.detach(), weight_left.detach()
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/hybrid_encoder.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/hybrid_encoder.py
new file mode 100644
index 00000000..77af3fb9
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/hybrid_encoder.py
@@ -0,0 +1,488 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import copy
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ...core import register
+from .utils import get_activation
+
+__all__ = ["HybridEncoder"]
+
+
+class ConvNormLayer_fuse(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size, stride, g=1, padding=None, bias=False, act=None):
+        super().__init__()
+        padding = (kernel_size - 1) // 2 if padding is None else padding
+        self.conv = nn.Conv2d(
+            ch_in, ch_out, kernel_size, stride, groups=g, padding=padding, bias=bias
+        )
+        self.norm = nn.BatchNorm2d(ch_out)
+        self.act = nn.Identity() if act is None else get_activation(act)
+        self.ch_in, self.ch_out, self.kernel_size, self.stride, self.g, self.padding, self.bias = (
+            ch_in,
+            ch_out,
+            kernel_size,
+            stride,
+            g,
+            padding,
+            bias,
+        )
+
+    def forward(self, x):
+        if hasattr(self, "conv_bn_fused"):
+            y = self.conv_bn_fused(x)
+        else:
+            y = self.norm(self.conv(x))
+        return self.act(y)
+
+    def convert_to_deploy(self):
+        if not hasattr(self, "conv_bn_fused"):
+            self.conv_bn_fused = nn.Conv2d(
+                self.ch_in,
+                self.ch_out,
+                self.kernel_size,
+                self.stride,
+                groups=self.g,
+                padding=self.padding,
+                bias=True,
+            )
+
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv_bn_fused.weight.data = kernel
+        self.conv_bn_fused.bias.data = bias
+        self.__delattr__("conv")
+        self.__delattr__("norm")
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor()
+
+        return kernel3x3, bias3x3
+
+    def _fuse_bn_tensor(self):
+        kernel = self.conv.weight
+        running_mean = self.norm.running_mean
+        running_var = self.norm.running_var
+        gamma = self.norm.weight
+        beta = self.norm.bias
+        eps = self.norm.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+
+class ConvNormLayer(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size, stride, g=1, padding=None, bias=False, act=None):
+        super().__init__()
+        padding = (kernel_size - 1) // 2 if padding is None else padding
+        self.conv = nn.Conv2d(
+            ch_in, ch_out, kernel_size, stride, groups=g, padding=padding, bias=bias
+        )
+        self.norm = nn.BatchNorm2d(ch_out)
+        self.act = nn.Identity() if act is None else get_activation(act)
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+
+class SCDown(nn.Module):
+    def __init__(self, c1, c2, k, s):
+        super().__init__()
+        self.cv1 = ConvNormLayer_fuse(c1, c2, 1, 1)
+        self.cv2 = ConvNormLayer_fuse(c2, c2, k, s, c2)
+
+    def forward(self, x):
+        return self.cv2(self.cv1(x))
+
+
+class VGGBlock(nn.Module):
+    def __init__(self, ch_in, ch_out, act="relu"):
+        super().__init__()
+        self.ch_in = ch_in
+        self.ch_out = ch_out
+        self.conv1 = ConvNormLayer(ch_in, ch_out, 3, 1, padding=1, act=None)
+        self.conv2 = ConvNormLayer(ch_in, ch_out, 1, 1, padding=0, act=None)
+        self.act = nn.Identity() if act is None else act
+
+    def forward(self, x):
+        if hasattr(self, "conv"):
+            y = self.conv(x)
+        else:
+            y = self.conv1(x) + self.conv2(x)
+
+        return self.act(y)
+
+    def convert_to_deploy(self):
+        if not hasattr(self, "conv"):
+            self.conv = nn.Conv2d(self.ch_in, self.ch_out, 3, 1, padding=1)
+
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv.weight.data = kernel
+        self.conv.bias.data = bias
+        self.__delattr__("conv1")
+        self.__delattr__("conv2")
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+
+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return F.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch: ConvNormLayer):
+        if branch is None:
+            return 0, 0
+        kernel = branch.conv.weight
+        running_mean = branch.norm.running_mean
+        running_var = branch.norm.running_var
+        gamma = branch.norm.weight
+        beta = branch.norm.bias
+        eps = branch.norm.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+
+class ELAN(nn.Module):
+    # csp-elan
+    def __init__(self, c1, c2, c3, c4, n=2, bias=False, act="silu", bottletype=VGGBlock):
+        super().__init__()
+        self.c = c3
+        self.cv1 = ConvNormLayer_fuse(c1, c3, 1, 1, bias=bias, act=act)
+        self.cv2 = nn.Sequential(
+            bottletype(c3 // 2, c4, act=get_activation(act)),
+            ConvNormLayer_fuse(c4, c4, 3, 1, bias=bias, act=act),
+        )
+        self.cv3 = nn.Sequential(
+            bottletype(c4, c4, act=get_activation(act)),
+            ConvNormLayer_fuse(c4, c4, 3, 1, bias=bias, act=act),
+        )
+        self.cv4 = ConvNormLayer_fuse(c3 + (2 * c4), c2, 1, 1, bias=bias, act=act)
+
+    def forward(self, x):
+        # y = [self.cv1(x)]
+        y = list(self.cv1(x).chunk(2, 1))
+        y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
+        return self.cv4(torch.cat(y, 1))
+
+
+class RepNCSPELAN4(nn.Module):
+    # csp-elan
+    def __init__(self, c1, c2, c3, c4, n=3, bias=False, act="silu"):
+        super().__init__()
+        self.c = c3 // 2
+        self.cv1 = ConvNormLayer_fuse(c1, c3, 1, 1, bias=bias, act=act)
+        self.cv2 = nn.Sequential(
+            CSPLayer(c3 // 2, c4, n, 1, bias=bias, act=act, bottletype=VGGBlock),
+            ConvNormLayer_fuse(c4, c4, 3, 1, bias=bias, act=act),
+        )
+        self.cv3 = nn.Sequential(
+            CSPLayer(c4, c4, n, 1, bias=bias, act=act, bottletype=VGGBlock),
+            ConvNormLayer_fuse(c4, c4, 3, 1, bias=bias, act=act),
+        )
+        self.cv4 = ConvNormLayer_fuse(c3 + (2 * c4), c2, 1, 1, bias=bias, act=act)
+
+    def forward_chunk(self, x):
+        y = list(self.cv1(x).chunk(2, 1))
+        y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
+        return self.cv4(torch.cat(y, 1))
+
+    def forward(self, x):
+        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
+        return self.cv4(torch.cat(y, 1))
+
+
+class CSPLayer(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        num_blocks=3,
+        expansion=1.0,
+        bias=False,
+        act="silu",
+        bottletype=VGGBlock,
+    ):
+        super(CSPLayer, self).__init__()
+        hidden_channels = int(out_channels * expansion)
+        self.conv1 = ConvNormLayer_fuse(in_channels, hidden_channels, 1, 1, bias=bias, act=act)
+        self.conv2 = ConvNormLayer_fuse(in_channels, hidden_channels, 1, 1, bias=bias, act=act)
+        self.bottlenecks = nn.Sequential(
+            *[
+                bottletype(hidden_channels, hidden_channels, act=get_activation(act))
+                for _ in range(num_blocks)
+            ]
+        )
+        if hidden_channels != out_channels:
+            self.conv3 = ConvNormLayer_fuse(hidden_channels, out_channels, 1, 1, bias=bias, act=act)
+        else:
+            self.conv3 = nn.Identity()
+
+    def forward(self, x):
+        x_1 = self.conv1(x)
+        x_1 = self.bottlenecks(x_1)
+        x_2 = self.conv2(x)
+        return self.conv3(x_1 + x_2)
+
+
+# transformer
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
+        super().__init__()
+        self.normalize_before = normalize_before
+
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout, batch_first=True)
+
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = get_activation(activation)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos_embed):
+        return tensor if pos_embed is None else tensor + pos_embed
+
+    def forward(self, src, src_mask=None, pos_embed=None) -> torch.Tensor:
+        residual = src
+        if self.normalize_before:
+            src = self.norm1(src)
+        q = k = self.with_pos_embed(src, pos_embed)
+        src, _ = self.self_attn(q, k, value=src, attn_mask=src_mask)
+
+        src = residual + self.dropout1(src)
+        if not self.normalize_before:
+            src = self.norm1(src)
+
+        residual = src
+        if self.normalize_before:
+            src = self.norm2(src)
+        src = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = residual + self.dropout2(src)
+        if not self.normalize_before:
+            src = self.norm2(src)
+        return src
+
+
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super(TransformerEncoder, self).__init__()
+        self.layers = nn.ModuleList([copy.deepcopy(encoder_layer) for _ in range(num_layers)])
+        self.num_layers = num_layers
+        self.norm = norm
+
+    def forward(self, src, src_mask=None, pos_embed=None) -> torch.Tensor:
+        output = src
+        for layer in self.layers:
+            output = layer(output, src_mask=src_mask, pos_embed=pos_embed)
+
+        if self.norm is not None:
+            output = self.norm(output)
+
+        return output
+
+
+@register()
+class HybridEncoder(nn.Module):
+    __share__ = [
+        "eval_spatial_size",
+    ]
+
+    def __init__(
+        self,
+        in_channels=[512, 1024, 2048],
+        feat_strides=[8, 16, 32],
+        hidden_dim=256,
+        nhead=8,
+        dim_feedforward=1024,
+        dropout=0.0,
+        enc_act="gelu",
+        use_encoder_idx=[2],
+        num_encoder_layers=1,
+        pe_temperature=10000,
+        expansion=1.0,
+        depth_mult=1.0,
+        act="silu",
+        eval_spatial_size=None,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.feat_strides = feat_strides
+        self.hidden_dim = hidden_dim
+        self.use_encoder_idx = use_encoder_idx
+        self.num_encoder_layers = num_encoder_layers
+        self.pe_temperature = pe_temperature
+        self.eval_spatial_size = eval_spatial_size
+        self.out_channels = [hidden_dim for _ in range(len(in_channels))]
+        self.out_strides = feat_strides
+
+        # channel projection
+        self.input_proj = nn.ModuleList()
+        for in_channel in in_channels:
+            proj = nn.Sequential(
+                OrderedDict(
+                    [
+                        ("conv", nn.Conv2d(in_channel, hidden_dim, kernel_size=1, bias=False)),
+                        ("norm", nn.BatchNorm2d(hidden_dim)),
+                    ]
+                )
+            )
+
+            self.input_proj.append(proj)
+
+        # encoder transformer
+        encoder_layer = TransformerEncoderLayer(
+            hidden_dim,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            activation=enc_act,
+        )
+
+        self.encoder = nn.ModuleList(
+            [
+                TransformerEncoder(copy.deepcopy(encoder_layer), num_encoder_layers)
+                for _ in range(len(use_encoder_idx))
+            ]
+        )
+
+        # top-down fpn
+        self.lateral_convs = nn.ModuleList()
+        self.fpn_blocks = nn.ModuleList()
+        for _ in range(len(in_channels) - 1, 0, -1):
+            self.lateral_convs.append(ConvNormLayer_fuse(hidden_dim, hidden_dim, 1, 1))
+            self.fpn_blocks.append(
+                RepNCSPELAN4(
+                    hidden_dim * 2,
+                    hidden_dim,
+                    hidden_dim * 2,
+                    round(expansion * hidden_dim // 2),
+                    round(3 * depth_mult),
+                )
+                # CSPLayer(hidden_dim * 2, hidden_dim, round(3 * depth_mult), act=act, expansion=expansion, bottletype=VGGBlock)
+            )
+
+        # bottom-up pan
+        self.downsample_convs = nn.ModuleList()
+        self.pan_blocks = nn.ModuleList()
+        for _ in range(len(in_channels) - 1):
+            self.downsample_convs.append(
+                nn.Sequential(
+                    SCDown(hidden_dim, hidden_dim, 3, 2),
+                )
+            )
+            self.pan_blocks.append(
+                RepNCSPELAN4(
+                    hidden_dim * 2,
+                    hidden_dim,
+                    hidden_dim * 2,
+                    round(expansion * hidden_dim // 2),
+                    round(3 * depth_mult),
+                )
+                # CSPLayer(hidden_dim * 2, hidden_dim, round(3 * depth_mult), act=act, expansion=expansion, bottletype=VGGBlock)
+            )
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        if self.eval_spatial_size:
+            for idx in self.use_encoder_idx:
+                stride = self.feat_strides[idx]
+                pos_embed = self.build_2d_sincos_position_embedding(
+                    self.eval_spatial_size[1] // stride,
+                    self.eval_spatial_size[0] // stride,
+                    self.hidden_dim,
+                    self.pe_temperature,
+                )
+                setattr(self, f"pos_embed{idx}", pos_embed)
+                # self.register_buffer(f'pos_embed{idx}', pos_embed)
+
+    @staticmethod
+    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+        """ """
+        grid_w = torch.arange(int(w), dtype=torch.float32)
+        grid_h = torch.arange(int(h), dtype=torch.float32)
+        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij")
+        assert (
+            embed_dim % 4 == 0
+        ), "Embed dimension must be divisible by 4 for 2D sin-cos position embedding"
+        pos_dim = embed_dim // 4
+        omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim
+        omega = 1.0 / (temperature**omega)
+
+        out_w = grid_w.flatten()[..., None] @ omega[None]
+        out_h = grid_h.flatten()[..., None] @ omega[None]
+
+        return torch.concat([out_w.sin(), out_w.cos(), out_h.sin(), out_h.cos()], dim=1)[None, :, :]
+
+    def forward(self, feats):
+        assert len(feats) == len(self.in_channels)
+        proj_feats = [self.input_proj[i](feat) for i, feat in enumerate(feats)]
+
+        # encoder
+        if self.num_encoder_layers > 0:
+            for i, enc_ind in enumerate(self.use_encoder_idx):
+                h, w = proj_feats[enc_ind].shape[2:]
+                # flatten [B, C, H, W] to [B, HxW, C]
+                src_flatten = proj_feats[enc_ind].flatten(2).permute(0, 2, 1)
+                if self.training or self.eval_spatial_size is None:
+                    pos_embed = self.build_2d_sincos_position_embedding(
+                        w, h, self.hidden_dim, self.pe_temperature
+                    ).to(src_flatten.device)
+                else:
+                    pos_embed = getattr(self, f"pos_embed{enc_ind}", None).to(src_flatten.device)
+
+                memory: torch.Tensor = self.encoder[i](src_flatten, pos_embed=pos_embed)
+                proj_feats[enc_ind] = (
+                    memory.permute(0, 2, 1).reshape(-1, self.hidden_dim, h, w).contiguous()
+                )
+
+        # broadcasting and fusion
+        inner_outs = [proj_feats[-1]]
+        for idx in range(len(self.in_channels) - 1, 0, -1):
+            feat_heigh = inner_outs[0]
+            feat_low = proj_feats[idx - 1]
+            feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](feat_heigh)
+            inner_outs[0] = feat_heigh
+            upsample_feat = F.interpolate(feat_heigh, scale_factor=2.0, mode="nearest")
+            inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
+                torch.concat([upsample_feat, feat_low], dim=1)
+            )
+            inner_outs.insert(0, inner_out)
+
+        outs = [inner_outs[0]]
+        for idx in range(len(self.in_channels) - 1):
+            feat_low = outs[-1]
+            feat_height = inner_outs[idx + 1]
+            downsample_feat = self.downsample_convs[idx](feat_low)
+            out = self.pan_blocks[idx](torch.concat([downsample_feat, feat_height], dim=1))
+            outs.append(out)
+
+        return outs
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/matcher.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/matcher.py
new file mode 100644
index 00000000..debe7970
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/matcher.py
@@ -0,0 +1,160 @@
+"""
+Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+Modules to compute the matching cost and solve the corresponding LSAP.
+
+Copyright (c) 2024 The D-FINE Authors All Rights Reserved.
+"""
+
+from typing import Dict
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from scipy.optimize import linear_sum_assignment
+
+from ...core import register
+from .box_ops import box_cxcywh_to_xyxy, generalized_box_iou
+
+
+@register()
+class HungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    __share__ = [
+        "use_focal_loss",
+    ]
+
+    def __init__(self, weight_dict, use_focal_loss=False, alpha=0.25, gamma=2.0):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = weight_dict["cost_class"]
+        self.cost_bbox = weight_dict["cost_bbox"]
+        self.cost_giou = weight_dict["cost_giou"]
+
+        self.use_focal_loss = use_focal_loss
+        self.alpha = alpha
+        self.gamma = gamma
+
+        assert (
+            self.cost_class != 0 or self.cost_bbox != 0 or self.cost_giou != 0
+        ), "all costs cant be 0"
+
+    @torch.no_grad()
+    def forward(self, outputs: Dict[str, torch.Tensor], targets, return_topk=False):
+        """Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        # We flatten to compute the cost matrices in a batch
+        if self.use_focal_loss:
+            out_prob = F.sigmoid(outputs["pred_logits"].flatten(0, 1))
+        else:
+            out_prob = (
+                outputs["pred_logits"].flatten(0, 1).softmax(-1)
+            )  # [batch_size * num_queries, num_classes]
+
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_ids = torch.cat([v["labels"] for v in targets])
+        tgt_bbox = torch.cat([v["boxes"] for v in targets])
+
+        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
+        # but approximate it in 1 - proba[target class].
+        # The 1 is a constant that doesn't change the matching, it can be ommitted.
+        if self.use_focal_loss:
+            out_prob = out_prob[:, tgt_ids]
+            neg_cost_class = (
+                (1 - self.alpha) * (out_prob**self.gamma) * (-(1 - out_prob + 1e-8).log())
+            )
+            pos_cost_class = (
+                self.alpha * ((1 - out_prob) ** self.gamma) * (-(out_prob + 1e-8).log())
+            )
+            cost_class = pos_cost_class - neg_cost_class
+        else:
+            cost_class = -out_prob[:, tgt_ids]
+
+        # Compute the L1 cost between boxes
+        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+        # Compute the giou cost betwen boxes
+        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
+
+        # Final cost matrix 3 * self.cost_bbox + 2 * self.cost_class + self.cost_giou
+        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
+        C = C.view(bs, num_queries, -1).cpu()
+
+        sizes = [len(v["boxes"]) for v in targets]
+        C = torch.nan_to_num(C, nan=1.0)
+        indices_pre = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+        indices = [
+            (torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64))
+            for i, j in indices_pre
+        ]
+
+        # Compute topk indices
+        if return_topk:
+            return {
+                "indices_o2m": self.get_top_k_matches(
+                    C, sizes=sizes, k=return_topk, initial_indices=indices_pre
+                )
+            }
+
+        return {"indices": indices}  # , 'indices_o2m': C.min(-1)[1]}
+
+    def get_top_k_matches(self, C, sizes, k=1, initial_indices=None):
+        indices_list = []
+        # C_original = C.clone()
+        for i in range(k):
+            indices_k = (
+                [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+                if i > 0
+                else initial_indices
+            )
+            indices_list.append(
+                [
+                    (torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64))
+                    for i, j in indices_k
+                ]
+            )
+            for c, idx_k in zip(C.split(sizes, -1), indices_k):
+                idx_k = np.stack(idx_k)
+                c[:, idx_k] = 1e6
+        indices_list = [
+            (
+                torch.cat([indices_list[i][j][0] for i in range(k)], dim=0),
+                torch.cat([indices_list[i][j][1] for i in range(k)], dim=0),
+            )
+            for j in range(len(sizes))
+        ]
+        # C.copy_(C_original)
+        return indices_list
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/postprocessor.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/postprocessor.py
new file mode 100644
index 00000000..b1516b6b
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/postprocessor.py
@@ -0,0 +1,93 @@
+"""
+Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright(c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+from ...core import register
+
+__all__ = ["DFINEPostProcessor"]
+
+
+def mod(a, b):
+    out = a - a // b * b
+    return out
+
+
+@register()
+class DFINEPostProcessor(nn.Module):
+    __share__ = ["num_classes", "use_focal_loss", "num_top_queries", "remap_mscoco_category"]
+
+    def __init__(
+        self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False
+    ) -> None:
+        super().__init__()
+        self.use_focal_loss = use_focal_loss
+        self.num_top_queries = num_top_queries
+        self.num_classes = int(num_classes)
+        self.remap_mscoco_category = remap_mscoco_category
+        self.deploy_mode = False
+
+    def extra_repr(self) -> str:
+        return f"use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}"
+
+    # def forward(self, outputs, orig_target_sizes):
+    def forward(self, outputs, orig_target_sizes: torch.Tensor):
+        logits, boxes = outputs["pred_logits"], outputs["pred_boxes"]
+        # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
+
+        bbox_pred = torchvision.ops.box_convert(boxes, in_fmt="cxcywh", out_fmt="xyxy")
+        bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
+
+        if self.use_focal_loss:
+            scores = F.sigmoid(logits)
+            scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
+            # TODO for older tensorrt
+            # labels = index % self.num_classes
+            labels = mod(index, self.num_classes)
+            index = index // self.num_classes
+            boxes = bbox_pred.gather(
+                dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])
+            )
+
+        else:
+            scores = F.softmax(logits)[:, :, :-1]
+            scores, labels = scores.max(dim=-1)
+            if scores.shape[1] > self.num_top_queries:
+                scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
+                labels = torch.gather(labels, dim=1, index=index)
+                boxes = torch.gather(
+                    boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])
+                )
+
+        # TODO for onnx export
+        if self.deploy_mode:
+            return labels, boxes, scores
+
+        # TODO
+        if self.remap_mscoco_category:
+            from ...data.dataset import mscoco_label2category
+
+            labels = (
+                torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])
+                .to(boxes.device)
+                .reshape(labels.shape)
+            )
+
+        results = []
+        for lab, box, sco in zip(labels, boxes, scores):
+            result = dict(labels=lab, boxes=box, scores=sco)
+            results.append(result)
+
+        return results
+
+    def deploy(
+        self,
+    ):
+        self.eval()
+        self.deploy_mode = True
+        return self
diff --git a/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/utils.py b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/utils.py
new file mode 100644
index 00000000..dffdcb59
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d-fine-code/src/zoo/dfine/utils.py
@@ -0,0 +1,182 @@
+"""
+D-FINE: Redefine Regression Task of DETRs as Fine-grained Distribution Refinement
+Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
+---------------------------------------------------------------------------------
+Modified from RT-DETR (https://github.com/lyuwenyu/RT-DETR)
+Copyright (c) 2023 lyuwenyu. All Rights Reserved.
+"""
+
+import math
+from typing import List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def inverse_sigmoid(x: torch.Tensor, eps: float = 1e-5) -> torch.Tensor:
+    x = x.clip(min=0.0, max=1.0)
+    return torch.log(x.clip(min=eps) / (1 - x).clip(min=eps))
+
+
+def bias_init_with_prob(prior_prob=0.01):
+    """initialize conv/fc bias value according to a given probability value."""
+    bias_init = float(-math.log((1 - prior_prob) / prior_prob))
+    return bias_init
+
+
+def deformable_attention_core_func(
+    value, value_spatial_shapes, sampling_locations, attention_weights
+):
+    """
+    Args:
+        value (Tensor): [bs, value_length, n_head, c]
+        value_spatial_shapes (Tensor|List): [n_levels, 2]
+        value_level_start_index (Tensor|List): [n_levels]
+        sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2]
+        attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points]
+
+    Returns:
+        output (Tensor): [bs, Length_{query}, C]
+    """
+    bs, _, n_head, c = value.shape
+    _, Len_q, _, n_levels, n_points, _ = sampling_locations.shape
+
+    split_shape = [h * w for h, w in value_spatial_shapes]
+    value_list = value.split(split_shape, dim=1)
+    sampling_grids = 2 * sampling_locations - 1
+    sampling_value_list = []
+    for level, (h, w) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[level].flatten(2).permute(0, 2, 1).reshape(bs * n_head, c, h, w)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, level].permute(0, 2, 1, 3, 4).flatten(0, 1)
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(
+            value_l_, sampling_grid_l_, mode="bilinear", padding_mode="zeros", align_corners=False
+        )
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_*M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.permute(0, 2, 1, 3, 4).reshape(
+        bs * n_head, 1, Len_q, n_levels * n_points
+    )
+    output = (
+        (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights)
+        .sum(-1)
+        .reshape(bs, n_head * c, Len_q)
+    )
+
+    return output.permute(0, 2, 1)
+
+
+def deformable_attention_core_func_v2(
+    value: torch.Tensor,
+    value_spatial_shapes,
+    sampling_locations: torch.Tensor,
+    attention_weights: torch.Tensor,
+    num_points_list: List[int],
+    method="default",
+):
+    """
+    Args:
+        value (Tensor): [bs, value_length, n_head, c]
+        value_spatial_shapes (Tensor|List): [n_levels, 2]
+        value_level_start_index (Tensor|List): [n_levels]
+        sampling_locations (Tensor): [bs, query_length, n_head, n_levels * n_points, 2]
+        attention_weights (Tensor): [bs, query_length, n_head, n_levels * n_points]
+
+    Returns:
+        output (Tensor): [bs, Length_{query}, C]
+    """
+    bs, n_head, c, _ = value[0].shape
+    _, Len_q, _, _, _ = sampling_locations.shape
+
+    # sampling_offsets [8, 480, 8, 12, 2]
+    if method == "default":
+        sampling_grids = 2 * sampling_locations - 1
+
+    elif method == "discrete":
+        sampling_grids = sampling_locations
+
+    sampling_grids = sampling_grids.permute(0, 2, 1, 3, 4).flatten(0, 1)
+    sampling_locations_list = sampling_grids.split(num_points_list, dim=-2)
+
+    sampling_value_list = []
+    for level, (h, w) in enumerate(value_spatial_shapes):
+        value_l = value[level].reshape(bs * n_head, c, h, w)
+        sampling_grid_l: torch.Tensor = sampling_locations_list[level]
+
+        if method == "default":
+            sampling_value_l = F.grid_sample(
+                value_l, sampling_grid_l, mode="bilinear", padding_mode="zeros", align_corners=False
+            )
+
+        elif method == "discrete":
+            # n * m, seq, n, 2
+            sampling_coord = (
+                sampling_grid_l * torch.tensor([[w, h]], device=value_l.device) + 0.5
+            ).to(torch.int64)
+
+            # FIX ME? for rectangle input
+            sampling_coord = sampling_coord.clamp(0, h - 1)
+            sampling_coord = sampling_coord.reshape(bs * n_head, Len_q * num_points_list[level], 2)
+
+            s_idx = (
+                torch.arange(sampling_coord.shape[0], device=value_l.device)
+                .unsqueeze(-1)
+                .repeat(1, sampling_coord.shape[1])
+            )
+            sampling_value_l: torch.Tensor = value_l[
+                s_idx, :, sampling_coord[..., 1], sampling_coord[..., 0]
+            ]  # n l c
+
+            sampling_value_l = sampling_value_l.permute(0, 2, 1).reshape(
+                bs * n_head, c, Len_q, num_points_list[level]
+            )
+
+        sampling_value_list.append(sampling_value_l)
+
+    attn_weights = attention_weights.permute(0, 2, 1, 3).reshape(
+        bs * n_head, 1, Len_q, sum(num_points_list)
+    )
+    weighted_sample_locs = torch.concat(sampling_value_list, dim=-1) * attn_weights
+    output = weighted_sample_locs.sum(-1).reshape(bs, n_head * c, Len_q)
+
+    return output.permute(0, 2, 1)
+
+
+def get_activation(act: str, inpace: bool = True):
+    """get activation"""
+    if act is None:
+        return nn.Identity()
+
+    elif isinstance(act, nn.Module):
+        return act
+
+    act = act.lower()
+
+    if act == "silu" or act == "swish":
+        m = nn.SiLU()
+
+    elif act == "relu":
+        m = nn.ReLU()
+
+    elif act == "leaky_relu":
+        m = nn.LeakyReLU()
+
+    elif act == "silu":
+        m = nn.SiLU()
+
+    elif act == "gelu":
+        m = nn.GELU()
+
+    elif act == "hardsigmoid":
+        m = nn.Hardsigmoid()
+
+    else:
+        raise RuntimeError("")
+
+    if hasattr(m, "inplace"):
+        m.inplace = inpace
+
+    return m
diff --git a/label_studio_ml/examples/d_fine/d_fine_requirements.txt b/label_studio_ml/examples/d_fine/d_fine_requirements.txt
new file mode 100644
index 00000000..750db4cf
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/d_fine_requirements.txt
@@ -0,0 +1,11 @@
+torch
+torchvision
+numpy==1.26.4
+faster-coco-eval>=1.6.5
+PyYAML
+tensorboard
+scipy
+calflops
+transformers
+loguru
+opencv-python-headless
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/docker-compose.yml b/label_studio_ml/examples/d_fine/docker-compose.yml
new file mode 100644
index 00000000..bb9b0d80
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/docker-compose.yml
@@ -0,0 +1,43 @@
+version: "3.8"
+
+services:
+  d-fine-ml-backend:
+    container_name: d-fine-ml-backend
+    build:
+      context: .
+      args:
+        TEST_ENV: ${TEST_ENV:-false}
+    # If you have a GPU and want to use it:
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1 # or 'all'
+    #           capabilities: [gpu]
+    environment:
+      # Label Studio ML Backend related
+      - BASIC_AUTH_USER=${BASIC_AUTH_USER:-}
+      - BASIC_AUTH_PASS=${BASIC_AUTH_PASS:-}
+      - LOG_LEVEL=${LOG_LEVEL:-INFO}
+      - WORKERS=${WORKERS:-1}
+      - THREADS=${THREADS:-4}
+      - MODEL_DIR=/data/models # Standard LS ML backend dir for custom model files (.pth)
+
+      # D-FINE specific
+      - DFINE_CODE_DIR=/app/d-fine-code # Location of D-FINE's 'src' and 'configs'
+      - DFINE_CONFIG_FILE=${DFINE_CONFIG_FILE:-dfine_hgnetv2_l_coco.yml} # Name of the .yml config file (expected in DFINE_CODE_DIR/configs/dfine/)
+      - DFINE_MODEL_WEIGHTS=${DFINE_MODEL_WEIGHTS:-dfine_l_coco.pth}     # Name of the .pth weights file (expected in MODEL_DIR)
+      - DEVICE=${DEVICE:-cuda} # 'cuda' or 'cpu'
+
+      # For accessing data from Label Studio (e.g., if LS is hosting images)
+      # - LABEL_STUDIO_URL=http://your-ls-host:8080
+      # - LABEL_STUDIO_API_KEY=your-ls-api-key
+    ports:
+      - "9090:9090"
+    volumes:
+      # Mount your local directory containing D-FINE .pth model weights to /data/models in the container
+      - ./models:/data/models:ro 
+      # Mount D-FINE's 'src' and 'configs' directories. 
+      # Create a 'd-fine-code' directory in this example folder and copy D-FINE's 'src' and 'configs' into it.
+      - ./d-fine-code:/app/d-fine-code:ro
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/model.py b/label_studio_ml/examples/d_fine/model.py
new file mode 100644
index 00000000..6fd37f74
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/model.py
@@ -0,0 +1,242 @@
+import os
+import logging
+from typing import List, Dict, Optional
+
+import torch
+import torchvision.transforms as T
+from PIL import Image
+
+from label_studio_ml.model import LabelStudioMLBase
+from label_studio_ml.response import ModelResponse
+from label_studio_ml.utils import get_local_path, DATA_UNDEFINED_NAME
+
+# --- D-FINE specific imports ---
+DFINE_CODE_DIR = os.getenv('DFINE_CODE_DIR', '/app/d-fine-code')
+if DFINE_CODE_DIR not in os.sys.path:
+    os.sys.path.insert(0, DFINE_CODE_DIR)
+try:
+    from src.core import YAMLConfig
+except ImportError as e:
+    raise ImportError(
+        f"Failed to import D-FINE components. "
+        f"Ensure DFINE_CODE_DIR ('{DFINE_CODE_DIR}') is set correctly and contains D-FINE's 'src' directory, "
+        f"and that PYTHONPATH is configured if running locally. Original error: {e}"
+    )
+# --- End D-FINE specific imports ---
+
+logger = logging.getLogger(__name__)
+
+COCO_CLASSES = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
+    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+    'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+]
+
+
+class DFINEModel(LabelStudioMLBase):
+    DEFAULT_MODEL_CONFIG_FILE = 'dfine_hgnetv2_l_coco.yml'
+    DEFAULT_MODEL_WEIGHTS_FILE = 'dfine_l_coco.pth'
+
+    def __init__(self, **kwargs):
+        # The project_id and label_config are passed by LSML and handled by super().__init__
+        # self.setup() will be called by super().__init__()
+        super(DFINEModel, self).__init__(**kwargs)
+
+    def setup(self):
+        """
+        Configure any parameters of your model here.
+        This is called only once by LabelStudioMLBase.__init__(),
+        so all D-FINE specific initializations should go here.
+        """
+        self.device = os.getenv('DEVICE', 'cuda' if torch.cuda.is_available() else 'cpu')
+        logger.info(f"Using device: {self.device}")
+
+        # MODEL_DIR is usually set by the _wsgi.py or docker-compose.yml
+        self.model_dir = os.getenv('MODEL_DIR', '/data/models')
+        self.dfine_config_root_dir = os.path.join(DFINE_CODE_DIR, 'configs')
+
+        model_config_filename = os.getenv('DFINE_CONFIG_FILE', self.DEFAULT_MODEL_CONFIG_FILE)
+        model_weights_filename = os.getenv('DFINE_MODEL_WEIGHTS', self.DEFAULT_MODEL_WEIGHTS_FILE)
+
+        # Define these attributes on self *before* using them in self.set("model_version", ...)
+        self.model_config_path = os.path.join(self.dfine_config_root_dir, 'dfine', model_config_filename)
+        self.model_weights_path = os.path.join(self.model_dir, model_weights_filename)
+        
+        logger.info(f"D-FINE Code Directory: {DFINE_CODE_DIR}")
+        logger.info(f"Model Weights Directory (for .pth): {self.model_dir}")
+        logger.info(f"D-FINE Config Root Directory (for .yml): {self.dfine_config_root_dir}")
+        logger.info(f"Resolved D-FINE Config Path: {self.model_config_path}")
+        logger.info(f"Resolved Model Weights Path: {self.model_weights_path}")
+
+        if not os.path.exists(self.model_config_path):
+            raise FileNotFoundError(f"D-FINE config file not found: {self.model_config_path}")
+        if not os.path.exists(self.model_weights_path):
+            raise FileNotFoundError(f"D-FINE model weights not found: {self.model_weights_path}")
+
+        # Set model version using the cache's set method
+        model_version_name = f"DFINE-{os.path.basename(self.model_config_path)}-{os.path.basename(self.model_weights_path)}"
+        self.set("model_version", model_version_name) # `set` is inherited from LabelStudioMLBase
+
+        # Load D-FINE configuration and model
+        self.d_fine_cfg = YAMLConfig(self.model_config_path, resume=self.model_weights_path)
+        if "HGNetv2" in self.d_fine_cfg.yaml_cfg and self.model_weights_path:
+            self.d_fine_cfg.yaml_cfg["HGNetv2"]["pretrained"] = False
+        
+        logger.info(f"Loading checkpoint from {self.model_weights_path}")
+        checkpoint = torch.load(self.model_weights_path, map_location="cpu")
+        state = checkpoint.get("ema", {}).get("module", checkpoint.get("model"))
+        if state is None:
+             raise ValueError(f"Could not find model state in checkpoint: {self.model_weights_path}. Checked 'ema.module' and 'model'.")
+        
+        self.d_fine_cfg.model.load_state_dict(state)
+        logger.info("Model state loaded successfully.")
+
+        self.model = self.d_fine_cfg.model.deploy().to(self.device).eval()
+        self.postprocessor = self.d_fine_cfg.postprocessor.deploy().to(self.device).eval()
+        logger.info("D-FINE model and postprocessor loaded and set to eval mode.")
+
+        eval_size = self.d_fine_cfg.yaml_cfg.get('eval_spatial_size', [640, 640])
+        self.image_height, self.image_width = eval_size[0], eval_size[1]
+        self.transform = T.Compose([
+            T.Resize((self.image_height, self.image_width)),
+            T.ToTensor(),
+        ])
+
+        self.model_class_names = COCO_CLASSES
+        
+        # Initialize score_thresh_from_config and label_map
+        # self.label_interface is available here because LabelStudioMLBase.__init__ calls use_label_config
+        # *before* calling self.setup(), provided label_config was passed to __init__.
+        # (label_config is passed during /setup API call)
+        self.score_thresh_from_config = float(os.getenv('MODEL_SCORE_THRESHOLD', 0.5))
+        self.label_map = {} # Will be populated based on the specific control tag in predict or if label_interface is ready
+
+        if self.label_interface: # Check if label_interface is initialized (it should be if label_config was passed)
+            try:
+                # self.from_name, self.to_name, self.value are set by get_first_tag_occurence
+                from_name_for_labels, _, _ = self.get_first_tag_occurence('RectangleLabels', 'Image')
+                # self.build_label_map uses self.from_name internally
+                self.label_map = self.build_label_map(from_name_for_labels, self.model_class_names)
+                logger.info(f"Label map for '{from_name_for_labels}' loaded in setup: {self.label_map}")
+                if not self.label_map:
+                     logger.warning(f"No label map built for control tag '{from_name_for_labels}' during setup. "
+                                   f"Ensure 'predicted_values' in your Label Studio config match model labels: {self.model_class_names}")
+                
+                control_tag_attrs = self.label_interface.get_control(from_name_for_labels).attr
+                self.score_thresh_from_config = float(control_tag_attrs.get('model_score_threshold', self.score_thresh_from_config))
+            except Exception as e:
+                logger.warning(f"Could not find 'RectangleLabels' or parse score_threshold from label_config during setup: {e}. "
+                               "Will attempt in predict() or use default/env_var for score_threshold.")
+        else:
+            logger.warning("self.label_interface not available during setup. Score_threshold will use default/env_var and label_map will be empty. This might happen if model is initialized without a label_config initially.")
+        
+        logger.info(f"Using score threshold (from setup): {self.score_thresh_from_config}")
+
+
+    def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> ModelResponse:
+        predictions = []
+        
+        from_name, to_name, value_key = self.get_first_tag_occurence('RectangleLabels', 'Image')
+        
+        # Ensure label_map is built using the current from_name from the labeling config
+        # self.from_name is an attribute of LabelStudioMLBase, set by get_first_tag_occurence
+        if not self.label_map or self.from_name != from_name:
+            current_label_map = self.build_label_map(from_name, self.model_class_names)
+            if not current_label_map:
+                logger.warning(f"No label map built for control tag '{from_name}' in predict. "
+                               f"Ensure 'predicted_values' in your Label Studio config match model labels: {self.model_class_names}")
+            self.label_map = current_label_map
+        
+        # Get score threshold from current control tag attributes
+        control_tag_attrs = self.label_interface.get_control(from_name).attr
+        current_score_threshold = float(control_tag_attrs.get('model_score_threshold', self.score_thresh_from_config))
+
+
+        for task in tasks:
+            image_url = task['data'].get(value_key) or task['data'].get(DATA_UNDEFINED_NAME)
+            if not image_url:
+                logger.warning(f"Image URL not found in task: {task.get('id', 'N/A')}. Skipping.")
+                predictions.append({"result": [], "score": 0, "model_version": self.get("model_version")})
+                continue
+
+            try:
+                image_path = get_local_path(image_url, task_id=task.get('id'))
+                image_pil = Image.open(image_path).convert("RGB")
+                original_width, original_height = image_pil.size
+
+                image_tensor = self.transform(image_pil).unsqueeze(0).to(self.device)
+                original_size_tensor = torch.tensor([[original_width, original_height]], dtype=torch.float32).to(self.device)
+
+                with torch.no_grad():
+                    outputs_raw = self.model(image_tensor)
+                    processed_predictions = self.postprocessor(outputs_raw, original_size_tensor)
+                
+                results_for_task = []
+                avg_score = 0
+                num_detections = 0
+
+                if processed_predictions and isinstance(processed_predictions, list) and len(processed_predictions) > 0:
+                    pred_item = processed_predictions[0] 
+                    
+                    labels_raw = pred_item.get('labels')
+                    boxes_raw = pred_item.get('boxes') 
+                    scores_raw = pred_item.get('scores')
+
+                    if labels_raw is not None and boxes_raw is not None and scores_raw is not None:
+                        for label_idx, box_coords, score in zip(labels_raw, boxes_raw, scores_raw):
+                            score_val = score.item()
+                            if score_val < current_score_threshold:
+                                continue
+                            
+                            model_label_name_idx = label_idx.item()
+                            if not (0 <= model_label_name_idx < len(self.model_class_names)):
+                                logger.warning(f"Predicted label index {model_label_name_idx} is out of bounds for COCO_CLASSES (len {len(self.model_class_names)}). Skipping.")
+                                continue
+                            model_label_name = self.model_class_names[model_label_name_idx]
+                            
+                            ls_label = self.label_map.get(model_label_name)
+
+                            if not ls_label:
+                                logger.debug(f"Label '{model_label_name}' (idx {label_idx.item()}) not found in LS label map {self.label_map}. Skipping.")
+                                continue
+
+                            x1, y1, x2, y2 = box_coords.tolist()
+
+                            results_for_task.append({
+                                "from_name": from_name,
+                                "to_name": to_name,
+                                "type": "rectanglelabels",
+                                "value": {
+                                    "rectanglelabels": [ls_label],
+                                    "x": (x1 / original_width) * 100,
+                                    "y": (y1 / original_height) * 100,
+                                    "width": ((x2 - x1) / original_width) * 100,
+                                    "height": ((y2 - y1) / original_height) * 100,
+                                },
+                                "score": score_val,
+                            })
+                            avg_score += score_val
+                            num_detections += 1
+                
+                final_avg_score = avg_score / num_detections if num_detections > 0 else 0
+                predictions.append({
+                    "result": results_for_task,
+                    "score": final_avg_score,
+                    "model_version": self.get("model_version")
+                })
+
+            except Exception as e:
+                logger.error(f"Error processing task {task.get('id', 'N/A')}: {e}", exc_info=True)
+                predictions.append({"result": [], "score": 0, "model_version": self.get("model_version")})
+        
+        return ModelResponse(predictions=predictions) # Removed model_version from ModelResponse init, it will use self.get("model_version") internally
+
+    def fit(self, event, data, **kwargs):
+        logger.info("fit() method called, but D-FINE training is not implemented in this ML backend.")
+        pass
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/requirements-base.txt b/label_studio_ml/examples/d_fine/requirements-base.txt
new file mode 100644
index 00000000..68ce357c
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/requirements-base.txt
@@ -0,0 +1,2 @@
+gunicorn==22.0.0
+label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git
\ No newline at end of file
diff --git a/label_studio_ml/examples/d_fine/requirements-test.txt b/label_studio_ml/examples/d_fine/requirements-test.txt
new file mode 100644
index 00000000..cffeec65
--- /dev/null
+++ b/label_studio_ml/examples/d_fine/requirements-test.txt
@@ -0,0 +1,2 @@
+pytest
+pytest-cov
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/.dockerignore b/label_studio_ml/examples/doclayout_yolo/.dockerignore
new file mode 100644
index 00000000..d0f5e5a7
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/.dockerignore
@@ -0,0 +1,23 @@
+# Exclude everything
+**
+
+# Include Dockerfile and docker-compose for reference (optional, decide based on your use case)
+!Dockerfile
+!docker-compose.yml
+
+# Include Python application files
+!*.py
+!*.yaml
+!tests/*
+!control_models/*
+!models/*
+!utils/*
+
+# Include requirements files
+!requirements*.txt
+
+# Include script
+!*.sh
+
+# Exclude specific requirements if necessary
+# requirements-test.txt (Uncomment if you decide to exclude this)
diff --git a/label_studio_ml/examples/doclayout_yolo/Dockerfile b/label_studio_ml/examples/doclayout_yolo/Dockerfile
new file mode 100644
index 00000000..7263cdee
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/Dockerfile
@@ -0,0 +1,70 @@
+# Use a PyTorch base image with CUDA support compatible with DocLayout-YOLO's requirements
+# Check DocLayout-YOLO/YOLOv10 documentation for compatible CUDA/PyTorch versions
+FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
+ARG DEBIAN_FRONTEND=noninteractive
+ARG TEST_ENV
+
+WORKDIR /app
+
+# Update conda if necessary (often not needed if base image is sufficient)
+# RUN conda update conda -y
+
+# Install system dependencies (keep common ones from YOLOv8 example)
+RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
+    --mount=type=cache,target="/var/lib/apt/lists",sharing=locked \
+    apt-get -y update \
+    && apt-get install -y --no-install-recommends git wget curl \
+    # Build tools often needed for Python packages
+    g++ build-essential \
+    # Common image/video processing libs
+    ffmpeg libsm6 libxext6 \
+    # Python dev headers and pip
+    libffi-dev python3-dev python3-pip gcc \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_CACHE_DIR=/.cache \
+    PORT=9090 \
+    WORKERS=2 \
+    THREADS=4 \
+    # Specify CUDA Home if needed by dependencies, adjust path based on base image
+    CUDA_HOME=/usr/local/cuda
+
+# Set Torch CUDA Arch List if compiling custom ops (check if DocLayout-YOLO needs this)
+# ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.9;9.0"
+
+# --- Install Python Dependencies ---
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install --no-cache-dir -r requirements-base.txt
+
+# Install model requirements (doclayout-yolo and its dependencies)
+COPY requirements.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install --no-cache-dir -r requirements.txt
+
+# install test requirements if needed
+COPY requirements-test.txt .
+# build only when TEST_ENV="true"
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+      pip install --no-cache-dir -r requirements-test.txt; \
+    fi
+
+WORKDIR /app
+
+COPY . ./
+
+# --- Model Download ---
+# Remove the YOLOv8 model download section.
+# Models for DocLayout-YOLO should be downloaded manually or via custom script/entrypoint
+# and mounted into the /app/models directory using docker-compose volumes.
+# Add a placeholder directory
+RUN mkdir -p /app/models
+
+# --- END Model Download ---
+
+ENV PYTHONPATH=/app
+
+CMD ["/app/start.sh"]
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/README.md b/label_studio_ml/examples/doclayout_yolo/README.md
new file mode 100644
index 00000000..febec895
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/README.md
@@ -0,0 +1,111 @@
+# Label Studio ML Backend for DocLayout-YOLO
+
+This directory contains an example ML backend that integrates the [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) model for document layout analysis into Label Studio.
+
+It uses the `doclayout-yolo` package (based on YOLOv10) to perform object detection on document images and return bounding box predictions for layout elements like text, titles, tables, figures, etc.
+
+## Features
+
+-   Loads DocLayout-YOLO models (`.pt` files).
+-   Performs inference on image tasks.
+-   Returns `RectangleLabels` predictions compatible with Label Studio.
+-   Configurable model path, confidence threshold, and image size via environment variables or Label Studio labeling config.
+-   Includes Docker setup for easy deployment.
+
+## Prerequisites
+
+-   Docker and Docker Compose installed.
+-   Label Studio installed and running.
+-   A trained/downloaded DocLayout-YOLO `.pt` model file.
+
+## Setup
+
+1.  **Download Model:**
+    -   Download your desired DocLayout-YOLO model (e.g., `doclayout_yolo_docstructbench_imgsz1024.pt`) from the [official Hugging Face collection](https://huggingface.co/collections/juliozhao/doclayout-yolo-670cdec674913d9a6f77b542).
+    -   Place the downloaded `.pt` file inside the `models/` directory within this example (`label_studio_ml/examples/doclayout_yolo/models/`).
+
+2.  **Configure `docker-compose.yml`:**
+    -   Open `docker-compose.yml`.
+    -   **Crucially**, set the `LABEL_STUDIO_API_KEY` environment variable to your Label Studio API key (find it on your Label Studio Account page).
+    -   Adjust `LABEL_STUDIO_URL` if your Label Studio instance is not running on `http://localhost:8080`. Use `http://host.docker.internal:8080` if Label Studio runs on the same machine as Docker.
+    -   Verify the `MODEL_NAME` environment variable matches the filename of the model you placed in the `models/` directory.
+    -   Adjust `MODEL_SCORE_THRESHOLD` and `DEFAULT_IMGSZ` if needed.
+
+## Running the Backend
+
+1.  **Build and Start:**
+    Navigate to the `label_studio_ml/examples/doclayout_yolo/` directory in your terminal and run:
+    ```bash
+    docker-compose up --build -d
+    ```
+    This will build the Docker image and start the ML backend container in the background.
+
+2.  **Check Logs (Optional):**
+    ```bash
+    docker-compose logs -f doclayout-yolo
+    ```
+    Look for messages indicating the server has started, typically on port 9090.
+
+## Connecting to Label Studio
+
+1.  Open Label Studio.
+2.  Go to your project's Settings > Machine Learning.
+3.  Click "Add Model".
+4.  Enter a Title (e.g., "DocLayout-YOLO Backend").
+5.  Enter the URL: `http://<your-machine-ip-or-hostname>:9090` (use the IP address of the machine running Docker, not localhost or 127.0.0.1 unless LS runs in the *same* Docker network).
+6.  Enable the "Use for interactive pre-annotations" toggle.
+7.  Click "Validate and Save".
+
+## Labeling Configuration
+
+Use a Label Studio labeling configuration that includes an `Image` object tag and a `RectangleLabels` control tag. Ensure the labels within `<RectangleLabels>` match the classes your DocLayout-YOLO model predicts.
+
+**Example:**
+
+```xml
+<View>
+  <Image name="image" value="$image"/>
+  <RectangleLabels name="label" toName="image">
+    <!-- Add labels corresponding to your DocLayout-YOLO model's classes -->
+    <!-- Example labels from DocStructBench -->
+    <Label value="paragraph" background="#FFA39E"/>
+    <Label value="title" background="#FFD59E"/>
+    <Label value="list_item" background="#FFFFB0"/>
+    <Label value="figure" background="#B5FF9E"/>
+    <Label value="table" background="#9EEAFF"/>
+    <Label value="page_header" background="#A89EFF"/>
+    <Label value="page_footer" background="#FFA8FF"/>
+    <Label value="section_header" background="#CCCCCC"/>
+    <Label value="equation" background="#E6E6E6"/>
+    <Label value="figure_caption" background="#D9D9D9"/>
+    <Label value="table_caption" background="#C0C0C0"/>
+    <Label value="reference" background="#A6A6A6"/>
+    <Label value="footnote" background="#8C8C8C"/>
+  </RectangleLabels>
+</View>
+```
+**Example (with model selection):**
+
+```xml
+<View>
+  <Image name="image" value="$image" zoom="true" zoomControl="true" rotateControl="true"/>
+  <RectangleLabels name="label" toName="image"
+                   model_path="<your-model-path>/doclayout_yolo_docstructbench_imgsz1024.pt"
+                   model_score_threshold="0.2"
+                   model_imgsz="1600">
+    <!-- Add labels corresponding to your DocLayout-YOLO model's classes -->
+    <!-- Example labels from DocStructBench -->
+    <Label value="title" background="#1cffb5"/>
+    <Label value="plain text" background="#c4c400"/>
+    <Label value="abandon" background="#B5FF9E"/>
+    <Label value="figure" background="#9EEAFF"/>
+    <Label value="figure_caption" background="#3c99f7"/>
+    <Label value="table" background="#FFA8FF"/>
+    <Label value="table_caption" background="#c6a2f7"/>
+    <Label value="table_footnote" background="#e94e03"/>
+    <Label value="isolate_formula" background="#fd921c"/>
+    <Label value="formula_caption" background="#fccd41"/>
+  </RectangleLabels>
+</View>
+```
+
diff --git a/label_studio_ml/examples/doclayout_yolo/__init__.py b/label_studio_ml/examples/doclayout_yolo/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/label_studio_ml/examples/doclayout_yolo/_wsgi.py b/label_studio_ml/examples/doclayout_yolo/_wsgi.py
new file mode 100644
index 00000000..d047a4da
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/_wsgi.py
@@ -0,0 +1,189 @@
+import os
+import argparse
+import json
+import logging
+import logging.config
+
+# Set a default log level if LOG_LEVEL is not defined
+log_level = os.getenv("LOG_LEVEL", "INFO")
+
+logging.config.dictConfig(
+    {
+        "version": 1,
+        "disable_existing_loggers": False,  # Prevent overriding existing loggers
+        "formatters": {
+            "standard": {
+                "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+            }
+        },
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "level": log_level,
+                "stream": "ext://sys.stdout",
+                "formatter": "standard",
+            }
+        },
+        "root": {
+            "level": log_level,
+            "handlers": ["console"],
+            "propagate": True,
+        },
+    }
+)
+
+from label_studio_ml.api import init_app
+# --- MODIFIED IMPORT ---
+from model import DocLayoutYOLO # Import the adapted class
+# --- END MODIFICATION ---
+
+
+# Config path is optional, can be used for fixed model params
+# _DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.json")
+
+# def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+#     if not os.path.exists(config_path):
+#         return dict()
+#     with open(config_path) as f:
+#         config = json.load(f)
+#     assert isinstance(config, dict)
+#     return config
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Label Studio ML Backend for DocLayout-YOLO") # Updated description
+    parser.add_argument(
+        "-p", "--port", dest="port", type=int, default=9090, help="Server port"
+    )
+    parser.add_argument(
+        "--host", dest="host", type=str, default="0.0.0.0", help="Server host"
+    )
+    parser.add_argument(
+        "--kwargs",
+        "--with",
+        dest="kwargs",
+        metavar="KEY=VAL",
+        nargs="+",
+        type=lambda kv: kv.split("="),
+        help="Additional LabelStudioMLBase model initialization kwargs",
+    )
+    parser.add_argument(
+        "-d", "--debug", dest="debug", action="store_true", help="Switch debug mode"
+    )
+    parser.add_argument(
+        "--log-level",
+        dest="log_level",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+        default=log_level,
+        help="Logging level",
+    )
+    parser.add_argument(
+        "--model-dir",
+        dest="model_dir",
+        default=os.getenv('MODEL_ROOT', os.path.join(os.path.dirname(__file__), 'models')), # Use MODEL_ROOT env var
+        help="Directory where models are stored",
+    )
+    parser.add_argument(
+        "--check",
+        dest="check",
+        action="store_true",
+        help="Validate model instance before launching server",
+    )
+    parser.add_argument(
+        "--basic-auth-user",
+        default=os.environ.get("ML_SERVER_BASIC_AUTH_USER", None),
+        help="Basic auth user",
+    )
+
+    parser.add_argument(
+        "--basic-auth-pass",
+        default=os.environ.get("ML_SERVER_BASIC_AUTH_PASS", None),
+        help="Basic auth pass",
+    )
+    # --- ADDED ARGUMENTS ---
+    parser.add_argument(
+        '--model-name', dest='model_name', default=os.environ.get('MODEL_NAME'),
+        help='Default model name to load (e.g., doclayout_yolo_docstructbench_imgsz1024.pt)'
+    )
+    parser.add_argument(
+        '--threshold', dest='score_threshold', type=float, default=os.environ.get('MODEL_SCORE_THRESHOLD', 0.5),
+        help='Default confidence threshold'
+    )
+    parser.add_argument(
+        '--imgsz', dest='imgsz', type=int, default=os.environ.get('DEFAULT_IMGSZ', 1024),
+        help='Default image size for prediction'
+    )
+    # --- END ADDITION ---
+
+    args = parser.parse_args()
+
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    def parse_kwargs():
+        param = dict()
+        if not args.kwargs:
+             return param
+        for k, v in args.kwargs:
+            if v.isdigit():
+                param[k] = int(v)
+            elif v == "True" or v == "true":
+                param[k] = True
+            elif v == "False" or v == "false":
+                param[k] = False
+            elif isfloat(v):
+                param[k] = float(v)
+            else:
+                param[k] = v
+        return param
+
+    # Prioritize command line args, then environment vars for model params
+    init_kwargs = {
+        'model_dir': args.model_dir,
+        'model_name': args.model_name,
+        'score_threshold': args.score_threshold,
+        'imgsz': args.imgsz
+    }
+    # Add kwargs from command line potentially overriding others
+    init_kwargs.update(parse_kwargs())
+
+    # Update environment variables based on args for consistency if needed elsewhere
+    os.environ['MODEL_ROOT'] = args.model_dir
+    if args.model_name:
+        os.environ['MODEL_NAME'] = args.model_name
+    os.environ['MODEL_SCORE_THRESHOLD'] = str(args.score_threshold)
+    os.environ['DEFAULT_IMGSZ'] = str(args.imgsz)
+
+
+    if args.check:
+        print(f'Check "{DocLayoutYOLO.__name__}" instance creation..')
+        # Pass relevant args to the constructor if needed, or rely on environment variables set above
+        model = DocLayoutYOLO(**init_kwargs)
+        print("Model instance created successfully.")
+
+
+    # Initialize the app with the specific model class and auth details
+    app = init_app(
+        model_class=DocLayoutYOLO,
+        basic_auth_user=args.basic_auth_user,
+        basic_auth_pass=args.basic_auth_pass,
+        # Pass initialization kwargs to the model class constructor
+        **init_kwargs
+    )
+
+    # Start the Flask development server (for local testing)
+    # For production, use Gunicorn via start.sh
+    app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+    # for uWSGI/Gunicorn use
+    # Initialize with kwargs from environment variables (handled within the class now)
+    app = init_app(model_class=DocLayoutYOLO)
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/cli.py b/label_studio_ml/examples/doclayout_yolo/cli.py
new file mode 100644
index 00000000..664389c7
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/cli.py
@@ -0,0 +1,139 @@
+import os
+import logging
+import json
+
+from tqdm import tqdm
+from argparse import ArgumentParser
+#from model import YOLO
+from model import DocLayoutYOLO
+from label_studio_sdk.client import LabelStudio
+from label_studio_ml.response import ModelResponse
+
+LABEL_STUDIO_URL = os.getenv("LABEL_STUDIO_URL", "http://localhost:8080")
+LABEL_STUDIO_API_KEY = os.getenv("LABEL_STUDIO_API_KEY", "your_api_key")
+PROJECT_ID = os.getenv("LABEL_STUDIO_PROJECT_ID", "1")
+
+logger = logging.getLogger(__name__)
+
+
+def arg_parser():
+    parser = ArgumentParser(description="YOLO client for Label Studio ML Backend")
+
+    parser.add_argument(
+        "--ls-url", type=str, default=LABEL_STUDIO_URL, help="Label Studio URL"
+    )
+    parser.add_argument(
+        "--ls-api-key",
+        type=str,
+        default=LABEL_STUDIO_API_KEY,
+        help="Label Studio API Key",
+    )
+    parser.add_argument(
+        "--project", type=str, default="1", help="Label Studio Project ID"
+    )
+    parser.add_argument(
+        "--tasks",
+        type=str,
+        default="tasks.json",
+        help="Path to tasks JSON file with list of ids or task datas. Example: tasks.json\n"
+             "String with ids separated by comma: if you provide task ids, "
+             "task data will be downloaded automatically from the Label Studio instance. Example: 1,2,3",
+    )
+    return parser.parse_args()
+
+
+class LabelStudioMLPredictor:
+    def __init__(self, ls_url, ls_api_key):
+        self.ls = LabelStudio(base_url=ls_url, api_key=ls_api_key)
+        logger.info(f"Successfully connected to Label Studio: {ls_url}")
+
+    def run(self, project, tasks):
+        # initialize Label Studio SDK client
+        ls = self.ls
+        project = ls.projects.get(id=project)
+        logger.info(f"Project is retrieved: {project.id}")
+
+        tasks = self.prepare_tasks(ls, tasks)
+
+        # load YOLO model
+        # TODO: use get_all_classes_inherited_LabelStudioMLBase to detect model classes
+        model = YOLO(project_id=project.id, label_config=project.label_config)
+        logger.info(f"YOLO ML backend is created")
+
+        # predict and send prediction to Label Studio
+        for task in tqdm(tasks, desc="Predict tasks"):
+            response = model.predict([task])
+            predictions = self.postprocess_response(model, response, task)
+
+            # send predictions to Label Studio
+            for prediction in predictions:
+                ls.predictions.create(
+                    task=task["id"],
+                    score=prediction.get("score", 0),
+                    model_version=prediction.get("model_version", "none"),
+                    result=prediction["result"],
+                )
+
+        logger.info("Model predictions are done!")
+
+    @staticmethod
+    def postprocess_response(model, response, task):
+        if response is None:
+            logger.warning(f"No predictions for task: {task}")
+            return None
+
+        # model returned ModelResponse
+        if isinstance(response, ModelResponse):
+            # check model version
+            if not response.has_model_version():
+                if model.model_version:
+                    response.set_version(str(model.model_version))
+            else:
+                response.update_predictions_version()
+            response = response.model_dump()
+            predictions = response.get("predictions")
+        # model returned list of dicts with predictions (old format)
+        elif isinstance(response, list):
+            predictions = response
+        else:
+            logger.error("No predictions generated by model")
+            return None
+
+        return predictions
+
+    @staticmethod
+    def prepare_tasks(ls, tasks):
+        # get tasks
+        if os.path.exists(tasks):
+            with open(tasks) as f:
+                tasks = json.load(f)
+        else:
+            tasks = tasks.split(",")
+            tasks = [int(task) for task in tasks]
+        assert isinstance(tasks, list), "Tasks should be a list"
+        assert len(tasks) > 0, "'Task list can't be empty"
+        logger.info(f"Detected {len(tasks)} tasks")
+        # check task data
+        if isinstance(tasks[0], dict):
+            if "data" not in tasks[0] or "id" not in tasks[0]:
+                raise ValueError("'data' and 'id' must be presented in all tasks")
+        elif isinstance(tasks[0], int):
+            # load tasks from Label Studio instance using SDK
+            logger.info("Task loading from Label Studio instance ...")
+            tasks = [
+                {"id": task_id, "data": ls.tasks.get(task_id).data}
+                for task_id in tqdm(tasks)
+            ]
+            logger.info("Task loading finished")
+        else:
+            raise ValueError(
+                "Unknown task format: "
+                "tasks should be a list of dicts (task data) or a list of task ids"
+            )
+        return tasks
+
+
+if __name__ == "__main__":
+    args = arg_parser()
+    predictor = LabelStudioMLPredictor(args.ls_url, args.ls_api_key)
+    predictor.run(args.project, args.tasks)
diff --git a/label_studio_ml/examples/doclayout_yolo/control_models/__init__.py b/label_studio_ml/examples/doclayout_yolo/control_models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/label_studio_ml/examples/doclayout_yolo/control_models/base.py b/label_studio_ml/examples/doclayout_yolo/control_models/base.py
new file mode 100644
index 00000000..a23d1cc9
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/control_models/base.py
@@ -0,0 +1,311 @@
+import os
+import logging
+
+from pydantic import BaseModel
+from typing import Optional, List, Dict, ClassVar
+# --- MODIFIED IMPORT ---
+# from ultralytics import YOLO
+from doclayout_yolo import YOLOv10 as DocLayoutModel # Renamed for clarity
+# --- END MODIFICATION ---
+
+from label_studio_ml.model import LabelStudioMLBase
+from label_studio_ml.utils import DATA_UNDEFINED_NAME
+from label_studio_sdk._extensions.label_studio_tools.core.utils.io import get_local_path
+from label_studio_sdk.label_interface.control_tags import ControlTag
+from label_studio_sdk.label_interface import LabelInterface
+
+# use matplotlib plots for debug
+DEBUG_PLOT = os.getenv("DEBUG_PLOT", "false").lower() in ["1", "true"]
+MODEL_SCORE_THRESHOLD = float(os.getenv("MODEL_SCORE_THRESHOLD", 0.5))
+DEFAULT_MODEL_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
+MODEL_ROOT = os.getenv("MODEL_ROOT", DEFAULT_MODEL_ROOT)
+# --- ADDED ---
+# Default model name, can be overridden by environment variable or label config attribute
+DEFAULT_MODEL_NAME = os.getenv("MODEL_NAME", "doclayout_yolo_docstructbench_imgsz1024.pt")
+# Default image size, can be overridden by label config attribute
+DEFAULT_IMGSZ = int(os.getenv("DEFAULT_IMGSZ", 1024))
+# --- END ADDITION ---
+
+os.makedirs(MODEL_ROOT, exist_ok=True)
+# if true, allow to use custom model path from the control tag in the labeling config
+ALLOW_CUSTOM_MODEL_PATH = os.getenv("ALLOW_CUSTOM_MODEL_PATH", "true").lower() in [
+    "1",
+    "true",
+]
+
+# Global cache for YOLO models
+_model_cache = {}
+logger = logging.getLogger(__name__)
+
+
+def get_bool(attr, attr_name, default="false"):
+    return attr.get(attr_name, default).lower() in ["1", "true", "yes"]
+
+
+class ControlModel(BaseModel):
+    """
+    Represents a control tag in Label Studio, which is associated with a specific type of labeling task
+    and is used to generate predictions using a DocLayout-YOLO model.
+
+    Attributes:
+        type (str): Type of the control, e.g., RectangleLabels, Choices, etc.
+        control (ControlTag): The actual control element from the Label Studio configuration.
+        from_name (str): The name of the control tag, used to link the control to the data.
+        to_name (str): The name of the data field that this control is associated with.
+        value (str): The value name from the object that this control operates on, e.g., an image or text field.
+        # --- MODIFIED TYPE HINT ---
+        model (DocLayoutModel): The model instance (DocLayout-YOLO) used to generate predictions for this control.
+        # --- END MODIFICATION ---
+        model_path (str): Path/name of the DocLayout-YOLO model file relative to MODEL_ROOT.
+        model_score_threshold (float): Threshold for prediction scores; predictions below this value will be ignored.
+        # --- ADDED ---
+        model_imgsz (int): Image size used for prediction.
+        # --- END ADDITION ---
+        label_map (Optional[Dict[str, str]]): A mapping of model labels to Label Studio labels.
+    """
+
+    type: ClassVar[str]
+    control: ControlTag
+    from_name: str
+    to_name: str
+    value: str
+    # --- MODIFIED TYPE HINT ---
+    model: DocLayoutModel
+    # --- END MODIFICATION ---
+    model_path: str # Changed from ClassVar to instance variable
+    model_score_threshold: float = 0.5
+    # --- ADDED ---
+    model_imgsz: int = 1024
+    # --- END ADDITION ---
+    label_map: Optional[Dict[str, str]] = {}
+    label_studio_ml_backend: LabelStudioMLBase
+    project_id: Optional[str] = None
+
+    def __init__(self, **data):
+        super().__init__(**data)
+
+    @classmethod
+    def is_control_matched(cls, control) -> bool:
+        """Check if the control tag matches the model type.
+        Args:
+            control (ControlTag): The control tag from the Label Studio Interface.
+        """
+        raise NotImplementedError("This method should be overridden in derived classes")
+
+    @staticmethod
+    def get_from_name_for_label_map(
+        label_interface: LabelInterface, target_name: str
+    ) -> str:
+        """Get the 'from_name' attribute for the label map building."""
+        return target_name
+
+    @classmethod
+    def create(cls, mlbackend: LabelStudioMLBase, control: ControlTag):
+        """Factory method to create an instance of a specific control model class.
+        Args:
+            mlbackend (LabelStudioMLBase): The ML backend instance.
+            control (ControlTag): The control tag from the Label Studio Interface.
+        """
+        from_name = control.name
+        to_name = control.to_name[0]
+        value = control.objects[0].value_name
+
+        # if skip is true, don't process this control
+        if get_bool(control.attr, "model_skip", "false"):
+            logger.info(
+                f"Skipping control tag '{control.tag}' with name '{from_name}', model_skip=true found"
+            )
+            return None
+        # read threshold attribute from the control tag, e.g.: <RectangleLabels model_score_threshold="0.5">
+        model_score_threshold = float(
+            control.attr.get("model_score_threshold")
+            or control.attr.get(
+                "score_threshold" # not recommended option, use `model_score_threshold`
+            )
+            or MODEL_SCORE_THRESHOLD
+        )
+        # --- ADDED ---
+        # read imgsz attribute from control tag
+        model_imgsz = int(
+            control.attr.get("model_imgsz")
+            or control.attr.get("imgsz") # alternative name
+            or DEFAULT_IMGSZ
+        )
+        # --- END ADDITION ---
+
+        # read `model_path` attribute from the control tag or use default
+        model_name = (
+            ALLOW_CUSTOM_MODEL_PATH and control.attr.get("model_path")
+        ) or DEFAULT_MODEL_NAME # Use default name if not specified
+
+        # --- MODIFIED MODEL LOADING ---
+        model_full_path = os.path.join(MODEL_ROOT, model_name)
+        if not os.path.exists(model_full_path):
+             # Try loading from Hugging Face Hub if path doesn't exist locally
+             logger.warning(f"Local model '{model_full_path}' not found.")
+             logger.info(f"Attempting to load '{model_name}' from Hugging Face Hub...")
+             # We assume model_name is a valid HF repo ID or contains one like 'user/repo'
+             # If the model needs specific args for from_pretrained, they'd need to be handled
+             try:
+                 # Use the class method from DocLayout-YOLO README if available
+                 if hasattr(DocLayoutModel, 'from_pretrained'):
+                      model = DocLayoutModel.from_pretrained(model_name)
+                      # Cache the loaded model using its identifier
+                      _model_cache[model_name] = model
+                      logger.info(f"Successfully loaded '{model_name}' from Hugging Face Hub.")
+                 else:
+                      # Fallback or specific logic if from_pretrained isn't standard
+                      raise NotImplementedError(f"Cannot load '{model_name}' automatically. Please download it to the '{MODEL_ROOT}' directory.")
+             except Exception as e:
+                 logger.error(f"Failed to load model '{model_name}' from Hugging Face Hub: {e}")
+                 raise FileNotFoundError(
+                    f"Model file '{model_name}' not found locally in '{MODEL_ROOT}' "
+                    f"and could not be loaded from Hugging Face Hub. "
+                    f"Please ensure the model exists or `model_path` is set correctly."
+                 ) from e
+        else:
+            # Load from local cache/file
+            model = cls.get_cached_model(model_full_path) # Use full path for caching local files
+
+        # Check if model loading was successful
+        if model is None:
+             raise ValueError(f"Failed to load model '{model_name}'")
+
+        # --- Get model class names (assuming compatible API) ---
+        # IMPORTANT: Verify that DocLayoutModel instance has a `names` attribute like ultralytics.YOLO
+        if not hasattr(model, 'names') or not isinstance(model.names, dict):
+             logger.warning(f"Model '{model_name}' does not have a standard `.names` attribute (dictionary of class IDs to names). Label mapping might be incorrect.")
+             # Fallback: Try to get names from the label config itself if model doesn't provide them?
+             # Or raise an error? For now, let's proceed but label map will likely be empty/wrong.
+             model_names = []
+        else:
+             model_names = list(model.names.values()) # class names from the model
+        # --- END MODIFIED MODEL LOADING & CLASS NAMES ---
+
+        # from_name for label mapping can be differed from control.name (e.g. VideoRectangle)
+        label_map_from_name = cls.get_from_name_for_label_map(
+            mlbackend.label_interface, from_name
+        )
+        label_map = mlbackend.build_label_map(label_map_from_name, model_names)
+
+        # --- MODIFIED RETURN VALUE ---
+        return cls(
+            control=control,
+            from_name=from_name,
+            to_name=to_name,
+            value=value,
+            model=model,
+            model_path=model_name, # Store the potentially relative path/name used
+            model_score_threshold=model_score_threshold,
+            model_imgsz=model_imgsz, # Pass imgsz
+            label_map=label_map,
+            label_studio_ml_backend=mlbackend,
+            project_id=mlbackend.project_id,
+        )
+        # --- END MODIFICATION ---
+
+    @classmethod
+    def load_yolo_model(cls, full_path) -> DocLayoutModel: # Changed param name
+        """Load DocLayout-YOLO model from the file."""
+        logger.info(f"Loading DocLayout-YOLO model: {full_path}")
+        # --- MODIFIED MODEL INSTANTIATION ---
+        try:
+            model = DocLayoutModel(full_path)
+            # IMPORTANT: Verify .names attribute existence and type
+            if not hasattr(model, 'names') or not isinstance(model.names, dict):
+                 logger.warning(f"Loaded model from '{full_path}' lacks a standard `.names` dictionary.")
+            else:
+                 logger.info(f"Model {full_path} names:\n{model.names}")
+            return model
+        except Exception as e:
+            logger.error(f"Error loading model from {full_path}: {e}")
+            raise
+        # --- END MODIFICATION ---
+
+    @classmethod
+    def get_cached_model(cls, path: str) -> DocLayoutModel: # Path is now full path for local
+        """Gets a model from cache or loads it."""
+        if path not in _model_cache:
+            _model_cache[path] = cls.load_yolo_model(path)
+        return _model_cache[path]
+
+    def debug_plot(self, image_or_result): # Argument might be result object now
+        if not DEBUG_PLOT:
+            return
+
+        try:
+            import matplotlib.pyplot as plt
+            # --- MODIFIED ---
+            # The yolov8 example plotted the *result* object. Check if DocLayout-YOLO results have plot()
+            if hasattr(image_or_result, 'plot'):
+                 # Assuming plot returns an image array (like numpy)
+                 plot_img = image_or_result.plot(pil=True) # Use PIL=True based on README example
+                 # Convert PIL to numpy array for matplotlib if needed
+                 import numpy as np
+                 plot_img_np = np.array(plot_img)
+                 plt.imshow(plot_img_np) # Display the annotated image
+                 plt.axis("off")
+                 plt.title(f"{self.type} - {self.model_path}")
+                 plt.show()
+            # Fallback if no plot method or plotting raw image
+            # elif isinstance(image_or_result, np.ndarray):
+            #      plt.imshow(image_or_result[..., ::-1]) # Assuming BGR->RGB if needed
+            #      plt.axis("off")
+            #      plt.title(self.type)
+            #      plt.show()
+            else:
+                 logger.warning("Debug plot failed: Result object has no standard plot() method.")
+            # --- END MODIFIED ---
+        except ImportError:
+            logger.warning("matplotlib not installed, skipping debug plot.")
+        except Exception as e:
+            logger.error(f"Error during debug plot: {e}")
+
+
+    def predict_regions(self, path) -> List[Dict]:
+        """Predict regions in the image using the DocLayout-YOLO model.
+        Args:
+            path (str): Path to the file with media
+        """
+        raise NotImplementedError("This method should be overridden in derived classes")
+
+    def fit(self, event, data, **kwargs):
+        """Fit the model."""
+        logger.warning("The fit method is not implemented for this control model")
+        return False
+
+    def get_path(self, task):
+        task_path = task["data"].get(self.value) or task["data"].get(
+            DATA_UNDEFINED_NAME
+        )
+        if task_path is None:
+            raise ValueError(
+                f"Can't load path using key '{self.value}' from task {task}"
+            )
+        if not isinstance(task_path, str):
+            raise ValueError(f"Path should be a string, but got {task_path}")
+
+        # try path as local file or try to load it from Label Studio instance/download via http
+        try:
+            path = get_local_path(task_path, task_id=task.get("id"))
+            logger.debug(f"Resolved path: {task_path} => {path}")
+            if not os.path.exists(path):
+                 logger.error(f"Resolved path does not exist: {path}")
+                 raise FileNotFoundError(f"Resolved path does not exist: {path}")
+        except Exception as e:
+            logger.error(f"Error resolving path {task_path}: {e}")
+            raise ValueError(f"Cannot access file at {task_path}. Check ML backend logs for details.") from e
+
+        return path
+
+    def __str__(self):
+        """Return a string with full representation of the control tag."""
+        return (
+            f"{self.type} from_name={self.from_name}, "
+            f"model={self.model_path}, imgsz={self.model_imgsz}, threshold={self.model_score_threshold}, "
+            f"label_map={self.label_map}"
+        )
+
+    class Config:
+        arbitrary_types_allowed = True
+        protected_namespaces = ("__.*__", "_.*")  # Excludes 'model_'
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/control_models/rectangle_labels.py b/label_studio_ml/examples/doclayout_yolo/control_models/rectangle_labels.py
new file mode 100644
index 00000000..d6b7b1b0
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/control_models/rectangle_labels.py
@@ -0,0 +1,186 @@
+import logging
+import torch # Added
+import os # Added
+
+from .base import ControlModel, get_bool # Relative import
+from typing import List, Dict
+from label_studio_sdk.label_interface.control_tags import ControlTag
+
+
+logger = logging.getLogger(__name__)
+
+
+def is_obb(control: ControlTag) -> bool:
+    """Check if the model should use oriented bounding boxes (OBB)
+    based on the control tag attribute `model_obb` from the labeling config.
+    DocLayout-YOLO likely doesn't support OBB by default, so this might always be false.
+    """
+    # DocLayout-YOLO is not specified to support OBB, assume false
+    # return get_bool(control.attr, "model_obb", "false")
+    return False
+
+
+class RectangleLabelsModel(ControlModel):
+    """
+    Class representing a RectangleLabels (bounding boxes) control tag for DocLayout-YOLO model.
+    """
+
+    type = "RectangleLabels"
+    # Default model path relative to MODEL_ROOT - user should change this or use env var/config attribute
+    # model_path = "doclayout_yolo_docstructbench_imgsz1024.pt" # This is now handled in base.py
+
+    @classmethod
+    def is_control_matched(cls, control) -> bool:
+        # check object tag type
+        if control.objects[0].tag != "Image":
+             logger.debug(f"Object tag is not Image for {control.name}")
+             return False
+        if is_obb(control):
+             logger.debug(f"OBB is set for {control.name}, but not supported by this model.")
+             return False
+        if control.tag != cls.type:
+             logger.debug(f"Control tag is not {cls.type} for {control.name}")
+             return False
+        logger.debug(f"Control tag {control.name} matched to {cls.__name__}")
+        return True
+
+
+    def predict_regions(self, image_path) -> List[Dict]:
+        """Run DocLayout-YOLO prediction and return regions"""
+        if self.model is None:
+            logger.error("Model is not loaded.")
+            return []
+
+        # Determine device
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device}")
+
+        try:
+            # --- MODIFIED PREDICTION CALL ---
+            # Use parameters stored in the ControlModel instance
+            logger.info(f"Running prediction on {os.path.basename(image_path)} with imgsz={self.model_imgsz}, conf={self.model_score_threshold}")
+            results = self.model.predict(
+                image_path,
+                imgsz=self.model_imgsz,
+                conf=self.model_score_threshold,
+                device=device
+                # Add other relevant prediction params if needed (e.g., iou, augment)
+            )
+            # --- END MODIFICATION ---
+
+            # --- DEBUG PLOTTING ---
+            # The result object itself might be plottable
+            if results and len(results) > 0:
+                 self.debug_plot(results[0])
+            # --- END DEBUG ---
+
+            # Check if results are in the expected format (list of result objects)
+            if not results or not isinstance(results, list) or len(results) == 0:
+                 logger.warning(f"Prediction yielded no results for {image_path}")
+                 return []
+
+            # Assume the first element contains the detections for the image
+            det_result = results[0]
+
+            # --- ADAPTED RESULT PROCESSING ---
+            # IMPORTANT: Verify the structure of the result object returned by doclayout_yolo.YOLOv10.predict
+            # We assume it's similar to ultralytics results and has `.boxes` and `.names` (via self.model.names)
+            # It must have `.boxes` attribute which contains detections.
+            if not hasattr(det_result, 'boxes') or det_result.boxes is None:
+                 logger.warning(f"Result object for {image_path} has no '.boxes' attribute or it's None. Cannot extract detections.")
+                 return []
+
+            data = det_result.boxes # This holds the bounding box data
+            # Need class names mapping from ID to string
+            if not hasattr(self.model, 'names') or not self.model.names:
+                 logger.error("Model does not have a 'names' attribute for class mapping. Cannot create regions.")
+                 return []
+            model_names = self.model.names # e.g., {0: 'text', 1: 'title', ...}
+
+            regions = []
+            # Check if boxes tensor is empty
+            if data.shape[0] == 0:
+                 logger.info(f"No bounding boxes detected in {image_path} above threshold {self.model_score_threshold}")
+                 return []
+
+            # Iterate through detected boxes
+            # Expect data.conf, data.xywhn, data.cls to exist
+            if not all(hasattr(data, attr) for attr in ['conf', 'xywhn', 'cls']):
+                 logger.error("Detection result's '.boxes' attribute is missing one or more required attributes: 'conf', 'xywhn', 'cls'.")
+                 return []
+
+            image_width = det_result.orig_shape[1]
+            image_height = det_result.orig_shape[0]
+
+            for i in range(data.shape[0]):  # iterate over items
+                 score = float(data.conf[i])  # Confidence score
+                 class_id = int(data.cls[i])  # Class ID
+
+                 # Map class ID to model label name
+                 if class_id not in model_names:
+                     logger.warning(f"Detected class ID {class_id} not found in model names map: {model_names}. Skipping.")
+                     continue
+                 model_label = model_names[class_id]
+
+                 # Map model label to Label Studio label using the generated label_map
+                 if model_label not in self.label_map:
+                     # logger.warning(f"Model label '{model_label}' not found in Label Studio label map: {self.label_map}. Skipping.")
+                     continue # Don't warn for every box, might be too verbose
+                 output_label = self.label_map[model_label]
+
+                 # Get normalized coordinates [x_center, y_center, width, height]
+                 x, y, w, h = data.xywhn[i].tolist()
+
+                 # Convert normalized xywh to LS format (top-left x, y, width, height in percentages)
+                 ls_x = (x - w / 2) * 100
+                 ls_y = (y - h / 2) * 100
+                 ls_w = w * 100
+                 ls_h = h * 100
+
+                 # Ensure coordinates are within bounds [0, 100]
+                 ls_x = max(0, min(ls_x, 100))
+                 ls_y = max(0, min(ls_y, 100))
+                 ls_w = max(0, min(ls_w, 100 - ls_x)) # Adjust width based on x
+                 ls_h = max(0, min(ls_h, 100 - ls_y)) # Adjust height based on y
+
+
+                 # Skip boxes below the specific threshold for this control tag
+                 if score < self.model_score_threshold:
+                      continue
+
+                 regions.append({
+                      "from_name": self.from_name,
+                      "to_name": self.to_name,
+                      "type": self.type.lower(), # "rectanglelabels"
+                      "value": {
+                          "rectanglelabels": [output_label],
+                          "x": ls_x,
+                          "y": ls_y,
+                          "width": ls_w,
+                          "height": ls_h,
+                      },
+                      "score": score,
+                      "original_width": image_width, # Add original image size for context
+                      "original_height": image_height
+                 })
+                 logger.debug(
+                     f"Added region: label={output_label}, score={score:.2f}, "
+                     f"xywh%=[{ls_x:.1f}, {ls_y:.1f}, {ls_w:.1f}, {ls_h:.1f}]"
+                 )
+
+            # --- END ADAPTATION ---
+            logger.info(f"Predicted {len(regions)} regions for {os.path.basename(image_path)}")
+            return regions
+
+        except FileNotFoundError:
+            logger.error(f"Image file not found at {image_path}")
+            return []
+        except Exception as e:
+            logger.error(f"Error during prediction for {image_path}: {e}", exc_info=True)
+            return []
+
+# --- REMOVED ---
+# # pre-load and cache default model at startup
+# # This is now handled dynamically in base.py when needed
+# RectangleLabelsModel.get_cached_model(RectangleLabelsModel.model_path)
+# --- END REMOVED ---
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/docker-compose.yml b/label_studio_ml/examples/doclayout_yolo/docker-compose.yml
new file mode 100644
index 00000000..d71f3a0f
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/docker-compose.yml
@@ -0,0 +1,54 @@
+version: "3.8"
+
+services:
+  doclayout-yolo: # Changed service name
+    container_name: doclayout-yolo-ml-backend # Changed container name
+    # Build from the Dockerfile in the current directory
+    build:
+      context: .
+      args:
+        TEST_ENV: ${TEST_ENV} # Pass build-time args if needed
+    # Define image name (optional but good practice)
+    image: my-doclayout-yolo-backend:latest
+    environment:
+      # --- Basic Auth (Optional) ---
+      # BASIC_AUTH_USER: <your_user>
+      # BASIC_AUTH_PASS: <your_password>
+
+      # --- Logging ---
+      - LOG_LEVEL=INFO # Or DEBUG
+
+      # --- Label Studio Connection (Required if accessing LS data) ---
+      # Use host.docker.internal for LS running on the host, or the actual IP/hostname
+      - LABEL_STUDIO_URL=http://host.docker.internal:8080
+      - LABEL_STUDIO_API_KEY=f8853c950962bb6b7774436b575d85c9a85388b6 # !! Replace with your API key !!
+
+      # --- DocLayout-YOLO Model Configuration ---
+      # Specify the directory inside the container where models are stored
+      - MODEL_ROOT=/app/models
+      # Specify the default model name to load (relative to MODEL_ROOT)
+      # This can be overridden by model_path attribute in the labeling config
+      - MODEL_NAME=doclayout_yolo_docstructbench_imgsz1024.pt # !! Adjust to your downloaded model !!
+      # Allow specifying `model_path` in the labeling config?
+      - ALLOW_CUSTOM_MODEL_PATH=true
+      # Default confidence threshold (can be overridden by model_score_threshold in config)
+      - MODEL_SCORE_THRESHOLD=0.2 # Adjust default threshold as needed
+      # Default image size (can be overridden by model_imgsz in config)
+      - DEFAULT_IMGSZ=1024
+      # Show matplotlib debug plot (requires matplotlib installed, might need adjustments in Dockerfile)
+      - DEBUG_PLOT=false
+
+      # --- Server Configuration ---
+      - WORKERS=1 # Adjust based on your server resources
+      - THREADS=4 # Adjust based on your server resources
+      - PYTHONPATH=/app # Ensure Python can find the modules
+
+    ports:
+      - "9090:9090" # Map container port 9090 to host port 9090
+    volumes:
+      # Mount local ./models directory (where you downloaded models) to /app/models in the container
+      - "./models:/app/models:ro" # Mount read-only is safer if models aren't modified
+      # Mount a cache directory (optional but recommended for joblib/hf cache)
+      - "./cache_dir:/app/cache_dir"
+      # Mount local data directory (if needed for testing or accessing local files directly)
+      # - "./data:/data"
diff --git a/label_studio_ml/examples/doclayout_yolo/model.py b/label_studio_ml/examples/doclayout_yolo/model.py
new file mode 100644
index 00000000..b60e5e9a
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/model.py
@@ -0,0 +1,191 @@
+import os
+import logging
+
+from label_studio_ml.model import LabelStudioMLBase
+from label_studio_ml.response import ModelResponse
+
+# --- MODIFIED IMPORTS ---
+# Only import the relevant control model
+from control_models.rectangle_labels import RectangleLabelsModel
+# Remove imports for other control models
+from typing import List, Dict, Optional
+
+# --- END MODIFICATION ---
+
+logger = logging.getLogger(__name__)
+if not os.getenv("LOG_LEVEL"):
+    logger.setLevel(logging.INFO)
+
+# --- MODIFIED ---
+# Register available model classes (only RectangleLabels for DocLayout-YOLO)
+available_model_classes = [
+    RectangleLabelsModel,
+]
+# --- END MODIFICATION ---
+
+
+# --- MODIFIED CLASS NAME ---
+class DocLayoutYOLO(LabelStudioMLBase):
+# --- END MODIFICATION ---
+    """Label Studio ML Backend based on DocLayout-YOLO (YOLOv10)"""
+
+    def setup(self):
+        """Configure any parameters of your model here"""
+        # --- MODIFIED VERSION ---
+        self.set("model_version", "DocLayout-YOLO")
+        # --- END MODIFICATION ---
+        logger.info(f"Model root directory: {os.getenv('MODEL_ROOT', './models')}")
+        logger.info(f"Default model name: {os.getenv('MODEL_NAME', 'doclayout_yolo_docstructbench_imgsz1024.pt')}")
+        logger.info(f"Default image size: {os.getenv('DEFAULT_IMGSZ', 1024)}")
+        logger.info(f"Default score threshold: {os.getenv('MODEL_SCORE_THRESHOLD', 0.5)}")
+        logger.info(f"Allow custom model path: {os.getenv('ALLOW_CUSTOM_MODEL_PATH', 'true')}")
+
+
+    def detect_control_models(self) -> List[RectangleLabelsModel]: # Modified return type hint
+        """Detect control models based on the labeling config.
+        Control models are used to predict regions for different control tags in the labeling config.
+        """
+        control_models = []
+
+        if not hasattr(self, 'label_interface') or not self.label_interface:
+             logger.error("Label interface is not initialized.")
+             return []
+
+        logger.debug(f"Scanning label config: {self.label_config}")
+        logger.debug(f"Available control tags: {[c.tag for c in self.label_interface.controls]}")
+
+        for control in self.label_interface.controls:
+             logger.debug(f"Checking control: tag={control.tag}, name={control.name}, toName={control.to_name}")
+             # skipping tags without toName
+             if not control.to_name:
+                 logger.warning(
+                     f'{control.tag} {control.name} has no "toName" attribute, skipping it'
+                 )
+                 continue
+
+             # match control tag with available control models
+             for model_class in available_model_classes:
+                 if model_class.is_control_matched(control):
+                     logger.info(f"Control tag '{control.name}' matched by {model_class.__name__}. Creating instance...")
+                     try:
+                         instance = model_class.create(self, control)
+                         if not instance:
+                             logger.debug(
+                                 f"No instance created for {control.tag} {control.name} (e.g., model_skip=true)"
+                             )
+                             continue
+
+                         # Check label map after creation
+                         if not instance.label_map:
+                             # This check might need refinement depending on whether a model always has names
+                             # or if labels come solely from the config.
+                             logger.warning(
+                                 f"No label map built for the '{control.tag}' control tag '{instance.from_name}'. "
+                                 f"This might happen if the model's output classes (from model.names) "
+                                 f"do not overlap with the labels defined in the Label Studio config for this tag, "
+                                 f"or if the model lacks a `.names` attribute. "
+                                 f"Predictions might be empty or incorrect for this tag."
+                                 # f"Config labels:\n{control.labels_attrs}\n"
+                                 # f"Model labels:\n{list(instance.model.names.values() if hasattr(instance.model, 'names') else [])}"
+                             )
+                             # Decide whether to proceed without a map or skip
+                             # For object detection, a map is usually crucial. Let's log a warning but proceed.
+                             # Consider raising an error if a map is strictly required.
+
+                         control_models.append(instance)
+                         logger.info(f"Successfully created and added control model instance: {instance}")
+                         break # Stop checking other model classes for this control
+                     except Exception as e:
+                         logger.error(f"Error creating control model instance for {control.name}: {e}", exc_info=True)
+                         # Optionally re-raise or continue to the next control
+                         continue # Skip this control if creation fails
+
+        if not control_models:
+            control_tags = ", ".join([c.type for c in available_model_classes])
+            logger.error(
+                f"No suitable control tags (e.g., {control_tags} connected to Image object tag) "
+                f"found or successfully initialized in the label config:\n{self.label_config}"
+            )
+            # raise ValueError(...) # Optionally raise error if no controls are found
+
+        logger.info(f"Detected {len(control_models)} control model(s).")
+        return control_models
+
+    def predict(
+        self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs
+    ) -> ModelResponse:
+        """Run DocLayout-YOLO predictions on the tasks
+        :param tasks: [Label Studio tasks in JSON format](https://labelstud.io/guide/task_format.html)
+        :param context: [Label Studio context in JSON format](https://labelstud.io/guide/ml_create)
+        :return model_response
+            ModelResponse(predictions=predictions) with
+            predictions [Predictions array in JSON format]
+            (https://labelstud.io/guide/export.html#Label-Studio-JSON-format-of-annotated-tasks)
+        """
+        logger.info(
+            f"Received {len(tasks)} tasks for prediction, project ID = {self.project_id}"
+        )
+        # Detect control models ONCE based on the label config passed in the request,
+        # or fall back to the one set during setup.
+        label_config_from_request = kwargs.get('label_config') or self.label_config
+        if label_config_from_request != self.label_config:
+             logger.warning("Label config from request differs from setup config. Re-initializing label interface for this request.")
+             self.label_interface = LabelInterface(label_config_from_request) # Re-init if different
+        elif not hasattr(self, 'label_interface') or not self.label_interface:
+             logger.warning("Label interface not found, initializing from label config.")
+             self.label_interface = LabelInterface(self.label_config)
+
+        control_models = self.detect_control_models()
+        if not control_models:
+             logger.error("No control models detected or initialized. Cannot perform prediction.")
+             return ModelResponse(predictions=[])
+
+        predictions = []
+        for task in tasks:
+            task_id = task.get('id', 'unknown')
+            logger.info(f"Processing task ID: {task_id}")
+            regions = []
+            try:
+                # --- MODIFIED - Iterate through detected control models ---
+                for model in control_models:
+                    image_path = model.get_path(task) # Get image path based on control's toName->value
+                    logger.info(f"Predicting regions for task {task_id} using control '{model.from_name}' on image '{os.path.basename(image_path)}'")
+                    try:
+                        task_regions = model.predict_regions(image_path)
+                        regions.extend(task_regions)
+                    except Exception as e:
+                         logger.error(f"Error predicting regions for task {task_id} with control '{model.from_name}': {e}", exc_info=True)
+                # --- END MODIFICATION ---
+
+                # Calculate final score (average of region scores)
+                all_scores = [region["score"] for region in regions if "score" in region]
+                avg_score = sum(all_scores) / max(len(all_scores), 1) if all_scores else 0
+
+                # Compose final prediction for the task
+                prediction = {
+                    "result": regions,
+                    "score": avg_score,
+                    "model_version": self.model_version,
+                }
+                predictions.append(prediction)
+                logger.info(f"Finished processing task ID: {task_id}, found {len(regions)} regions.")
+
+            except Exception as e:
+                 logger.error(f"Failed to process task ID {task_id}: {e}", exc_info=True)
+                 # Append an empty prediction or skip the task
+                 predictions.append({
+                    "result": [],
+                    "score": 0,
+                    "model_version": self.model_version,
+                 })
+
+
+        return ModelResponse(predictions=predictions)
+
+    def fit(self, event, data, **kwargs):
+        """
+        This method is called each time an annotation is created or updated.
+        DocLayout-YOLO example does not implement training.
+        """
+        logger.warning("Training (fit method) is not implemented for this DocLayout-YOLO backend.")
+        return {} # Return empty dict to indicate no training occurred
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/models/README.md b/label_studio_ml/examples/doclayout_yolo/models/README.md
new file mode 100644
index 00000000..425fb5e2
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/models/README.md
@@ -0,0 +1,21 @@
+# DocLayout-YOLO Models
+
+This directory is intended to store the DocLayout-YOLO model files (`.pt`).
+
+## Downloading Models
+
+You need to download the pre-trained or fine-tuned DocLayout-YOLO models manually and place them in this directory.
+
+Models can be downloaded from the official Hugging Face collection:
+[https://huggingface.co/collections/juliozhao/doclayout-yolo-670cdec674913d9a6f77b542](https://huggingface.co/collections/juliozhao/doclayout-yolo-670cdec674913d9a6f77b542)
+
+**Example:** Download `doclayout_yolo_docstructbench_imgsz1024.pt` and place it here.
+
+## Configuration
+
+The ML backend will look for models in this directory by default. You can specify which model to use via:
+
+1.  **Environment Variable:** Set `MODEL_NAME` in `docker-compose.yml` (e.g., `MODEL_NAME=doclayout_yolo_docstructbench_imgsz1024.pt`).
+2.  **Labeling Config:** Add the `model_path` attribute to your `<RectangleLabels>` tag in the Label Studio UI (e.g., `<RectangleLabels name="label" toName="image" model_path="your_custom_model.pt">`). This requires `ALLOW_CUSTOM_MODEL_PATH=true` in `docker-compose.yml`.
+
+If `MODEL_NAME` is not set and `model_path` is not used in the config, the backend will try to load the default specified in `control_models/rectangle_labels.py` (currently `doclayout_yolo_docstructbench_imgsz1024.pt`).
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/requirements-base.txt b/label_studio_ml/examples/doclayout_yolo/requirements-base.txt
new file mode 100644
index 00000000..92965901
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/requirements-base.txt
@@ -0,0 +1,3 @@
+gunicorn==22.0.0
+label-studio-ml @ git+https://github.com/HumanSignal/label-studio-ml-backend.git@master
+label-studio-sdk @ git+https://github.com/HumanSignal/label-studio-sdk.git
diff --git a/label_studio_ml/examples/doclayout_yolo/requirements-test.txt b/label_studio_ml/examples/doclayout_yolo/requirements-test.txt
new file mode 100644
index 00000000..e8ac9d09
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/requirements-test.txt
@@ -0,0 +1,3 @@
+pytest
+pytest-cov
+pytest-mock
\ No newline at end of file
diff --git a/label_studio_ml/examples/doclayout_yolo/requirements.txt b/label_studio_ml/examples/doclayout_yolo/requirements.txt
new file mode 100644
index 00000000..2b50443d
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/requirements.txt
@@ -0,0 +1,14 @@
+# Main dependency for the model
+doclayout-yolo
+
+# Required by doclayout-yolo or its dependencies (like ultralytics/yolov10)
+# Keep torch installed via Dockerfile's base image or add specific version if needed
+# torch
+# torchvision
+# torchaudio
+torchmetrics~=1.4.2 # Often a dependency of ultralytics-based packages
+huggingface_hub
+tqdm
+
+# For model downloading (optional, can be done manually)
+# huggingface-hub
diff --git a/label_studio_ml/examples/doclayout_yolo/start.sh b/label_studio_ml/examples/doclayout_yolo/start.sh
new file mode 100644
index 00000000..449c16e3
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/start.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# Execute the gunicorn command
+exec gunicorn --bind :${PORT:-9090} --workers ${WORKERS:-1} --threads ${THREADS:-4} --timeout 0 _wsgi:app
diff --git a/label_studio_ml/examples/doclayout_yolo/tests/__init__.py b/label_studio_ml/examples/doclayout_yolo/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/label_studio_ml/examples/doclayout_yolo/tests/test_common.py b/label_studio_ml/examples/doclayout_yolo/tests/test_common.py
new file mode 100644
index 00000000..b6bb75e0
--- /dev/null
+++ b/label_studio_ml/examples/doclayout_yolo/tests/test_common.py
@@ -0,0 +1,191 @@
+"""
+This file contains tests for the API
+"""
+
+import os
+import pickle
+import pytest
+import json
+
+from unittest.mock import MagicMock, patch
+from model import YOLO
+
+
+def load_file(path):
+    # json
+    if path.endswith(".json"):
+        with open(path, "r") as f:
+            return json.load(f)
+    # pickle
+    if path.endswith(".pickle"):
+        with open(path, "rb") as f:
+            return pickle.load(f)
+
+
+@pytest.fixture
+def client():
+    from _wsgi import init_app
+
+    app = init_app(model_class=YOLO)
+    app.config["TESTING"] = True
+    with app.test_client() as client:
+        yield client
+
+
+TEST_DIR = os.path.dirname(__file__)
+
+
+label_configs = [
+    # test 1: wrong key in task data
+    """
+    <View>
+      <Image name="image" value="$image"/>
+      <RectangleLabels name="label" toName="image" model_score_threshold="0.25">
+        <Label value="Airplane" background="green"/>
+        <Label value="Car" background="blue" predicted_values="car, truck"/>
+      </RectangleLabels>
+    </View>
+    """,
+    # test 2: model_skip=true
+    """
+    <View>
+      <Image name="image" value="$image"/>
+      <RectangleLabels name="label" toName="image" model_skip="true">
+        <Label value="Airplane" background="green"/>
+        <Label value="Car" background="blue" predicted_values="car, truck"/>
+      </RectangleLabels>
+    </View>
+    """,
+]
+
+tasks = [
+    # test 1: wrong key in task data
+    {"data": {"wrong_key": "https://some/path"}},
+    # test 2: model_skip
+    {"data": {"image": "https://some/path"}},
+]
+
+expected = [
+    # test 1: wrong key in task data
+    "Can't load path using key",
+    # test 2: model skip
+    "No suitable control tags",
+]
+
+
+@pytest.mark.parametrize(
+    "label_config, task, expect", zip(label_configs, tasks, expected)
+)
+def test_label_configs(client, label_config, task, expect, capsys):
+    data = {"schema": label_config, "project": "42"}
+    response = client.post(
+        "/setup", data=json.dumps(data), content_type="application/json"
+    )
+    assert response.status_code == 200, "Error while setup: " + str(response.content)
+
+    data = {"tasks": [task], "label_config": label_config}
+    response = client.post(
+        "/predict", data=json.dumps(data), content_type="application/json"
+    )
+    assert (
+        response.status_code == 500
+    ), "Error was expected, but another status code found"
+
+    # Capture stdout and stderr
+    captured = capsys.readouterr()
+
+    # Check for specific words in the output
+    assert expect in captured.out, "Text not found in error string"
+
+
+@pytest.fixture
+def mock_logger(mocker):
+    return mocker.patch("model.logger")
+
+
+@pytest.fixture
+def mock_ml_backend():
+    return MagicMock()
+
+
+@pytest.fixture
+def mock_label_interface(mock_ml_backend):
+    mock_label_interface = MagicMock()
+    mock_ml_backend.label_interface = mock_label_interface
+    return mock_label_interface
+
+
+@pytest.fixture
+def mock_model_class():
+    mock_model_class = MagicMock()
+    mock_model_class.type = "RectangleLabels"
+    mock_model_class.is_control_matched = MagicMock(return_value=True)
+    mock_model_class.create = MagicMock()
+    return mock_model_class
+
+
+@pytest.fixture
+def available_model_classes(mock_model_class):
+    return [mock_model_class, MagicMock(), MagicMock()]
+
+
+@pytest.fixture
+def yolo_instance(mock_label_interface):
+    yolo = YOLO()
+    yolo.label_interface = mock_label_interface
+    return yolo
+
+
+def test_control_no_to_name(yolo_instance, mock_label_interface, mock_logger):
+    mock_label_interface.controls = [
+        MagicMock(to_name=None, tag="RectangleLabels", name="test_label")
+    ]
+
+    with pytest.raises(ValueError):
+        yolo_instance.detect_control_models()
+
+    mock_logger.warning.assert_called_once()
+
+
+def test_control_no_label_map(
+    yolo_instance,
+    mock_label_interface,
+    mock_model_class,
+    available_model_classes,
+    mock_logger,
+):
+    mock_instance = MagicMock()
+    mock_instance.label_map = None
+    mock_model_class.create.return_value = mock_instance
+    mock_label_interface.controls = [
+        MagicMock(to_name=["image"], tag="RectangleLabels", name="test_label")
+    ]
+
+    with patch("model.available_model_classes", available_model_classes):
+        result = yolo_instance.detect_control_models()
+
+    # Updated expectation to handle control models that should be skipped if label_map is None
+    assert len(result) == 1
+    mock_logger.error.assert_called_once()
+
+
+def test_control_with_valid_label_map(
+    yolo_instance,
+    mock_label_interface,
+    mock_model_class,
+    available_model_classes,
+    mock_logger,
+):
+    mock_instance = MagicMock()
+    mock_instance.label_map = {"car": "Car"}
+    mock_model_class.create.return_value = mock_instance
+    mock_label_interface.controls = [
+        MagicMock(to_name=["image"], tag="RectangleLabels", name="test_label")
+    ]
+
+    with patch("model.available_model_classes", available_model_classes):
+        result = yolo_instance.detect_control_models()
+
+    assert len(result) == 1
+    assert result[0] == mock_instance
+    mock_logger.debug.assert_called_once()