diff --git a/docs/source/tutorials/ptf_V2_example.ipynb b/docs/source/tutorials/ptf_V2_example.ipynb
index 81313151d..1419b8360 100644
--- a/docs/source/tutorials/ptf_V2_example.ipynb
+++ b/docs/source/tutorials/ptf_V2_example.ipynb
@@ -1,813 +1,1775 @@
 {
-  "cells": [
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "rzVbXsEBxnF-"
+   },
+   "source": [
+    "# Example Notebook for a basic vignette for `pytorch-forecasting v2` Model Training and Inference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "yt0uZV7Px-40"
+   },
+   "source": [
+    "<div class=\"alert alert-block alert-info\">\n",
+    ":warning: The \"Data Pipeline\" showcased here is part of an experimental rework of the `pytorch-forecasting` data layer, planned for release in v2.0.0. The API is currently unstable and subject to change without prior notice. This notebook serves as a basic demonstration of the intended workflow and is not recommended for use in production environments. Feedback and suggestions are highly encouraged — please share them in <a href=\"https://github.com/sktime/pytorch-forecasting/issues/1736\">issue 1736</a>.\n",
+    "</div>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "r15UunnLoxnK"
+   },
+   "source": [
+    "In this notebook, we demonstrate how to train and evaluate the **Temporal Fusion Transformer (TFT)** using the new `TimeSeries` and `DataModule` API from the v2 pipeline.\n",
+    "We can do this in 2 ways:\n",
+    "1. **High-level package API:**\n",
+    "\n",
+    "    This approach handles data loading, dataloader creation, and model training internally. It provides a simple, `scikit-learn`-like `fit` → `predict` workflow.\n",
+    "    Users can still configure key training options (such as the `trainer`, callbacks, and training parameters) but cannot plug in fully custom `trainer` implementations or override internal pipeline logic.\n",
+    "\n",
+    "2. **Low-level 3-stage pipeline**:\n",
+    "This involves explicitly constructing:\n",
+    "    * a `TimeSeries` object\n",
+    "\n",
+    "    * a `DataModule`\n",
+    "\n",
+    "    * the model (e.g., `TFT`)\n",
+    "    \n",
+    "    This workflow is ideal if you need custom setups such as custom trainers, callbacks, or advanced data preprocessing.\n",
+    "    It requires a deeper understanding of how the three layers (TimeSeries, DataModule, and the model) interact, but offers maximum flexibility."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QyMFNk4MyY_b"
+   },
+   "source": [
+    "# Create Synthetic data\n",
+    "We generate a synthetic dataset using `load_toydata` that creates a `pandas` DataFrame with just numerical values as for now **the pipeline assumes the data to be numerical only**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "RkgOT4kiy_RU"
+   },
+   "outputs": [],
+   "source": [
+    "from pytorch_forecasting.data.examples import load_toydata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "id": "WX-FRdusJSVN",
+    "outputId": "2ad916b8-2fd9-4318-afb1-2bda84d284d7"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rzVbXsEBxnF-"
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "summary": "{\n  \"name\": \"data_df\",\n  \"rows\": 4900,\n  \"fields\": [\n    {\n      \"column\": \"series_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 28,\n        \"min\": 0,\n        \"max\": 99,\n        \"num_unique_values\": 100,\n        \"samples\": [\n          83,\n          53,\n          70\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"time_idx\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14,\n        \"min\": 0,\n        \"max\": 48,\n        \"num_unique_values\": 49,\n        \"samples\": [\n          13,\n          45,\n          47\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"x\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.6712252870750063,\n        \"min\": -1.2780952045426857,\n        \"max\": 1.3163602917006327,\n        \"num_unique_values\": 4900,\n        \"samples\": [\n          0.19335967827533446,\n          0.8492207493147326,\n          -0.9687640491099185\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"y\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.6753351884449413,\n        \"min\": -1.2780952045426857,\n        \"max\": 1.3163602917006327,\n        \"num_unique_values\": 4900,\n        \"samples\": [\n          0.6981263626070341,\n          0.7052787051636003,\n          -0.861386757323439\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"category\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 0,\n        \"max\": 4,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          1,\n          4,\n          2\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"future_known_feature\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.6741140972121411,\n        \"min\": -0.9991351502732795,\n        \"max\": 1.0,\n        \"num_unique_values\": 49,\n        \"samples\": [\n          0.26749882862458735,\n          -0.2107957994307797,\n          -0.01238866346289056\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"static_feature\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.2792423704109133,\n        \"min\": 0.031153133884698536,\n        \"max\": 0.9662188410416612,\n        \"num_unique_values\": 100,\n        \"samples\": [\n          0.24602577096925082,\n          0.8680231736929984,\n          0.6913124004679789\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"static_feature_cat\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 2,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          0,\n          1,\n          2\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
+       "type": "dataframe",
+       "variable_name": "data_df"
       },
-      "source": [
-        "# `pytorch-forecasting v2` Model Training and Inference - Beta API"
+      "text/html": [
+       "\n",
+       "  <div id=\"df-1832c3c5-7f87-4d94-b11a-f1f39dcbdc3e\" class=\"colab-df-container\">\n",
+       "    <div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>series_id</th>\n",
+       "      <th>time_idx</th>\n",
+       "      <th>x</th>\n",
+       "      <th>y</th>\n",
+       "      <th>category</th>\n",
+       "      <th>future_known_feature</th>\n",
+       "      <th>static_feature</th>\n",
+       "      <th>static_feature_cat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>-0.030643</td>\n",
+       "      <td>0.148280</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.039213</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.148280</td>\n",
+       "      <td>0.433029</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.995004</td>\n",
+       "      <td>0.039213</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.433029</td>\n",
+       "      <td>0.742511</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.980067</td>\n",
+       "      <td>0.039213</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0.742511</td>\n",
+       "      <td>0.729270</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.955336</td>\n",
+       "      <td>0.039213</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0.729270</td>\n",
+       "      <td>0.628604</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.921061</td>\n",
+       "      <td>0.039213</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>\n",
+       "    <div class=\"colab-df-buttons\">\n",
+       "\n",
+       "  <div class=\"colab-df-container\">\n",
+       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1832c3c5-7f87-4d94-b11a-f1f39dcbdc3e')\"\n",
+       "            title=\"Convert this dataframe to an interactive table.\"\n",
+       "            style=\"display:none;\">\n",
+       "\n",
+       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+       "  </svg>\n",
+       "    </button>\n",
+       "\n",
+       "  <style>\n",
+       "    .colab-df-container {\n",
+       "      display:flex;\n",
+       "      gap: 12px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert {\n",
+       "      background-color: #E8F0FE;\n",
+       "      border: none;\n",
+       "      border-radius: 50%;\n",
+       "      cursor: pointer;\n",
+       "      display: none;\n",
+       "      fill: #1967D2;\n",
+       "      height: 32px;\n",
+       "      padding: 0 0 0 0;\n",
+       "      width: 32px;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-convert:hover {\n",
+       "      background-color: #E2EBFA;\n",
+       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "      fill: #174EA6;\n",
+       "    }\n",
+       "\n",
+       "    .colab-df-buttons div {\n",
+       "      margin-bottom: 4px;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert {\n",
+       "      background-color: #3B4455;\n",
+       "      fill: #D2E3FC;\n",
+       "    }\n",
+       "\n",
+       "    [theme=dark] .colab-df-convert:hover {\n",
+       "      background-color: #434B5C;\n",
+       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+       "      fill: #FFFFFF;\n",
+       "    }\n",
+       "  </style>\n",
+       "\n",
+       "    <script>\n",
+       "      const buttonEl =\n",
+       "        document.querySelector('#df-1832c3c5-7f87-4d94-b11a-f1f39dcbdc3e button.colab-df-convert');\n",
+       "      buttonEl.style.display =\n",
+       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "\n",
+       "      async function convertToInteractive(key) {\n",
+       "        const element = document.querySelector('#df-1832c3c5-7f87-4d94-b11a-f1f39dcbdc3e');\n",
+       "        const dataTable =\n",
+       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+       "                                                    [key], {});\n",
+       "        if (!dataTable) return;\n",
+       "\n",
+       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+       "          + ' to learn more about interactive tables.';\n",
+       "        element.innerHTML = '';\n",
+       "        dataTable['output_type'] = 'display_data';\n",
+       "        await google.colab.output.renderOutput(dataTable, element);\n",
+       "        const docLink = document.createElement('div');\n",
+       "        docLink.innerHTML = docLinkHtml;\n",
+       "        element.appendChild(docLink);\n",
+       "      }\n",
+       "    </script>\n",
+       "  </div>\n",
+       "\n",
+       "\n",
+       "    <div id=\"df-846d0093-caf5-46ab-8a57-4a3a141ca666\">\n",
+       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-846d0093-caf5-46ab-8a57-4a3a141ca666')\"\n",
+       "                title=\"Suggest charts\"\n",
+       "                style=\"display:none;\">\n",
+       "\n",
+       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+       "     width=\"24px\">\n",
+       "    <g>\n",
+       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+       "    </g>\n",
+       "</svg>\n",
+       "      </button>\n",
+       "\n",
+       "<style>\n",
+       "  .colab-df-quickchart {\n",
+       "      --bg-color: #E8F0FE;\n",
+       "      --fill-color: #1967D2;\n",
+       "      --hover-bg-color: #E2EBFA;\n",
+       "      --hover-fill-color: #174EA6;\n",
+       "      --disabled-fill-color: #AAA;\n",
+       "      --disabled-bg-color: #DDD;\n",
+       "  }\n",
+       "\n",
+       "  [theme=dark] .colab-df-quickchart {\n",
+       "      --bg-color: #3B4455;\n",
+       "      --fill-color: #D2E3FC;\n",
+       "      --hover-bg-color: #434B5C;\n",
+       "      --hover-fill-color: #FFFFFF;\n",
+       "      --disabled-bg-color: #3B4455;\n",
+       "      --disabled-fill-color: #666;\n",
+       "  }\n",
+       "\n",
+       "  .colab-df-quickchart {\n",
+       "    background-color: var(--bg-color);\n",
+       "    border: none;\n",
+       "    border-radius: 50%;\n",
+       "    cursor: pointer;\n",
+       "    display: none;\n",
+       "    fill: var(--fill-color);\n",
+       "    height: 32px;\n",
+       "    padding: 0;\n",
+       "    width: 32px;\n",
+       "  }\n",
+       "\n",
+       "  .colab-df-quickchart:hover {\n",
+       "    background-color: var(--hover-bg-color);\n",
+       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+       "    fill: var(--button-hover-fill-color);\n",
+       "  }\n",
+       "\n",
+       "  .colab-df-quickchart-complete:disabled,\n",
+       "  .colab-df-quickchart-complete:disabled:hover {\n",
+       "    background-color: var(--disabled-bg-color);\n",
+       "    fill: var(--disabled-fill-color);\n",
+       "    box-shadow: none;\n",
+       "  }\n",
+       "\n",
+       "  .colab-df-spinner {\n",
+       "    border: 2px solid var(--fill-color);\n",
+       "    border-color: transparent;\n",
+       "    border-bottom-color: var(--fill-color);\n",
+       "    animation:\n",
+       "      spin 1s steps(1) infinite;\n",
+       "  }\n",
+       "\n",
+       "  @keyframes spin {\n",
+       "    0% {\n",
+       "      border-color: transparent;\n",
+       "      border-bottom-color: var(--fill-color);\n",
+       "      border-left-color: var(--fill-color);\n",
+       "    }\n",
+       "    20% {\n",
+       "      border-color: transparent;\n",
+       "      border-left-color: var(--fill-color);\n",
+       "      border-top-color: var(--fill-color);\n",
+       "    }\n",
+       "    30% {\n",
+       "      border-color: transparent;\n",
+       "      border-left-color: var(--fill-color);\n",
+       "      border-top-color: var(--fill-color);\n",
+       "      border-right-color: var(--fill-color);\n",
+       "    }\n",
+       "    40% {\n",
+       "      border-color: transparent;\n",
+       "      border-right-color: var(--fill-color);\n",
+       "      border-top-color: var(--fill-color);\n",
+       "    }\n",
+       "    60% {\n",
+       "      border-color: transparent;\n",
+       "      border-right-color: var(--fill-color);\n",
+       "    }\n",
+       "    80% {\n",
+       "      border-color: transparent;\n",
+       "      border-right-color: var(--fill-color);\n",
+       "      border-bottom-color: var(--fill-color);\n",
+       "    }\n",
+       "    90% {\n",
+       "      border-color: transparent;\n",
+       "      border-bottom-color: var(--fill-color);\n",
+       "    }\n",
+       "  }\n",
+       "</style>\n",
+       "\n",
+       "      <script>\n",
+       "        async function quickchart(key) {\n",
+       "          const quickchartButtonEl =\n",
+       "            document.querySelector('#' + key + ' button');\n",
+       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
+       "          try {\n",
+       "            const charts = await google.colab.kernel.invokeFunction(\n",
+       "                'suggestCharts', [key], {});\n",
+       "          } catch (error) {\n",
+       "            console.error('Error during call to suggestCharts:', error);\n",
+       "          }\n",
+       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+       "        }\n",
+       "        (() => {\n",
+       "          let quickchartButtonEl =\n",
+       "            document.querySelector('#df-846d0093-caf5-46ab-8a57-4a3a141ca666 button');\n",
+       "          quickchartButtonEl.style.display =\n",
+       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+       "        })();\n",
+       "      </script>\n",
+       "    </div>\n",
+       "\n",
+       "    </div>\n",
+       "  </div>\n"
+      ],
+      "text/plain": [
+       "   series_id  time_idx         x         y  category  future_known_feature  \\\n",
+       "0          0         0 -0.030643  0.148280         0              1.000000   \n",
+       "1          0         1  0.148280  0.433029         0              0.995004   \n",
+       "2          0         2  0.433029  0.742511         0              0.980067   \n",
+       "3          0         3  0.742511  0.729270         0              0.955336   \n",
+       "4          0         4  0.729270  0.628604         0              0.921061   \n",
+       "\n",
+       "   static_feature  static_feature_cat  \n",
+       "0        0.039213                   0  \n",
+       "1        0.039213                   0  \n",
+       "2        0.039213                   0  \n",
+       "3        0.039213                   0  \n",
+       "4        0.039213                   0  "
       ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_series = 100  # Number of individual time series to generate\n",
+    "seq_length = 50  # Length of each time series\n",
+    "data_df = load_toydata(num_series, seq_length)\n",
+    "data_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_8TgLH82runO"
+   },
+   "source": [
+    "# High-level API\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "A1cqKCRur4oj"
+   },
+   "source": [
+    "## Steps\n",
+    "* Create the `TimeSeries` object\n",
+    "* Create `configs` for model, `datamodule`, `trainer` etc.\n",
+    "* Create the `model_pkg` object\n",
+    "* perform `pkg.fit` and `pkg.predict`.\n",
+    "\n",
+    "##  Create Dataset object\n",
+    "\n",
+    "`TimeSeries` returns the raw data in terms of tensors .\n",
+    "\n",
+    "---\n",
+    "\n",
+    "`TimeSeries` dataset's Key arguments:\n",
+    "- `data`: DataFrame with sequence data.\n",
+    "- `time`: integer typed column denoting the time index within `data`.\n",
+    "- `target`:  Column(s) in `data` denoting the forecasting target.\n",
+    "- `group`: List of column names identifying a time series instance within `data`.\n",
+    "- `num`: List of numerical features.\n",
+    "- `cat`: List of categorical features.\n",
+    "- `known`: Features known in future\n",
+    "- `unknown`: Features not known in the future\n",
+    "- `static`: List of variables that do not change over time,\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "id": "u8OPR0HntXqR"
+   },
+   "outputs": [],
+   "source": [
+    "from pytorch_forecasting.data.timeseries import TimeSeries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "6a_oy4VjtrHQ",
+    "outputId": "54678fb8-864e-4f32-eeb9-83697946a3e5"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yt0uZV7Px-40"
-      },
-      "source": [
-        "<div class=\"alert alert-block alert-info\">\n",
-        ":warning: The vignette showcased here is part of an experimental rework of the `pytorch-forecasting` data layer, planned for release in v2.0.0. The API is currently unstable and subject to change without prior notice.\n",
-        "\n",
-        "Feedback and suggestions are highly encouraged — please share them in <a href=\"https://github.com/sktime/pytorch-forecasting/issues/1736\">issue 1736</a>.\n",
-        "</div>\n"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/data/timeseries/_timeseries_v2.py:105: UserWarning: TimeSeries is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create `TimeSeries` dataset that returns the raw data in terms of tensors\n",
+    "dataset = TimeSeries(\n",
+    "    data=data_df,\n",
+    "    time=\"time_idx\",\n",
+    "    target=\"y\",\n",
+    "    group=[\"series_id\"],\n",
+    "    num=[\"x\", \"future_known_feature\", \"static_feature\"],\n",
+    "    cat=[\"category\", \"static_feature_cat\"],\n",
+    "    known=[\"future_known_feature\"],\n",
+    "    unknown=[\"x\", \"category\"],\n",
+    "    static=[\"static_feature\", \"static_feature_cat\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "EoS6W9zh6wCj"
+   },
+   "source": [
+    "## Create the configs\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "MKPXPUcC5dTY"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from pytorch_forecasting.data.encoders import (\n",
+    "    EncoderNormalizer,\n",
+    "    NaNLabelEncoder,\n",
+    "    TorchNormalizer,\n",
+    ")\n",
+    "from pytorch_forecasting.metrics import MAE, SMAPE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WYl9-oZz6nk6"
+   },
+   "source": [
+    "Here we use `EncoderDecoderTimeSeriesDataModule`\n",
+    "\n",
+    "\n",
+    "`EncoderDecoderTimeSeriesDataModule` key arguments:\n",
+    "- `time_series_dataset`: `TimeSeries` dataset instance\n",
+    "- `max_encoder_length` : Maximum length of the encoder input sequence.\n",
+    "- `max_prediction_length` : Maximum length of the decoder output sequence.\n",
+    "- `batch_size` : Batch size for DataLoader.\n",
+    "- `categorical_encoders` :  Dictionary of categorical encoders.\n",
+    "- `scalers` : Dictionary of feature scalers.\n",
+    "- `target_normalizer`: Normalizer for the target variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "id": "YGMShzfyttp_"
+   },
+   "outputs": [],
+   "source": [
+    "datamodule_cfg = dict(\n",
+    "    max_encoder_length=30,\n",
+    "    max_prediction_length=1,\n",
+    "    batch_size=32,\n",
+    "    categorical_encoders={\n",
+    "        \"category\": NaNLabelEncoder(add_nan=True),\n",
+    "        \"static_feature_cat\": NaNLabelEncoder(add_nan=True),\n",
+    "    },\n",
+    "    scalers={\n",
+    "        \"x\": StandardScaler(),\n",
+    "        \"future_known_feature\": StandardScaler(),\n",
+    "        \"static_feature\": StandardScaler(),\n",
+    "    },\n",
+    "    target_normalizer=TorchNormalizer(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Pi5Qkznh6t3y"
+   },
+   "source": [
+    "We would use `TFT` model in this tutorial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "id": "q6Thm13ct7OV"
+   },
+   "outputs": [],
+   "source": [
+    "model_cfg = dict(\n",
+    "    loss=MAE(),\n",
+    "    logging_metrics=[MAE(), SMAPE()],\n",
+    "    optimizer=\"adam\",\n",
+    "    optimizer_params={\"lr\": 1e-3},\n",
+    "    lr_scheduler=\"reduce_lr_on_plateau\",\n",
+    "    lr_scheduler_params={\"mode\": \"min\", \"factor\": 0.1, \"patience\": 10},\n",
+    "    hidden_size=64,\n",
+    "    num_layers=2,\n",
+    "    attention_head_size=4,\n",
+    "    dropout=0.1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "id": "Stfuc_xCuON6"
+   },
+   "outputs": [],
+   "source": [
+    "trainer_cfg = dict(\n",
+    "    max_epochs=5,\n",
+    "    accelerator=\"auto\",\n",
+    "    devices=1,\n",
+    "    enable_progress_bar=True,\n",
+    "    log_every_n_steps=10,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "id": "XS_ND8UAubdN"
+   },
+   "outputs": [],
+   "source": [
+    "from pytorch_forecasting.models.temporal_fusion_transformer._tft_pkg_v2 import (\n",
+    "    TFT_pkg_v2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6yoqI8907DG4"
+   },
+   "source": [
+    "## Create the `model_pkg` object\n",
+    "\n",
+    "This `pkg` class acts as a wrapper around the whole ML pipeline in `pytorch-forecasting` and we can simply just define the `pkg` class and then use `pkg.fit` and `pkg.predict` to perform the \"fit\", \"predict\" mechanisms."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "aOxng4Rguwj2",
+    "outputId": "2c50fcad-f990-4aae-f0bb-5dbdd6a87377"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6D9ARyp05R0t"
-      },
-      "source": [
-        "In this vignette, we demonstrate how to train and evaluate the **Temporal Fusion Transformer (TFT)** using the new `TimeSeries` and `DataModule` API from the v2 pipeline.\n",
-        "\n",
-        "\n",
-        "## Steps\n",
-        "\n",
-        "1. **Load Data**  \n",
-        "2. **Create Dataset & DataModule**  \n",
-        "3. **Initialize, Train & Run Inference with the Model**\n",
-        "\n",
-        "\n",
-        "\n",
-        "### Load Data\n",
-        "\n",
-        "We generate a synthetic dataset using `load_toydata` which returns a `pandas` DataFrame with purely numerical values.  \n",
-        "*(Note: The current pipeline assumes all inputs are numerical only.)*\n",
-        "\n",
-        "\n",
-        "\n",
-        "\n",
-        "###  Create Dataset & DataModule\n",
-        "\n",
-        "- `TimeSeries` returns the raw data in terms of tensors .\n",
-        "- `DataModule` wraps the dataset, handles splits, preprocessing, batching, and exposes `metadata` for the model initialisation.\n",
-        "\n",
-        "\n",
-        "\n",
-        "### Initialize the Model\n",
-        "\n",
-        "We initialize the TFT model using the `metadata` provided by the `DataModule`. This metadata includes all required dimensional info for the encoder, decoder, and static inputs.\n",
-        "\n",
-        "\n",
-        "\n",
-        "### Train the Model\n",
-        "\n",
-        "We use a `Trainer` from PyTorch Lightning to train the model\n",
-        "\n",
-        "### Run Inference\n",
-        "\n",
-        "After training, we can make predictions using the trained model\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'loss': MAE(), 'logging_metrics': [MAE(), SMAPE()], 'optimizer': 'adam', 'optimizer_params': {'lr': 0.001}, 'lr_scheduler': 'reduce_lr_on_plateau', 'lr_scheduler_params': {'mode': 'min', 'factor': 0.1, 'patience': 10}, 'hidden_size': 64, 'num_layers': 2, 'attention_head_size': 4, 'dropout': 0.1}\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_pkg = TFT_pkg_v2(\n",
+    "    model_cfg=model_cfg,\n",
+    "    trainer_cfg=trainer_cfg,\n",
+    "    datamodule_cfg=datamodule_cfg,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 976,
+     "referenced_widgets": [
+      "4ecdea6764d145118ab53e59451d2b0c",
+      "e7b969aa6d8e433d9aeeac4357bc425d",
+      "42707b895305490b82cd644250e689fa",
+      "19c3c106d5a9489cae445a0b5fc88183",
+      "a6e5908902eb40e997e6086287f28f2a",
+      "f23d99cc4f01426eb4fc8d41fc8f4b16",
+      "df8d7458b0fa4f508d4ce357fc95c609",
+      "464464a47d604d708be37e18edec4810",
+      "c7ca9662eae04999b21de91c183a0856",
+      "e82943533ad54539a777d6adae271d0f",
+      "e0fa236745204d5ba2dbc1ff2c51f1a2",
+      "c52bb8ff12db4df3a05cf1da7b5470f7",
+      "273fb7ccddeb476f9c76bd1be44a6ae0",
+      "922dfb34c7494b20a874e294c07447e0",
+      "96eda6cd2fbc47b9a29d2cf176332058",
+      "d2ed70b544924436b185f79d0cd90862",
+      "aff8baef21494ccf99bf092fc3daaae0",
+      "640d3876c2ce49de9757962b5b5b0e32",
+      "f99327891eeb424b9df4fe04a6bedcfb",
+      "04804ca4c425464db2754abf3cf95568",
+      "386a2097c02f4af6ba239e385f4b0b47",
+      "7e95bdbc3a6c46bf8b1e96bdecfa7303",
+      "572629c64cfd46a983f1a8c6483a2cf1",
+      "61f356a200c5470db777e7e0f9e8c520",
+      "4905c6d809274aa39a984e3e458fc89a",
+      "b16522c88ebb435f96c05315ef91ebbb",
+      "65cce98698644285896363e509ae6139",
+      "a4a4c07f117e46989cb4877a5d2dd9e0",
+      "3f8cc20607db40c7acb4110feba9ab0d",
+      "3c5c1f55d5a64838b94fc0fbd85097c6",
+      "4f5cce37b6ac430e85757dd06b06953a",
+      "942645c506ea436fb598455d84c8a970",
+      "969a3ddfaed84150944d697307ababe4",
+      "955c5e9c139148a1a352d17202fe097f",
+      "f0fbcbcf02e443bc99a469cf4c7f8131",
+      "72fb23e179594f35a68418e2e0ee65fc",
+      "7808bf48e45940cfa0d4bccae784d730",
+      "ed0358a45ec14ce687fc02904a815e38",
+      "67a3d79f1b2e4e03b9a564286c04d5d4",
+      "27fd0da590314bb68dfac5b7c72d6584",
+      "15e539660a2547f49fb2cf8a6143f5fa",
+      "c46b831b37a347868f1d35d0dbbfd923",
+      "3f985da9d6a245c5b54dbb47926a4fd4",
+      "5ab64c01efb84e75af1a8aaf6675f5bf",
+      "8d0747756fd2434399ae8d233a82d607",
+      "f77d800d097b494ca3e945abdaedd75c",
+      "05a14444ea4043dea69a4e7185e66cb1",
+      "b775518f409449928c3211260d7223c0",
+      "2db9a1e74ad14139af235f1a2a146e0a",
+      "2bcdfdf1b12c495aa8b425c88fcfbd1b",
+      "d71a01b309e948239a16062097ee76a2",
+      "006cdf49ce55411bb072c2670d87773f",
+      "5b3be082628244948284a40bea451ff1",
+      "bf96e25bd5d64892a329b624961abeb8",
+      "b5e879fa1fee4d0ba30ac5af07d1d8c5",
+      "78f2d725dcb34deca5407c277c384d8d",
+      "e1dc997d76d54eb9a9245530a60c2cd9",
+      "8fa00a6415b74091a012bc5fff543f42",
+      "c74c472cdf174a28a4ca3fed1b312332",
+      "409d65e2b79f49318c580d9835ffc29c",
+      "568141e0b45f44ff9b497c6474d8019f",
+      "1a889d0b55bb4e6d80d5f170297a6262",
+      "679072fe36f5404588879eab670e01e2",
+      "ab72364dc8cf433e907c40df3e7be9e9",
+      "d56c627297bd435fbbc60317066084f9",
+      "6a62c54d7d7b4f689dc31e57aaa20411",
+      "3ab523d60fd249a7bcece32280872abd",
+      "ef8e78e4f8a248dcbbc2ea3e464c5922",
+      "56685ebbfd244154bd1829dec6f0db0b",
+      "dc5f9a923d27492cb382691ec01a1ddc",
+      "58995b1bd1c24433a3aca0ab53c6b8bf",
+      "9082a1b6eb3a4d14b4c92eedec1c2404",
+      "13eb0c5265ad48d08b3f8e46a55896f0",
+      "f91e202108684aa9af76fdf3e9d83206",
+      "f617590dcd184fd99f98515940ac85af",
+      "33a1e5f21b694e3bb62d2c0d73aa65e3",
+      "8c8c8832e16c4d489e0df7514dc78f6a"
+     ]
     },
+    "id": "c27Qj4QAvFwx",
+    "outputId": "21bbd594-d92e-498b-bd02-71829295c483"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QyMFNk4MyY_b"
-      },
-      "source": [
-        "# 1. Load Data\n",
-        "We generate a synthetic dataset using `load_toydata` that creates a `pandas` DataFrame with just numerical values as for now **the pipeline assumes the data to be numerical only**."
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/data/data_module.py:129: UserWarning: EncoderDecoderTimeSeriesDataModule is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n",
+      "/content/pytorch-forecasting/pytorch_forecasting/models/base/_base_model_v2.py:64: UserWarning: The Model 'TFT' is part of an experimental reworkof the pytorch-forecasting model layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. This class is intended for beta testing and as a basic skeleton, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n",
+      "INFO: GPU available: True (cuda), used: True\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+      "INFO: TPU available: False, using: 0 TPU cores\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+      "INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO: \n",
+      "  | Name                  | Type               | Params | Mode \n",
+      "---------------------------------------------------------------------\n",
+      "0 | loss                  | MAE                | 0      | train\n",
+      "1 | encoder_var_selection | Sequential         | 709    | train\n",
+      "2 | decoder_var_selection | Sequential         | 193    | train\n",
+      "3 | static_context_linear | Linear             | 192    | train\n",
+      "4 | lstm_encoder          | LSTM               | 51.5 K | train\n",
+      "5 | lstm_decoder          | LSTM               | 50.4 K | train\n",
+      "6 | self_attention        | MultiheadAttention | 16.6 K | train\n",
+      "7 | pre_output            | Linear             | 4.2 K  | train\n",
+      "8 | output_layer          | Linear             | 65     | train\n",
+      "---------------------------------------------------------------------\n",
+      "123 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "123 K     Total params\n",
+      "0.495     Total estimated model params size (MB)\n",
+      "18        Modules in train mode\n",
+      "0         Modules in eval mode\n",
+      "INFO:lightning.pytorch.callbacks.model_summary:\n",
+      "  | Name                  | Type               | Params | Mode \n",
+      "---------------------------------------------------------------------\n",
+      "0 | loss                  | MAE                | 0      | train\n",
+      "1 | encoder_var_selection | Sequential         | 709    | train\n",
+      "2 | decoder_var_selection | Sequential         | 193    | train\n",
+      "3 | static_context_linear | Linear             | 192    | train\n",
+      "4 | lstm_encoder          | LSTM               | 51.5 K | train\n",
+      "5 | lstm_decoder          | LSTM               | 50.4 K | train\n",
+      "6 | self_attention        | MultiheadAttention | 16.6 K | train\n",
+      "7 | pre_output            | Linear             | 4.2 K  | train\n",
+      "8 | output_layer          | Linear             | 65     | train\n",
+      "---------------------------------------------------------------------\n",
+      "123 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "123 K     Total params\n",
+      "0.495     Total estimated model params size (MB)\n",
+      "18        Modules in train mode\n",
+      "0         Modules in eval mode\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "id": "RkgOT4kiy_RU"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4ecdea6764d145118ab53e59451d2b0c",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/base/_base_model.py:28: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-            "  from tqdm.autonotebook import tqdm\n"
-          ]
-        }
-      ],
-      "source": [
-        "from pytorch_forecasting.data.examples import load_toydata"
+      "text/plain": [
+       "Sanity Checking: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 206
-        },
-        "id": "WX-FRdusJSVN",
-        "outputId": "e481484c-b0c3-4026-c933-a9dc047617c5"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c52bb8ff12db4df3a05cf1da7b5470f7",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>series_id</th>\n",
-              "      <th>time_idx</th>\n",
-              "      <th>x</th>\n",
-              "      <th>y</th>\n",
-              "      <th>category</th>\n",
-              "      <th>future_known_feature</th>\n",
-              "      <th>static_feature</th>\n",
-              "      <th>static_feature_cat</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>0</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.167712</td>\n",
-              "      <td>0.172154</td>\n",
-              "      <td>0</td>\n",
-              "      <td>1.000000</td>\n",
-              "      <td>0.300509</td>\n",
-              "      <td>0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>0</td>\n",
-              "      <td>1</td>\n",
-              "      <td>0.172154</td>\n",
-              "      <td>0.467233</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.995004</td>\n",
-              "      <td>0.300509</td>\n",
-              "      <td>0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>0</td>\n",
-              "      <td>2</td>\n",
-              "      <td>0.467233</td>\n",
-              "      <td>0.554952</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.980067</td>\n",
-              "      <td>0.300509</td>\n",
-              "      <td>0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>0</td>\n",
-              "      <td>3</td>\n",
-              "      <td>0.554952</td>\n",
-              "      <td>0.746529</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.955336</td>\n",
-              "      <td>0.300509</td>\n",
-              "      <td>0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>0</td>\n",
-              "      <td>4</td>\n",
-              "      <td>0.746529</td>\n",
-              "      <td>0.711745</td>\n",
-              "      <td>0</td>\n",
-              "      <td>0.921061</td>\n",
-              "      <td>0.300509</td>\n",
-              "      <td>0</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   series_id  time_idx         x         y  category  future_known_feature  \\\n",
-              "0          0         0  0.167712  0.172154         0              1.000000   \n",
-              "1          0         1  0.172154  0.467233         0              0.995004   \n",
-              "2          0         2  0.467233  0.554952         0              0.980067   \n",
-              "3          0         3  0.554952  0.746529         0              0.955336   \n",
-              "4          0         4  0.746529  0.711745         0              0.921061   \n",
-              "\n",
-              "   static_feature  static_feature_cat  \n",
-              "0        0.300509                   0  \n",
-              "1        0.300509                   0  \n",
-              "2        0.300509                   0  \n",
-              "3        0.300509                   0  \n",
-              "4        0.300509                   0  "
-            ]
-          },
-          "execution_count": 2,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "num_series = 100  # Number of individual time series to generate\n",
-        "seq_length = 50  # Length of each time series\n",
-        "data_df = load_toydata(num_series, seq_length)\n",
-        "data_df.head()"
+      "text/plain": [
+       "Training: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RYQ5CdNUyc2q"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "572629c64cfd46a983f1a8c6483a2cf1",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "# 2. Create the dataset and datamodule\n",
-        "We create a `TimeSeries` dataset instance that returns the raw data in terms of tensors, then this \"raw data\" is sent to the `data_module`that will internally handle the dataloaders and preprocessing"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ONe8Eo1zzvCH"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "955c5e9c139148a1a352d17202fe097f",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "`TimeSeries` dataset's Key arguments:\n",
-        "- `data`: DataFrame with sequence data.\n",
-        "- `time`: integer typed column denoting the time index within `data`.\n",
-        "- `target`:  Column(s) in `data` denoting the forecasting target.\n",
-        "- `group`: List of column names identifying a time series instance within `data`.\n",
-        "- `num`: List of numerical features.\n",
-        "- `cat`: List of categorical features.\n",
-        "- `known`: Features known in future\n",
-        "- `unknown`: Features not known in the future\n",
-        "- `static`: List of variables that do not change over time,"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "id": "JPD3y3qny5Dx"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8d0747756fd2434399ae8d233a82d607",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [],
-      "source": [
-        "from pytorch_forecasting.data.timeseries import TimeSeries"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "id": "AxxPHK6AKSD2"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "78f2d725dcb34deca5407c277c384d8d",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/pytorch_forecasting/data/timeseries/_timeseries_v2.py:105: UserWarning: TimeSeries is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
-            "  warn(\n"
-          ]
-        }
-      ],
-      "source": [
-        "# create `TimeSeries` dataset that returns the raw data in terms of tensors\n",
-        "dataset = TimeSeries(\n",
-        "    data=data_df,\n",
-        "    time=\"time_idx\",\n",
-        "    target=\"y\",\n",
-        "    group=[\"series_id\"],\n",
-        "    num=[\"x\", \"future_known_feature\", \"static_feature\"],\n",
-        "    cat=[\"category\", \"static_feature_cat\"],\n",
-        "    known=[\"future_known_feature\"],\n",
-        "    unknown=[\"x\", \"category\"],\n",
-        "    static=[\"static_feature\", \"static_feature_cat\"],\n",
-        ")"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S-yHU46v1MhN"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ab523d60fd249a7bcece32280872abd",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "`EncoderDecoderTimeSeriesDataModule` key arguments:\n",
-        "- `time_series_dataset`: `TimeSeries` dataset instance\n",
-        "- `max_encoder_length` : Maximum length of the encoder input sequence.\n",
-        "- `max_prediction_length` : Maximum length of the decoder output sequence.\n",
-        "- `batch_size` : Batch size for DataLoader.\n",
-        "- `categorical_encoders` :  Dictionary of categorical encoders.\n",
-        "- `scalers` : Dictionary of feature scalers.\n",
-        "- `target_normalizer`: Normalizer for the target variable."
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "id": "DUWB4LrGyxrL"
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn.preprocessing import StandardScaler\n",
-        "from pytorch_forecasting.data.data_module import EncoderDecoderTimeSeriesDataModule\n",
-        "from pytorch_forecasting.data.encoders import (\n",
-        "    NaNLabelEncoder,\n",
-        "    TorchNormalizer,\n",
-        ")"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: `Trainer.fit` stopped: `max_epochs=5` reached.\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "id": "5U5Lr_ZFKX0s"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/pytorch_forecasting/data/data_module.py:129: UserWarning: TimeSeries is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
-            "  warn(\n"
-          ]
-        }
-      ],
-      "source": [
-        "# create the `data_module` that handles the dataloaders and preprocessing\n",
-        "data_module = EncoderDecoderTimeSeriesDataModule(\n",
-        "    time_series_dataset=dataset,\n",
-        "    max_encoder_length=30,\n",
-        "    max_prediction_length=1,\n",
-        "    batch_size=32,\n",
-        "    categorical_encoders={\n",
-        "        \"category\": NaNLabelEncoder(add_nan=True),\n",
-        "        \"static_feature_cat\": NaNLabelEncoder(add_nan=True),\n",
-        "    },\n",
-        "    scalers={\n",
-        "        \"x\": StandardScaler(),\n",
-        "        \"future_known_feature\": StandardScaler(),\n",
-        "        \"static_feature\": StandardScaler(),\n",
-        "    },\n",
-        "    target_normalizer=TorchNormalizer(),\n",
-        ")"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Artifacts saved in: /content/pytorch-forecasting/checkpoints\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qykX7vQ7zWnC"
-      },
-      "source": [
-        "# 3. Initialise and train the model"
+     "data": {
+      "text/plain": [
+       "PosixPath('/content/pytorch-forecasting/checkpoints/best-epoch=3-step=168.ckpt')"
       ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_pkg.fit(dataset)  # You can also pass in a DataModule here"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "tYb9WJVy7nyC"
+   },
+   "source": [
+    "\n",
+    "#### Output\n",
+    "Output of TFT model is a `dict` with key `prediction`:\n",
+    "\n",
+    "- `y_pred[\"prediction\"]`: Tensor of shape `(batch_size, prediction_length, output_size)`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 218,
+     "referenced_widgets": [
+      "c232241e3ca24eddb2dae0f9ceb7e114",
+      "b8ca7fe5fc534f3bbecc9130102a3129",
+      "ad5be790369941ea9aa51bc297740f54",
+      "374f1f4cb472482eaad8c7a00eaf3217",
+      "a6c3a711dc41433a8b255a68f9b3a8a2",
+      "12cb9f1a3f5b4111a995bba45ea38ca3",
+      "d89abdeee4a6494abc9be8dd1da0ec0c",
+      "30485ca293ea4d7ba1591727897f2e28",
+      "5e40761af7644c9aa7861c62c75ed441",
+      "dc3ad9baee4b4f58b69a3af4891deb4d",
+      "6b5a84943e524c559a68099c05220f2c"
+     ]
     },
+    "id": "vTt5w73CvNuc",
+    "outputId": "24089add-49b9-410d-d0f3-50cd7426b35d"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_kz3MO362Tlo"
-      },
-      "source": [
-        "To initialise the model you now don't have to pass arguments like `encoder_cont`, `decoder_cont` etc as they are calculated internally using the `metadata` property [[source]](https://github.com/sktime/pytorch-forecasting/blob/4a34931e499c2b59de3939fcffcaabd75204b045/pytorch_forecasting/data/data_module.py#L264-L292) of `EncoderDecoderTimeSeriesDataModule`. But you still have to pass other params like `loss`, `optimizer` etc"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/data/data_module.py:129: UserWarning: EncoderDecoderTimeSeriesDataModule is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n",
+      "INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO: GPU available: True (cuda), used: True\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+      "INFO: TPU available: False, using: 0 TPU cores\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+      "INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XvwIuzD34Ytk"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c232241e3ca24eddb2dae0f9ceb7e114",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "\n",
-        "```python\n",
-        "model = TFT(\n",
-        "    loss=nn.MSELoss(),\n",
-        "    logging_metrics=[MAE(), SMAPE()],\n",
-        "    metadata=data_module.metadata,  # <-- crucial for model setup\n",
-        "    ...\n",
-        ")\n",
-        "```\n",
-        "\n",
-        "The `metadata` includes:\n",
-        "- `max_encoder_length`, `max_prediction_length`\n",
-        "- number of continuous/categorical variables in encoder/decoder\n",
-        "- number of static features\n",
-        "\n",
-        "These are used to configure internal layers like `encoder_cont`, `decoder_cat`, etc.\n"
+      "text/plain": [
+       "Predicting: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "preds = model_pkg.predict(dataset, return_info=[\"index\", \"x\", \"y\"])\n",
+    "# You can also pass in a DataModule or Dataloader here"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "KANOUlepv0ty",
+    "outputId": "17ce66d0-5ba9-4a09-b5fc-9ecebb8bd470"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "First Predicted Value:\n",
+      "Index: -0.0801810473203659\n",
+      "Prediction: 0.11192154139280319\n",
+      "Actual: -0.1557866632938385\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"First Predicted Value:\")\n",
+    "print(\"Index:\", preds[\"index\"][0].item())\n",
+    "print(\"Prediction:\", preds[\"prediction\"][0].item())\n",
+    "print(\"Actual:\", preds[\"y\"][0].item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "D2-S5EXeo2er"
+   },
+   "source": [
+    "# 3-stage pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6D9ARyp05R0t"
+   },
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "## Steps\n",
+    "\n",
+    "1. Create `TimeSeries` Dataset object\n",
+    "2. Create DataModule object\n",
+    "3. Initialize, Train & Run Inference with the Model\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "###  Create Dataset & DataModule\n",
+    "\n",
+    "- `TimeSeries` returns the raw data in terms of tensors .\n",
+    "- `DataModule` wraps the dataset, handles splits, preprocessing, batching, and exposes `metadata` for the model initialisation.\n",
+    "\n",
+    "\n",
+    "\n",
+    "### Initialize the Model\n",
+    "\n",
+    "We initialize the TFT model using the `metadata` provided by the `DataModule`. This metadata includes all required dimensional info for the encoder, decoder, and static inputs.\n",
+    "\n",
+    "\n",
+    "\n",
+    "### Train the Model\n",
+    "\n",
+    "We use a `Trainer` from PyTorch Lightning to train the model\n",
+    "\n",
+    "### Run Inference\n",
+    "\n",
+    "After training, we can make predictions using the trained model\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "RYQ5CdNUyc2q"
+   },
+   "source": [
+    "## 1. Create the dataset\n",
+    "We create a `TimeSeries` dataset instance that returns the raw data in terms of tensors, then this \"raw data\" is sent to the `data_module`that will internally handle the dataloaders and preprocessing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ONe8Eo1zzvCH"
+   },
+   "source": [
+    "`TimeSeries` dataset's Key arguments:\n",
+    "- `data`: DataFrame with sequence data.\n",
+    "- `time`: integer typed column denoting the time index within `data`.\n",
+    "- `target`:  Column(s) in `data` denoting the forecasting target.\n",
+    "- `group`: List of column names identifying a time series instance within `data`.\n",
+    "- `num`: List of numerical features.\n",
+    "- `cat`: List of categorical features.\n",
+    "- `known`: Features known in future\n",
+    "- `unknown`: Features not known in the future\n",
+    "- `static`: List of variables that do not change over time,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "id": "JPD3y3qny5Dx"
+   },
+   "outputs": [],
+   "source": [
+    "from pytorch_forecasting.data.timeseries import TimeSeries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "AxxPHK6AKSD2",
+    "outputId": "7d8eea0b-6bfc-447c-b40b-f3c7e8bae4a4"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/data/timeseries/_timeseries_v2.py:105: UserWarning: TimeSeries is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create `TimeSeries` dataset that returns the raw data in terms of tensors\n",
+    "dataset = TimeSeries(\n",
+    "    data=data_df,\n",
+    "    time=\"time_idx\",\n",
+    "    target=\"y\",\n",
+    "    group=[\"series_id\"],\n",
+    "    num=[\"x\", \"future_known_feature\", \"static_feature\"],\n",
+    "    cat=[\"category\", \"static_feature_cat\"],\n",
+    "    known=[\"future_known_feature\"],\n",
+    "    unknown=[\"x\", \"category\"],\n",
+    "    static=[\"static_feature\", \"static_feature_cat\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HCKRpRkOsmro"
+   },
+   "source": [
+    "## 2. Create datamodule"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "S-yHU46v1MhN"
+   },
+   "source": [
+    "`EncoderDecoderTimeSeriesDataModule` key arguments:\n",
+    "- `time_series_dataset`: `TimeSeries` dataset instance\n",
+    "- `max_encoder_length` : Maximum length of the encoder input sequence.\n",
+    "- `max_prediction_length` : Maximum length of the decoder output sequence.\n",
+    "- `batch_size` : Batch size for DataLoader.\n",
+    "- `categorical_encoders` :  Dictionary of categorical encoders.\n",
+    "- `scalers` : Dictionary of feature scalers.\n",
+    "- `target_normalizer`: Normalizer for the target variable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "id": "DUWB4LrGyxrL"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from pytorch_forecasting.data.data_module import EncoderDecoderTimeSeriesDataModule\n",
+    "from pytorch_forecasting.data.encoders import (\n",
+    "    EncoderNormalizer,\n",
+    "    NaNLabelEncoder,\n",
+    "    TorchNormalizer,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "5U5Lr_ZFKX0s",
+    "outputId": "248cf4c4-b7e4-4210-c782-e142ced29189"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/data/data_module.py:129: UserWarning: EncoderDecoderTimeSeriesDataModule is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create the `data_module` that handles the dataloaders and preprocessing\n",
+    "data_module = EncoderDecoderTimeSeriesDataModule(\n",
+    "    time_series_dataset=dataset,\n",
+    "    max_encoder_length=30,\n",
+    "    max_prediction_length=1,\n",
+    "    batch_size=32,\n",
+    "    categorical_encoders={\n",
+    "        \"category\": NaNLabelEncoder(add_nan=True),\n",
+    "        \"static_feature_cat\": NaNLabelEncoder(add_nan=True),\n",
+    "    },\n",
+    "    scalers={\n",
+    "        \"x\": StandardScaler(),\n",
+    "        \"future_known_feature\": StandardScaler(),\n",
+    "        \"static_feature\": StandardScaler(),\n",
+    "    },\n",
+    "    target_normalizer=TorchNormalizer(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "qykX7vQ7zWnC"
+   },
+   "source": [
+    "## 3. Initialise and train the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_kz3MO362Tlo"
+   },
+   "source": [
+    "To initialise the model you now don't have to pass arguments like `encoder_cont`, `decoder_cont` etc as they are calculated internally using the `metadata` property [[source]](https://github.com/sktime/pytorch-forecasting/blob/4a34931e499c2b59de3939fcffcaabd75204b045/pytorch_forecasting/data/data_module.py#L264-L292) of `EncoderDecoderTimeSeriesDataModule`. But you still have to pass other params like `loss`, `optimizer` etc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XvwIuzD34Ytk"
+   },
+   "source": [
+    "\n",
+    "```python\n",
+    "model = TFT(\n",
+    "    loss=nn.MSELoss(),\n",
+    "    logging_metrics=[MAE(), SMAPE()],\n",
+    "    metadata=data_module.metadata,  # <-- crucial for model setup\n",
+    "    ...\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "The `metadata` includes:\n",
+    "- `max_encoder_length`, `max_prediction_length`\n",
+    "- number of continuous/categorical variables in encoder/decoder\n",
+    "- number of static features\n",
+    "\n",
+    "These are used to configure internal layers like `encoder_cont`, `decoder_cat`, etc.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "xOsEucZnzCkN"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from pytorch_forecasting.metrics import MAE, SMAPE\n",
+    "from pytorch_forecasting.models.temporal_fusion_transformer._tft_v2 import TFT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "9qbjnTxnyh4H",
+    "outputId": "a486be25-1c41-4ef2-f2ce-8e5da9704594"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/content/pytorch-forecasting/pytorch_forecasting/models/base/_base_model_v2.py:64: UserWarning: The Model 'TFT' is part of an experimental reworkof the pytorch-forecasting model layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. This class is intended for beta testing and as a basic skeleton, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
+      "  warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialise the Model\n",
+    "model = TFT(\n",
+    "    loss=MAE(),\n",
+    "    logging_metrics=[MAE(), SMAPE()],\n",
+    "    optimizer=\"adam\",\n",
+    "    optimizer_params={\"lr\": 1e-3},\n",
+    "    lr_scheduler=\"reduce_lr_on_plateau\",\n",
+    "    lr_scheduler_params={\"mode\": \"min\", \"factor\": 0.1, \"patience\": 10},\n",
+    "    hidden_size=64,\n",
+    "    num_layers=2,\n",
+    "    attention_head_size=4,\n",
+    "    dropout=0.1,\n",
+    "    metadata=data_module.metadata,  # pass the metadata from the datamodule to the model\n",
+    "    # to initialise important params like `encoder_cont` etc\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "svdoye-d8F-z"
+   },
+   "source": [
+    "We use a `Trainer` from PyTorch Lightning to train the model:\n",
+    "\n",
+    "```python\n",
+    "trainer = Trainer(max_epochs=5, ...)\n",
+    "trainer.fit(model, data_module)\n",
+    "```\n",
+    "\n",
+    "The `Trainer`:\n",
+    "- Pulls data from `data_module`\n",
+    "- Handles device placement\n",
+    "- Logs training progress and metrics\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "RTSmUu9RytS8"
+   },
+   "outputs": [],
+   "source": [
+    "from lightning.pytorch import Trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 940,
+     "referenced_widgets": [
+      "a553dd138d714a18af6c0713e1b43897",
+      "34b327966f2b4573a30331a4ca2bad36",
+      "2012c67eef9d43099e0566c866d24043",
+      "abd2f8bdab144398b139ba1653e325b7",
+      "8c09917e05734fd4ac283d3f6ec98de3",
+      "2d9aeac5b4d6406b95f41030b3902ead",
+      "fdd3021d0aba4ea19d02b9fde8e01e6c",
+      "26963ab219a447f2a53313948576286d",
+      "870e865f5c3c48d88971b7d4a0f18f1f",
+      "66628b3d61634d44b911582f00355793",
+      "6ae72c38fbe742ac850b2cb60d7faa2e",
+      "86d1a5f363394cc18785ac88a22d76d3",
+      "c5db7875e835420cb238605415521449",
+      "7f3b3e4f12e04abc9baf01711628f447",
+      "13053e2cc53b4d86a18835d3cf62d9a0",
+      "a7637150a8614da1a9df8e820c3a5195",
+      "bc0b2a7717d14663bafd06df6462469f",
+      "418156a6b73246dc86e8fce9112c14ea",
+      "2151663ad02a429cac5a52a90371ce95",
+      "f07bfb9fa06342389b5fec3fb19d59f1",
+      "68689a954b69463ba4a2f02ed26d41a9",
+      "fb5c4ef912a54a96a4aaacbd9b7d0517",
+      "87c0a054bd0a4cc38bdaa2ef39bb30ae",
+      "f8bb72058f0141c69e0dac07c0ad4bbd",
+      "377ab5bbba5a46c3bb65d29ff2eac00b",
+      "34ff67844e7742bcb40b5fc0d4d6c475",
+      "ab042b4a1b0644c9819cc0d21ca1f13f",
+      "01d5bef8db0c42a7a82f9600a6ee7549",
+      "ffb26eb93aa049b1a2a808c60dc9508a",
+      "0108dec7eb894503ba2e7e1bd33e814b",
+      "6ab9f5c0352f4da7acda358e07fd7793",
+      "f856cbb51d3c4b31bf7ca98d7f387fc7",
+      "4ca73580d9ae4cd190da8d2e36e27a89",
+      "9d5419fc33de4216807044da7fe61524",
+      "4e2b7089a69644e9a1ff3afd7524d491",
+      "10f011e768704402a8d46f6377c6364e",
+      "f542ed5ec0204587a03813849b712b2e",
+      "e1c67f9cef4a4bb79411efc427122c2c",
+      "1380306827ec4169a896b9ffcbeb8c2c",
+      "828c025cedfc483e9fb0af4f7c7b9393",
+      "41b14de355a04031ad4deabdadc6f9fe",
+      "c12b35d0715a4ae580557cd6c17740c8",
+      "d3e3d7deb36441539c459e24ab2d8aa3",
+      "7f23a8e77dce4f38a764c7637fe10b78",
+      "a9e0349ab6494ec09a6f909e152f685d",
+      "f648ccd5714f4f0fa5f9710d6a26a5a8",
+      "168a880240964d0194f1b986eacbcd29",
+      "d44ceef01cd24e9aa3090b6f85d784aa",
+      "01487f5cfc2e4a79b5b310c7732cba16",
+      "64622e3cde0543918d827a5bccee0abd",
+      "00e7ff7f5a1646e887b35fb22ab4a8b0",
+      "37db4be2353f436abf3ec0a71dca8e39",
+      "2daef716eda64487837f300db34039da",
+      "21e78da0f7204d5e8ccfc92cc39be9ae",
+      "12b11fd7fb53408e92e79ad50cfdd433",
+      "83af6fa1a89c44398eaa18bc89031fa8",
+      "762d1c8b6c2a4b7996c0ed9dfabe8f8c",
+      "53ebd9b8a8e54e93a572dd807d9fafd4",
+      "6cfa122301624a2e9efd2364d1d114bb",
+      "a4a42a7c37fd4d22b2af0495f6b77292",
+      "d57a2d0bbbb246f9b049024de69cea94",
+      "ed8f3f06633c4f249a8311e311118c97",
+      "06458336850c4c838ad02188538c6c64",
+      "6ffbcceb8ca6487dbcd7e1b03f78d957",
+      "4b0ccc7bbcc146a5bccc9b41483fb505",
+      "6df22cc241a843ccab44daa16db38136",
+      "803a672f660842f6ab949ea62fc9e44e",
+      "969658209b1043c59b4c931280fd2f5f",
+      "7cce8e0bd68b4fe69a1e67f4b695c31f",
+      "e2039b30418048d8a030037d2f711498",
+      "690e2dc613364e5b9e3f204c76798f12",
+      "eda10823474b40949452e97f0259009f",
+      "51382e4f2efc4f9ea25254c3ccc07ba0",
+      "70518e8f3e704fed9d91cf2eb28546d6",
+      "5cad874f97cb4132a987dc1908bba9aa",
+      "29be52da8ab2448e902bf9a9a264e5dd",
+      "de1138c4a27d44dabe2179511b31160e"
+     ]
+    },
+    "id": "aB_ayE_eykXp",
+    "outputId": "bdcab291-96b3-4dd6-e997-09e08c95653f"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO: GPU available: True (cuda), used: True\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+      "INFO: TPU available: False, using: 0 TPU cores\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+      "INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO: \n",
+      "  | Name                  | Type               | Params | Mode \n",
+      "---------------------------------------------------------------------\n",
+      "0 | loss                  | MAE                | 0      | train\n",
+      "1 | encoder_var_selection | Sequential         | 709    | train\n",
+      "2 | decoder_var_selection | Sequential         | 193    | train\n",
+      "3 | static_context_linear | Linear             | 192    | train\n",
+      "4 | lstm_encoder          | LSTM               | 51.5 K | train\n",
+      "5 | lstm_decoder          | LSTM               | 50.4 K | train\n",
+      "6 | self_attention        | MultiheadAttention | 16.6 K | train\n",
+      "7 | pre_output            | Linear             | 4.2 K  | train\n",
+      "8 | output_layer          | Linear             | 65     | train\n",
+      "---------------------------------------------------------------------\n",
+      "123 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "123 K     Total params\n",
+      "0.495     Total estimated model params size (MB)\n",
+      "18        Modules in train mode\n",
+      "0         Modules in eval mode\n",
+      "INFO:lightning.pytorch.callbacks.model_summary:\n",
+      "  | Name                  | Type               | Params | Mode \n",
+      "---------------------------------------------------------------------\n",
+      "0 | loss                  | MAE                | 0      | train\n",
+      "1 | encoder_var_selection | Sequential         | 709    | train\n",
+      "2 | decoder_var_selection | Sequential         | 193    | train\n",
+      "3 | static_context_linear | Linear             | 192    | train\n",
+      "4 | lstm_encoder          | LSTM               | 51.5 K | train\n",
+      "5 | lstm_decoder          | LSTM               | 50.4 K | train\n",
+      "6 | self_attention        | MultiheadAttention | 16.6 K | train\n",
+      "7 | pre_output            | Linear             | 4.2 K  | train\n",
+      "8 | output_layer          | Linear             | 65     | train\n",
+      "---------------------------------------------------------------------\n",
+      "123 K     Trainable params\n",
+      "0         Non-trainable params\n",
+      "123 K     Total params\n",
+      "0.495     Total estimated model params size (MB)\n",
+      "18        Modules in train mode\n",
+      "0         Modules in eval mode\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training model...\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        "id": "xOsEucZnzCkN"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a553dd138d714a18af6c0713e1b43897",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "from pytorch_forecasting.metrics import MAE, SMAPE\n",
-        "from pytorch_forecasting.models.temporal_fusion_transformer._tft_v2 import TFT"
+      "text/plain": [
+       "Sanity Checking: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "9qbjnTxnyh4H",
-        "outputId": "f59bf985-ffaa-4980-c890-39a80dfcc598"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "86d1a5f363394cc18785ac88a22d76d3",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/base/_base_model_v2.py:61: UserWarning: The Model 'TFT' is part of an experimental reworkof the pytorch-forecasting model layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. This class is intended for beta testing and as a basic skeleton, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
-            "  warn(\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Initialise the Model\n",
-        "model = TFT(\n",
-        "    loss=MAE(),\n",
-        "    logging_metrics=[MAE(), SMAPE()],\n",
-        "    optimizer=\"adam\",\n",
-        "    optimizer_params={\"lr\": 1e-3},\n",
-        "    lr_scheduler=\"reduce_lr_on_plateau\",\n",
-        "    lr_scheduler_params={\"mode\": \"min\", \"factor\": 0.1, \"patience\": 10},\n",
-        "    hidden_size=64,\n",
-        "    num_layers=2,\n",
-        "    attention_head_size=4,\n",
-        "    dropout=0.1,\n",
-        "    metadata=data_module.metadata,  # pass the metadata from the datamodule to the model\n",
-        "    # to initialise important params like `encoder_cont` etc\n",
-        ")"
+      "text/plain": [
+       "Training: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "svdoye-d8F-z"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "87c0a054bd0a4cc38bdaa2ef39bb30ae",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "We use a `Trainer` from PyTorch Lightning to train the model:\n",
-        "\n",
-        "```python\n",
-        "trainer = Trainer(max_epochs=5, ...)\n",
-        "trainer.fit(model, data_module)\n",
-        "```\n",
-        "\n",
-        "The `Trainer`:\n",
-        "- Pulls data from `data_module`\n",
-        "- Handles device placement\n",
-        "- Logs training progress and metrics\n"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        "id": "RTSmUu9RytS8"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9d5419fc33de4216807044da7fe61524",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [],
-      "source": [
-        "from lightning.pytorch import Trainer"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 930,
-          "referenced_widgets": [
-            "edc196b34a0b49fa992fd89909a8414f",
-            "76e801c32da348788b0b92fd2c2849c1",
-            "cd6d925c2dcf4ae995f62d8f20219fde",
-            "283f67f9ef6a47f8b3d4a389a18b8ccc",
-            "8e19d683c674403b87cbb7c162a0ab33",
-            "525fe2cef0444d558d870c394cf67e81",
-            "640318d6256a4451957fce6481625f6b",
-            "0820bd0fd60f4e0180899f18615d9ee3",
-            "90cb2b96e18948679eda809a9123565c",
-            "5ffb5772ae4f4764b23643cfa547a615",
-            "04f140582780402a94e3391bfcbffa91",
-            "4abfcb0ade1b47e1b015e7efd75cd563",
-            "483c0919c7c740af89e6c7a7470591f7",
-            "18849509bcfc41dd89011efb5690e49b",
-            "098eb27809314e7ca15c5b9bce21b46d",
-            "53f7fc6124fb4dda8bfb147078212ada",
-            "8c2821dc533c4a1ba81d68836047dc54",
-            "cb303a9d13084d47b0714d717f482a80",
-            "491aa926179b41dba21a008dc6e9a4fc",
-            "363967a782db4627a3e16a726b4993c6",
-            "0d28163142aa465784871ec647efa504",
-            "a6d821b9d9c9453b87740c3e4d86ca77",
-            "2301684175b0454897af0fc5e534f52f",
-            "58fe6653d5e944be8fdd072cd3541b8c",
-            "00fb395e4cb5472faf221affc0244780",
-            "1cd0c86474a443b8aae6466405dd3807",
-            "cfcb344ba8cb49a78a8f934657252855",
-            "4f455332437a465bb300ccf50405358d",
-            "fde9c4616cd84e629998819c18adfd1f",
-            "405e6f4ab49b445ba7e942e0d35419eb",
-            "2ee2fe3ea3c04592851fef7e6dda5115",
-            "713bcea77e1744179502f6a187239ce1",
-            "531c73851dcd469791d6ebdc9aa9da52",
-            "b0ed53cd20bd47b5aac5fecaacb9bfed",
-            "da87d95793f84907819ab4c506f6936b",
-            "c8a64619bf574d43a01595e3c8690759",
-            "8d0c66b5cc984b8db7d151a970897266",
-            "a723c1055f4f4c0d9c7b2c8c3469fa9d",
-            "99dcc8dd7e88463a95ddcf86c67ec153",
-            "94c1a818e78a47ee9bb46f1c8b1dad34",
-            "903b0891a3014701b117a391d777e012",
-            "f6053af158304fc1b307a296cdbe7290",
-            "f6c93805c03545458f1af0fffaf77fb0",
-            "52bbe8a40baf4a8f989281bcc225a067",
-            "c9eb21c70ac243d1a21f50f287b4731f",
-            "100a61bf42b0498ba241ddd9ef7a96f3",
-            "1cce85eee5bc4700aa966c05e35ea042",
-            "2dcaeafc8bc84db786f9fa99aeda39db",
-            "7a9cf1446cc14d51a0a729822eb1cabc",
-            "1c6f0e1be6f8475485ba6354ccc68597",
-            "7ef67c160b8740e9932486f857470542",
-            "282e2d3a169a4e18b46ca85dbfe82371",
-            "84a96c68feba48b6880de08722de8743",
-            "c111af2e297641908cb1835d9a4d06ff",
-            "d94d97f0852b450c8ea56e6ef4e44ac5",
-            "1dd910df34e44cdc98586e3094dfddd4",
-            "86c6678f86d944eeba1fe982197cb7d3",
-            "b609e7acf93944a98f11b450e01222f8",
-            "0baf540b09454f61873cd6a3088a3c9d",
-            "7bc8c547554c481481d0653193dcd917",
-            "2d92b66364844b5491dabee7a0b90686",
-            "99a8fec0a22f4949a3d16fc7f48aeea6",
-            "0fb99264aa0644cda962905dfe1d6997",
-            "f3d688e29f4b4432ac370d6a1e76ec5c",
-            "d4cd0f3144784991ba9ec0b178a6c2e7",
-            "83a1e407cf60401b932c6ee6e72d90db",
-            "99443f56f20a4c6da81ddb107ab84490",
-            "2da428c9a3b541bc8f667d5715fdcb0d",
-            "f483a26167424f66b8b3c537537afd77",
-            "bc01c61ee2a14f3bbea23d04bb7e9dc9",
-            "b59e64ed8d9d46dd8803bfe25602c391",
-            "cc1d7b4924784a08999f41e1e3c989ac",
-            "8c6ad939d53942e48d8d6fb4c30b666d",
-            "49d44a5db92b4b3e979e88d14072a16c",
-            "36d0bfd0973c43e4958af452d2c7025c",
-            "8a90ddfc1fd446a6894c2771195a4419",
-            "aa94917a2f364a4ea9033000730d48b7"
-          ]
-        },
-        "id": "aB_ayE_eykXp",
-        "outputId": "02c49d3e-2124-4b0b-8ca4-b2d886662613"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a9e0349ab6494ec09a6f909e152f685d",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "GPU available: True (cuda), used: True\n",
-            "TPU available: False, using: 0 TPU cores\n",
-            "HPU available: False, using: 0 HPUs\n",
-            "You are using a CUDA device ('NVIDIA GeForce RTX 4050 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training model...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
-            "\n",
-            "  | Name                  | Type               | Params | Mode \n",
-            "---------------------------------------------------------------------\n",
-            "0 | loss                  | MAE                | 0      | train\n",
-            "1 | encoder_var_selection | Sequential         | 709    | train\n",
-            "2 | decoder_var_selection | Sequential         | 193    | train\n",
-            "3 | static_context_linear | Linear             | 192    | train\n",
-            "4 | lstm_encoder          | LSTM               | 51.5 K | train\n",
-            "5 | lstm_decoder          | LSTM               | 50.4 K | train\n",
-            "6 | self_attention        | MultiheadAttention | 16.6 K | train\n",
-            "7 | pre_output            | Linear             | 4.2 K  | train\n",
-            "8 | output_layer          | Linear             | 65     | train\n",
-            "---------------------------------------------------------------------\n",
-            "123 K     Trainable params\n",
-            "0         Non-trainable params\n",
-            "123 K     Total params\n",
-            "0.495     Total estimated model params size (MB)\n",
-            "18        Modules in train mode\n",
-            "0         Modules in eval mode\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "                                                                           "
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Epoch 4: 100%|██████████| 42/42 [00:02<00:00, 16.95it/s, v_num=2, train_loss_step=0.0977, val_loss=0.120, val_MAE=0.120, val_SMAPE=0.467, train_loss_epoch=0.133, train_MAE=0.133, train_SMAPE=0.473]"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "`Trainer.fit` stopped: `max_epochs=5` reached.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Epoch 4: 100%|██████████| 42/42 [00:02<00:00, 16.84it/s, v_num=2, train_loss_step=0.0977, val_loss=0.120, val_MAE=0.120, val_SMAPE=0.467, train_loss_epoch=0.133, train_MAE=0.133, train_SMAPE=0.473]\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Train the model\n",
-        "print(\"\\nTraining model...\")\n",
-        "trainer = Trainer(\n",
-        "    max_epochs=5,\n",
-        "    accelerator=\"auto\",\n",
-        "    devices=1,\n",
-        "    enable_progress_bar=True,\n",
-        "    log_every_n_steps=10,\n",
-        ")\n",
-        "\n",
-        "trainer.fit(model, data_module)"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "p3mI-QVJ8TZF"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "83af6fa1a89c44398eaa18bc89031fa8",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "source": [
-        "After training, we can make predictions using the trained model:\n",
-        "\n",
-        "```python\n",
-        "model.eval()\n",
-        "with torch.no_grad():\n",
-        "    batch = next(iter(data_module.test_dataloader()))\n",
-        "    x, y = batch\n",
-        "    y_pred = model(x)\n",
-        "```\n",
-        "\n",
-        "#### Output\n",
-        "Output of TFT model is a `dict` with key `prediction`:\n",
-        "\n",
-        "- `y_pred[\"prediction\"]`: Tensor of shape `(batch_size, prediction_length, output_size)`\n"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278,
-          "referenced_widgets": [
-            "a081e60ec1604f6abf87dc3f108d66b6",
-            "565d40dbd4b0467396949cc2adc030d5",
-            "f41ef269f3754fffa1296278be09be3b",
-            "2f336cafec17479f80f897042286233a",
-            "ccd55c64fc884543a5831f23cf213642",
-            "e1ade849ec7341e9a026a7d2dcac24be",
-            "105949ce9b8844a78481da2f7b7406b2",
-            "ebc84674fde647aeb1bf579a3c890f15",
-            "f82e1c40a89c4d14b28d91ed7d8a9e28",
-            "a8fc76e0722a4dea8f00f5cd412399c6",
-            "a84df45d2ed644f1baf72bd22f924224"
-          ]
-        },
-        "id": "Si7bbZIULBZz",
-        "outputId": "ff3fb499-14e2-48e5-e4a1-ec8e18650c36"
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "803a672f660842f6ab949ea62fc9e44e",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
-            "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Evaluating model...\n",
-            "Testing DataLoader 0: 100%|██████████| 9/9 [00:00<00:00, 27.38it/s] \n",
-            "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
-            "       Test metric             DataLoader 0\n",
-            "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
-            "        test_MAE            0.11830110847949982\n",
-            "       test_SMAPE           0.4569336473941803\n",
-            "        test_loss           0.11830110847949982\n",
-            "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
-            "\n",
-            "Prediction shape: torch.Size([32, 1, 1])\n",
-            "First prediction values: [[0.03997546]]\n",
-            "First true values: [-0.16256696]\n",
-            "\n",
-            "TFT model test complete!\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Evaluate the model\n",
-        "print(\"\\nEvaluating model...\")\n",
-        "test_metrics = trainer.test(model, data_module)\n",
-        "\n",
-        "model.eval()\n",
-        "with torch.no_grad():\n",
-        "    test_batch = next(iter(data_module.test_dataloader()))\n",
-        "    x_test, y_test = test_batch\n",
-        "    y_pred = model(x_test)\n",
-        "\n",
-        "    print(\"\\nPrediction shape:\", y_pred[\"prediction\"].shape)\n",
-        "    print(\"First prediction values:\", y_pred[\"prediction\"][0].cpu().numpy())\n",
-        "    print(\"First true values:\", y_test[0].cpu().numpy())\n",
-        "print(\"\\nTFT model test complete!\")"
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
       ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: `Trainer.fit` stopped: `max_epochs=5` reached.\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.\n"
+     ]
     }
-  ],
-  "metadata": {
+   ],
+   "source": [
+    "# Train the model\n",
+    "print(\"\\nTraining model...\")\n",
+    "trainer = Trainer(\n",
+    "    max_epochs=5,\n",
+    "    accelerator=\"auto\",\n",
+    "    devices=1,\n",
+    "    enable_progress_bar=True,\n",
+    "    log_every_n_steps=10,\n",
+    ")\n",
+    "\n",
+    "trainer.fit(model, data_module)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "p3mI-QVJ8TZF"
+   },
+   "source": [
+    "\n",
+    "#### Output\n",
+    "Output of TFT model is a `dict` with key `prediction`:\n",
+    "\n",
+    "- `y_pred[\"prediction\"]`: Tensor of shape `(batch_size, prediction_length, output_size)`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_module.setup(stage=\"test\")\n",
+    "test_dataloader = data_module.test_dataloader()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "provenance": []
+     "base_uri": "https://localhost:8080/",
+     "height": 181,
+     "referenced_widgets": [
+      "37994d5bc49f4e70985436884f91c290",
+      "2e9e4d42c79943d6b898f71a64c77d2a",
+      "c2baeafd63d84115a4adc58542390e6a",
+      "2c0e791c673e4df3b964a8315ab47fe2",
+      "d0ebcd6e60bd4cb1b0004e20d4cbf868",
+      "c58ffcd54aff4df5bee07cbd5fc0ac9e",
+      "0d957465db5048e2b4a1a144a29b2d6f",
+      "0b59e2c8901d40d79ceb08156d1c4e1d",
+      "da6a1fefd37b4a74a77eaf48d09a1d1b",
+      "2e49603fb8454b538257b9297507122d",
+      "2a26b76eeffe4cf899776c4913552d1f"
+     ]
     },
-    "kernelspec": {
-      "display_name": ".venv (3.12.3)",
-      "language": "python",
-      "name": "python3"
+    "id": "_b8o8e2Tmzbd",
+    "outputId": "e437cf69-2d5f-4c5a-cf6c-4d0dcf31f444"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n",
+      "INFO: GPU available: True (cuda), used: True\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+      "INFO: TPU available: False, using: 0 TPU cores\n",
+      "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+      "INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
+     ]
     },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "37994d5bc49f4e70985436884f91c290",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.12.3"
+      "text/plain": [
+       "Predicting: |          | 0/? [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
+   ],
+   "source": [
+    "preds = model.predict(test_dataloader, return_info=[\"index\", \"x\", \"y\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "-ndMvbe0pLha",
+    "outputId": "266ef096-374b-45f8-8ea0-4d283c588443"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "First Predicted Value:\n",
+      "Index: 0.11104673147201538\n",
+      "Prediction: -0.001255139708518982\n",
+      "Actual: 0.07348770648241043\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"First Predicted Value:\")\n",
+    "print(\"Index:\", preds[\"index\"][0].item())\n",
+    "print(\"Prediction:\", preds[\"prediction\"][0].item())\n",
+    "print(\"Actual:\", preds[\"y\"][0].item())"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/pytorch_forecasting/base/_base_pkg.py b/pytorch_forecasting/base/_base_pkg.py
new file mode 100644
index 000000000..6bfd00323
--- /dev/null
+++ b/pytorch_forecasting/base/_base_pkg.py
@@ -0,0 +1,310 @@
+from pathlib import Path
+import pickle
+from typing import Any, Optional, Union
+
+from lightning import Trainer
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.core.datamodule import LightningDataModule
+import torch
+from torch.utils.data import DataLoader
+import yaml
+
+from pytorch_forecasting.data import TimeSeries
+from pytorch_forecasting.models.base._base_object import _BasePtForecasterV2
+
+
+class Base_pkg(_BasePtForecasterV2):
+    """
+    Base model package class acting as a high-level wrapper for the Lightning workflow.
+
+    This class simplifies the user experience by managing model, datamodule, and trainer
+    configurations, and providing streamlined ``fit`` and ``predict`` methods.
+
+    Parameters
+    ----------
+    model_cfg : dict, optional
+        Model configs for the initialisation of the model. Required if not loading
+        from a checkpoint. Defaults to ``{}``.
+    trainer_cfg : dict, optional
+        Configs to initialise ``lightning.Trainer``. Defaults to {}.
+    datamodule_cfg : Union[dict, str, Path], optional
+        Configs to initialise a ``LightningDataModule``.
+
+        - If dict, the keys and values are used as configuration parameters.
+        - If str or Path, it should be a path to a ``.pkl`` file containing
+          the serialized configuration dictionary. Required for reproducibility
+          when loading a model for inference. Defaults to {}.
+
+    ckpt_path : Union[str, Path], optional
+        Path to the checkpoint from which to load the model. If provided, `model_cfg`
+        is ignored. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        model_cfg: Optional[Union[dict[str, Any], str, Path]] = None,
+        trainer_cfg: Optional[Union[dict[str, Any], str, Path]] = None,
+        datamodule_cfg: Optional[Union[dict[str, Any], str, Path]] = None,
+        ckpt_path: Optional[Union[str, Path]] = None,
+    ):
+        self.ckpt_path = Path(ckpt_path) if ckpt_path else None
+        self.model_cfg = self._load_config(
+            model_cfg, ckpt_path=self.ckpt_path, auto_file_name="model_cfg.pkl"
+        )
+        print(self.model_cfg)
+
+        self.datamodule_cfg = self._load_config(
+            datamodule_cfg,
+            ckpt_path=self.ckpt_path,
+            auto_file_name="datamodule_cfg.pkl",
+        )
+        self.trainer_cfg = self._load_config(trainer_cfg)
+        self.metadata = self._load_config(
+            None, ckpt_path=self.ckpt_path, auto_file_name="metadata.pkl"
+        )
+
+        self.model = None
+        self.trainer = None
+        self.datamodule = None
+        if self.ckpt_path:
+            print(self.metadata)
+            self._build_model(metadata=self.metadata, **self.model_cfg)
+        else:
+            self.model = None
+
+    @staticmethod
+    def _load_config(
+        config: Union[dict, str, Path, None],
+        ckpt_path: Optional[Union[str, Path]] = None,
+        auto_file_name: Optional[str] = None,
+    ) -> dict:
+        """
+        Loads configuration from a dictionary, YAML file, or Pickle file.
+        """
+        if config is None:
+            if ckpt_path and auto_file_name:
+                path = Path(ckpt_path).parent / auto_file_name
+                if path.exists():
+                    with open(path, "rb") as f:
+                        return pickle.load(f)  # noqa : S301
+            return {}
+
+        if isinstance(config, dict):
+            return config
+
+        path = Path(config)
+        if not path.exists():
+            raise FileNotFoundError(f"Configuration file not found: {path}")
+
+        suffix = path.suffix.lower()
+        print(suffix)
+
+        if suffix in [".yaml", ".yml"]:
+            with open(path) as f:
+                return yaml.safe_load(f) or {}
+
+        else:
+            raise ValueError(
+                f"Unsupported config format: {suffix}. Use .yaml, .yml, or .pkl"
+            )
+
+    @classmethod
+    def get_cls(cls):
+        """Get the underlying model class."""
+        raise NotImplementedError("Subclasses must implement `get_cls`.")
+
+    @classmethod
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
+        raise NotImplementedError("Subclasses must implement `get_datamodule_cls`.")
+
+    @classmethod
+    def get_test_dataset_from(cls, **kwargs):
+        """
+        Creates and returns D1 TimeSeries dataSet objects for testing.
+        """
+        from pytorch_forecasting.tests._data_scenarios import (
+            data_with_covariates_v2,
+            make_datasets_v2,
+        )
+
+        raw_data = data_with_covariates_v2()
+
+        datasets_info = make_datasets_v2(raw_data, **kwargs)
+
+        return {
+            "train": datasets_info["training_dataset"],
+            "predict": datasets_info["validation_dataset"],
+        }
+
+    def _build_model(self, metadata: dict, **kwargs):
+        """Instantiates the model, either from a checkpoint or from config."""
+        model_cls = self.get_cls()
+        if self.ckpt_path:
+            self.model = model_cls.load_from_checkpoint(
+                self.ckpt_path, metadata=metadata, **kwargs
+            )
+        elif self.model_cfg:
+            self.model = model_cls(**self.model_cfg, metadata=metadata)
+        else:
+            self.model = None
+
+    def _build_datamodule(self, data: TimeSeries) -> LightningDataModule:
+        """Constructs a DataModule from a D1 layer object."""
+        if not self.datamodule_cfg:
+            raise ValueError("`datamodule_cfg` must be provided to build a datamodule.")
+        datamodule_cls = self.get_datamodule_cls()
+        return datamodule_cls(data, **self.datamodule_cfg)
+
+    def _load_dataloader(
+        self, data: Union[TimeSeries, LightningDataModule, DataLoader]
+    ) -> DataLoader:
+        """Converts various data input types into a DataLoader for prediction."""
+        if isinstance(data, TimeSeries):  # D1 Layer
+            dm = self._build_datamodule(data)
+            dm.setup(stage="predict")
+            return dm.predict_dataloader()
+        elif isinstance(data, LightningDataModule):  # D2 Layer
+            data.setup(stage="predict")
+            return data.predict_dataloader()
+        elif isinstance(data, DataLoader):
+            return data
+        else:
+            raise TypeError(
+                f"Unsupported data type for prediction: {type(data).__name__}. "
+                "Expected TimeSeriesDataSet, LightningDataModule, or DataLoader."
+            )
+
+    def _save_artifact(self, output_dir: Path):
+        """Save all configuration artifacts."""
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        with open(output_dir / "datamodule_cfg.pkl", "wb") as f:
+            pickle.dump(self.datamodule_cfg, f)
+
+        with open(output_dir / "model_cfg.pkl", "wb") as f:
+            pickle.dump(self.model_cfg, f)
+
+        if self.datamodule is not None and hasattr(self.datamodule, "metadata"):
+            with open(output_dir / "metadata.pkl", "wb") as f:
+                pickle.dump(self.datamodule.metadata, f)
+
+    def fit(
+        self,
+        data: Union[TimeSeries, LightningDataModule],
+        # todo: we should create a base data_module for different data_modules
+        save_ckpt: bool = True,
+        ckpt_dir: Union[str, Path] = "checkpoints",
+        ckpt_kwargs: Optional[dict[str, Any]] = None,
+        **trainer_fit_kwargs,
+    ):
+        """
+        Fit the model to the training data.
+
+        Parameters
+        ----------
+        data : Union[TimeSeries, LightningDataModule]
+            The data to fit on (D1 or D2 layer). This object is responsible
+            for providing both training and validation data.
+        save_ckpt : bool, default=True
+            If True, save the best model checkpoint and the `datamodule_cfg`.
+        ckpt_dir : Union[str, Path], default="checkpoints"
+            Directory to save artifacts.
+        ckpt_kwargs : dict, optional
+            Keyword arguments passed to ``ModelCheckpoint``.
+        **trainer_fit_kwargs :
+            Additional keyword arguments passed to `trainer.fit()`.
+
+        Returns
+        -------
+        Optional[Path]
+            The path to the best model checkpoint if `save_ckpt=True`, else None.
+        """
+        if isinstance(data, TimeSeries):
+            self.datamodule = self._build_datamodule(data)
+        else:
+            self.datamodule = data
+        self.datamodule.setup(stage="fit")
+
+        if self.model is None:
+            if not self.model_cfg:
+                raise RuntimeError(
+                    "`model_cfg` must be provided to train from scratch."
+                )
+            metadata = self.datamodule.metadata
+            self._build_model(metadata)
+
+        callbacks = self.trainer_cfg.get("callbacks", []).copy()
+        checkpoint_cb = None
+        if save_ckpt:
+            ckpt_dir = Path(ckpt_dir)
+            ckpt_dir.mkdir(parents=True, exist_ok=True)
+            default_ckpt_kwargs = {
+                "dirpath": ckpt_dir,
+                "filename": "best-{epoch}-{step}",
+                "save_top_k": 1,
+                "monitor": "val_loss",
+                "mode": "min",
+            }
+            if ckpt_kwargs:
+                default_ckpt_kwargs.update(ckpt_kwargs)
+            checkpoint_cb = ModelCheckpoint(**default_ckpt_kwargs)
+            callbacks.append(checkpoint_cb)
+        trainer_init_cfg = self.trainer_cfg.copy()
+        trainer_init_cfg.pop("callbacks", None)
+
+        self.trainer = Trainer(**trainer_init_cfg, callbacks=callbacks)
+
+        self.trainer.fit(self.model, datamodule=self.datamodule, **trainer_fit_kwargs)
+        if save_ckpt and checkpoint_cb:
+            best_model_path = Path(checkpoint_cb.best_model_path)
+            self._save_artifact(best_model_path.parent)
+            print(f"Artifacts saved in: {best_model_path.parent}")
+            return best_model_path
+        return None
+
+    def predict(
+        self,
+        data: Union[TimeSeries, LightningDataModule, DataLoader],
+        output_dir: Optional[Union[str, Path]] = None,
+        **kwargs,
+    ) -> Union[dict[str, torch.Tensor], None]:
+        """
+        Generate predictions by wrapping the model's predict method.
+
+        This method prepares the data by resolving it into a DataLoader and then
+        delegates the prediction task to the underlying model's ``.predict()`` method.
+
+        Parameters
+        ----------
+        data : Union[TimeSeries, LightningDataModule, DataLoader]
+            The data to predict on (D1, D2, or DataLoader).
+        **kwargs :
+            Additional keyword arguments passed directly to the model's ``.predict()``
+            method. This includes `mode`, `return_info`, `output_dir`, and any
+            `trainer_kwargs`.
+
+        Returns
+        -------
+        Union[Dict[str, torch.Tensor], None]
+            A dictionary of prediction tensors, or `None` if `output_dir` is specified
+            in `**kwargs`.
+        """
+        if self.model is None:
+            raise RuntimeError(
+                "Model is not initialized. Provide `model_cfg` or `ckpt_path`."
+            )
+
+        dataloader = self._load_dataloader(data)
+        predictions = self.model.predict(dataloader, **kwargs)
+
+        if output_dir:
+            output_path = Path(output_dir)
+            output_path.mkdir(parents=True, exist_ok=True)
+            output_file = output_path / "predictions.pkl"
+            with open(output_file, "wb") as f:
+                pickle.dump(predictions, f)
+            print(f"Predictions saved to {output_file}")
+            return None
+
+        return predictions
diff --git a/pytorch_forecasting/callbacks/__init__.py b/pytorch_forecasting/callbacks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pytorch_forecasting/callbacks/predict.py b/pytorch_forecasting/callbacks/predict.py
new file mode 100644
index 000000000..0d4dab719
--- /dev/null
+++ b/pytorch_forecasting/callbacks/predict.py
@@ -0,0 +1,111 @@
+from typing import Any, Optional
+from warnings import warn
+
+from lightning import Trainer
+from lightning.pytorch import LightningModule
+from lightning.pytorch.callbacks import BasePredictionWriter
+import torch
+
+
+class PredictCallback(BasePredictionWriter):
+    """
+    Callback to capture predictions and related information internally.
+
+    This callback is used by ``BaseModel.predict()`` to process raw model outputs
+    into the desired format (``prediction``, ``quantiles``, or ``raw``) and collect
+    any additional requested info (``x``, ``y``, ``index``, etc.). The results are
+    collated and stored in memory, accessible via the ``.result`` property.
+
+    Parameters
+    ----------
+    mode : str
+        The prediction mode ("prediction", "quantiles", or "raw").
+    return_info : list[str], optional
+        Additional information to return.
+    **kwargs :
+        Additional keyword arguments for `to_prediction` or `to_quantiles`.
+    """
+
+    def __init__(
+        self,
+        mode: str = "prediction",
+        return_info: Optional[list[str]] = None,
+        mode_kwargs: dict[str, Any] = None,
+    ):
+        super().__init__(write_interval="epoch")
+        self.mode = mode
+        self.return_info = return_info or []
+        self.mode_kwargs = mode_kwargs or {}
+        self._reset_data()
+
+    def _reset_data(self, result: bool = True):
+        """Clear collected data for a new prediction run."""
+        self.predictions = []
+        self.info = {key: [] for key in self.return_info}
+        if result:
+            self._result = None
+
+    def on_predict_batch_end(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+        outputs: Any,
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int = 0,
+    ):
+        """Process and store predictions for a single batch."""
+        x, y = batch
+
+        if self.mode == "raw":
+            processed_output = outputs
+        elif self.mode == "prediction":
+            processed_output = pl_module.to_prediction(outputs, **self.mode_kwargs)
+        elif self.mode == "quantiles":
+            processed_output = pl_module.to_quantiles(outputs, **self.mode_kwargs)
+        else:
+            raise ValueError(f"Invalid prediction mode: {self.mode}")
+
+        self.predictions.append(processed_output)
+
+        for key in self.return_info:
+            if key == "x":
+                self.info[key].append(x)
+            elif key == "y":
+                self.info[key].append(y[0])
+            elif key == "index":
+                self.info[key].append(y[1])
+            elif key == "decoder_lengths":
+                self.info[key].append(x["decoder_lengths"])
+            else:
+                warn(f"Unknown return_info key: {key}")
+
+    def on_predict_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
+        """Collate all batch results into final tensors."""
+        if self.mode == "raw" and isinstance(self.predictions[0], dict):
+            keys = self.predictions[0].keys()
+            collated_preds = {
+                key: torch.cat([p[key] for p in self.predictions]) for key in keys
+            }
+        else:
+            collated_preds = {"prediction": torch.cat(self.predictions)}
+
+        final_result = collated_preds
+
+        for key, data_list in self.info.items():
+            if isinstance(data_list[0], dict):
+                collated_info = {
+                    k: torch.cat([d[k] for d in data_list]) for k in data_list[0].keys()
+                }
+            else:
+                collated_info = torch.cat(data_list)
+            final_result[key] = collated_info
+
+        self._result = final_result
+        self._reset_data(result=False)
+
+    @property
+    def result(self) -> dict[str, torch.Tensor]:
+        if self._result is None:
+            raise RuntimeError("Prediction results are not yet available.")
+        return self._result
diff --git a/pytorch_forecasting/models/base/_base_model_v2.py b/pytorch_forecasting/models/base/_base_model_v2.py
index 8896a5397..e0affe943 100644
--- a/pytorch_forecasting/models/base/_base_model_v2.py
+++ b/pytorch_forecasting/models/base/_base_model_v2.py
@@ -5,16 +5,19 @@
 ########################################################################################
 
 
-from typing import Optional, Union
+from typing import Any, Optional, Union
 from warnings import warn
 
+from lightning import Trainer
 from lightning.pytorch import LightningModule
 from lightning.pytorch.utilities.types import STEP_OUTPUT
 import torch
 import torch.nn as nn
 from torch.optim import Optimizer
+from torch.utils.data import DataLoader
 
-from pytorch_forecasting.metrics import Metric
+from pytorch_forecasting.callbacks.predict import PredictCallback
+from pytorch_forecasting.metrics import Metric, MultiLoss
 from pytorch_forecasting.utils._classproperty import classproperty
 
 
@@ -91,6 +94,69 @@ def forward(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
         """
         raise NotImplementedError("Forward method must be implemented by subclass.")
 
+    def predict(
+        self,
+        dataloader: DataLoader,
+        mode: str = "prediction",
+        return_info: Optional[list[str]] = None,
+        mode_kwargs: dict[str, Any] = None,
+        trainer_kwargs: dict[str, Any] = None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Generate predictions for new data using the `lightning.Trainer`.
+
+        Parameters
+        ----------
+        dataloader : DataLoader
+            The dataloader containing the data to predict on.
+        mode : str
+            The prediction mode ("prediction", "quantiles", or "raw").
+        return_info : list[str], optional
+            A list of additional information to return.
+        mode_kwargs : dict[str, Any]
+            Additional arguments for `to_prediction`/`to_quantiles`.
+        trainer_kwargs: dict[str, Any]
+            Additional arguments for `Trainer`.
+
+        Returns
+        -------
+        dict[str, torch.Tensor]
+            A dictionary of prediction results.
+        """
+        trainer_kwargs = trainer_kwargs or {}
+        predict_callback = PredictCallback(
+            mode=mode, return_info=return_info, mode_kwargs=mode_kwargs
+        )
+
+        callbacks = trainer_kwargs.get("callbacks", [])
+        if not isinstance(callbacks, list):
+            callbacks = [callbacks]
+        callbacks.append(predict_callback)
+        trainer_kwargs["callbacks"] = callbacks
+
+        trainer = Trainer(**trainer_kwargs)
+        trainer.predict(self, dataloaders=dataloader)
+
+        return predict_callback.result
+
+    def to_prediction(self, out: dict[str, Any], **kwargs) -> torch.Tensor:
+        """Converts raw model output to point forecasts."""
+        # todo: add MultiLoss support
+        try:
+            out = self.loss.to_prediction(out["prediction"], **kwargs)
+        except TypeError:  # in case passed kwargs do not exist
+            out = self.loss.to_prediction(out["prediction"])
+        return out
+
+    def to_quantiles(self, out: dict[str, Any], **kwargs) -> torch.Tensor:
+        """Converts raw model output to quantile forecasts."""
+        # todo: add MultiLoss support
+        try:
+            out = self.loss.to_quantiles(out["prediction"], **kwargs)
+        except TypeError:  # in case passed kwargs do not exist
+            out = self.loss.to_quantiles(out["prediction"])
+        return out
+
     def training_step(
         self, batch: tuple[dict[str, torch.Tensor]], batch_idx: int
     ) -> STEP_OUTPUT:
diff --git a/pytorch_forecasting/models/dlinear/_dlinear_pkg_v2.py b/pytorch_forecasting/models/dlinear/_dlinear_pkg_v2.py
index bf4fffce5..500446d9d 100644
--- a/pytorch_forecasting/models/dlinear/_dlinear_pkg_v2.py
+++ b/pytorch_forecasting/models/dlinear/_dlinear_pkg_v2.py
@@ -2,10 +2,10 @@
 Packages container for DLinear model.
 """
 
-from pytorch_forecasting.models.base._base_object import _BasePtForecasterV2
+from pytorch_forecasting.base._base_pkg import Base_pkg
 
 
-class DLinear_pkg_v2(_BasePtForecasterV2):
+class DLinear_pkg_v2(Base_pkg):
     """DLinear package container."""
 
     _tags = {
@@ -26,6 +26,13 @@ def get_cls(cls):
 
         return DLinear
 
+    @classmethod
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
+        from pytorch_forecasting.data._tslib_data_module import TslibDataModule
+
+        return TslibDataModule
+
     @classmethod
     def _get_test_datamodule_from(cls, trainer_kwargs):
         """Create test dataloaders from trainer_kwargs - following v1 pattern."""
@@ -112,7 +119,7 @@ def get_test_train_params(cls):
 
         from pytorch_forecasting.metrics import MAE, MAPE, SMAPE, QuantileLoss
 
-        return [
+        params = [
             {},
             dict(moving_avg=25, individual=False, logging_metrics=[SMAPE()]),
             dict(
@@ -125,3 +132,13 @@ def get_test_train_params(cls):
                 logging_metrics=[SMAPE()],
             ),
         ]
+
+        default_dm_cfg = {"context_length": 8, "prediction_length": 2}
+
+        for param in params:
+            current_dm_cfg = param.get("datamodule_cfg", {})
+            default_dm_cfg.update(current_dm_cfg)
+
+            param["datamodule_cfg"] = default_dm_cfg
+
+        return params
diff --git a/pytorch_forecasting/models/samformer/_samformer_v2_pkg.py b/pytorch_forecasting/models/samformer/_samformer_v2_pkg.py
index 36db9340a..2838fcc91 100644
--- a/pytorch_forecasting/models/samformer/_samformer_v2_pkg.py
+++ b/pytorch_forecasting/models/samformer/_samformer_v2_pkg.py
@@ -2,10 +2,10 @@
 Samformer package container.
 """
 
-from pytorch_forecasting.models.base._base_object import _BasePtForecasterV2
+from pytorch_forecasting.base._base_pkg import Base_pkg
 
 
-class Samformer_pkg_v2(_BasePtForecasterV2):
+class Samformer_pkg_v2(Base_pkg):
     """Samformer package container."""
 
     _tags = {
@@ -21,83 +21,13 @@ def get_cls(cls):
         return Samformer
 
     @classmethod
-    def _get_test_datamodule_from(cls, trainer_kwargs):
-        """Create test dataloaders from trainer_kwargs - following v1 pattern."""
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
         from pytorch_forecasting.data.data_module import (
             EncoderDecoderTimeSeriesDataModule,
         )
-        from pytorch_forecasting.tests._data_scenarios import (
-            data_with_covariates_v2,
-            make_datasets_v2,
-        )
-
-        data_with_covariates = data_with_covariates_v2()
-
-        data_loader_default_kwargs = dict(
-            target="target",
-            group_ids=["agency_encoded", "sku_encoded"],
-            add_relative_time_idx=True,
-        )
-
-        data_loader_kwargs = trainer_kwargs.get("data_loader_kwargs", {})
-        data_loader_default_kwargs.update(data_loader_kwargs)
 
-        datasets_info = make_datasets_v2(
-            data_with_covariates, **data_loader_default_kwargs
-        )
-
-        training_dataset = datasets_info["training_dataset"]
-        validation_dataset = datasets_info["validation_dataset"]
-        training_max_time_idx = datasets_info["training_max_time_idx"]
-
-        max_encoder_length = data_loader_kwargs.get("max_encoder_length", 4)
-        max_prediction_length = data_loader_kwargs.get("max_prediction_length", 3)
-        add_relative_time_idx = data_loader_kwargs.get("add_relative_time_idx", True)
-        batch_size = data_loader_kwargs.get("batch_size", 2)
-
-        train_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=training_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.8, 0.2, 0.0),
-        )
-
-        val_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.0, 1.0, 0.0),
-        )
-
-        test_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=1,
-            train_val_test_split=(0.0, 0.0, 1.0),
-        )
-
-        train_datamodule.setup("fit")
-        val_datamodule.setup("fit")
-        test_datamodule.setup("test")
-
-        train_dataloader = train_datamodule.train_dataloader()
-        val_dataloader = val_datamodule.val_dataloader()
-        test_dataloader = test_datamodule.test_dataloader()
-
-        return {
-            "train": train_dataloader,
-            "val": val_dataloader,
-            "test": test_dataloader,
-            "data_module": train_datamodule,
-        }
+        return EncoderDecoderTimeSeriesDataModule
 
     @classmethod
     def get_test_train_params(cls):
@@ -115,7 +45,7 @@ def get_test_train_params(cls):
 
         from pytorch_forecasting.metrics import QuantileLoss
 
-        return [
+        params = [
             {
                 # "loss": nn.MSELoss(),
                 "hidden_size": 32,
@@ -134,3 +64,13 @@ def get_test_train_params(cls):
                 "use_revin": False,
             },
         ]
+
+        default_dm_cfg = {"max_encoder_length": 4, "max_prediction_length": 3}
+
+        for param in params:
+            current_dm_cfg = param.get("datamodule_cfg", {})
+            default_dm_cfg.update(current_dm_cfg)
+
+            param["datamodule_cfg"] = default_dm_cfg
+
+        return params
diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/_tft_pkg_v2.py b/pytorch_forecasting/models/temporal_fusion_transformer/_tft_pkg_v2.py
index 8c95daa6b..d121eba6e 100644
--- a/pytorch_forecasting/models/temporal_fusion_transformer/_tft_pkg_v2.py
+++ b/pytorch_forecasting/models/temporal_fusion_transformer/_tft_pkg_v2.py
@@ -1,9 +1,9 @@
 """TFT package container."""
 
-from pytorch_forecasting.models.base import _BasePtForecasterV2
+from pytorch_forecasting.base._base_pkg import Base_pkg
 
 
-class TFT_pkg_v2(_BasePtForecasterV2):
+class TFT_pkg_v2(Base_pkg):
     """TFT package container."""
 
     _tags = {
@@ -23,83 +23,13 @@ def get_cls(cls):
         return TFT
 
     @classmethod
-    def _get_test_datamodule_from(cls, trainer_kwargs):
-        """Create test dataloaders from trainer_kwargs - following v1 pattern."""
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
         from pytorch_forecasting.data.data_module import (
             EncoderDecoderTimeSeriesDataModule,
         )
-        from pytorch_forecasting.tests._data_scenarios import (
-            data_with_covariates_v2,
-            make_datasets_v2,
-        )
-
-        data_with_covariates = data_with_covariates_v2()
-
-        data_loader_default_kwargs = dict(
-            target="target",
-            group_ids=["agency_encoded", "sku_encoded"],
-            add_relative_time_idx=True,
-        )
-
-        data_loader_kwargs = trainer_kwargs.get("data_loader_kwargs", {})
-        data_loader_default_kwargs.update(data_loader_kwargs)
-
-        datasets_info = make_datasets_v2(
-            data_with_covariates, **data_loader_default_kwargs
-        )
-
-        training_dataset = datasets_info["training_dataset"]
-        validation_dataset = datasets_info["validation_dataset"]
-        training_max_time_idx = datasets_info["training_max_time_idx"]
-
-        max_encoder_length = data_loader_kwargs.get("max_encoder_length", 4)
-        max_prediction_length = data_loader_kwargs.get("max_prediction_length", 3)
-        add_relative_time_idx = data_loader_kwargs.get("add_relative_time_idx", True)
-        batch_size = data_loader_kwargs.get("batch_size", 2)
 
-        train_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=training_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.8, 0.2, 0.0),
-        )
-
-        val_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.0, 1.0, 0.0),
-        )
-
-        test_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=1,
-            train_val_test_split=(0.0, 0.0, 1.0),
-        )
-
-        train_datamodule.setup("fit")
-        val_datamodule.setup("fit")
-        test_datamodule.setup("test")
-
-        train_dataloader = train_datamodule.train_dataloader()
-        val_dataloader = val_datamodule.val_dataloader()
-        test_dataloader = test_datamodule.test_dataloader()
-
-        return {
-            "train": train_dataloader,
-            "val": val_dataloader,
-            "test": test_dataloader,
-            "data_module": train_datamodule,
-        }
+        return EncoderDecoderTimeSeriesDataModule
 
     @classmethod
     def get_test_train_params(cls):
@@ -113,19 +43,17 @@ def get_test_train_params(cls):
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
             `create_test_instance` uses the first (or only) dictionary in `params`
         """
-        return [
+        params = [
             {},
             dict(
                 hidden_size=25,
                 attention_head_size=5,
             ),
-            dict(
-                data_loader_kwargs=dict(max_encoder_length=5, max_prediction_length=3)
-            ),
+            dict(datamodule_cfg=dict(max_encoder_length=5, max_prediction_length=3)),
             dict(
                 hidden_size=24,
                 attention_head_size=8,
-                data_loader_kwargs=dict(
+                datamodule_cfg=dict(
                     max_encoder_length=5,
                     max_prediction_length=3,
                     add_relative_time_idx=False,
@@ -133,7 +61,17 @@ def get_test_train_params(cls):
             ),
             dict(
                 hidden_size=12,
-                data_loader_kwargs=dict(max_encoder_length=7, max_prediction_length=10),
+                datamodule_cfg=dict(max_encoder_length=7, max_prediction_length=10),
             ),
             dict(attention_head_size=2),
         ]
+
+        default_dm_cfg = {"max_encoder_length": 4, "max_prediction_length": 3}
+
+        for param in params:
+            current_dm_cfg = param.get("datamodule_cfg", {})
+            default_dm_cfg.update(current_dm_cfg)
+
+            param["datamodule_cfg"] = default_dm_cfg
+
+        return params
diff --git a/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2.py b/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2.py
index 70620928e..f49caaf59 100644
--- a/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2.py
+++ b/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2.py
@@ -79,7 +79,7 @@ def __init__(
         """
 
         super().__init__(loss=loss)
-        self.save_hyperparameters(logger=False)
+        self.save_hyperparameters(ignore=["loss", "logging_metrics", "metadata"])
 
         self.dropout = dropout_rate
         self.persistence_weight = persistence_weight
diff --git a/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2_pkg.py b/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2_pkg.py
index d3cf70454..6b2780053 100644
--- a/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2_pkg.py
+++ b/pytorch_forecasting/models/tide/_tide_dsipts/_tide_v2_pkg.py
@@ -1,9 +1,9 @@
 """TIDE package container."""
 
-from pytorch_forecasting.models.base._base_object import _BasePtForecasterV2
+from pytorch_forecasting.base._base_pkg import Base_pkg
 
 
-class TIDE_pkg_v2(_BasePtForecasterV2):
+class TIDE_pkg_v2(Base_pkg):
     """TIDE package container."""
 
     _tags = {
@@ -19,83 +19,13 @@ def get_cls(cls):
         return TIDE
 
     @classmethod
-    def _get_test_datamodule_from(cls, trainer_kwargs):
-        """Create test dataloaders from trainer_kwargs - following v1 pattern."""
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
         from pytorch_forecasting.data.data_module import (
             EncoderDecoderTimeSeriesDataModule,
         )
-        from pytorch_forecasting.tests._data_scenarios import (
-            data_with_covariates_v2,
-            make_datasets_v2,
-        )
-
-        data_with_covariates = data_with_covariates_v2()
-
-        data_loader_default_kwargs = dict(
-            target="target",
-            group_ids=["agency_encoded", "sku_encoded"],
-            add_relative_time_idx=True,
-        )
-
-        data_loader_kwargs = trainer_kwargs.get("data_loader_kwargs", {})
-        data_loader_default_kwargs.update(data_loader_kwargs)
-
-        datasets_info = make_datasets_v2(
-            data_with_covariates, **data_loader_default_kwargs
-        )
-
-        training_dataset = datasets_info["training_dataset"]
-        validation_dataset = datasets_info["validation_dataset"]
-        training_max_time_idx = datasets_info["training_max_time_idx"]
-
-        max_encoder_length = data_loader_kwargs.get("max_encoder_length", 4)
-        max_prediction_length = data_loader_kwargs.get("max_prediction_length", 3)
-        add_relative_time_idx = data_loader_kwargs.get("add_relative_time_idx", True)
-        batch_size = data_loader_kwargs.get("batch_size", 2)
-
-        train_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=training_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.8, 0.2, 0.0),
-        )
-
-        val_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=batch_size,
-            train_val_test_split=(0.0, 1.0, 0.0),
-        )
-
-        test_datamodule = EncoderDecoderTimeSeriesDataModule(
-            time_series_dataset=validation_dataset,
-            max_encoder_length=max_encoder_length,
-            max_prediction_length=max_prediction_length,
-            min_prediction_idx=training_max_time_idx,
-            add_relative_time_idx=add_relative_time_idx,
-            batch_size=1,
-            train_val_test_split=(0.0, 0.0, 1.0),
-        )
 
-        train_datamodule.setup("fit")
-        val_datamodule.setup("fit")
-        test_datamodule.setup("test")
-
-        train_dataloader = train_datamodule.train_dataloader()
-        val_dataloader = val_datamodule.val_dataloader()
-        test_dataloader = test_datamodule.test_dataloader()
-
-        return {
-            "train": train_dataloader,
-            "val": val_dataloader,
-            "test": test_dataloader,
-            "data_module": train_datamodule,
-        }
+        return EncoderDecoderTimeSeriesDataModule
 
     @classmethod
     def get_test_train_params(cls):
@@ -111,7 +41,7 @@ def get_test_train_params(cls):
         """
         from pytorch_forecasting.metrics import MAE, MAPE
 
-        return [
+        params = [
             dict(
                 hidden_size=16,
                 d_model=8,
@@ -125,7 +55,7 @@ def get_test_train_params(cls):
                 n_add_enc=2,
                 n_add_dec=2,
                 dropout_rate=0.2,
-                data_loader_kwargs=dict(max_encoder_length=5, max_prediction_length=3),
+                datamodule_cfg=dict(max_encoder_length=5, max_prediction_length=3),
                 loss=MAE(),
             ),
             dict(
@@ -134,7 +64,16 @@ def get_test_train_params(cls):
                 n_add_enc=3,
                 n_add_dec=2,
                 dropout_rate=0.1,
-                data_loader_kwargs=dict(max_encoder_length=4, max_prediction_length=2),
+                datamodule_cfg=dict(max_encoder_length=4, max_prediction_length=2),
                 loss=MAPE(),
             ),
         ]
+        default_dm_cfg = {"max_encoder_length": 4, "max_prediction_length": 3}
+
+        for param in params:
+            current_dm_cfg = param.get("datamodule_cfg", {})
+            default_dm_cfg.update(current_dm_cfg)
+
+            param["datamodule_cfg"] = default_dm_cfg
+
+        return params
diff --git a/pytorch_forecasting/models/timexer/_timexer_pkg_v2.py b/pytorch_forecasting/models/timexer/_timexer_pkg_v2.py
index 74b27227f..2bb377cc4 100644
--- a/pytorch_forecasting/models/timexer/_timexer_pkg_v2.py
+++ b/pytorch_forecasting/models/timexer/_timexer_pkg_v2.py
@@ -2,10 +2,10 @@
 Metadata container for TimeXer v2.
 """
 
-from pytorch_forecasting.models.base._base_object import _BasePtForecasterV2
+from pytorch_forecasting.base._base_pkg import Base_pkg
 
 
-class TimeXer_pkg_v2(_BasePtForecasterV2):
+class TimeXer_pkg_v2(Base_pkg):
     """TimeXer metadata container."""
 
     _tags = {
@@ -25,77 +25,11 @@ def get_cls(cls):
         return TimeXer
 
     @classmethod
-    def _get_test_datamodule_from(cls, trainer_kwargs):
-        """Create test dataloaders from trainer_kwargs - following v1 pattern."""
+    def get_datamodule_cls(cls):
+        """Get the underlying DataModule class."""
         from pytorch_forecasting.data._tslib_data_module import TslibDataModule
-        from pytorch_forecasting.tests._data_scenarios import (
-            data_with_covariates_v2,
-            make_datasets_v2,
-        )
 
-        data_with_covariates = data_with_covariates_v2()
-
-        data_loader_default_kwargs = dict(
-            target="target",
-            group_ids=["agency_encoded", "sku_encoded"],
-            add_relative_time_idx=True,
-        )
-
-        data_loader_kwargs = trainer_kwargs.get("data_loader_kwargs", {})
-        data_loader_default_kwargs.update(data_loader_kwargs)
-
-        datasets_info = make_datasets_v2(
-            data_with_covariates, **data_loader_default_kwargs
-        )
-
-        training_dataset = datasets_info["training_dataset"]
-        validation_dataset = datasets_info["validation_dataset"]
-
-        context_length = data_loader_kwargs.get("context_length", 12)
-        prediction_length = data_loader_kwargs.get("prediction_length", 4)
-        batch_size = data_loader_kwargs.get("batch_size", 2)
-
-        train_datamodule = TslibDataModule(
-            time_series_dataset=training_dataset,
-            context_length=context_length,
-            prediction_length=prediction_length,
-            add_relative_time_idx=data_loader_kwargs.get("add_relative_time_idx", True),
-            batch_size=batch_size,
-            train_val_test_split=(0.8, 0.2, 0.0),
-        )
-
-        val_datamodule = TslibDataModule(
-            time_series_dataset=validation_dataset,
-            context_length=context_length,
-            prediction_length=prediction_length,
-            add_relative_time_idx=data_loader_kwargs.get("add_relative_time_idx", True),
-            batch_size=batch_size,
-            train_val_test_split=(0.0, 1.0, 0.0),
-        )
-
-        test_datamodule = TslibDataModule(
-            time_series_dataset=validation_dataset,
-            context_length=context_length,
-            prediction_length=prediction_length,
-            add_relative_time_idx=data_loader_kwargs.get("add_relative_time_idx", True),
-            batch_size=1,
-            train_val_test_split=(0.0, 0.0, 1.0),
-        )
-
-        train_datamodule.setup("fit")
-        val_datamodule.setup("fit")
-        test_datamodule.setup("test")
-
-        train_dataloader = train_datamodule.train_dataloader()
-        val_dataloader = val_datamodule.val_dataloader()
-        test_dataloader = test_datamodule.test_dataloader()
-
-        return {
-            "train": train_dataloader,
-            "val": val_dataloader,
-            "test": test_dataloader,
-            "data_module": train_datamodule,
-        }
+        return TslibDataModule
 
     @classmethod
     def get_test_train_params(cls):
@@ -111,17 +45,17 @@ def get_test_train_params(cls):
         """
         from pytorch_forecasting.metrics import QuantileLoss
 
-        return [
+        params = [
             {},
             dict(
                 hidden_size=64,
                 n_heads=4,
             ),
-            dict(data_loader_kwargs=dict(context_length=12, prediction_length=3)),
+            dict(datamodule_cfg=dict(context_length=12, prediction_length=3)),
             dict(
                 hidden_size=32,
                 n_heads=2,
-                data_loader_kwargs=dict(
+                datamodule_cfg=dict(
                     context_length=12,
                     prediction_length=3,
                     add_relative_time_idx=False,
@@ -130,7 +64,7 @@ def get_test_train_params(cls):
             dict(
                 hidden_size=128,
                 patch_length=12,
-                data_loader_kwargs=dict(context_length=16, prediction_length=4),
+                datamodule_cfg=dict(context_length=16, prediction_length=4),
             ),
             dict(
                 n_heads=2,
@@ -156,7 +90,7 @@ def get_test_train_params(cls):
                 factor=2,
                 activation="relu",
                 dropout=0.05,
-                data_loader_kwargs=dict(
+                datamodule_cfg=dict(
                     context_length=16,
                     prediction_length=4,
                 ),
@@ -172,3 +106,12 @@ def get_test_train_params(cls):
                 use_efficient_attention=True,
             ),
         ]
+        default_dm_cfg = {"context_length": 12, "prediction_length": 4}
+
+        for param in params:
+            current_dm_cfg = param.get("datamodule_cfg", {})
+            default_dm_cfg.update(current_dm_cfg)
+
+            param["datamodule_cfg"] = default_dm_cfg
+
+        return params
diff --git a/pytorch_forecasting/tests/test_all_estimators_v2.py b/pytorch_forecasting/tests/test_all_estimators_v2.py
deleted file mode 100644
index 9c28c5d0a..000000000
--- a/pytorch_forecasting/tests/test_all_estimators_v2.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""Automated tests based on the skbase test suite template."""
-
-import shutil
-
-import lightning.pytorch as pl
-from lightning.pytorch.callbacks import EarlyStopping
-from lightning.pytorch.loggers import TensorBoardLogger
-import torch
-import torch.nn as nn
-
-from pytorch_forecasting.metrics import SMAPE
-from pytorch_forecasting.tests.test_all_estimators import (
-    EstimatorFixtureGenerator,
-    EstimatorPackageConfig,
-)
-
-# whether to test only estimators from modules that are changed w.r.t. main
-# default is False, can be set to True by pytest --only_changed_modules True flag
-ONLY_CHANGED_MODULES = False
-
-
-def _integration(
-    estimator_cls,
-    dataloaders,
-    tmp_path,
-    data_loader_kwargs={},
-    clip_target: bool = False,
-    trainer_kwargs=None,
-    **kwargs,
-):
-    train_dataloader = dataloaders["train"]
-    val_dataloader = dataloaders["val"]
-    test_dataloader = dataloaders["test"]
-
-    early_stop_callback = EarlyStopping(
-        monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min"
-    )
-
-    logger = TensorBoardLogger(tmp_path)
-    if trainer_kwargs is None:
-        trainer_kwargs = {}
-    trainer = pl.Trainer(
-        max_epochs=3,
-        gradient_clip_val=0.1,
-        callbacks=[early_stop_callback],
-        enable_checkpointing=True,
-        default_root_dir=tmp_path,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        logger=logger,
-        **trainer_kwargs,
-    )
-    training_data_module = dataloaders.get("data_module")
-    metadata = training_data_module.metadata
-
-    assert isinstance(
-        metadata, dict
-    ), f"Expected metadata to be dict, got {type(metadata)}"
-
-    if "loss" in kwargs:
-        loss = kwargs["loss"]
-        kwargs.pop("loss")
-    else:
-        loss = SMAPE()
-
-    net = estimator_cls(
-        metadata=metadata,
-        loss=loss,
-        **kwargs,
-    )
-
-    trainer.fit(
-        net,
-        train_dataloaders=train_dataloader,
-        val_dataloaders=val_dataloader,
-    )
-    test_outputs = trainer.test(net, dataloaders=test_dataloader)
-    assert len(test_outputs) > 0
-
-    # todo: add the predict pipeline and make this test cleaner
-    x, y = next(iter(test_dataloader))
-    net.eval()
-    with torch.no_grad():
-        output = net(x)
-    net.train()
-    prediction = output["prediction"]
-    n_dims = prediction.ndim
-    assert n_dims == 3, (
-        f"Prediction output must be 3D, but got {n_dims}D tensor "
-        f"with shape {output.shape}"
-    )
-
-    shutil.rmtree(tmp_path, ignore_errors=True)
-
-
-class TestAllPtForecastersV2(EstimatorPackageConfig, EstimatorFixtureGenerator):
-    """Generic tests for all objects in the mini package."""
-
-    object_type_filter = "forecaster_pytorch_v2"
-
-    def test_doctest_examples(self, object_class):
-        """Runs doctests for estimator class."""
-        from skbase.utils.doctest_run import run_doctest
-
-        run_doctest(object_class, name=f"class {object_class.__name__}")
-
-    def test_integration(
-        self,
-        object_pkg,
-        trainer_kwargs,
-        tmp_path,
-    ):
-        object_class = object_pkg.get_cls()
-        dataloaders = object_pkg._get_test_datamodule_from(trainer_kwargs)
-
-        _integration(object_class, dataloaders, tmp_path, **trainer_kwargs)
-
-    def test_pkg_linkage(self, object_pkg, object_class):
-        """Test that the package is linked correctly."""
-        # check name method
-        msg = (
-            f"Package {object_pkg}.name() does not match class "
-            f"name {object_class.__name__}. "
-            "The expected package name is "
-            f"{object_class.__name__}_pkg."
-        )
-        assert object_pkg.name() == object_class.__name__, msg
-
-        # check naming convention
-        msg = (
-            f"Package {object_pkg.__name__} does not match class "
-            f"name {object_class.__name__}. "
-            "The expected package name is "
-            f"{object_class.__name__}_pkg."
-        )
-        assert object_pkg.__name__ == object_class.__name__ + "_pkg_v2", msg
diff --git a/pytorch_forecasting/tests/test_all_v2/__init__.py b/pytorch_forecasting/tests/test_all_v2/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pytorch_forecasting/tests/test_all_v2/_test_integration.py b/pytorch_forecasting/tests/test_all_v2/_test_integration.py
new file mode 100644
index 000000000..83c1bdc78
--- /dev/null
+++ b/pytorch_forecasting/tests/test_all_v2/_test_integration.py
@@ -0,0 +1,35 @@
+from typing import Any
+
+import torch
+
+from pytorch_forecasting.base._base_pkg import Base_pkg
+from pytorch_forecasting.data import TimeSeries
+
+
+def _integration(
+    pkg: Base_pkg,
+    test_data: dict[str, TimeSeries],
+    datamodule_cfg: dict[str, Any],
+    **kwargs,
+):
+    """Test integration of models with the `TimeSeries` and datamodules"""
+    pkg.fit(test_data["train"])
+
+    predictions = pkg.predict(
+        test_data["predict"],
+        mode="raw",
+    )
+    assert predictions is not None
+    assert isinstance(predictions, dict)
+    assert "prediction" in predictions
+
+    pred_tensor = predictions["prediction"]
+    assert isinstance(pred_tensor, torch.Tensor)
+    assert pred_tensor.ndim == 3, f"Prediction must be 3D, got {pred_tensor.ndim}D"
+
+    expected_pred_len = datamodule_cfg.get("prediction_length")
+    if expected_pred_len:
+        assert pred_tensor.shape[1] == expected_pred_len, (
+            f"Pred length mismatch: expected {expected_pred_len}, "
+            f"got {pred_tensor.shape[1]}"
+        )
diff --git a/pytorch_forecasting/tests/test_all_v2/test_all_estimators_v2.py b/pytorch_forecasting/tests/test_all_v2/test_all_estimators_v2.py
new file mode 100644
index 000000000..87123028b
--- /dev/null
+++ b/pytorch_forecasting/tests/test_all_v2/test_all_estimators_v2.py
@@ -0,0 +1,134 @@
+"""Automated tests based on the skbase test suite template."""
+
+import os
+from pathlib import Path
+import shutil
+
+import torch
+
+from pytorch_forecasting.tests.test_all_estimators import (
+    EstimatorFixtureGenerator,
+    EstimatorPackageConfig,
+)
+from pytorch_forecasting.tests.test_all_v2._test_integration import _integration
+from pytorch_forecasting.tests.test_all_v2.utils import _setup_pkg_and_data
+
+# whether to test only estimators from modules that are changed w.r.t. main
+# default is False, can be set to True by pytest --only_changed_modules True flag
+ONLY_CHANGED_MODULES = False
+
+
+class TestAllPtForecastersV2(EstimatorPackageConfig, EstimatorFixtureGenerator):
+    """Generic tests for all objects in the mini package."""
+
+    object_type_filter = "forecaster_pytorch_v2"
+
+    def test_doctest_examples(self, object_class):
+        """Runs doctests for estimator class."""
+        from skbase.utils.doctest_run import run_doctest
+
+        run_doctest(object_class, name=f"class {object_class.__name__}")
+
+    def test_integration(
+        self,
+        object_pkg,
+        trainer_kwargs,
+        tmp_path,
+    ):
+        pkg, test_data, dm_cfg = _setup_pkg_and_data(
+            object_pkg, trainer_kwargs, tmp_path
+        )
+
+        _integration(pkg, test_data, dm_cfg)
+
+        shutil.rmtree(tmp_path, ignore_errors=True)
+
+    def test_checkpointing(self, object_pkg, trainer_kwargs, tmp_path):
+        """Test that the package can save a checkpoint and reload from it."""
+        pkg, test_data, _ = _setup_pkg_and_data(object_pkg, trainer_kwargs, tmp_path)
+
+        ckpt_dir = Path(tmp_path) / "checkpoints"
+        best_model_path = pkg.fit(
+            test_data["train"],
+            save_ckpt=True,
+            ckpt_dir=ckpt_dir,
+            ckpt_kwargs={"monitor": "train_loss_epoch"},
+        )
+
+        assert best_model_path is not None
+        assert os.path.exists(best_model_path)
+
+        dm_cfg_path = Path(best_model_path).parent / "model_cfg.pkl"
+        assert (
+            dm_cfg_path.exists()
+        ), "datamodule_cfg.pkl was not saved alongside checkpoint"
+
+        pkg_loaded = object_pkg(ckpt_path=best_model_path)
+
+        predictions = pkg_loaded.predict(test_data["predict"], mode="prediction")
+
+        assert predictions is not None
+        assert "prediction" in predictions
+        shutil.rmtree(tmp_path, ignore_errors=True)
+
+    def test_predict_modes(self, object_pkg, trainer_kwargs, tmp_path):
+        """Test different prediction modes and return_info."""
+        pkg, test_data, _ = _setup_pkg_and_data(object_pkg, trainer_kwargs, tmp_path)
+
+        pkg.fit(test_data["train"], save_ckpt=False)
+        predict_data = test_data["predict"]
+
+        # mode="raw"
+        raw_out = pkg.predict(predict_data, mode="raw")
+        raw_pred_tensor = raw_out["prediction"]
+        assert any(isinstance(v, torch.Tensor) for v in raw_out.values())
+        assert (
+            raw_pred_tensor.ndim == 3
+        ), f"Prediction must be 3D, got {raw_pred_tensor.ndim}D"
+
+        # mode="quantiles"
+        quantile_out = pkg.predict(predict_data, mode="quantiles")
+        quanitle_pred_tensor = quantile_out["prediction"]
+        assert isinstance(quanitle_pred_tensor, torch.Tensor)
+        assert (
+            quanitle_pred_tensor.ndim == 3
+        ), f"Prediction must be 3D, got {quanitle_pred_tensor.ndim}D"
+
+        # mode="prediction"
+        pred_out = pkg.predict(predict_data, mode="prediction")
+        pred_tensor = pred_out["prediction"]
+        assert isinstance(pred_tensor, torch.Tensor)
+        assert pred_tensor.ndim == 2, f"Prediction must be 3D, got {pred_tensor.ndim}D"
+
+        return_info_keys = ["index", "x"]
+        info_out = pkg.predict(
+            predict_data, mode="prediction", return_info=return_info_keys
+        )
+
+        for key in return_info_keys:
+            assert key in info_out, f"Requested key '{key}' missing from output"
+
+        assert info_out["index"] is not None
+        assert isinstance(info_out["x"], dict)
+
+        shutil.rmtree(tmp_path, ignore_errors=True)
+
+    def test_pkg_linkage(self, object_pkg, object_class):
+        """Test that the package is linked correctly."""
+        # check name method
+        msg = (
+            f"Package {object_pkg}.name() does not match class "
+            f"name {object_class.__name__}. "
+            "The expected package name is "
+            f"{object_class.__name__}_pkg."
+        )
+        assert object_pkg.name() == object_class.__name__, msg
+
+        # check naming convention
+        msg = (
+            f"Package {object_pkg.__name__} does not match class "
+            f"name {object_class.__name__}. "
+            "The expected package name is "
+            f"{object_class.__name__}_pkg."
+        )
+        assert object_pkg.__name__ == object_class.__name__ + "_pkg_v2", msg
diff --git a/pytorch_forecasting/tests/test_all_v2/utils.py b/pytorch_forecasting/tests/test_all_v2/utils.py
new file mode 100644
index 000000000..a8cb714dc
--- /dev/null
+++ b/pytorch_forecasting/tests/test_all_v2/utils.py
@@ -0,0 +1,61 @@
+from typing import Any
+
+from lightning.pytorch.loggers import TensorBoardLogger
+
+from pytorch_forecasting.base._base_pkg import Base_pkg
+from pytorch_forecasting.data import TimeSeries
+from pytorch_forecasting.metrics import SMAPE
+
+
+def _setup_pkg_and_data(
+    estimator_cls: type[Base_pkg],
+    trainer_kwargs: dict[str, Any],
+    tmp_path: str,
+) -> tuple[Base_pkg, dict[str, TimeSeries], dict[str, Any]]:
+    """
+    Helper to initialize the Package, Datasets, and Configs.
+
+    Returns
+    -------
+    pkg : Base_pkg
+        The initialized model package.
+    test_data : dict
+        Dictionary containing 'train' and 'predict' TimeSeries datasets.
+    datamodule_cfg : dict
+        The final datamodule configuration used.
+    """
+    params_copy = trainer_kwargs.copy()
+    datamodule_cfg = params_copy.pop("datamodule_cfg", {})
+    model_cfg = params_copy
+
+    if "loss" not in model_cfg:
+        model_cfg["loss"] = SMAPE()
+
+    default_datamodule_cfg = {
+        "train_val_test_split": (0.8, 0.2),
+        "add_relative_time_idx": True,
+        "batch_size": 2,
+    }
+    default_datamodule_cfg.update(datamodule_cfg)
+
+    logger = TensorBoardLogger(str(tmp_path))
+    trainer_cfg = {
+        "max_epochs": 2,
+        "gradient_clip_val": 0.1,
+        "enable_checkpointing": True,
+        "default_root_dir": str(tmp_path),
+        "limit_train_batches": 2,
+        "limit_val_batches": 1,
+        "accelerator": "cpu",
+        "logger": logger,
+    }
+
+    test_data = estimator_cls.get_test_dataset_from(**default_datamodule_cfg)
+
+    pkg = estimator_cls(
+        model_cfg=model_cfg,
+        trainer_cfg=trainer_cfg,
+        datamodule_cfg=default_datamodule_cfg,
+    )
+
+    return pkg, test_data, default_datamodule_cfg
diff --git a/pytorch_forecasting/tests/test_class_register.py b/pytorch_forecasting/tests/test_class_register.py
index 2a1052125..a9699fb99 100644
--- a/pytorch_forecasting/tests/test_class_register.py
+++ b/pytorch_forecasting/tests/test_class_register.py
@@ -20,7 +20,9 @@ def get_test_class_registry():
         keys are scitypes, values are test classes TestAll[Scitype]
     """
     from pytorch_forecasting.tests.test_all_estimators import TestAllPtForecasters
-    from pytorch_forecasting.tests.test_all_estimators_v2 import TestAllPtForecastersV2
+    from pytorch_forecasting.tests.test_all_v2.test_all_estimators_v2 import (
+        TestAllPtForecastersV2,
+    )
 
     testclass_dict = dict()
     testclass_dict["forecaster_pytorch_v1"] = TestAllPtForecasters

	time_idx	x	y	future_known_feature	static_feature
0	0	-0.030643	0.148280	1.000000	0.039213
1	1	0.148280	0.433029	0.995004	0.039213
2	2	0.433029	0.742511	0.980067	0.039213
3	3	0.742511	0.729270	0.955336	0.039213
4	4	0.729270	0.628604	0.921061	0.039213
	time_idx	x	y	future_known_feature	static_feature
0	0	0.167712	0.172154	1.000000	0.300509
1	1	0.172154	0.467233	0.995004	0.300509
2	2	0.467233	0.554952	0.980067	0.300509
3	3	0.554952	0.746529	0.955336	0.300509
4	4	0.746529	0.711745	0.921061	0.300509