From af94a866b7ea9604043c421a7db3edf17925e29d Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Thu, 22 May 2025 21:51:48 +0530
Subject: [PATCH 01/36] Basedeep forecaster added

---
 aeon/forecasting/deep_learning/__init__.py |   1 +
 aeon/forecasting/deep_learning/base.py     | 215 +++++++++++++++++++++
 2 files changed, 216 insertions(+)
 create mode 100644 aeon/forecasting/deep_learning/__init__.py
 create mode 100644 aeon/forecasting/deep_learning/base.py

diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py
new file mode 100644
index 0000000000..42067031dc
--- /dev/null
+++ b/aeon/forecasting/deep_learning/__init__.py
@@ -0,0 +1 @@
+"""Initialization for aeon forecasting deep learning module."""
diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
new file mode 100644
index 0000000000..17eead9c49
--- /dev/null
+++ b/aeon/forecasting/deep_learning/base.py
@@ -0,0 +1,215 @@
+"""
+BaseDeepForecaster class.
+
+A simplified first base class for deep learning forecasting models.
+This class is a subclass of BaseForecaster and inherits its methods and attributes.
+It provides a base for deep learning models, including methods for training and
+predicting.
+
+"""
+
+from abc import abstractmethod
+
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+
+from aeon.forecasting.base import BaseForecaster
+
+
+class BaseDeepForecaster(BaseForecaster):
+    """Base class for deep learning forecasters in aeon.
+
+    Parameters
+    ----------
+    horizon : int, default=1
+        Forecasting horizon, the number of steps ahead to predict.
+    window : int, default=10
+        The window size for creating input sequences.
+    batch_size : int, default=32
+        Batch size for training the model.
+    epochs : int, default=100
+        Number of epochs to train the model.
+    verbose : int, default=0
+        Verbosity mode (0, 1, or 2).
+    optimizer : str or tf.keras.optimizers.Optimizer, default='adam'
+        Optimizer to use for training.
+    loss : str or tf.keras.losses.Loss, default='mse'
+        Loss function for training.
+    random_state : int, default=None
+        Seed for random number generators.
+    """
+
+    def __init__(
+        self,
+        horizon=1,
+        window=10,
+        batch_size=32,
+        epochs=100,
+        verbose=0,
+        optimizer="adam",
+        loss="mse",
+        random_state=None,
+    ):
+        self.horizon = horizon
+        self.window = window
+        self.batch_size = batch_size
+        self.epochs = epochs
+        self.verbose = verbose
+        self.optimizer = optimizer
+        self.loss = loss
+        self.random_state = random_state
+        self.model_ = None
+        super().__init__()
+
+    def _fit(self, y, X=None):
+        """Fit the forecaster to training data.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series
+            Target time series to which to fit the forecaster.
+        X : np.ndarray or pd.DataFrame, default=None
+            Exogenous variables.
+
+        Returns
+        -------
+        self : returns an instance of self
+        """
+        # Set random seed for reproducibility
+        if self.random_state is not None:
+            np.random.seed(self.random_state)
+            tf.random.set_seed(self.random_state)
+
+        # Convert input data to numpy array
+        y_inner = self._convert_input(y)
+
+        # Create sequences for training
+        X_train, y_train = self._create_sequences(y_inner)
+
+        # Build and compile the model
+        self.model_ = self._build_model(X_train.shape[1:])
+        self.model_.compile(optimizer=self.optimizer, loss=self.loss)
+
+        # Train the model
+        self.model_.fit(
+            X_train,
+            y_train,
+            batch_size=self.batch_size,
+            epochs=self.epochs,
+            verbose=self.verbose,
+        )
+
+        return self
+
+    def _predict(self, y=None, X=None):
+        """Make forecasts for y.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series, default=None
+            Series to predict from.
+        X : np.ndarray or pd.DataFrame, default=None
+            Exogenous variables.
+
+        Returns
+        -------
+        predictions : np.ndarray
+            Predicted values.
+        """
+        if y is None:
+            raise ValueError("y cannot be None for prediction")
+
+        # Convert input data to numpy array
+        y_inner = self._convert_input(y)
+
+        # Use the last window of data for prediction
+        last_window = y_inner[-self.window :].reshape(1, self.window, 1)
+
+        # Make prediction
+        prediction = self.model_.predict(last_window, verbose=0)
+
+        return prediction.flatten()
+
+    def _forecast(self, y, X=None):
+        """Forecast time series at future horizon.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series
+            Time series to forecast from.
+        X : np.ndarray or pd.DataFrame, default=None
+            Exogenous variables.
+
+        Returns
+        -------
+        forecasts : np.ndarray
+            Forecasted values.
+        """
+        # Fit the model
+        self._fit(y, X)
+
+        # Make prediction
+        return self._predict(y, X)
+
+    def _convert_input(self, y):
+        """Convert input data to numpy array.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series
+            Input time series.
+
+        Returns
+        -------
+        y_inner : np.ndarray
+            Converted numpy array.
+        """
+        if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame):
+            y_inner = y.values
+        else:
+            y_inner = y
+
+        # Ensure 1D array
+        if len(y_inner.shape) > 1:
+            y_inner = y_inner.flatten()
+
+        return y_inner
+
+    def _create_sequences(self, data):
+        """Create input sequences and target values for training.
+
+        Parameters
+        ----------
+        data : np.ndarray
+            Time series data.
+
+        Returns
+        -------
+        X : np.ndarray
+            Input sequences.
+        y : np.ndarray
+            Target values.
+        """
+        X, y = [], []
+        for i in range(len(data) - self.window - self.horizon + 1):
+            X.append(data[i : (i + self.window)])
+            y.append(data[i + self.window : (i + self.window + self.horizon)])
+
+        return np.array(X).reshape(-1, self.window, 1), np.array(y)
+
+    @abstractmethod
+    def _build_model(self, input_shape):
+        """Build the deep learning model.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            Shape of input data.
+
+        Returns
+        -------
+        model : tf.keras.Model
+            Compiled Keras model.
+        """
+        pass

From d2ee9ec5acda38de318ad22c1df0563b3f9d526f Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Mon, 26 May 2025 20:30:50 +0530
Subject: [PATCH 02/36] init for basedlf added

---
 aeon/forecasting/deep_learning/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py
index 42067031dc..c4b7a27030 100644
--- a/aeon/forecasting/deep_learning/__init__.py
+++ b/aeon/forecasting/deep_learning/__init__.py
@@ -1 +1,7 @@
 """Initialization for aeon forecasting deep learning module."""
+
+__all__ = [
+    "BaseDeepForecaster",
+]
+
+from aeon.forecasting.deep_learning.base import BaseDeepForecaster

From ab3030c0767fb773714c523a32963aa99c18c078 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 15 Jun 2025 16:13:43 +0530
Subject: [PATCH 03/36] test file and axis added for basedeepforecaster

---
 aeon/forecasting/deep_learning/base.py        | 76 ++++++++++++++-----
 .../deep_learning/tests/__init__.py           |  1 +
 .../deep_learning/tests/test_base.py          | 62 +++++++++++++++
 3 files changed, 120 insertions(+), 19 deletions(-)
 create mode 100644 aeon/forecasting/deep_learning/tests/__init__.py
 create mode 100644 aeon/forecasting/deep_learning/tests/test_base.py

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index 17eead9c49..ebab0116bd 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -1,12 +1,13 @@
+"""Base class module for deep learning forecasters in aeon.
+
+This module defines the `BaseDeepForecaster` class, an abstract base class for
+deep learning-based forecasting models within the aeon toolkit.
 """
-BaseDeepForecaster class.
 
-A simplified first base class for deep learning forecasting models.
-This class is a subclass of BaseForecaster and inherits its methods and attributes.
-It provides a base for deep learning models, including methods for training and
-predicting.
+from __future__ import annotations
 
-"""
+__maintainer__ = []
+__all__ = ["BaseDeepForecaster"]
 
 from abc import abstractmethod
 
@@ -20,6 +21,9 @@
 class BaseDeepForecaster(BaseForecaster):
     """Base class for deep learning forecasters in aeon.
 
+    This class provides a foundation for deep learning-based forecasting models,
+    handling data preprocessing, model training, and prediction.
+
     Parameters
     ----------
     horizon : int, default=1
@@ -38,6 +42,9 @@ class BaseDeepForecaster(BaseForecaster):
         Loss function for training.
     random_state : int, default=None
         Seed for random number generators.
+    axis : int, default=0
+        Axis along which to apply the forecaster.
+        Default is 0 for univariate time series.
     """
 
     def __init__(
@@ -50,6 +57,7 @@ def __init__(
         optimizer="adam",
         loss="mse",
         random_state=None,
+        axis=0,
     ):
         self.horizon = horizon
         self.window = window
@@ -59,8 +67,11 @@ def __init__(
         self.optimizer = optimizer
         self.loss = loss
         self.random_state = random_state
+        self.axis = axis
         self.model_ = None
-        super().__init__()
+
+        # Pass horizon and axis to BaseForecaster
+        super().__init__(horizon=horizon, axis=axis)
 
     def _fit(self, y, X=None):
         """Fit the forecaster to training data.
@@ -74,7 +85,8 @@ def _fit(self, y, X=None):
 
         Returns
         -------
-        self : returns an instance of self
+        self : BaseDeepForecaster
+            Returns an instance of self.
         """
         # Set random seed for reproducibility
         if self.random_state is not None:
@@ -83,12 +95,21 @@ def _fit(self, y, X=None):
 
         # Convert input data to numpy array
         y_inner = self._convert_input(y)
+        if y_inner.shape[0] < self.window + self.horizon:
+            raise ValueError(
+                f"Data length ({y_inner.shape[0]}) is insufficient"
+                f"({self.window}) and horizon ({self.horizon})."
+            )
 
         # Create sequences for training
         X_train, y_train = self._create_sequences(y_inner)
 
+        if X_train.shape[0] == 0:
+            raise ValueError("No training sequences could be created.")
+
         # Build and compile the model
-        self.model_ = self._build_model(X_train.shape[1:])
+        input_shape = X_train.shape[1:]
+        self.model_ = self._build_model(input_shape)
         self.model_.compile(optimizer=self.optimizer, loss=self.loss)
 
         # Train the model
@@ -115,7 +136,7 @@ def _predict(self, y=None, X=None):
         Returns
         -------
         predictions : np.ndarray
-            Predicted values.
+            Predicted values for the specified horizon.
         """
         if y is None:
             raise ValueError("y cannot be None for prediction")
@@ -123,13 +144,26 @@ def _predict(self, y=None, X=None):
         # Convert input data to numpy array
         y_inner = self._convert_input(y)
 
+        if len(y_inner) < self.window:
+            raise ValueError(
+                f"Input data length ({len(y_inner)}) is less than the window size "
+                f"({self.window})."
+            )
+
         # Use the last window of data for prediction
         last_window = y_inner[-self.window :].reshape(1, self.window, 1)
 
         # Make prediction
-        prediction = self.model_.predict(last_window, verbose=0)
+        predictions = []
+        current_window = last_window
+        for _ in range(self.horizon):
+            pred = self.model_.predict(current_window, verbose=0)
+            predictions.append(pred[0, 0])
+            # Update the window with the latest prediction (autoregressive)
+            current_window = np.roll(current_window, -1, axis=1)
+            current_window[0, -1, 0] = pred[0, 0]
 
-        return prediction.flatten()
+        return np.array(predictions)
 
     def _forecast(self, y, X=None):
         """Forecast time series at future horizon.
@@ -144,13 +178,9 @@ def _forecast(self, y, X=None):
         Returns
         -------
         forecasts : np.ndarray
-            Forecasted values.
+            Forecasted values for the specified horizon.
         """
-        # Fit the model
-        self._fit(y, X)
-
-        # Make prediction
-        return self._predict(y, X)
+        return self._fit(y, X)._predict(y, X)
 
     def _convert_input(self, y):
         """Convert input data to numpy array.
@@ -191,12 +221,20 @@ def _create_sequences(self, data):
         y : np.ndarray
             Target values.
         """
+        if len(data) < self.window + self.horizon:
+            raise ValueError(
+                f"Data length ({len(data)}) is insufficient for window "
+                f"({self.window}) and horizon ({self.horizon})."
+            )
+
         X, y = [], []
         for i in range(len(data) - self.window - self.horizon + 1):
             X.append(data[i : (i + self.window)])
             y.append(data[i + self.window : (i + self.window + self.horizon)])
 
-        return np.array(X).reshape(-1, self.window, 1), np.array(y)
+        X = np.array(X).reshape(-1, self.window, 1)
+        y = np.array(y).reshape(-1, self.horizon)
+        return X, y
 
     @abstractmethod
     def _build_model(self, input_shape):
diff --git a/aeon/forecasting/deep_learning/tests/__init__.py b/aeon/forecasting/deep_learning/tests/__init__.py
new file mode 100644
index 0000000000..3dda9d25ea
--- /dev/null
+++ b/aeon/forecasting/deep_learning/tests/__init__.py
@@ -0,0 +1 @@
+"""Deep Learning Forecasting Tests File."""
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
new file mode 100644
index 0000000000..05536f98c5
--- /dev/null
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -0,0 +1,62 @@
+"""Test for BaseDeepForecaster class in aeon."""
+
+import numpy as np
+import pytest
+
+from aeon.forecasting.deep_learning import BaseDeepForecaster
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+class SimpleDeepForecaster(BaseDeepForecaster):
+    """A simple concrete implementation of BaseDeepForecaster for testing."""
+
+    def _build_model(self, input_shape):
+        import tensorflow as tf
+
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Flatten(input_shape=input_shape),
+                tf.keras.layers.Dense(10, activation="relu"),
+                tf.keras.layers.Dense(self.horizon),
+            ]
+        )
+        return model
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies("tensorflow", severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+def test_base_deep_forecaster_fit_predict():
+    """Test fitting and predicting with BaseDeepForecaster implementation."""
+    # Generate synthetic data
+    np.random.seed(42)
+    data = np.random.randn(50)
+
+    # Initialize forecaster
+    forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0)
+
+    # Fit the model
+    forecaster.fit(data)
+
+    # Predict
+    predictions = forecaster.predict(data)
+
+    # Validate output shape
+    assert (
+        len(predictions) == 2
+    ), f"Expected predictions of length 2, got {len(predictions)}"
+    assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array"
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies("tensorflow", severity="none"),
+    reason="skip test if required soft dependency not available",
+)
+def test_base_deep_forecaster_insufficient_data():
+    """Test error handling for insufficient data."""
+    data = np.random.randn(5)
+    forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0)
+
+    with pytest.raises(ValueError, match="Data length.*insufficient"):
+        forecaster.fit(data)

From 1f202db1cae45834503986ffb37859599b29759a Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 15 Jun 2025 16:38:39 +0530
Subject: [PATCH 04/36] test locally

---
 .github/workflows/pr_pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index cf1baee900..9c70b16b4c 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -3,7 +3,7 @@ name: PR pytest
 on:
   push:
     branches:
-      - main
+      - basedlf
   pull_request:
     paths:
       - "aeon/**"

From 14eb41fa83a5799d0fa8608ffd516f1766da7a1c Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 15 Jun 2025 17:19:27 +0530
Subject: [PATCH 05/36] dlf corrected

---
 .github/workflows/pr_pytest.yml                   | 2 +-
 aeon/forecasting/deep_learning/tests/test_base.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index 9c70b16b4c..cf1baee900 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -3,7 +3,7 @@ name: PR pytest
 on:
   push:
     branches:
-      - basedlf
+      - main
   pull_request:
     paths:
       - "aeon/**"
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 05536f98c5..1eae0969e1 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -10,6 +10,9 @@
 class SimpleDeepForecaster(BaseDeepForecaster):
     """A simple concrete implementation of BaseDeepForecaster for testing."""
 
+    def __init__(self, horizon=1, window=5, epochs=1, verbose=0):
+        super().__init__(horizon=horizon, window=window, epochs=epochs, verbose=verbose)
+
     def _build_model(self, input_shape):
         import tensorflow as tf
 

From d1a2aab72097dd38658fa0a10f572005c5b70aaa Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 22 Jun 2025 12:27:15 +0530
Subject: [PATCH 06/36] tf soft dep added

---
 aeon/forecasting/deep_learning/base.py            | 3 ++-
 aeon/forecasting/deep_learning/tests/test_base.py | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index ebab0116bd..ba33331fc7 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -13,7 +13,6 @@
 
 import numpy as np
 import pandas as pd
-import tensorflow as tf
 
 from aeon.forecasting.base import BaseForecaster
 
@@ -88,6 +87,8 @@ def _fit(self, y, X=None):
         self : BaseDeepForecaster
             Returns an instance of self.
         """
+        import tensorflow as tf
+
         # Set random seed for reproducibility
         if self.random_state is not None:
             np.random.seed(self.random_state)
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 1eae0969e1..270a60225e 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -7,6 +7,10 @@
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
+@pytest.mark.skipif(
+    not _check_soft_dependencies("tensorflow", severity="none"),
+    reason="skip test if required soft dependency not available",
+)
 class SimpleDeepForecaster(BaseDeepForecaster):
     """A simple concrete implementation of BaseDeepForecaster for testing."""
 

From 5fb72c706e87b3b7bf4fcd9701298b250cc98d89 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 6 Jul 2025 16:51:57 +0530
Subject: [PATCH 07/36] tcn network added

---
 aeon/networks/__init__.py       |   2 +
 aeon/networks/_tcn.py           | 326 ++++++++++++++++++++++++++++++++
 aeon/networks/tests/test_tcn.py | 212 +++++++++++++++++++++
 3 files changed, 540 insertions(+)
 create mode 100644 aeon/networks/_tcn.py
 create mode 100644 aeon/networks/tests/test_tcn.py

diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py
index d774abe102..61e669283c 100644
--- a/aeon/networks/__init__.py
+++ b/aeon/networks/__init__.py
@@ -19,6 +19,7 @@
     "AEBiGRUNetwork",
     "DisjointCNNNetwork",
     "RecurrentNetwork",
+    "TemporalConvolutionalNetwork",
 ]
 from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork
 from aeon.networks._ae_bgru import AEBiGRUNetwork
@@ -36,4 +37,5 @@
 from aeon.networks._mlp import MLPNetwork
 from aeon.networks._resnet import ResNetNetwork
 from aeon.networks._rnn import RecurrentNetwork
+from aeon.networks._tcn import TemporalConvolutionalNetwork
 from aeon.networks.base import BaseDeepLearningNetwork
diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
new file mode 100644
index 0000000000..08f4ff9341
--- /dev/null
+++ b/aeon/networks/_tcn.py
@@ -0,0 +1,326 @@
+"""Implementation of Temporal Convolutional Network (TCN).
+
+Based on the paper "An Empirical Evaluation of Generic Convolutional and
+Recurrent Networks for Sequence Modeling" by Bai et al. (2018).
+"""
+
+__maintainer__ = []
+
+from aeon.networks.base import BaseDeepLearningNetwork
+
+
+class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
+    """Temporal Convolutional Network (TCN) for sequence modeling.
+
+    A generic convolutional architecture for sequence modeling that combines:
+    - Dilated convolutions for exponentially large receptive fields
+    - Residual connections for training stability
+
+    The TCN can take sequences of any length and map them to output sequences
+    of the same length, making it suitable for autoregressive prediction tasks.
+
+    Parameters
+    ----------
+    num_inputs : int
+        Number of input channels/features in the input sequence.
+    num_channels : list of int
+        List specifying the number of output channels for each layer.
+        The length determines the depth of the network.
+    kernel_size : int, default=2
+        Size of the convolutional kernel. Larger kernels can capture
+        more local context but require more parameters.
+    dropout : float, default=0.2
+        Dropout rate applied after each convolutional layer for regularization.
+
+    Notes
+    -----
+    The receptive field size grows exponentially with network depth due to
+    dilated convolutions with dilation factors of 2^i for layer i.
+
+    References
+    ----------
+    Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
+    generic convolutional and recurrent networks for sequence modeling.
+    arXiv preprint arXiv:1803.01271.
+    """
+
+    _config = {
+        "python_dependencies": ["tensorflow"],
+        "python_version": "<3.13",
+        "structure": "encoder",
+    }
+
+    def __init__(
+        self,
+        num_inputs: int,
+        num_channels: list,
+        kernel_size: int = 2,
+        dropout: float = 0.2,
+    ):
+        """Initialize the TCN architecture.
+
+        Parameters
+        ----------
+        num_inputs : int
+            Number of input channels/features.
+        num_channels : list of int
+            Number of output channels for each temporal block.
+        kernel_size : int, default=2
+            Size of convolutional kernels.
+        dropout : float, default=0.2
+            Dropout rate for regularization.
+        """
+        super().__init__()
+        self.num_inputs = num_inputs
+        self.num_channels = num_channels
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+
+    def _conv1d_with_variable_padding(
+        self,
+        x,
+        filters: int,
+        kernel_size: int,
+        padding_value: int,
+        stride: int = 1,
+        dilation_rate: int = 1,
+    ):
+        """Apply 1D convolution with variable padding for causal convolutions.
+
+        Parameters
+        ----------
+        x : tf.Tensor
+            Input tensor of shape (batch_size, channels, sequence_length).
+        filters : int
+            Number of output filters.
+        kernel_size : int
+            Size of the convolutional kernel.
+        padding_value : int
+            Amount of padding to apply.
+        stride : int, default=1
+            Stride of the convolution.
+        dilation_rate : int, default=1
+            Dilation rate for dilated convolutions.
+
+        Returns
+        -------
+        tf.Tensor
+            Output tensor after convolution.
+        """
+        import tensorflow as tf
+
+        # Transpose to Keras format (batch, sequence, channels)
+        x_keras_format = tf.keras.layers.Permute((2, 1))(x)
+
+        # Apply padding in sequence dimension
+        padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format)
+
+        # Create and apply convolution layer
+        conv_layer = tf.keras.layers.Conv1D(
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=stride,
+            dilation_rate=dilation_rate,
+            padding="valid",
+        )
+
+        # Apply convolution
+        out = conv_layer(padded_x)
+
+        # Transpose back to PyTorch format (batch, channels, sequence)
+        return tf.keras.layers.Permute((2, 1))(out)
+
+    def _chomp_1d(self, x, chomp_size: int):
+        """Remove padding from the end of sequences to maintain causality.
+
+        This operation ensures that the output at time t only depends on
+        inputs from times 0 to t, preventing information leakage from future.
+
+        Parameters
+        ----------
+        x : tf.Tensor
+            Input tensor of shape (batch_size, channels, sequence_length).
+        chomp_size : int
+            Number of time steps to remove from the end.
+
+        Returns
+        -------
+        tf.Tensor
+            Chomped tensor with reduced sequence length.
+        """
+        return x[:, :, :-chomp_size]
+
+    def _temporal_block(
+        self,
+        x,
+        n_inputs: int,
+        n_outputs: int,
+        kernel_size: int,
+        stride: int,
+        dilation: int,
+        padding: int,
+        dropout: float = 0.2,
+        training: bool = None,
+    ):
+        """Create a temporal block with dilated causal convolutions.
+
+        Each temporal block consists of:
+        1. Two dilated causal convolutions
+        2. ReLU activations and dropout for regularization
+        3. Residual connection with optional 1x1 convolution for dimension
+           matching
+
+        Parameters
+        ----------
+        x : tf.Tensor
+            Input tensor of shape (batch_size, channels, sequence_length).
+        n_inputs : int
+            Number of input channels.
+        n_outputs : int
+            Number of output channels.
+        kernel_size : int
+            Size of convolutional kernels.
+        stride : int
+            Stride of convolutions (typically 1).
+        dilation : int
+            Dilation factor for dilated convolutions.
+        padding : int
+            Padding size to be chomped off.
+        dropout : float, default=0.2
+            Dropout rate for regularization.
+        training : bool, optional
+            Whether the model is in training mode.
+
+        Returns
+        -------
+        tf.Tensor
+            Output tensor of shape (batch_size, n_outputs, sequence_length).
+        """
+        import tensorflow as tf
+
+        # First convolution block
+        out = self._conv1d_with_variable_padding(
+            x, n_outputs, kernel_size, padding, stride, dilation
+        )
+        out = self._chomp_1d(out, padding)
+        out = tf.keras.layers.ReLU()(out)
+        out = tf.keras.layers.Dropout(dropout)(out, training=training)
+
+        # Second convolution block
+        out = self._conv1d_with_variable_padding(
+            out, n_outputs, kernel_size, padding, stride, dilation
+        )
+        out = self._chomp_1d(out, padding)
+        out = tf.keras.layers.ReLU()(out)
+        out = tf.keras.layers.Dropout(dropout)(out, training=training)
+
+        # Residual connection with optional dimension matching
+        if n_inputs != n_outputs:
+            res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1)
+        else:
+            res = x
+
+        # Add residual and apply final ReLU
+        result = tf.keras.layers.Add()([out, res])
+        return tf.keras.layers.ReLU()(result)
+
+    def _temporal_conv_net(
+        self,
+        x,
+        num_inputs: int,
+        num_channels: list,
+        kernel_size: int = 2,
+        dropout: float = 0.2,
+        training: bool = None,
+    ):
+        """Apply the complete Temporal Convolutional Network.
+
+        Stacks multiple temporal blocks with exponentially increasing dilation
+        factors to achieve a large receptive field efficiently.
+
+        Parameters
+        ----------
+        x : tf.Tensor
+            Input tensor of shape (batch_size, channels, sequence_length).
+        num_inputs : int
+            Number of input channels.
+        num_channels : list of int
+            Number of output channels for each temporal block.
+        kernel_size : int, default=2
+            Size of convolutional kernels.
+        dropout : float, default=0.2
+            Dropout rate for regularization.
+        training : bool, optional
+            Whether the model is in training mode.
+
+        Returns
+        -------
+        tf.Tensor
+            Output tensor after applying all temporal blocks.
+        """
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation_size = 2**i
+            in_channels = num_inputs if i == 0 else num_channels[i - 1]
+            out_channels = num_channels[i]
+            padding = (kernel_size - 1) * dilation_size
+
+            x = self._temporal_block(
+                x,
+                n_inputs=in_channels,
+                n_outputs=out_channels,
+                kernel_size=kernel_size,
+                stride=1,
+                dilation=dilation_size,
+                padding=padding,
+                dropout=dropout,
+                training=training,
+            )
+
+        return x
+
+    def build_network(self, input_shape: tuple, **kwargs) -> tuple:
+        """Build the complete TCN architecture.
+
+        Constructs a series of temporal blocks with exponentially increasing
+        dilation factors to achieve a large receptive field efficiently.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            Shape of input data (sequence_length, num_features).
+        **kwargs
+            Additional keyword arguments (unused).
+
+        Returns
+        -------
+        tuple
+            A tuple containing (input_layer, output_tensor) representing
+            the complete network architecture.
+
+        Notes
+        -----
+        The dilation factor for layer i is 2^i, which ensures exponential
+        growth of the receptive field while maintaining computational
+        efficiency.
+        """
+        import tensorflow as tf
+
+        # Create input layer
+        input_layer = tf.keras.layers.Input(shape=input_shape)
+
+        # Transpose input to match the expected format (batch, channels, seq)
+        x = input_layer
+
+        # Apply TCN using the private function
+        x = self._temporal_conv_net(
+            x,
+            num_inputs=self.num_inputs,
+            num_channels=self.num_channels,
+            kernel_size=self.kernel_size,
+            dropout=self.dropout,
+        )
+
+        output = x
+
+        return input_layer, output
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
new file mode 100644
index 0000000000..97500f5557
--- /dev/null
+++ b/aeon/networks/tests/test_tcn.py
@@ -0,0 +1,212 @@
+"""Tests for the TemporalConvolutionalNetwork."""
+
+import pytest
+
+from aeon.networks import TemporalConvolutionalNetwork
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+def test_tcn_network_basic():
+    """Test basic TCN network creation and build_network functionality."""
+    import tensorflow as tf
+
+    input_shape = (100, 5)
+    num_inputs = 5
+    num_channels = [32, 64]
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs, num_channels=num_channels
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+
+    # Check that layers are created correctly
+    assert hasattr(input_layer, "shape"), "Input layer should have a shape attribute"
+    assert hasattr(output_layer, "shape"), "Output layer should have a shape attribute"
+    assert input_layer.dtype == tf.float32
+    assert output_layer.dtype == tf.float32
+
+    # Create a model to test the network structure
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+    assert model is not None, "Model should be created successfully"
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+@pytest.mark.parametrize("num_channels", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
+def test_tcn_network_different_channels(num_channels):
+    """Test TCN network with different channel configurations."""
+    import tensorflow as tf
+
+    input_shape = (50, 3)
+    num_inputs = 3
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs, num_channels=num_channels
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+
+    # Create a model and verify it works
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+    assert model is not None
+
+    # Test with dummy data
+    import numpy as np
+
+    dummy_input = np.random.random((8,) + input_shape)
+    output = model(dummy_input)
+    assert output is not None, "Model should produce output"
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+@pytest.mark.parametrize("kernel_size", [2, 3, 5])
+def test_tcn_network_kernel_sizes(kernel_size):
+    """Test TCN network with different kernel sizes."""
+    import tensorflow as tf
+
+    input_shape = (80, 4)
+    num_inputs = 4
+    num_channels = [32, 64]
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs,
+        num_channels=num_channels,
+        kernel_size=kernel_size,
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+
+    # Verify network builds successfully
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+    assert model is not None
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+@pytest.mark.parametrize("dropout", [0.0, 0.1, 0.3, 0.5])
+def test_tcn_network_dropout_rates(dropout):
+    """Test TCN network with different dropout rates."""
+    import tensorflow as tf
+
+    input_shape = (60, 2)
+    num_inputs = 2
+    num_channels = [16, 32]
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs, num_channels=num_channels, dropout=dropout
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+
+    # Verify network builds successfully
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+    assert model is not None
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+def test_tcn_network_output_shape():
+    """Test TCN network output shapes."""
+    import numpy as np
+    import tensorflow as tf
+
+    input_shape = (40, 6)
+    batch_size = 16
+    num_inputs = 6
+    num_channels = [32, 64]
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs, num_channels=num_channels
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+
+    # Create dummy input and test output shape
+    dummy_input = np.random.random((batch_size,) + input_shape)
+    output = model(dummy_input)
+
+    # Output should maintain sequence length and have final channel dimension
+    expected_shape = (batch_size, num_channels[-1], input_shape[1])
+    assert (
+        output.shape == expected_shape
+    ), f"Expected shape {expected_shape}, got {output.shape}"
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+def test_tcn_network_config():
+    """Test TCN network configuration attributes."""
+    tcn_network = TemporalConvolutionalNetwork(num_inputs=3, num_channels=[16, 32])
+
+    # Check _config attributes
+    assert "python_dependencies" in tcn_network._config
+    assert "tensorflow" in tcn_network._config["python_dependencies"]
+    assert "python_version" in tcn_network._config
+    assert "structure" in tcn_network._config
+    assert tcn_network._config["structure"] == "encoder"
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+def test_tcn_network_parameter_initialization():
+    """Test TCN network parameter initialization."""
+    num_inputs = 4
+    num_channels = [32, 64, 128]
+    kernel_size = 3
+    dropout = 0.2
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs,
+        num_channels=num_channels,
+        kernel_size=kernel_size,
+        dropout=dropout,
+    )
+
+    # Check that parameters are set correctly
+    assert tcn_network.num_inputs == num_inputs
+    assert tcn_network.num_channels == num_channels
+    assert tcn_network.kernel_size == kernel_size
+    assert tcn_network.dropout == dropout
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+def test_tcn_network_single_layer():
+    """Test TCN network with single temporal block."""
+    import tensorflow as tf
+
+    input_shape = (30, 2)
+    num_inputs = 2
+    num_channels = [16]  # Single layer
+
+    tcn_network = TemporalConvolutionalNetwork(
+        num_inputs=num_inputs, num_channels=num_channels
+    )
+    input_layer, output_layer = tcn_network.build_network(input_shape)
+
+    # Verify single layer network works
+    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
+    assert model is not None
+
+    # Test with dummy data
+    import numpy as np
+
+    dummy_input = np.random.random((4,) + input_shape)
+    output = model(dummy_input)
+    assert output.shape == (4, 16, 2)

From 3434757d2403729f4b07f67a960c41cc8250a4ff Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 6 Jul 2025 16:52:47 +0530
Subject: [PATCH 08/36] tcn_net pytest added

---
 .github/workflows/pr_pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index 69323e47c5..368425d136 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -3,7 +3,7 @@ name: PR pytest
 on:
   push:
     branches:
-      - main
+      - tcn_net
   pull_request:
     paths:
       - "aeon/**"

From c602e39cb5edb82537cd697096536f9b9733fb38 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 6 Jul 2025 17:29:00 +0530
Subject: [PATCH 09/36] tcn_network updated with default params

---
 aeon/networks/_tcn.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 08f4ff9341..900b9e47c1 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -52,8 +52,8 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     def __init__(
         self,
-        num_inputs: int,
-        num_channels: list,
+        num_inputs: int = 1,
+        num_channels: list = [16] * 3,
         kernel_size: int = 2,
         dropout: float = 0.2,
     ):

From 05a0f355017c449980d7aafaebdbb3ae7d8aa7dd Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Mon, 7 Jul 2025 17:29:16 +0530
Subject: [PATCH 10/36] TCN forecaster added

---
 .github/workflows/pr_pytest.yml               |   2 +-
 aeon/forecasting/deep_learning/__init__.py    |   2 +
 aeon/forecasting/deep_learning/_tcn.py        | 140 ++++++++++++++++++
 .../deep_learning/tests/test_tcn.py           |  37 +++++
 4 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 aeon/forecasting/deep_learning/_tcn.py
 create mode 100644 aeon/forecasting/deep_learning/tests/test_tcn.py

diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index 5240ce85ec..82dbdc0f14 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -3,7 +3,7 @@ name: PR pytest
 on:
   push:
     branches:
-      - tcn_net
+      - tcn_fst
   pull_request:
     paths:
       - "aeon/**"
diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py
index c4b7a27030..8e3bac6a86 100644
--- a/aeon/forecasting/deep_learning/__init__.py
+++ b/aeon/forecasting/deep_learning/__init__.py
@@ -2,6 +2,8 @@
 
 __all__ = [
     "BaseDeepForecaster",
+    "TCNForecaster",
 ]
 
+from aeon.forecasting.deep_learning._tcn import TCNForecaster
 from aeon.forecasting.deep_learning.base import BaseDeepForecaster
diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
new file mode 100644
index 0000000000..1b591e6c74
--- /dev/null
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -0,0 +1,140 @@
+"""TCNForecaster module for deep learning forecasting in aeon."""
+
+from __future__ import annotations
+
+__maintainer__ = []
+__all__ = ["TCNForecaster"]
+
+from aeon.forecasting.deep_learning.base import BaseDeepForecaster
+from aeon.networks._tcn import TemporalConvolutionalNetwork
+
+
+class TCNForecaster(BaseDeepForecaster):
+    """A deep learning forecaster using Temporal Convolutional Network (TCN).
+
+    It leverages the `TemporalConvolutionalNetwork` from aeon's network module
+    to build the architecture suitable for forecasting tasks.
+
+    Parameters
+    ----------
+    horizon : int, default=1
+        Forecasting horizon, the number of steps ahead to predict.
+    window : int, default=10
+        The window size for creating input sequences.
+    batch_size : int, default=32
+        Batch size for training the model.
+    epochs : int, default=100
+        Number of epochs to train the model.
+    verbose : int, default=0
+        Verbosity mode (0, 1, or 2).
+    optimizer : str or tf.keras.optimizers.Optimizer, default='adam'
+        Optimizer to use for training.
+    loss : str or tf.keras.losses.Loss, default='mse'
+        Loss function for training.
+    random_state : int, default=None
+        Seed for random number generators.
+    axis : int, default=0
+        Axis along which to apply the forecaster.
+    num_inputs : int, default=1
+        Number of input channels/features in the input sequence.
+    num_channels : list of int, default=[16, 16, 16]
+        List specifying the number of output channels for each layer of the
+        TCN. The length determines the depth of the network.
+    kernel_size : int, default=2
+        Size of the convolutional kernel in the TCN.
+    dropout : float, default=0.2
+        Dropout rate applied after each convolutional layer for
+        regularization.
+
+
+    """
+
+    def __init__(
+        self,
+        horizon=1,
+        window=10,
+        batch_size=32,
+        epochs=100,
+        verbose=0,
+        optimizer="adam",
+        loss="mse",
+        random_state=None,
+        axis=0,
+        num_inputs=1,
+        num_channels=None,
+        kernel_size=2,
+        dropout=0.2,
+    ):
+        super().__init__(
+            horizon=horizon,
+            window=window,
+            batch_size=batch_size,
+            epochs=epochs,
+            verbose=verbose,
+            optimizer=optimizer,
+            loss=loss,
+            random_state=random_state,
+            axis=axis,
+        )
+        self.num_inputs = num_inputs
+        self.num_channels = num_channels if num_channels is not None else [16, 16, 16]
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+
+    def _add_linear_layer(self, x, output_units):
+        """Add a linear layer to the output of the TCN network.
+
+        Parameters
+        ----------
+        x : tf.Tensor
+            Input tensor from the TCN output, typically of shape
+            (batch_size, channels, sequence_length).
+        output_units : int
+            Number of output units for the linear layer, typically matching
+            the forecasting horizon.
+
+        Returns
+        -------
+        tf.Tensor
+            Output tensor after applying the linear layer.
+        """
+        import tensorflow as tf
+
+        # Take the last time step's output for forecasting
+        x_last = x[:, -1, :]
+        # Apply a dense layer to map to the desired output size (horizon)
+        output = tf.keras.layers.Dense(output_units)(x_last)
+        return output
+
+    def _build_model(self, input_shape):
+        """Build the TCN model for forecasting.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            Shape of input data, typically (window, num_inputs).
+
+        Returns
+        -------
+        model : tf.keras.Model
+            Compiled Keras model with TCN architecture.
+        """
+        import tensorflow as tf
+
+        # Initialize the TCN network with the updated parameters
+        network = TemporalConvolutionalNetwork(
+            num_inputs=self.num_inputs,
+            num_channels=self.num_channels,
+            kernel_size=self.kernel_size,
+            dropout=self.dropout,
+        )
+
+        # Build the network with the given input shape
+        input_layer, output = network.build_network(input_shape=input_shape)
+
+        # Adjust the output layer to match the forecasting horizon
+        output = self._add_linear_layer(output, self.horizon)
+
+        # Create the final model
+        model = tf.keras.Model(inputs=input_layer, outputs=output)
+        return model
diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py
new file mode 100644
index 0000000000..2717eaf4b4
--- /dev/null
+++ b/aeon/forecasting/deep_learning/tests/test_tcn.py
@@ -0,0 +1,37 @@
+"""Test TCN."""
+
+__maintainer__ = []
+__all__ = []
+
+import pytest
+
+from aeon.datasets import load_airline
+from aeon.forecasting.deep_learning._tcn import TCNForecaster
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+@pytest.mark.parametrize("horizon,window,epochs", [(1, 10, 2), (3, 12, 3), (5, 15, 2)])
+def test_tcn_forecaster(horizon, window, epochs):
+    """Test TCNForecaster with different parameter combinations."""
+    import tensorflow as tf
+
+    # Load airline dataset
+    y = load_airline()
+
+    # Initialize TCNForecaster
+    forecaster = TCNForecaster(
+        horizon=horizon, window=window, epochs=epochs, batch_size=16, verbose=0
+    )
+
+    # Fit and predict
+    forecaster.fit(y)
+    prediction = forecaster.predict(y)
+
+    # Basic assertions
+    assert prediction is not None
+    if isinstance(prediction, tf.Tensor):
+        assert not tf.math.is_nan(prediction).numpy()

From 2f3c98b9008d8a7504f24dcd1d22760261a4e1b3 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Mon, 7 Jul 2025 23:43:22 +0530
Subject: [PATCH 11/36] tcn reshaped

---
 aeon/networks/_tcn.py           | 23 ++++++++++++++++++++---
 aeon/networks/tests/test_tcn.py |  4 ++--
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 900b9e47c1..987b0f2b68 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -42,6 +42,21 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
     Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
     generic convolutional and recurrent networks for sequence modeling.
     arXiv preprint arXiv:1803.01271.
+
+    Examples
+    --------
+    >>> from aeon.networks._tcn import TemporalConvolutionalNetwork
+    >>> from aeon.testing.data_generation import make_example_3d_numpy
+    >>> import tensorflow as tf
+    >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150,
+    ...                              return_y=True, regression_target=True,
+    ...                              random_state=42)
+    >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8])
+    >>> input_layer, output = network.build_network(input_shape=(4, 150))
+    >>> model = tf.keras.Model(inputs=input_layer, outputs=output)
+    >>> model.compile(optimizer="adam", loss="mse")
+    >>> model.fit(X, y, epochs=2, batch_size=2, verbose=0)  # doctest: +SKIP
+    <keras.src.callbacks.History object ...>
     """
 
     _config = {
@@ -53,7 +68,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
     def __init__(
         self,
         num_inputs: int = 1,
-        num_channels: list = [16] * 3,
+        num_channels: list = [16] * 3,  # change to n_filters
         kernel_size: int = 2,
         dropout: float = 0.2,
     ):
@@ -321,6 +336,8 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
             dropout=self.dropout,
         )
 
-        output = x
-
+        x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :])
+        output = tf.keras.layers.Lambda(
+            lambda x: tf.reduce_mean(x, axis=1, keepdims=True), output_shape=(1,)
+        )(x)
         return input_layer, output
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
index 97500f5557..b78424a73a 100644
--- a/aeon/networks/tests/test_tcn.py
+++ b/aeon/networks/tests/test_tcn.py
@@ -136,7 +136,7 @@ def test_tcn_network_output_shape():
     output = model(dummy_input)
 
     # Output should maintain sequence length and have final channel dimension
-    expected_shape = (batch_size, num_channels[-1], input_shape[1])
+    expected_shape = (batch_size, 1)
     assert (
         output.shape == expected_shape
     ), f"Expected shape {expected_shape}, got {output.shape}"
@@ -209,4 +209,4 @@ def test_tcn_network_single_layer():
 
     dummy_input = np.random.random((4,) + input_shape)
     output = model(dummy_input)
-    assert output.shape == (4, 16, 2)
+    assert output.shape == (4, 1)

From f6447b180c4b725d889dbed5b80fe948945bbd6c Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 13:34:47 +0530
Subject: [PATCH 12/36] tcn changed

---
 aeon/networks/_tcn.py           | 22 ++++++++++-----------
 aeon/networks/tests/test_tcn.py | 34 ++++++++++++++++-----------------
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 987b0f2b68..62ed404f1b 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -68,7 +68,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
     def __init__(
         self,
         num_inputs: int = 1,
-        num_channels: list = [16] * 3,  # change to n_filters
+        n_filters: list = [16] * 3,  # changed from num_channels
         kernel_size: int = 2,
         dropout: float = 0.2,
     ):
@@ -78,7 +78,7 @@ def __init__(
         ----------
         num_inputs : int
             Number of input channels/features.
-        num_channels : list of int
+        n_filters : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -87,7 +87,7 @@ def __init__(
         """
         super().__init__()
         self.num_inputs = num_inputs
-        self.num_channels = num_channels
+        self.n_filters = n_filters
         self.kernel_size = kernel_size
         self.dropout = dropout
 
@@ -243,7 +243,7 @@ def _temporal_conv_net(
         self,
         x,
         num_inputs: int,
-        num_channels: list,
+        n_filters: list,  # changed from num_channels
         kernel_size: int = 2,
         dropout: float = 0.2,
         training: bool = None,
@@ -259,7 +259,7 @@ def _temporal_conv_net(
             Input tensor of shape (batch_size, channels, sequence_length).
         num_inputs : int
             Number of input channels.
-        num_channels : list of int
+        n_filters : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -273,11 +273,11 @@ def _temporal_conv_net(
         tf.Tensor
             Output tensor after applying all temporal blocks.
         """
-        num_levels = len(num_channels)
+        num_levels = len(n_filters)
         for i in range(num_levels):
             dilation_size = 2**i
-            in_channels = num_inputs if i == 0 else num_channels[i - 1]
-            out_channels = num_channels[i]
+            in_channels = num_inputs if i == 0 else n_filters[i - 1]
+            out_channels = n_filters[i]
             padding = (kernel_size - 1) * dilation_size
 
             x = self._temporal_block(
@@ -331,13 +331,11 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         x = self._temporal_conv_net(
             x,
             num_inputs=self.num_inputs,
-            num_channels=self.num_channels,
+            n_filters=self.n_filters,
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
 
         x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :])
-        output = tf.keras.layers.Lambda(
-            lambda x: tf.reduce_mean(x, axis=1, keepdims=True), output_shape=(1,)
-        )(x)
+        output = tf.keras.layers.Dense(1)(x)
         return input_layer, output
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
index b78424a73a..47c1d38615 100644
--- a/aeon/networks/tests/test_tcn.py
+++ b/aeon/networks/tests/test_tcn.py
@@ -16,10 +16,10 @@ def test_tcn_network_basic():
 
     input_shape = (100, 5)
     num_inputs = 5
-    num_channels = [32, 64]
+    n_filters = [32, 64]
 
     tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, num_channels=num_channels
+        num_inputs=num_inputs, n_filters=n_filters
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
@@ -38,8 +38,8 @@ def test_tcn_network_basic():
     not _check_soft_dependencies(["tensorflow"], severity="none"),
     reason="Tensorflow soft dependency unavailable.",
 )
-@pytest.mark.parametrize("num_channels", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
-def test_tcn_network_different_channels(num_channels):
+@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
+def test_tcn_network_different_channels(n_filters):
     """Test TCN network with different channel configurations."""
     import tensorflow as tf
 
@@ -47,7 +47,7 @@ def test_tcn_network_different_channels(num_channels):
     num_inputs = 3
 
     tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, num_channels=num_channels
+        num_inputs=num_inputs, n_filters=n_filters
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
@@ -74,11 +74,11 @@ def test_tcn_network_kernel_sizes(kernel_size):
 
     input_shape = (80, 4)
     num_inputs = 4
-    num_channels = [32, 64]
+    n_filters = [32, 64]
 
     tcn_network = TemporalConvolutionalNetwork(
         num_inputs=num_inputs,
-        num_channels=num_channels,
+        n_filters=n_filters,
         kernel_size=kernel_size,
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
@@ -99,10 +99,10 @@ def test_tcn_network_dropout_rates(dropout):
 
     input_shape = (60, 2)
     num_inputs = 2
-    num_channels = [16, 32]
+    n_filters = [16, 32]
 
     tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, num_channels=num_channels, dropout=dropout
+        num_inputs=num_inputs, n_filters=n_filters, dropout=dropout
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
@@ -123,10 +123,10 @@ def test_tcn_network_output_shape():
     input_shape = (40, 6)
     batch_size = 16
     num_inputs = 6
-    num_channels = [32, 64]
+    n_filters = [32, 64]
 
     tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, num_channels=num_channels
+        num_inputs=num_inputs, n_filters=n_filters
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
     model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
@@ -148,7 +148,7 @@ def test_tcn_network_output_shape():
 )
 def test_tcn_network_config():
     """Test TCN network configuration attributes."""
-    tcn_network = TemporalConvolutionalNetwork(num_inputs=3, num_channels=[16, 32])
+    tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32])
 
     # Check _config attributes
     assert "python_dependencies" in tcn_network._config
@@ -165,20 +165,20 @@ def test_tcn_network_config():
 def test_tcn_network_parameter_initialization():
     """Test TCN network parameter initialization."""
     num_inputs = 4
-    num_channels = [32, 64, 128]
+    n_filters = [32, 64, 128]
     kernel_size = 3
     dropout = 0.2
 
     tcn_network = TemporalConvolutionalNetwork(
         num_inputs=num_inputs,
-        num_channels=num_channels,
+        n_filters=n_filters,
         kernel_size=kernel_size,
         dropout=dropout,
     )
 
     # Check that parameters are set correctly
     assert tcn_network.num_inputs == num_inputs
-    assert tcn_network.num_channels == num_channels
+    assert tcn_network.n_filters == n_filters
     assert tcn_network.kernel_size == kernel_size
     assert tcn_network.dropout == dropout
 
@@ -193,10 +193,10 @@ def test_tcn_network_single_layer():
 
     input_shape = (30, 2)
     num_inputs = 2
-    num_channels = [16]  # Single layer
+    n_filters = [16]  # Single layer
 
     tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, num_channels=num_channels
+        num_inputs=num_inputs, n_filters=n_filters
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
 

From 30d862abc9ab04f424d643e1dcf69598d7f85eb7 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 14:09:51 +0530
Subject: [PATCH 13/36] base fst changed

---
 aeon/forecasting/deep_learning/base.py | 54 ++++++++------------------
 1 file changed, 17 insertions(+), 37 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index ba33331fc7..40c7b3e212 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -68,6 +68,7 @@ def __init__(
         self.random_state = random_state
         self.axis = axis
         self.model_ = None
+        self.last_window_ = None
 
         # Pass horizon and axis to BaseForecaster
         super().__init__(horizon=horizon, axis=axis)
@@ -121,7 +122,7 @@ def _fit(self, y, X=None):
             epochs=self.epochs,
             verbose=self.verbose,
         )
-
+        self.last_window_ = y_inner[-self.window :]
         return self
 
     def _predict(self, y=None, X=None):
@@ -130,9 +131,9 @@ def _predict(self, y=None, X=None):
         Parameters
         ----------
         y : np.ndarray or pd.Series, default=None
-            Series to predict from.
+            Series to predict from. If None, uses last fitted window.
         X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables.
+            Exogenous variables (not supported by default).
 
         Returns
         -------
@@ -140,49 +141,28 @@ def _predict(self, y=None, X=None):
             Predicted values for the specified horizon.
         """
         if y is None:
-            raise ValueError("y cannot be None for prediction")
-
-        # Convert input data to numpy array
-        y_inner = self._convert_input(y)
-
-        if len(y_inner) < self.window:
-            raise ValueError(
-                f"Input data length ({len(y_inner)}) is less than the window size "
-                f"({self.window})."
-            )
-
-        # Use the last window of data for prediction
-        last_window = y_inner[-self.window :].reshape(1, self.window, 1)
-
-        # Make prediction
+            if not hasattr(self, "last_window_"):
+                raise ValueError("No fitted data available for prediction.")
+            y_inner = self.last_window_
+        else:
+            y_inner = self._convert_input(y)
+            if len(y_inner) < self.window:
+                raise ValueError(
+                    f"Input data length ({len(y_inner)}) is less than the window size "
+                    f"({self.window})."
+                )
+            y_inner = y_inner[-self.window :]
+
+        last_window = y_inner.reshape(1, self.window, 1)
         predictions = []
         current_window = last_window
         for _ in range(self.horizon):
             pred = self.model_.predict(current_window, verbose=0)
             predictions.append(pred[0, 0])
-            # Update the window with the latest prediction (autoregressive)
             current_window = np.roll(current_window, -1, axis=1)
             current_window[0, -1, 0] = pred[0, 0]
-
         return np.array(predictions)
 
-    def _forecast(self, y, X=None):
-        """Forecast time series at future horizon.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series
-            Time series to forecast from.
-        X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables.
-
-        Returns
-        -------
-        forecasts : np.ndarray
-            Forecasted values for the specified horizon.
-        """
-        return self._fit(y, X)._predict(y, X)
-
     def _convert_input(self, y):
         """Convert input data to numpy array.
 

From 9b9d26608980f0d87ab69eb890f276ab89192474 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 14:17:21 +0530
Subject: [PATCH 14/36] TCN forecaster updated

---
 aeon/forecasting/deep_learning/_tcn.py | 44 +++++++-------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 1b591e6c74..147dc618ec 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -37,7 +37,7 @@ class TCNForecaster(BaseDeepForecaster):
         Axis along which to apply the forecaster.
     num_inputs : int, default=1
         Number of input channels/features in the input sequence.
-    num_channels : list of int, default=[16, 16, 16]
+    n_filters : list of int, default=[16, 16, 16]
         List specifying the number of output channels for each layer of the
         TCN. The length determines the depth of the network.
     kernel_size : int, default=2
@@ -49,6 +49,12 @@ class TCNForecaster(BaseDeepForecaster):
 
     """
 
+    _tags = {
+        "capability:horizon": True,
+        "capability:multivariate": True,
+        "capability:exogenous": False,
+    }
+
     def __init__(
         self,
         horizon=1,
@@ -61,7 +67,7 @@ def __init__(
         random_state=None,
         axis=0,
         num_inputs=1,
-        num_channels=None,
+        n_filters=None,
         kernel_size=2,
         dropout=0.2,
     ):
@@ -77,35 +83,10 @@ def __init__(
             axis=axis,
         )
         self.num_inputs = num_inputs
-        self.num_channels = num_channels if num_channels is not None else [16, 16, 16]
+        self.n_filters = n_filters
         self.kernel_size = kernel_size
         self.dropout = dropout
 
-    def _add_linear_layer(self, x, output_units):
-        """Add a linear layer to the output of the TCN network.
-
-        Parameters
-        ----------
-        x : tf.Tensor
-            Input tensor from the TCN output, typically of shape
-            (batch_size, channels, sequence_length).
-        output_units : int
-            Number of output units for the linear layer, typically matching
-            the forecasting horizon.
-
-        Returns
-        -------
-        tf.Tensor
-            Output tensor after applying the linear layer.
-        """
-        import tensorflow as tf
-
-        # Take the last time step's output for forecasting
-        x_last = x[:, -1, :]
-        # Apply a dense layer to map to the desired output size (horizon)
-        output = tf.keras.layers.Dense(output_units)(x_last)
-        return output
-
     def _build_model(self, input_shape):
         """Build the TCN model for forecasting.
 
@@ -121,10 +102,12 @@ def _build_model(self, input_shape):
         """
         import tensorflow as tf
 
+        if self.n_filters is None:
+            self.n_filters = [16] * 3
         # Initialize the TCN network with the updated parameters
         network = TemporalConvolutionalNetwork(
             num_inputs=self.num_inputs,
-            num_channels=self.num_channels,
+            n_filters=self.n_filters,
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
@@ -132,9 +115,6 @@ def _build_model(self, input_shape):
         # Build the network with the given input shape
         input_layer, output = network.build_network(input_shape=input_shape)
 
-        # Adjust the output layer to match the forecasting horizon
-        output = self._add_linear_layer(output, self.horizon)
-
         # Create the final model
         model = tf.keras.Model(inputs=input_layer, outputs=output)
         return model

From 78b2f3dfcc3a3eb01558d463da64c2549d0a7c5a Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 14:18:17 +0530
Subject: [PATCH 15/36] test file corrected

---
 aeon/forecasting/deep_learning/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 270a60225e..21e90e4a68 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -47,7 +47,7 @@ def test_base_deep_forecaster_fit_predict():
     forecaster.fit(data)
 
     # Predict
-    predictions = forecaster.predict(data)
+    predictions = forecaster.predict()
 
     # Validate output shape
     assert (

From 49be666b27010b86ea7b85834e2b861012980bfa Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 20:25:05 +0530
Subject: [PATCH 16/36] tcn updated

---
 aeon/networks/__init__.py       |   4 +-
 aeon/networks/_tcn.py           | 138 ++++++++++++++++----------------
 aeon/networks/tests/test_tcn.py |  62 +++++---------
 3 files changed, 92 insertions(+), 112 deletions(-)

diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py
index 61e669283c..aed37be7e7 100644
--- a/aeon/networks/__init__.py
+++ b/aeon/networks/__init__.py
@@ -19,7 +19,7 @@
     "AEBiGRUNetwork",
     "DisjointCNNNetwork",
     "RecurrentNetwork",
-    "TemporalConvolutionalNetwork",
+    "TCNNetwork",
 ]
 from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork
 from aeon.networks._ae_bgru import AEBiGRUNetwork
@@ -37,5 +37,5 @@
 from aeon.networks._mlp import MLPNetwork
 from aeon.networks._resnet import ResNetNetwork
 from aeon.networks._rnn import RecurrentNetwork
-from aeon.networks._tcn import TemporalConvolutionalNetwork
+from aeon.networks._tcn import TCNNetwork
 from aeon.networks.base import BaseDeepLearningNetwork
diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 62ed404f1b..5400a557b7 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -1,15 +1,11 @@
-"""Implementation of Temporal Convolutional Network (TCN).
-
-Based on the paper "An Empirical Evaluation of Generic Convolutional and
-Recurrent Networks for Sequence Modeling" by Bai et al. (2018).
-"""
+"""Implementation of Temporal Convolutional Network (TCN)."""
 
 __maintainer__ = []
 
 from aeon.networks.base import BaseDeepLearningNetwork
 
 
-class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
+class TCNNetwork(BaseDeepLearningNetwork):
     """Temporal Convolutional Network (TCN) for sequence modeling.
 
     A generic convolutional architecture for sequence modeling that combines:
@@ -21,9 +17,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     Parameters
     ----------
-    num_inputs : int
-        Number of input channels/features in the input sequence.
-    num_channels : list of int
+    n_blocks : list of int
         List specifying the number of output channels for each layer.
         The length determines the depth of the network.
     kernel_size : int, default=2
@@ -39,19 +33,19 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     References
     ----------
-    Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
+    .. [1]  Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
     generic convolutional and recurrent networks for sequence modeling.
     arXiv preprint arXiv:1803.01271.
 
     Examples
     --------
-    >>> from aeon.networks._tcn import TemporalConvolutionalNetwork
+    >>> from aeon.networks._tcn import TCNNetwork
     >>> from aeon.testing.data_generation import make_example_3d_numpy
     >>> import tensorflow as tf
     >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150,
     ...                              return_y=True, regression_target=True,
     ...                              random_state=42)
-    >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8])
+    >>> network = TCNNetwork(num_channels=[8, 8])
     >>> input_layer, output = network.build_network(input_shape=(4, 150))
     >>> model = tf.keras.Model(inputs=input_layer, outputs=output)
     >>> model.compile(optimizer="adam", loss="mse")
@@ -67,8 +61,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     def __init__(
         self,
-        num_inputs: int = 1,
-        n_filters: list = [16] * 3,  # changed from num_channels
+        n_blocks: list = [16] * 3,
         kernel_size: int = 2,
         dropout: float = 0.2,
     ):
@@ -78,7 +71,7 @@ def __init__(
         ----------
         num_inputs : int
             Number of input channels/features.
-        n_filters : list of int
+        n_blocks : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -86,33 +79,32 @@ def __init__(
             Dropout rate for regularization.
         """
         super().__init__()
-        self.num_inputs = num_inputs
-        self.n_filters = n_filters
+        self.n_blocks = n_blocks
         self.kernel_size = kernel_size
         self.dropout = dropout
 
     def _conv1d_with_variable_padding(
         self,
-        x,
-        filters: int,
+        input_tensor,
+        n_filters: int,
         kernel_size: int,
         padding_value: int,
-        stride: int = 1,
+        strides: int = 1,
         dilation_rate: int = 1,
     ):
         """Apply 1D convolution with variable padding for causal convolutions.
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
-        filters : int
+        n_filters : int
             Number of output filters.
         kernel_size : int
             Size of the convolutional kernel.
         padding_value : int
             Amount of padding to apply.
-        stride : int, default=1
+        strides : int, default=1
             Stride of the convolution.
         dilation_rate : int, default=1
             Dilation rate for dilated convolutions.
@@ -125,16 +117,16 @@ def _conv1d_with_variable_padding(
         import tensorflow as tf
 
         # Transpose to Keras format (batch, sequence, channels)
-        x_keras_format = tf.keras.layers.Permute((2, 1))(x)
+        x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor)
 
         # Apply padding in sequence dimension
         padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format)
 
         # Create and apply convolution layer
         conv_layer = tf.keras.layers.Conv1D(
-            filters=filters,
+            filters=n_filters,
             kernel_size=kernel_size,
-            strides=stride,
+            strides=strides,
             dilation_rate=dilation_rate,
             padding="valid",
         )
@@ -145,7 +137,7 @@ def _conv1d_with_variable_padding(
         # Transpose back to PyTorch format (batch, channels, sequence)
         return tf.keras.layers.Permute((2, 1))(out)
 
-    def _chomp_1d(self, x, chomp_size: int):
+    def _chomp(self, input_tensor, chomp_size: int):
         """Remove padding from the end of sequences to maintain causality.
 
         This operation ensures that the output at time t only depends on
@@ -153,7 +145,7 @@ def _chomp_1d(self, x, chomp_size: int):
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
         chomp_size : int
             Number of time steps to remove from the end.
@@ -163,17 +155,17 @@ def _chomp_1d(self, x, chomp_size: int):
         tf.Tensor
             Chomped tensor with reduced sequence length.
         """
-        return x[:, :, :-chomp_size]
+        return input_tensor[:, :, :-chomp_size]
 
     def _temporal_block(
         self,
-        x,
+        input_tensor,
         n_inputs: int,
-        n_outputs: int,
+        n_filters: int,
         kernel_size: int,
-        stride: int,
-        dilation: int,
-        padding: int,
+        strides: int,
+        dilation_rate: int,
+        padding_value: int,
         dropout: float = 0.2,
         training: bool = None,
     ):
@@ -187,19 +179,19 @@ def _temporal_block(
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
         n_inputs : int
             Number of input channels.
-        n_outputs : int
-            Number of output channels.
+        n_filters : int
+            Number of output filters.
         kernel_size : int
             Size of convolutional kernels.
-        stride : int
+        strides : int
             Stride of convolutions (typically 1).
-        dilation : int
+        dilation_rate : int
             Dilation factor for dilated convolutions.
-        padding : int
+        padding_value : int
             Padding size to be chomped off.
         dropout : float, default=0.2
             Dropout rate for regularization.
@@ -209,31 +201,38 @@ def _temporal_block(
         Returns
         -------
         tf.Tensor
-            Output tensor of shape (batch_size, n_outputs, sequence_length).
+            Output tensor of shape (batch_size, n_filters, sequence_length).
         """
         import tensorflow as tf
 
         # First convolution block
         out = self._conv1d_with_variable_padding(
-            x, n_outputs, kernel_size, padding, stride, dilation
+            input_tensor, n_filters, kernel_size, padding_value, strides, dilation_rate
         )
-        out = self._chomp_1d(out, padding)
+        out = self._chomp(out, padding_value)
         out = tf.keras.layers.ReLU()(out)
         out = tf.keras.layers.Dropout(dropout)(out, training=training)
 
         # Second convolution block
         out = self._conv1d_with_variable_padding(
-            out, n_outputs, kernel_size, padding, stride, dilation
+            out, n_filters, kernel_size, padding_value, strides, dilation_rate
         )
-        out = self._chomp_1d(out, padding)
+        out = self._chomp(out, padding_value)
         out = tf.keras.layers.ReLU()(out)
         out = tf.keras.layers.Dropout(dropout)(out, training=training)
 
         # Residual connection with optional dimension matching
-        if n_inputs != n_outputs:
-            res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1)
+        if n_inputs != n_filters:
+            res = self._conv1d_with_variable_padding(
+                input_tensor=input_tensor,
+                n_filters=n_filters,
+                kernel_size=1,
+                padding_value=0,
+                strides=1,
+                dilation_rate=1,
+            )
         else:
-            res = x
+            res = input_tensor
 
         # Add residual and apply final ReLU
         result = tf.keras.layers.Add()([out, res])
@@ -241,9 +240,9 @@ def _temporal_block(
 
     def _temporal_conv_net(
         self,
-        x,
-        num_inputs: int,
-        n_filters: list,  # changed from num_channels
+        input_tensor,
+        n_inputs: int,
+        n_blocks: list,
         kernel_size: int = 2,
         dropout: float = 0.2,
         training: bool = None,
@@ -255,11 +254,11 @@ def _temporal_conv_net(
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
-        num_inputs : int
+        n_inputs : int
             Number of input channels.
-        n_filters : list of int
+        n_blocks : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -273,26 +272,26 @@ def _temporal_conv_net(
         tf.Tensor
             Output tensor after applying all temporal blocks.
         """
-        num_levels = len(n_filters)
+        num_levels = len(n_blocks)
         for i in range(num_levels):
-            dilation_size = 2**i
-            in_channels = num_inputs if i == 0 else n_filters[i - 1]
-            out_channels = n_filters[i]
-            padding = (kernel_size - 1) * dilation_size
+            dilation_rate = 2**i
+            in_channels = n_inputs if i == 0 else n_blocks[i - 1]
+            out_channels = n_blocks[i]
+            padding_value = (kernel_size - 1) * dilation_rate
 
-            x = self._temporal_block(
-                x,
+            input_tensor = self._temporal_block(
+                input_tensor,
                 n_inputs=in_channels,
-                n_outputs=out_channels,
+                n_filters=out_channels,
                 kernel_size=kernel_size,
-                stride=1,
-                dilation=dilation_size,
-                padding=padding,
+                strides=1,
+                dilation_rate=dilation_rate,
+                padding_value=padding_value,
                 dropout=dropout,
                 training=training,
             )
 
-        return x
+        return input_tensor
 
     def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         """Build the complete TCN architecture.
@@ -303,7 +302,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         Parameters
         ----------
         input_shape : tuple
-            Shape of input data (sequence_length, num_features).
+            Shape of input data (n_channels, n_timepoints).
         **kwargs
             Additional keyword arguments (unused).
 
@@ -326,12 +325,13 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
 
         # Transpose input to match the expected format (batch, channels, seq)
         x = input_layer
+        n_inputs = input_shape[0]
 
         # Apply TCN using the private function
         x = self._temporal_conv_net(
             x,
-            num_inputs=self.num_inputs,
-            n_filters=self.n_filters,
+            n_inputs=n_inputs,
+            n_blocks=self.n_blocks,
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
index 47c1d38615..94495e3c41 100644
--- a/aeon/networks/tests/test_tcn.py
+++ b/aeon/networks/tests/test_tcn.py
@@ -1,8 +1,8 @@
-"""Tests for the TemporalConvolutionalNetwork."""
+"""Tests for the TCNNetwork."""
 
 import pytest
 
-from aeon.networks import TemporalConvolutionalNetwork
+from aeon.networks import TCNNetwork
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
@@ -15,12 +15,9 @@ def test_tcn_network_basic():
     import tensorflow as tf
 
     input_shape = (100, 5)
-    num_inputs = 5
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Check that layers are created correctly
@@ -38,17 +35,14 @@ def test_tcn_network_basic():
     not _check_soft_dependencies(["tensorflow"], severity="none"),
     reason="Tensorflow soft dependency unavailable.",
 )
-@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
-def test_tcn_network_different_channels(n_filters):
+@pytest.mark.parametrize("n_blocks", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
+def test_tcn_network_different_channels(n_blocks):
     """Test TCN network with different channel configurations."""
     import tensorflow as tf
 
     input_shape = (50, 3)
-    num_inputs = 3
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Create a model and verify it works
@@ -73,12 +67,10 @@ def test_tcn_network_kernel_sizes(kernel_size):
     import tensorflow as tf
 
     input_shape = (80, 4)
-    num_inputs = 4
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs,
-        n_filters=n_filters,
+    tcn_network = TCNNetwork(
+        n_blocks=n_blocks,
         kernel_size=kernel_size,
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
@@ -98,12 +90,9 @@ def test_tcn_network_dropout_rates(dropout):
     import tensorflow as tf
 
     input_shape = (60, 2)
-    num_inputs = 2
-    n_filters = [16, 32]
+    n_blocks = [16, 32]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters, dropout=dropout
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Verify network builds successfully
@@ -122,12 +111,9 @@ def test_tcn_network_output_shape():
 
     input_shape = (40, 6)
     batch_size = 16
-    num_inputs = 6
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
     model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
 
@@ -148,7 +134,7 @@ def test_tcn_network_output_shape():
 )
 def test_tcn_network_config():
     """Test TCN network configuration attributes."""
-    tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32])
+    tcn_network = TCNNetwork(n_blocks=[16, 32])
 
     # Check _config attributes
     assert "python_dependencies" in tcn_network._config
@@ -164,21 +150,18 @@ def test_tcn_network_config():
 )
 def test_tcn_network_parameter_initialization():
     """Test TCN network parameter initialization."""
-    num_inputs = 4
-    n_filters = [32, 64, 128]
+    n_blocks = [32, 64, 128]
     kernel_size = 3
     dropout = 0.2
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs,
-        n_filters=n_filters,
+    tcn_network = TCNNetwork(
+        n_blocks=n_blocks,
         kernel_size=kernel_size,
         dropout=dropout,
     )
 
     # Check that parameters are set correctly
-    assert tcn_network.num_inputs == num_inputs
-    assert tcn_network.n_filters == n_filters
+    assert tcn_network.n_blocks == n_blocks
     assert tcn_network.kernel_size == kernel_size
     assert tcn_network.dropout == dropout
 
@@ -192,12 +175,9 @@ def test_tcn_network_single_layer():
     import tensorflow as tf
 
     input_shape = (30, 2)
-    num_inputs = 2
-    n_filters = [16]  # Single layer
+    n_blocks = [16]  # Single layer
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Verify single layer network works

From 7bacdac1a9df547330c08691ab89d4b0ece3a23d Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 20:28:53 +0530
Subject: [PATCH 17/36] tcn updated

---
 aeon/networks/__init__.py       |   4 +-
 aeon/networks/_tcn.py           | 138 ++++++++++++++++----------------
 aeon/networks/tests/test_tcn.py |  62 +++++---------
 3 files changed, 92 insertions(+), 112 deletions(-)

diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py
index 61e669283c..aed37be7e7 100644
--- a/aeon/networks/__init__.py
+++ b/aeon/networks/__init__.py
@@ -19,7 +19,7 @@
     "AEBiGRUNetwork",
     "DisjointCNNNetwork",
     "RecurrentNetwork",
-    "TemporalConvolutionalNetwork",
+    "TCNNetwork",
 ]
 from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork
 from aeon.networks._ae_bgru import AEBiGRUNetwork
@@ -37,5 +37,5 @@
 from aeon.networks._mlp import MLPNetwork
 from aeon.networks._resnet import ResNetNetwork
 from aeon.networks._rnn import RecurrentNetwork
-from aeon.networks._tcn import TemporalConvolutionalNetwork
+from aeon.networks._tcn import TCNNetwork
 from aeon.networks.base import BaseDeepLearningNetwork
diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 62ed404f1b..5400a557b7 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -1,15 +1,11 @@
-"""Implementation of Temporal Convolutional Network (TCN).
-
-Based on the paper "An Empirical Evaluation of Generic Convolutional and
-Recurrent Networks for Sequence Modeling" by Bai et al. (2018).
-"""
+"""Implementation of Temporal Convolutional Network (TCN)."""
 
 __maintainer__ = []
 
 from aeon.networks.base import BaseDeepLearningNetwork
 
 
-class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
+class TCNNetwork(BaseDeepLearningNetwork):
     """Temporal Convolutional Network (TCN) for sequence modeling.
 
     A generic convolutional architecture for sequence modeling that combines:
@@ -21,9 +17,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     Parameters
     ----------
-    num_inputs : int
-        Number of input channels/features in the input sequence.
-    num_channels : list of int
+    n_blocks : list of int
         List specifying the number of output channels for each layer.
         The length determines the depth of the network.
     kernel_size : int, default=2
@@ -39,19 +33,19 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     References
     ----------
-    Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
+    .. [1]  Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
     generic convolutional and recurrent networks for sequence modeling.
     arXiv preprint arXiv:1803.01271.
 
     Examples
     --------
-    >>> from aeon.networks._tcn import TemporalConvolutionalNetwork
+    >>> from aeon.networks._tcn import TCNNetwork
     >>> from aeon.testing.data_generation import make_example_3d_numpy
     >>> import tensorflow as tf
     >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150,
     ...                              return_y=True, regression_target=True,
     ...                              random_state=42)
-    >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8])
+    >>> network = TCNNetwork(num_channels=[8, 8])
     >>> input_layer, output = network.build_network(input_shape=(4, 150))
     >>> model = tf.keras.Model(inputs=input_layer, outputs=output)
     >>> model.compile(optimizer="adam", loss="mse")
@@ -67,8 +61,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork):
 
     def __init__(
         self,
-        num_inputs: int = 1,
-        n_filters: list = [16] * 3,  # changed from num_channels
+        n_blocks: list = [16] * 3,
         kernel_size: int = 2,
         dropout: float = 0.2,
     ):
@@ -78,7 +71,7 @@ def __init__(
         ----------
         num_inputs : int
             Number of input channels/features.
-        n_filters : list of int
+        n_blocks : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -86,33 +79,32 @@ def __init__(
             Dropout rate for regularization.
         """
         super().__init__()
-        self.num_inputs = num_inputs
-        self.n_filters = n_filters
+        self.n_blocks = n_blocks
         self.kernel_size = kernel_size
         self.dropout = dropout
 
     def _conv1d_with_variable_padding(
         self,
-        x,
-        filters: int,
+        input_tensor,
+        n_filters: int,
         kernel_size: int,
         padding_value: int,
-        stride: int = 1,
+        strides: int = 1,
         dilation_rate: int = 1,
     ):
         """Apply 1D convolution with variable padding for causal convolutions.
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
-        filters : int
+        n_filters : int
             Number of output filters.
         kernel_size : int
             Size of the convolutional kernel.
         padding_value : int
             Amount of padding to apply.
-        stride : int, default=1
+        strides : int, default=1
             Stride of the convolution.
         dilation_rate : int, default=1
             Dilation rate for dilated convolutions.
@@ -125,16 +117,16 @@ def _conv1d_with_variable_padding(
         import tensorflow as tf
 
         # Transpose to Keras format (batch, sequence, channels)
-        x_keras_format = tf.keras.layers.Permute((2, 1))(x)
+        x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor)
 
         # Apply padding in sequence dimension
         padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format)
 
         # Create and apply convolution layer
         conv_layer = tf.keras.layers.Conv1D(
-            filters=filters,
+            filters=n_filters,
             kernel_size=kernel_size,
-            strides=stride,
+            strides=strides,
             dilation_rate=dilation_rate,
             padding="valid",
         )
@@ -145,7 +137,7 @@ def _conv1d_with_variable_padding(
         # Transpose back to PyTorch format (batch, channels, sequence)
         return tf.keras.layers.Permute((2, 1))(out)
 
-    def _chomp_1d(self, x, chomp_size: int):
+    def _chomp(self, input_tensor, chomp_size: int):
         """Remove padding from the end of sequences to maintain causality.
 
         This operation ensures that the output at time t only depends on
@@ -153,7 +145,7 @@ def _chomp_1d(self, x, chomp_size: int):
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
         chomp_size : int
             Number of time steps to remove from the end.
@@ -163,17 +155,17 @@ def _chomp_1d(self, x, chomp_size: int):
         tf.Tensor
             Chomped tensor with reduced sequence length.
         """
-        return x[:, :, :-chomp_size]
+        return input_tensor[:, :, :-chomp_size]
 
     def _temporal_block(
         self,
-        x,
+        input_tensor,
         n_inputs: int,
-        n_outputs: int,
+        n_filters: int,
         kernel_size: int,
-        stride: int,
-        dilation: int,
-        padding: int,
+        strides: int,
+        dilation_rate: int,
+        padding_value: int,
         dropout: float = 0.2,
         training: bool = None,
     ):
@@ -187,19 +179,19 @@ def _temporal_block(
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
         n_inputs : int
             Number of input channels.
-        n_outputs : int
-            Number of output channels.
+        n_filters : int
+            Number of output filters.
         kernel_size : int
             Size of convolutional kernels.
-        stride : int
+        strides : int
             Stride of convolutions (typically 1).
-        dilation : int
+        dilation_rate : int
             Dilation factor for dilated convolutions.
-        padding : int
+        padding_value : int
             Padding size to be chomped off.
         dropout : float, default=0.2
             Dropout rate for regularization.
@@ -209,31 +201,38 @@ def _temporal_block(
         Returns
         -------
         tf.Tensor
-            Output tensor of shape (batch_size, n_outputs, sequence_length).
+            Output tensor of shape (batch_size, n_filters, sequence_length).
         """
         import tensorflow as tf
 
         # First convolution block
         out = self._conv1d_with_variable_padding(
-            x, n_outputs, kernel_size, padding, stride, dilation
+            input_tensor, n_filters, kernel_size, padding_value, strides, dilation_rate
         )
-        out = self._chomp_1d(out, padding)
+        out = self._chomp(out, padding_value)
         out = tf.keras.layers.ReLU()(out)
         out = tf.keras.layers.Dropout(dropout)(out, training=training)
 
         # Second convolution block
         out = self._conv1d_with_variable_padding(
-            out, n_outputs, kernel_size, padding, stride, dilation
+            out, n_filters, kernel_size, padding_value, strides, dilation_rate
         )
-        out = self._chomp_1d(out, padding)
+        out = self._chomp(out, padding_value)
         out = tf.keras.layers.ReLU()(out)
         out = tf.keras.layers.Dropout(dropout)(out, training=training)
 
         # Residual connection with optional dimension matching
-        if n_inputs != n_outputs:
-            res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1)
+        if n_inputs != n_filters:
+            res = self._conv1d_with_variable_padding(
+                input_tensor=input_tensor,
+                n_filters=n_filters,
+                kernel_size=1,
+                padding_value=0,
+                strides=1,
+                dilation_rate=1,
+            )
         else:
-            res = x
+            res = input_tensor
 
         # Add residual and apply final ReLU
         result = tf.keras.layers.Add()([out, res])
@@ -241,9 +240,9 @@ def _temporal_block(
 
     def _temporal_conv_net(
         self,
-        x,
-        num_inputs: int,
-        n_filters: list,  # changed from num_channels
+        input_tensor,
+        n_inputs: int,
+        n_blocks: list,
         kernel_size: int = 2,
         dropout: float = 0.2,
         training: bool = None,
@@ -255,11 +254,11 @@ def _temporal_conv_net(
 
         Parameters
         ----------
-        x : tf.Tensor
+        input_tensor : tf.Tensor
             Input tensor of shape (batch_size, channels, sequence_length).
-        num_inputs : int
+        n_inputs : int
             Number of input channels.
-        n_filters : list of int
+        n_blocks : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
             Size of convolutional kernels.
@@ -273,26 +272,26 @@ def _temporal_conv_net(
         tf.Tensor
             Output tensor after applying all temporal blocks.
         """
-        num_levels = len(n_filters)
+        num_levels = len(n_blocks)
         for i in range(num_levels):
-            dilation_size = 2**i
-            in_channels = num_inputs if i == 0 else n_filters[i - 1]
-            out_channels = n_filters[i]
-            padding = (kernel_size - 1) * dilation_size
+            dilation_rate = 2**i
+            in_channels = n_inputs if i == 0 else n_blocks[i - 1]
+            out_channels = n_blocks[i]
+            padding_value = (kernel_size - 1) * dilation_rate
 
-            x = self._temporal_block(
-                x,
+            input_tensor = self._temporal_block(
+                input_tensor,
                 n_inputs=in_channels,
-                n_outputs=out_channels,
+                n_filters=out_channels,
                 kernel_size=kernel_size,
-                stride=1,
-                dilation=dilation_size,
-                padding=padding,
+                strides=1,
+                dilation_rate=dilation_rate,
+                padding_value=padding_value,
                 dropout=dropout,
                 training=training,
             )
 
-        return x
+        return input_tensor
 
     def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         """Build the complete TCN architecture.
@@ -303,7 +302,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         Parameters
         ----------
         input_shape : tuple
-            Shape of input data (sequence_length, num_features).
+            Shape of input data (n_channels, n_timepoints).
         **kwargs
             Additional keyword arguments (unused).
 
@@ -326,12 +325,13 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
 
         # Transpose input to match the expected format (batch, channels, seq)
         x = input_layer
+        n_inputs = input_shape[0]
 
         # Apply TCN using the private function
         x = self._temporal_conv_net(
             x,
-            num_inputs=self.num_inputs,
-            n_filters=self.n_filters,
+            n_inputs=n_inputs,
+            n_blocks=self.n_blocks,
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
index 47c1d38615..94495e3c41 100644
--- a/aeon/networks/tests/test_tcn.py
+++ b/aeon/networks/tests/test_tcn.py
@@ -1,8 +1,8 @@
-"""Tests for the TemporalConvolutionalNetwork."""
+"""Tests for the TCNNetwork."""
 
 import pytest
 
-from aeon.networks import TemporalConvolutionalNetwork
+from aeon.networks import TCNNetwork
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
@@ -15,12 +15,9 @@ def test_tcn_network_basic():
     import tensorflow as tf
 
     input_shape = (100, 5)
-    num_inputs = 5
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Check that layers are created correctly
@@ -38,17 +35,14 @@ def test_tcn_network_basic():
     not _check_soft_dependencies(["tensorflow"], severity="none"),
     reason="Tensorflow soft dependency unavailable.",
 )
-@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
-def test_tcn_network_different_channels(n_filters):
+@pytest.mark.parametrize("n_blocks", [[32], [32, 64], [16, 32, 64], [64, 32, 16]])
+def test_tcn_network_different_channels(n_blocks):
     """Test TCN network with different channel configurations."""
     import tensorflow as tf
 
     input_shape = (50, 3)
-    num_inputs = 3
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Create a model and verify it works
@@ -73,12 +67,10 @@ def test_tcn_network_kernel_sizes(kernel_size):
     import tensorflow as tf
 
     input_shape = (80, 4)
-    num_inputs = 4
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs,
-        n_filters=n_filters,
+    tcn_network = TCNNetwork(
+        n_blocks=n_blocks,
         kernel_size=kernel_size,
     )
     input_layer, output_layer = tcn_network.build_network(input_shape)
@@ -98,12 +90,9 @@ def test_tcn_network_dropout_rates(dropout):
     import tensorflow as tf
 
     input_shape = (60, 2)
-    num_inputs = 2
-    n_filters = [16, 32]
+    n_blocks = [16, 32]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters, dropout=dropout
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Verify network builds successfully
@@ -122,12 +111,9 @@ def test_tcn_network_output_shape():
 
     input_shape = (40, 6)
     batch_size = 16
-    num_inputs = 6
-    n_filters = [32, 64]
+    n_blocks = [32, 64]
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
     model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
 
@@ -148,7 +134,7 @@ def test_tcn_network_output_shape():
 )
 def test_tcn_network_config():
     """Test TCN network configuration attributes."""
-    tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32])
+    tcn_network = TCNNetwork(n_blocks=[16, 32])
 
     # Check _config attributes
     assert "python_dependencies" in tcn_network._config
@@ -164,21 +150,18 @@ def test_tcn_network_config():
 )
 def test_tcn_network_parameter_initialization():
     """Test TCN network parameter initialization."""
-    num_inputs = 4
-    n_filters = [32, 64, 128]
+    n_blocks = [32, 64, 128]
     kernel_size = 3
     dropout = 0.2
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs,
-        n_filters=n_filters,
+    tcn_network = TCNNetwork(
+        n_blocks=n_blocks,
         kernel_size=kernel_size,
         dropout=dropout,
     )
 
     # Check that parameters are set correctly
-    assert tcn_network.num_inputs == num_inputs
-    assert tcn_network.n_filters == n_filters
+    assert tcn_network.n_blocks == n_blocks
     assert tcn_network.kernel_size == kernel_size
     assert tcn_network.dropout == dropout
 
@@ -192,12 +175,9 @@ def test_tcn_network_single_layer():
     import tensorflow as tf
 
     input_shape = (30, 2)
-    num_inputs = 2
-    n_filters = [16]  # Single layer
+    n_blocks = [16]  # Single layer
 
-    tcn_network = TemporalConvolutionalNetwork(
-        num_inputs=num_inputs, n_filters=n_filters
-    )
+    tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Verify single layer network works

From 9a1b8782fadede1aabf30c429ad9113d04dffc32 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 20:41:34 +0530
Subject: [PATCH 18/36] tcnfst updated with net

---
 aeon/forecasting/deep_learning/_tcn.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 147dc618ec..97d5a282ba 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -6,13 +6,13 @@
 __all__ = ["TCNForecaster"]
 
 from aeon.forecasting.deep_learning.base import BaseDeepForecaster
-from aeon.networks._tcn import TemporalConvolutionalNetwork
+from aeon.networks._tcn import TCNNetwork
 
 
 class TCNForecaster(BaseDeepForecaster):
     """A deep learning forecaster using Temporal Convolutional Network (TCN).
 
-    It leverages the `TemporalConvolutionalNetwork` from aeon's network module
+    It leverages the `TCNNetwork` from aeon's network module
     to build the architecture suitable for forecasting tasks.
 
     Parameters
@@ -35,9 +35,7 @@ class TCNForecaster(BaseDeepForecaster):
         Seed for random number generators.
     axis : int, default=0
         Axis along which to apply the forecaster.
-    num_inputs : int, default=1
-        Number of input channels/features in the input sequence.
-    n_filters : list of int, default=[16, 16, 16]
+    n_blocks : list of int, default=[16, 16, 16]
         List specifying the number of output channels for each layer of the
         TCN. The length determines the depth of the network.
     kernel_size : int, default=2
@@ -66,8 +64,7 @@ def __init__(
         loss="mse",
         random_state=None,
         axis=0,
-        num_inputs=1,
-        n_filters=None,
+        n_blocks=None,
         kernel_size=2,
         dropout=0.2,
     ):
@@ -78,12 +75,12 @@ def __init__(
             epochs=epochs,
             verbose=verbose,
             optimizer=optimizer,
-            loss=loss,
             random_state=random_state,
             axis=axis,
+            loss=loss,
         )
-        self.num_inputs = num_inputs
-        self.n_filters = n_filters
+
+        self.n_blocks = n_blocks
         self.kernel_size = kernel_size
         self.dropout = dropout
 
@@ -102,12 +99,11 @@ def _build_model(self, input_shape):
         """
         import tensorflow as tf
 
-        if self.n_filters is None:
-            self.n_filters = [16] * 3
+        if self.n_blocks is None:
+            self.n_blocks = [16] * 3
         # Initialize the TCN network with the updated parameters
-        network = TemporalConvolutionalNetwork(
-            num_inputs=self.num_inputs,
-            n_filters=self.n_filters,
+        network = TCNNetwork(
+            n_blocks=self.n_blocks,
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )

From 08dadeca78beba0b1c2a6685e003950366603e58 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 8 Jul 2025 20:49:27 +0530
Subject: [PATCH 19/36] doctest corrected

---
 aeon/networks/_tcn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 5400a557b7..834b5865e7 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -45,7 +45,7 @@ class TCNNetwork(BaseDeepLearningNetwork):
     >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150,
     ...                              return_y=True, regression_target=True,
     ...                              random_state=42)
-    >>> network = TCNNetwork(num_channels=[8, 8])
+    >>> network = TCNNetwork(n_blocks=[8, 8])
     >>> input_layer, output = network.build_network(input_shape=(4, 150))
     >>> model = tf.keras.Model(inputs=input_layer, outputs=output)
     >>> model.compile(optimizer="adam", loss="mse")

From 086c5a46701a6c068bfccba0dd71a8a445dc0d41 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 13 Jul 2025 12:57:26 +0530
Subject: [PATCH 20/36] changes made

---
 aeon/forecasting/deep_learning/_tcn.py | 46 ++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 97d5a282ba..d3d06bf0fe 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -3,8 +3,11 @@
 from __future__ import annotations
 
 __maintainer__ = []
+
 __all__ = ["TCNForecaster"]
 
+from typing import Any
+
 from aeon.forecasting.deep_learning.base import BaseDeepForecaster
 from aeon.networks._tcn import TCNNetwork
 
@@ -43,14 +46,14 @@ class TCNForecaster(BaseDeepForecaster):
     dropout : float, default=0.2
         Dropout rate applied after each convolutional layer for
         regularization.
-
-
     """
 
     _tags = {
+        "python_dependencies": ["tensorflow"],
         "capability:horizon": True,
         "capability:multivariate": True,
         "capability:exogenous": False,
+        "capability:univariate": True,
     }
 
     def __init__(
@@ -79,7 +82,6 @@ def __init__(
             axis=axis,
             loss=loss,
         )
-
         self.n_blocks = n_blocks
         self.kernel_size = kernel_size
         self.dropout = dropout
@@ -99,11 +101,9 @@ def _build_model(self, input_shape):
         """
         import tensorflow as tf
 
-        if self.n_blocks is None:
-            self.n_blocks = [16] * 3
         # Initialize the TCN network with the updated parameters
         network = TCNNetwork(
-            n_blocks=self.n_blocks,
+            n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16],
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
@@ -114,3 +114,37 @@ def _build_model(self, input_shape):
         # Create the final model
         model = tf.keras.Model(inputs=input_layer, outputs=output)
         return model
+
+    # Added to handle __name__ in tests (class-level access)
+    @classmethod
+    def _get_test_params(
+        cls, parameter_set: str = "default"
+    ) -> dict[str, Any] | list[dict[str, Any]]:
+        """
+        Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+            For forecasters, a "default" set of parameters should be provided for
+            general testing, and a "results_comparison" set for comparing against
+            previously recorded results if the general set does not produce suitable
+            probabilities to compare against.
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+        """
+        param = {
+            "epochs": 10,
+            "batch_size": 4,
+            "n_blocks": [8, 8],
+            "kernel_size": 2,
+            "dropout": 0.1,
+        }
+        return [param]

From b6ccd079f6b7b4771fff40f1ee61ad536617f99d Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 20 Jul 2025 13:46:28 +0530
Subject: [PATCH 21/36] basedelf updated

---
 aeon/forecasting/deep_learning/base.py        | 37 +++++++++++++------
 .../deep_learning/tests/test_base.py          | 17 +++++----
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index 40c7b3e212..3fa421c772 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -31,7 +31,7 @@ class BaseDeepForecaster(BaseForecaster):
         The window size for creating input sequences.
     batch_size : int, default=32
         Batch size for training the model.
-    epochs : int, default=100
+    n_epochs : int, default=100
         Number of epochs to train the model.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
@@ -39,19 +39,33 @@ class BaseDeepForecaster(BaseForecaster):
         Optimizer to use for training.
     loss : str or tf.keras.losses.Loss, default='mse'
         Loss function for training.
-    random_state : int, default=None
-        Seed for random number generators.
+    random_state : int, RandomState instance or None, default=None
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by np.random.
+        Seeded random number generation can only be guaranteed on CPU processing,
+        GPU processing will be non-deterministic.
     axis : int, default=0
         Axis along which to apply the forecaster.
         Default is 0 for univariate time series.
     """
 
+    _tags = {
+        "capability:horizon": False,
+        "capability:exogenous": False,
+        "algorithm_type": "deeplearning",
+        "non_deterministic": True,
+        "cant_pickle": True,
+        "python_dependencies": "tensorflow",
+    }
+
     def __init__(
         self,
         horizon=1,
         window=10,
         batch_size=32,
-        epochs=100,
+        n_epochs=100,
         verbose=0,
         optimizer="adam",
         loss="mse",
@@ -61,7 +75,7 @@ def __init__(
         self.horizon = horizon
         self.window = window
         self.batch_size = batch_size
-        self.epochs = epochs
+        self.n_epochs = n_epochs
         self.verbose = verbose
         self.optimizer = optimizer
         self.loss = loss
@@ -89,11 +103,12 @@ def _fit(self, y, X=None):
             Returns an instance of self.
         """
         import tensorflow as tf
+        from sklearn.utils import check_random_state
 
         # Set random seed for reproducibility
-        if self.random_state is not None:
-            np.random.seed(self.random_state)
-            tf.random.set_seed(self.random_state)
+        rng = check_random_state(self.random_state)
+        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
+        tf.keras.utils.set_random_seed(self.random_state_)
 
         # Convert input data to numpy array
         y_inner = self._convert_input(y)
@@ -111,7 +126,7 @@ def _fit(self, y, X=None):
 
         # Build and compile the model
         input_shape = X_train.shape[1:]
-        self.model_ = self._build_model(input_shape)
+        self.model_ = self.build_model(input_shape)
         self.model_.compile(optimizer=self.optimizer, loss=self.loss)
 
         # Train the model
@@ -119,7 +134,7 @@ def _fit(self, y, X=None):
             X_train,
             y_train,
             batch_size=self.batch_size,
-            epochs=self.epochs,
+            epochs=self.n_epochs,
             verbose=self.verbose,
         )
         self.last_window_ = y_inner[-self.window :]
@@ -218,7 +233,7 @@ def _create_sequences(self, data):
         return X, y
 
     @abstractmethod
-    def _build_model(self, input_shape):
+    def build_model(self, input_shape):
         """Build the deep learning model.
 
         Parameters
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 21e90e4a68..5827c16a18 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -14,10 +14,13 @@
 class SimpleDeepForecaster(BaseDeepForecaster):
     """A simple concrete implementation of BaseDeepForecaster for testing."""
 
-    def __init__(self, horizon=1, window=5, epochs=1, verbose=0):
-        super().__init__(horizon=horizon, window=window, epochs=epochs, verbose=verbose)
+    def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0):
+        super().__init__(
+            horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose
+        )
 
-    def _build_model(self, input_shape):
+    def build_model(self, input_shape):
+        """Build a simple Keras model for testing."""
         import tensorflow as tf
 
         model = tf.keras.Sequential(
@@ -41,7 +44,7 @@ def test_base_deep_forecaster_fit_predict():
     data = np.random.randn(50)
 
     # Initialize forecaster
-    forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0)
+    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
 
     # Fit the model
     forecaster.fit(data)
@@ -51,8 +54,8 @@ def test_base_deep_forecaster_fit_predict():
 
     # Validate output shape
     assert (
-        len(predictions) == 2
-    ), f"Expected predictions of length 2, got {len(predictions)}"
+        len(predictions) == 1
+    ), f"Expected predictions of length 1, got {len(predictions)}"
     assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array"
 
 
@@ -63,7 +66,7 @@ def test_base_deep_forecaster_fit_predict():
 def test_base_deep_forecaster_insufficient_data():
     """Test error handling for insufficient data."""
     data = np.random.randn(5)
-    forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0)
+    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
 
     with pytest.raises(ValueError, match="Data length.*insufficient"):
         forecaster.fit(data)

From 405fa80712465ae5145c5f3b21643c0319914bef Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 20 Jul 2025 14:20:49 +0530
Subject: [PATCH 22/36] test base chanegd

---
 aeon/forecasting/deep_learning/tests/test_base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 5827c16a18..5579ab9959 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -42,6 +42,7 @@ def test_base_deep_forecaster_fit_predict():
     # Generate synthetic data
     np.random.seed(42)
     data = np.random.randn(50)
+    y = np.random.randn(10)
 
     # Initialize forecaster
     forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
@@ -50,7 +51,7 @@ def test_base_deep_forecaster_fit_predict():
     forecaster.fit(data)
 
     # Predict
-    predictions = forecaster.predict()
+    predictions = forecaster.predict(y)
 
     # Validate output shape
     assert (

From 5cb1523cb3c0aa034351f78f63c8ffce136236b7 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Mon, 21 Jul 2025 15:44:14 +0530
Subject: [PATCH 23/36] tcn rshaped

---
 aeon/networks/_tcn.py           | 38 +++++++++++----------------------
 aeon/networks/tests/test_tcn.py | 24 +++++++++++----------
 2 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py
index 834b5865e7..f88242823c 100644
--- a/aeon/networks/_tcn.py
+++ b/aeon/networks/_tcn.py
@@ -40,17 +40,10 @@ class TCNNetwork(BaseDeepLearningNetwork):
     Examples
     --------
     >>> from aeon.networks._tcn import TCNNetwork
-    >>> from aeon.testing.data_generation import make_example_3d_numpy
-    >>> import tensorflow as tf
-    >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150,
-    ...                              return_y=True, regression_target=True,
-    ...                              random_state=42)
     >>> network = TCNNetwork(n_blocks=[8, 8])
-    >>> input_layer, output = network.build_network(input_shape=(4, 150))
-    >>> model = tf.keras.Model(inputs=input_layer, outputs=output)
-    >>> model.compile(optimizer="adam", loss="mse")
-    >>> model.fit(X, y, epochs=2, batch_size=2, verbose=0)  # doctest: +SKIP
-    <keras.src.callbacks.History object ...>
+    >>> input_layer, output = network.build_network(input_shape=(150, 4))
+    >>> input_layer.shape, output.shape
+    ((None, 150, 4), (None, 4))
     """
 
     _config = {
@@ -69,8 +62,6 @@ def __init__(
 
         Parameters
         ----------
-        num_inputs : int
-            Number of input channels/features.
         n_blocks : list of int
             Number of output channels for each temporal block.
         kernel_size : int, default=2
@@ -97,7 +88,7 @@ def _conv1d_with_variable_padding(
         Parameters
         ----------
         input_tensor : tf.Tensor
-            Input tensor of shape (batch_size, channels, sequence_length).
+            Input tensor of shape (batch_size, n_timepoints, n_channels).
         n_filters : int
             Number of output filters.
         kernel_size : int
@@ -116,11 +107,8 @@ def _conv1d_with_variable_padding(
         """
         import tensorflow as tf
 
-        # Transpose to Keras format (batch, sequence, channels)
-        x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor)
-
         # Apply padding in sequence dimension
-        padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format)
+        padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(input_tensor)
 
         # Create and apply convolution layer
         conv_layer = tf.keras.layers.Conv1D(
@@ -134,8 +122,7 @@ def _conv1d_with_variable_padding(
         # Apply convolution
         out = conv_layer(padded_x)
 
-        # Transpose back to PyTorch format (batch, channels, sequence)
-        return tf.keras.layers.Permute((2, 1))(out)
+        return out
 
     def _chomp(self, input_tensor, chomp_size: int):
         """Remove padding from the end of sequences to maintain causality.
@@ -155,7 +142,7 @@ def _chomp(self, input_tensor, chomp_size: int):
         tf.Tensor
             Chomped tensor with reduced sequence length.
         """
-        return input_tensor[:, :, :-chomp_size]
+        return input_tensor[:, :-chomp_size, :]
 
     def _temporal_block(
         self,
@@ -302,7 +289,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         Parameters
         ----------
         input_shape : tuple
-            Shape of input data (n_channels, n_timepoints).
+            Shape of input data (n_timepoints, n_channels).
         **kwargs
             Additional keyword arguments (unused).
 
@@ -323,9 +310,9 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
         # Create input layer
         input_layer = tf.keras.layers.Input(shape=input_shape)
 
-        # Transpose input to match the expected format (batch, channels, seq)
+        # Transpose input to match the expected format (batch, n_timepoints, n_channels)
         x = input_layer
-        n_inputs = input_shape[0]
+        n_inputs = input_shape[1]
 
         # Apply TCN using the private function
         x = self._temporal_conv_net(
@@ -335,7 +322,6 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple:
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
-
-        x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :])
-        output = tf.keras.layers.Dense(1)(x)
+        output = tf.keras.layers.Dense(input_shape[1])(x[:, :, -1])
+        # output = tf.keras.layers.Dense(1)(x)
         return input_layer, output
diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py
index 94495e3c41..b21df0784d 100644
--- a/aeon/networks/tests/test_tcn.py
+++ b/aeon/networks/tests/test_tcn.py
@@ -14,10 +14,10 @@ def test_tcn_network_basic():
     """Test basic TCN network creation and build_network functionality."""
     import tensorflow as tf
 
-    input_shape = (100, 5)
+    input_shape = (100, 5)  # (n_timepoints, n_channels)
     n_blocks = [32, 64]
-
     tcn_network = TCNNetwork(n_blocks=n_blocks)
+
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Check that layers are created correctly
@@ -40,9 +40,9 @@ def test_tcn_network_different_channels(n_blocks):
     """Test TCN network with different channel configurations."""
     import tensorflow as tf
 
-    input_shape = (50, 3)
-
+    input_shape = (50, 3)  # (n_timepoints, n_channels)
     tcn_network = TCNNetwork(n_blocks=n_blocks)
+
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Create a model and verify it works
@@ -66,13 +66,14 @@ def test_tcn_network_kernel_sizes(kernel_size):
     """Test TCN network with different kernel sizes."""
     import tensorflow as tf
 
-    input_shape = (80, 4)
+    input_shape = (80, 4)  # (n_timepoints, n_channels)
     n_blocks = [32, 64]
 
     tcn_network = TCNNetwork(
         n_blocks=n_blocks,
         kernel_size=kernel_size,
     )
+
     input_layer, output_layer = tcn_network.build_network(input_shape)
 
     # Verify network builds successfully
@@ -89,7 +90,7 @@ def test_tcn_network_dropout_rates(dropout):
     """Test TCN network with different dropout rates."""
     import tensorflow as tf
 
-    input_shape = (60, 2)
+    input_shape = (60, 2)  # (n_timepoints, n_channels)
     n_blocks = [16, 32]
 
     tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout)
@@ -109,20 +110,21 @@ def test_tcn_network_output_shape():
     import numpy as np
     import tensorflow as tf
 
-    input_shape = (40, 6)
+    input_shape = (40, 6)  # (n_timepoints, n_channels)
     batch_size = 16
     n_blocks = [32, 64]
 
     tcn_network = TCNNetwork(n_blocks=n_blocks)
     input_layer, output_layer = tcn_network.build_network(input_shape)
+
     model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
 
     # Create dummy input and test output shape
     dummy_input = np.random.random((batch_size,) + input_shape)
     output = model(dummy_input)
 
-    # Output should maintain sequence length and have final channel dimension
-    expected_shape = (batch_size, 1)
+    # Output should have the same number of channels as input
+    expected_shape = (batch_size, input_shape[1])  # (batch_size, n_channels)
     assert (
         output.shape == expected_shape
     ), f"Expected shape {expected_shape}, got {output.shape}"
@@ -174,7 +176,7 @@ def test_tcn_network_single_layer():
     """Test TCN network with single temporal block."""
     import tensorflow as tf
 
-    input_shape = (30, 2)
+    input_shape = (30, 2)  # (n_timepoints, n_channels)
     n_blocks = [16]  # Single layer
 
     tcn_network = TCNNetwork(n_blocks=n_blocks)
@@ -189,4 +191,4 @@ def test_tcn_network_single_layer():
 
     dummy_input = np.random.random((4,) + input_shape)
     output = model(dummy_input)
-    assert output.shape == (4, 1)
+    assert output.shape == (4, input_shape[1])  # (batch_size, n_channels)

From 2ab68c92cc265081f9148813523919ebfef75d25 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 22 Jul 2025 15:01:25 +0530
Subject: [PATCH 24/36] tcn fst updated

---
 aeon/forecasting/deep_learning/_tcn.py           | 12 ++++++------
 aeon/forecasting/deep_learning/tests/test_tcn.py |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index d3d06bf0fe..fbe28d935e 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -61,7 +61,7 @@ def __init__(
         horizon=1,
         window=10,
         batch_size=32,
-        epochs=100,
+        n_epochs=100,
         verbose=0,
         optimizer="adam",
         loss="mse",
@@ -75,7 +75,7 @@ def __init__(
             horizon=horizon,
             window=window,
             batch_size=batch_size,
-            epochs=epochs,
+            n_epochs=n_epochs,
             verbose=verbose,
             optimizer=optimizer,
             random_state=random_state,
@@ -86,7 +86,7 @@ def __init__(
         self.kernel_size = kernel_size
         self.dropout = dropout
 
-    def _build_model(self, input_shape):
+    def build_model(self, input_shape):
         """Build the TCN model for forecasting.
 
         Parameters
@@ -107,7 +107,7 @@ def _build_model(self, input_shape):
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
-
+        # input_shape = (input_shape[1], input_shape[0])
         # Build the network with the given input shape
         input_layer, output = network.build_network(input_shape=input_shape)
 
@@ -127,7 +127,7 @@ def _get_test_params(
         ----------
         parameter_set : str, default="default"
             Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value, will return `"default"` set.
+            special parameters are defined for a value,` will return `"default"` set.
             For forecasters, a "default" set of parameters should be provided for
             general testing, and a "results_comparison" set for comparing against
             previously recorded results if the general set does not produce suitable
@@ -141,7 +141,7 @@ def _get_test_params(
             `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
         """
         param = {
-            "epochs": 10,
+            "n_epochs": 10,
             "batch_size": 4,
             "n_blocks": [8, 8],
             "kernel_size": 2,
diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py
index 2717eaf4b4..851069bf73 100644
--- a/aeon/forecasting/deep_learning/tests/test_tcn.py
+++ b/aeon/forecasting/deep_learning/tests/test_tcn.py
@@ -24,7 +24,7 @@ def test_tcn_forecaster(horizon, window, epochs):
 
     # Initialize TCNForecaster
     forecaster = TCNForecaster(
-        horizon=horizon, window=window, epochs=epochs, batch_size=16, verbose=0
+        horizon=horizon, window=window, n_epochs=epochs, batch_size=16, verbose=0
     )
 
     # Fit and predict

From bc1adbafd32a74cbd9cc0efabab77a4995dd1397 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sun, 17 Aug 2025 17:50:26 +0530
Subject: [PATCH 25/36] current basedlf

---
 aeon/forecasting/deep_learning/base.py        | 233 +++++++++++++++---
 .../deep_learning/tests/test_base.py          |   2 +-
 2 files changed, 197 insertions(+), 38 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index 3fa421c772..a08690a988 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import pandas as pd
+from sklearn.utils import check_random_state
 
 from aeon.forecasting.base import BaseForecaster
 
@@ -21,7 +22,8 @@ class BaseDeepForecaster(BaseForecaster):
     """Base class for deep learning forecasters in aeon.
 
     This class provides a foundation for deep learning-based forecasting models,
-    handling data preprocessing, model training, and prediction.
+    handling data preprocessing, model training, and prediction with enhanced
+    capabilities for callbacks, model saving/loading, and efficiency.
 
     Parameters
     ----------
@@ -39,38 +41,57 @@ class BaseDeepForecaster(BaseForecaster):
         Optimizer to use for training.
     loss : str or tf.keras.losses.Loss, default='mse'
         Loss function for training.
+    callbacks : list of tf.keras.callbacks.Callback or None, default=None
+        List of Keras callbacks to be applied during training.
     random_state : int, RandomState instance or None, default=None
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by np.random.
-        Seeded random number generation can only be guaranteed on CPU processing,
-        GPU processing will be non-deterministic.
     axis : int, default=0
         Axis along which to apply the forecaster.
-        Default is 0 for univariate time series.
+    last_file_name : str, default="last_model"
+        The name of the file of the last model, used for saving models.
+    save_best_model : bool, default=False
+        Whether to save the best model during training based on validation loss.
+    file_path : str, default="./"
+        Directory path where models will be saved.
+
+    Attributes
+    ----------
+    model_ : tf.keras.Model or None
+        The fitted Keras model.
+    history_ : tf.keras.callbacks.History or None
+        Training history containing loss and metrics.
+    last_window_ : np.ndarray or None
+        The last window of data used for prediction.
     """
 
     _tags = {
-        "capability:horizon": False,
+        "capability:horizon": True,
         "capability:exogenous": False,
         "algorithm_type": "deeplearning",
         "non_deterministic": True,
         "cant_pickle": True,
         "python_dependencies": "tensorflow",
+        "capability:multivariate": True,
     }
 
     def __init__(
         self,
         horizon=1,
         window=10,
-        batch_size=32,
-        n_epochs=100,
+        batch_size=32, # remove
+        n_epochs=100, # remove 
         verbose=0,
-        optimizer="adam",
-        loss="mse",
-        random_state=None,
+        optimizer="adam", # remove it 
+        loss="mse", # remove it 
+        callbacks=None,
+        random_state=None, # remove it 
         axis=0,
+        last_file_name="last_model",
+        save_best_model=False,
+        file_path="./",
     ):
         self.horizon = horizon
         self.window = window
@@ -79,15 +100,22 @@ def __init__(
         self.verbose = verbose
         self.optimizer = optimizer
         self.loss = loss
+        self.callbacks = callbacks
         self.random_state = random_state
         self.axis = axis
+        self.last_file_name = last_file_name
+        self.save_best_model = save_best_model
+        self.file_path = file_path
+        
+        # Initialize attributes
         self.model_ = None
+        self.history_ = None
         self.last_window_ = None
 
         # Pass horizon and axis to BaseForecaster
         super().__init__(horizon=horizon, axis=axis)
 
-    def _fit(self, y, X=None):
+    def _fit(self, y, X=None): # remove it 
         """Fit the forecaster to training data.
 
         Parameters
@@ -103,7 +131,6 @@ def _fit(self, y, X=None):
             Returns an instance of self.
         """
         import tensorflow as tf
-        from sklearn.utils import check_random_state
 
         # Set random seed for reproducibility
         rng = check_random_state(self.random_state)
@@ -112,9 +139,13 @@ def _fit(self, y, X=None):
 
         # Convert input data to numpy array
         y_inner = self._convert_input(y)
+
+        if y_inner.ndim == 1:
+            y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
+
         if y_inner.shape[0] < self.window + self.horizon:
             raise ValueError(
-                f"Data length ({y_inner.shape[0]}) is insufficient"
+                f"Data length ({y_inner.shape[0]}) is insufficient for window "
                 f"({self.window}) and horizon ({self.horizon})."
             )
 
@@ -129,18 +160,25 @@ def _fit(self, y, X=None):
         self.model_ = self.build_model(input_shape)
         self.model_.compile(optimizer=self.optimizer, loss=self.loss)
 
+        # Prepare callbacks
+        callbacks_list = self._prepare_callbacks()
+
         # Train the model
-        self.model_.fit(
+        self.history_ = self.model_.fit(
             X_train,
             y_train,
             batch_size=self.batch_size,
             epochs=self.n_epochs,
             verbose=self.verbose,
+            callbacks=callbacks_list,
         )
-        self.last_window_ = y_inner[-self.window :]
+
+        # Save the last window for prediction
+        self.last_window_ = y_inner[-self.window:]
+
         return self
 
-    def _predict(self, y=None, X=None):
+    def _predict(self, y=None, X=None): # remove it
         """Make forecasts for y.
 
         Parameters
@@ -153,7 +191,8 @@ def _predict(self, y=None, X=None):
         Returns
         -------
         predictions : np.ndarray
-            Predicted values for the specified horizon.
+            Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate
+            data or (horizon,) for univariate data.
         """
         if y is None:
             if not hasattr(self, "last_window_"):
@@ -161,22 +200,33 @@ def _predict(self, y=None, X=None):
             y_inner = self.last_window_
         else:
             y_inner = self._convert_input(y)
-            if len(y_inner) < self.window:
+            if y_inner.ndim == 1:
+                y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
+            if y_inner.shape[0] < self.window:
                 raise ValueError(
-                    f"Input data length ({len(y_inner)}) is less than the window size "
+                    f"Input data length ({y_inner.shape[0]}) is less than the window size "
                     f"({self.window})."
                 )
-            y_inner = y_inner[-self.window :]
-
-        last_window = y_inner.reshape(1, self.window, 1)
+            y_inner = y_inner[-self.window:]
+           
+        # Get the number of channels from the input data
+        num_channels = y_inner.shape[-1]
+        last_window = y_inner.reshape(1, self.window, num_channels)
         predictions = []
         current_window = last_window
+
         for _ in range(self.horizon):
             pred = self.model_.predict(current_window, verbose=0)
-            predictions.append(pred[0, 0])
+            predictions.append(pred)  
             current_window = np.roll(current_window, -1, axis=1)
-            current_window[0, -1, 0] = pred[0, 0]
-        return np.array(predictions)
+            current_window[0, -1, :] = pred[0, :]  # Update all channels
+
+        predictions = np.array(predictions)  
+        predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels)
+        if num_channels == 1:
+            predictions = predictions.flatten()  # Convert to (horizon,) for univariate
+        
+        return predictions
 
     def _convert_input(self, y):
         """Convert input data to numpy array.
@@ -196,10 +246,6 @@ def _convert_input(self, y):
         else:
             y_inner = y
 
-        # Ensure 1D array
-        if len(y_inner.shape) > 1:
-            y_inner = y_inner.flatten()
-
         return y_inner
 
     def _create_sequences(self, data):
@@ -208,30 +254,143 @@ def _create_sequences(self, data):
         Parameters
         ----------
         data : np.ndarray
-            Time series data.
+            Time series data. Assumes shape (timepoints, channels) for multivariate
+            data or (timepoints,) for univariate.
 
         Returns
         -------
         X : np.ndarray
-            Input sequences.
+            Input sequences. Shape: (num_sequences, window, channels) for multivariate
+            or (num_sequences, window, 1) for univariate.
         y : np.ndarray
-            Target values.
+            Target values. Shape: (num_sequences, horizon, channels) for multivariate
+            or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed).
         """
-        if len(data) < self.window + self.horizon:
+        if data.ndim == 1:
+            data = data.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
+
+        num_timepoints, num_channels = data.shape
+
+        if num_timepoints < self.window + self.horizon:
             raise ValueError(
-                f"Data length ({len(data)}) is insufficient for window "
+                f"Data length ({num_timepoints}) is insufficient for window "
                 f"({self.window}) and horizon ({self.horizon})."
             )
 
         X, y = [], []
-        for i in range(len(data) - self.window - self.horizon + 1):
+        for i in range(num_timepoints - self.window - self.horizon + 1):
             X.append(data[i : (i + self.window)])
             y.append(data[i + self.window : (i + self.window + self.horizon)])
 
-        X = np.array(X).reshape(-1, self.window, 1)
-        y = np.array(y).reshape(-1, self.horizon)
+        X = np.array(X)  # Shape: (num_sequences, window, channels)
+        y = np.array(y)  # Shape: (num_sequences, horizon, channels)
+
         return X, y
 
+    def _prepare_callbacks(self):
+        """Prepare callbacks for training.
+
+        Returns
+        -------
+        callbacks_list : list
+            List of callbacks to be used during training.
+        """
+        callbacks_list = []
+
+        # Add user-provided callbacks
+        if self.callbacks is not None:
+            if isinstance(self.callbacks, list):
+                callbacks_list.extend(self.callbacks)
+            else:
+                callbacks_list.append(self.callbacks)
+
+        # Add model checkpoint callback if save_best_model is True
+        if self.save_best_model:
+            callbacks_list = self._get_model_checkpoint_callback(
+                callbacks_list, self.file_path, "best_model"
+            )
+
+        return callbacks_list
+
+    def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
+        """Add model checkpoint callback to save the best model.
+
+        Parameters
+        ----------
+        callbacks : list
+            Existing list of callbacks.
+        file_path : str
+            Directory path where the model will be saved.
+        file_name : str
+            Name of the model file.
+
+        Returns
+        -------
+        callbacks : list
+            Updated list of callbacks including ModelCheckpoint.
+        """
+        import tensorflow as tf
+
+        model_checkpoint_ = tf.keras.callbacks.ModelCheckpoint(
+            filepath=file_path + file_name + ".keras",
+            monitor="loss",
+            save_best_only=True,
+            verbose=self.verbose,
+        )
+
+        if isinstance(callbacks, list):
+            return callbacks + [model_checkpoint_]
+        else:
+            return [callbacks] + [model_checkpoint_]
+
+    def summary(self):
+        """Summary function to return the losses/metrics for model fit.
+
+        Returns
+        -------
+        history : dict or None
+            Dictionary containing model's train/validation losses and metrics.
+        """
+        return self.history_.history if self.history_ is not None else None
+
+    def save_last_model_to_file(self, file_path="./"):
+        """Save the last epoch of the trained deep learning model.
+
+        Parameters
+        ----------
+        file_path : str, default="./"
+            The directory where the model will be saved.
+
+        Returns
+        -------
+        None
+        """
+        if self.model_ is None:
+            raise ValueError("No model to save. Please fit the model first.")
+        
+        self.model_.save(file_path + self.last_file_name + ".keras")
+
+    def load_model(self, model_path):
+        """Load a pre-trained keras model instead of fitting.
+
+        When calling this function, all functionalities can be used
+        such as predict with the loaded model.
+
+        Parameters
+        ----------
+        model_path : str
+            Path to the saved model file including extension.
+            Example: model_path="path/to/file/best_model.keras"
+
+        Returns
+        -------
+        None
+        """
+        import tensorflow as tf
+
+        self.model_ = tf.keras.models.load_model(model_path)
+        self.is_fitted = True
+
     @abstractmethod
     def build_model(self, input_shape):
         """Build the deep learning model.
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 5579ab9959..46774b520d 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -70,4 +70,4 @@ def test_base_deep_forecaster_insufficient_data():
     forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
 
     with pytest.raises(ValueError, match="Data length.*insufficient"):
-        forecaster.fit(data)
+        forecaster.fit(data)
\ No newline at end of file

From 4c6b7898f504bb2ec25ea01de687927405e9a30c Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 12:34:43 +0530
Subject: [PATCH 26/36] tcn forecaster updated with new base class

---
 aeon/forecasting/deep_learning/_tcn.py        | 145 ++++++++++--
 aeon/forecasting/deep_learning/base.py        | 218 +-----------------
 .../deep_learning/tests/test_base.py          |  93 +++-----
 .../deep_learning/tests/test_tcn.py           |  43 +++-
 4 files changed, 209 insertions(+), 290 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 2fbb26ae91..95b0a91f0e 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -3,16 +3,19 @@
 from __future__ import annotations
 
 __maintainer__ = []
-
 __all__ = ["TCNForecaster"]
 
 from typing import Any
 
+import numpy as np
+from sklearn.utils import check_random_state
+
+from aeon.forecasting.base import DirectForecastingMixin
 from aeon.forecasting.deep_learning.base import BaseDeepForecaster
 from aeon.networks._tcn import TCNNetwork
 
 
-class TCNForecaster(BaseDeepForecaster):
+class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin):
     """A deep learning forecaster using Temporal Convolutional Network (TCN).
 
     It leverages the `TCNNetwork` from aeon's network module
@@ -26,7 +29,7 @@ class TCNForecaster(BaseDeepForecaster):
         The window size for creating input sequences.
     batch_size : int, default=32
         Batch size for training the model.
-    epochs : int, default=100
+    n_epochs : int, default=100
         Number of epochs to train the model.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
@@ -34,10 +37,18 @@ class TCNForecaster(BaseDeepForecaster):
         Optimizer to use for training.
     loss : str or tf.keras.losses.Loss, default='mse'
         Loss function for training.
+    callbacks : list of tf.keras.callbacks.Callback or None, default=None
+        List of Keras callbacks to be applied during training.
     random_state : int, default=None
         Seed for random number generators.
     axis : int, default=0
         Axis along which to apply the forecaster.
+    last_file_name : str, default="last_model"
+        The name of the file of the last model, used for saving models.
+    save_best_model : bool, default=False
+        Whether to save the best model during training based on validation loss.
+    file_path : str, default="./"
+        Directory path where models will be saved.
     n_blocks : list of int, default=[16, 16, 16]
         List specifying the number of output channels for each layer of the
         TCN. The length determines the depth of the network.
@@ -50,10 +61,13 @@ class TCNForecaster(BaseDeepForecaster):
 
     _tags = {
         "python_dependencies": ["tensorflow"],
-        "capability:horizon": False,
+        "capability:horizon": True,
         "capability:multivariate": True,
         "capability:exogenous": False,
         "capability:univariate": True,
+        "algorithm_type": "deeplearning",
+        "non_deterministic": True,
+        "cant_pickle": True,
     }
 
     def __init__(
@@ -65,8 +79,12 @@ def __init__(
         verbose=0,
         optimizer="adam",
         loss="mse",
+        callbacks=None,
         random_state=None,
         axis=0,
+        last_file_name="last_model",
+        save_best_model=False,
+        file_path="./",
         n_blocks=None,
         kernel_size=2,
         dropout=0.2,
@@ -74,17 +92,21 @@ def __init__(
         super().__init__(
             horizon=horizon,
             window=window,
-            batch_size=batch_size,
-            n_epochs=n_epochs,
             verbose=verbose,
-            optimizer=optimizer,
-            random_state=random_state,
+            callbacks=callbacks,
             axis=axis,
-            loss=loss,
+            last_file_name=last_file_name,
+            save_best_model=save_best_model,
+            file_path=file_path,
         )
         self.n_blocks = n_blocks
         self.kernel_size = kernel_size
         self.dropout = dropout
+        self.batch_size = batch_size
+        self.n_epochs = n_epochs
+        self.optimizer = optimizer
+        self.loss = loss
+        self.random_state = random_state
 
     def build_model(self, input_shape):
         """Build the TCN model for forecasting.
@@ -101,21 +123,106 @@ def build_model(self, input_shape):
         """
         import tensorflow as tf
 
-        # Initialize the TCN network with the updated parameters
         network = TCNNetwork(
             n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16],
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
-        # input_shape = (input_shape[1], input_shape[0])
-        # Build the network with the given input shape
         input_layer, output = network.build_network(input_shape=input_shape)
-
-        # Create the final model
         model = tf.keras.Model(inputs=input_layer, outputs=output)
         return model
 
-    # Added to handle __name__ in tests (class-level access)
+    def _fit(self, y, exog=None):
+        """Fit the forecaster to training data.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series
+            Target time series to which to fit the forecaster.
+
+        Returns
+        -------
+        self : TCNForecaster
+            Returns an instance of self.
+        """
+        import tensorflow as tf
+
+        rng = check_random_state(self.random_state)
+        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
+        tf.keras.utils.set_random_seed(self.random_state_)
+        y_inner = y
+        num_timepoints, num_channels = y_inner.shape
+        num_sequences = num_timepoints - self.window - self.horizon + 1
+        if y_inner.shape[0] < self.window + self.horizon:
+            raise ValueError(
+                f"Data length ({y_inner.shape}) is insufficient for window "
+                f"({self.window}) and horizon ({self.horizon})."
+            )
+        windows_full = np.lib.stride_tricks.sliding_window_view(
+            y_inner, window_shape=(self.window, num_channels)
+        )
+        windows_full = np.squeeze(windows_full, axis=1)
+        X_train = windows_full[:num_sequences]
+        # print(f"Shape of X_train is {X_train.shape}")
+        tail = y_inner[self.window :]
+        y_windows = np.lib.stride_tricks.sliding_window_view(
+            tail, window_shape=(self.horizon, num_channels)
+        )
+        y_windows = np.squeeze(y_windows, axis=1)
+        y_train = y_windows[:num_sequences]
+        # print(f"Shape of y_train is {y_train.shape}")
+        input_shape = X_train.shape[1:]
+        self.model_ = self.build_model(input_shape)
+        self.model_.compile(optimizer=self.optimizer, loss=self.loss)
+        callbacks_list = self._prepare_callbacks()
+        self.history_ = self.model_.fit(
+            X_train,
+            y_train,
+            batch_size=self.batch_size,
+            epochs=self.n_epochs,
+            verbose=self.verbose,
+            callbacks=callbacks_list,
+        )
+        self.last_window_ = y_inner[-self.window :]
+        return self
+
+    def _predict(self, y=None, exog=None):
+        """Make forecasts for y.
+
+        Parameters
+        ----------
+        y : np.ndarray or pd.Series, default=None
+            Series to predict from. If None, uses last fitted window.
+
+        Returns
+        -------
+        predictions : np.ndarray
+            Predicted values for the specified horizon. Since TCN has single
+            horizon capability, returns single step prediction.
+        """
+        if y is None:
+            if not hasattr(self, "last_window_"):
+                raise ValueError("No fitted data available for prediction.")
+            y_inner = self.last_window_
+        else:
+            y_inner = y
+            if y_inner.ndim == 1:
+                y_inner = y_inner.reshape(-1, 1)
+            if y_inner.shape[0] < self.window:
+                raise ValueError(
+                    f"Input data length ({y_inner.shape}) is less than the "
+                    f"window size ({self.window})."
+                )
+            y_inner = y_inner[-self.window :]
+        num_channels = y_inner.shape[-1]
+        last_window = y_inner.reshape(1, self.window, num_channels)
+        pred = self.model_.predict(last_window, verbose=0)
+        if num_channels == 1:
+            prediction = pred.flatten()[0]
+        else:
+            prediction = pred[0, :]
+        return prediction
+
     @classmethod
     def _get_test_params(
         cls, parameter_set: str = "default"
@@ -127,18 +234,12 @@ def _get_test_params(
         ----------
         parameter_set : str, default="default"
             Name of the set of test parameters to return, for use in tests. If no
-            special parameters are defined for a value,` will return `"default"` set.
-            For forecasters, a "default" set of parameters should be provided for
-            general testing, and a "results_comparison" set for comparing against
-            previously recorded results if the general set does not produce suitable
-            probabilities to compare against.
+            special parameters are defined for a value, will return "default" set.
 
         Returns
         -------
         params : dict or list of dict, default={}
             Parameters to create testing instances of the class.
-            Each dict are parameters to construct an "interesting" test instance, i.e.,
-            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
         """
         param = {
             "n_epochs": 10,
diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index a08690a988..b861305c21 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -11,10 +11,6 @@
 
 from abc import abstractmethod
 
-import numpy as np
-import pandas as pd
-from sklearn.utils import check_random_state
-
 from aeon.forecasting.base import BaseForecaster
 
 
@@ -31,23 +27,10 @@ class BaseDeepForecaster(BaseForecaster):
         Forecasting horizon, the number of steps ahead to predict.
     window : int, default=10
         The window size for creating input sequences.
-    batch_size : int, default=32
-        Batch size for training the model.
-    n_epochs : int, default=100
-        Number of epochs to train the model.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
-    optimizer : str or tf.keras.optimizers.Optimizer, default='adam'
-        Optimizer to use for training.
-    loss : str or tf.keras.losses.Loss, default='mse'
-        Loss function for training.
     callbacks : list of tf.keras.callbacks.Callback or None, default=None
         List of Keras callbacks to be applied during training.
-    random_state : int, RandomState instance or None, default=None
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by np.random.
     axis : int, default=0
         Axis along which to apply the forecaster.
     last_file_name : str, default="last_model"
@@ -81,13 +64,8 @@ def __init__(
         self,
         horizon=1,
         window=10,
-        batch_size=32, # remove
-        n_epochs=100, # remove 
         verbose=0,
-        optimizer="adam", # remove it 
-        loss="mse", # remove it 
         callbacks=None,
-        random_state=None, # remove it 
         axis=0,
         last_file_name="last_model",
         save_best_model=False,
@@ -95,197 +73,32 @@ def __init__(
     ):
         self.horizon = horizon
         self.window = window
-        self.batch_size = batch_size
-        self.n_epochs = n_epochs
         self.verbose = verbose
-        self.optimizer = optimizer
-        self.loss = loss
         self.callbacks = callbacks
-        self.random_state = random_state
         self.axis = axis
         self.last_file_name = last_file_name
         self.save_best_model = save_best_model
         self.file_path = file_path
-        
-        # Initialize attributes
+
         self.model_ = None
         self.history_ = None
         self.last_window_ = None
 
-        # Pass horizon and axis to BaseForecaster
         super().__init__(horizon=horizon, axis=axis)
 
-    def _fit(self, y, X=None): # remove it 
-        """Fit the forecaster to training data.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series
-            Target time series to which to fit the forecaster.
-        X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables.
-
-        Returns
-        -------
-        self : BaseDeepForecaster
-            Returns an instance of self.
-        """
-        import tensorflow as tf
-
-        # Set random seed for reproducibility
-        rng = check_random_state(self.random_state)
-        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
-        tf.keras.utils.set_random_seed(self.random_state_)
-
-        # Convert input data to numpy array
-        y_inner = self._convert_input(y)
-
-        if y_inner.ndim == 1:
-            y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-
-        if y_inner.shape[0] < self.window + self.horizon:
-            raise ValueError(
-                f"Data length ({y_inner.shape[0]}) is insufficient for window "
-                f"({self.window}) and horizon ({self.horizon})."
-            )
-
-        # Create sequences for training
-        X_train, y_train = self._create_sequences(y_inner)
-
-        if X_train.shape[0] == 0:
-            raise ValueError("No training sequences could be created.")
-
-        # Build and compile the model
-        input_shape = X_train.shape[1:]
-        self.model_ = self.build_model(input_shape)
-        self.model_.compile(optimizer=self.optimizer, loss=self.loss)
-
-        # Prepare callbacks
-        callbacks_list = self._prepare_callbacks()
-
-        # Train the model
-        self.history_ = self.model_.fit(
-            X_train,
-            y_train,
-            batch_size=self.batch_size,
-            epochs=self.n_epochs,
-            verbose=self.verbose,
-            callbacks=callbacks_list,
-        )
-
-        # Save the last window for prediction
-        self.last_window_ = y_inner[-self.window:]
-
-        return self
-
-    def _predict(self, y=None, X=None): # remove it
-        """Make forecasts for y.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series, default=None
-            Series to predict from. If None, uses last fitted window.
-        X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables (not supported by default).
-
-        Returns
-        -------
-        predictions : np.ndarray
-            Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate
-            data or (horizon,) for univariate data.
-        """
-        if y is None:
-            if not hasattr(self, "last_window_"):
-                raise ValueError("No fitted data available for prediction.")
-            y_inner = self.last_window_
-        else:
-            y_inner = self._convert_input(y)
-            if y_inner.ndim == 1:
-                y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-            if y_inner.shape[0] < self.window:
-                raise ValueError(
-                    f"Input data length ({y_inner.shape[0]}) is less than the window size "
-                    f"({self.window})."
-                )
-            y_inner = y_inner[-self.window:]
-           
-        # Get the number of channels from the input data
-        num_channels = y_inner.shape[-1]
-        last_window = y_inner.reshape(1, self.window, num_channels)
-        predictions = []
-        current_window = last_window
-
-        for _ in range(self.horizon):
-            pred = self.model_.predict(current_window, verbose=0)
-            predictions.append(pred)  
-            current_window = np.roll(current_window, -1, axis=1)
-            current_window[0, -1, :] = pred[0, :]  # Update all channels
-
-        predictions = np.array(predictions)  
-        predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels)
-        if num_channels == 1:
-            predictions = predictions.flatten()  # Convert to (horizon,) for univariate
-        
-        return predictions
-
-    def _convert_input(self, y):
-        """Convert input data to numpy array.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series
-            Input time series.
-
-        Returns
-        -------
-        y_inner : np.ndarray
-            Converted numpy array.
-        """
-        if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame):
-            y_inner = y.values
-        else:
-            y_inner = y
-
-        return y_inner
-
-    def _create_sequences(self, data):
-        """Create input sequences and target values for training.
-
-        Parameters
-        ----------
-        data : np.ndarray
-            Time series data. Assumes shape (timepoints, channels) for multivariate
-            data or (timepoints,) for univariate.
-
-        Returns
-        -------
-        X : np.ndarray
-            Input sequences. Shape: (num_sequences, window, channels) for multivariate
-            or (num_sequences, window, 1) for univariate.
-        y : np.ndarray
-            Target values. Shape: (num_sequences, horizon, channels) for multivariate
-            or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed).
-        """
-        if data.ndim == 1:
-            data = data.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-
-        num_timepoints, num_channels = data.shape
-
-        if num_timepoints < self.window + self.horizon:
-            raise ValueError(
-                f"Data length ({num_timepoints}) is insufficient for window "
-                f"({self.window}) and horizon ({self.horizon})."
-            )
-
-        X, y = [], []
-        for i in range(num_timepoints - self.window - self.horizon + 1):
-            X.append(data[i : (i + self.window)])
-            y.append(data[i + self.window : (i + self.window + self.horizon)])
+    def _fit(self, y, exog=None):
+        """Fit the model."""
+        pass
 
-        X = np.array(X)  # Shape: (num_sequences, window, channels)
-        y = np.array(y)  # Shape: (num_sequences, horizon, channels)
+    def _predict(self, y, exog=None):
+        """Predict using the model."""
+        pass
 
-        return X, y
+    def _forecast(self, y, exog=None):
+        """Forecast values for time series X."""
+        y = self._preprocess_series(y, 1, True)
+        self.fit(y, exog)
+        return self.predict(y, exog)
 
     def _prepare_callbacks(self):
         """Prepare callbacks for training.
@@ -296,20 +109,15 @@ def _prepare_callbacks(self):
             List of callbacks to be used during training.
         """
         callbacks_list = []
-
-        # Add user-provided callbacks
         if self.callbacks is not None:
             if isinstance(self.callbacks, list):
                 callbacks_list.extend(self.callbacks)
             else:
                 callbacks_list.append(self.callbacks)
-
-        # Add model checkpoint callback if save_best_model is True
         if self.save_best_model:
             callbacks_list = self._get_model_checkpoint_callback(
                 callbacks_list, self.file_path, "best_model"
             )
-
         return callbacks_list
 
     def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
@@ -337,7 +145,6 @@ def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
             save_best_only=True,
             verbose=self.verbose,
         )
-
         if isinstance(callbacks, list):
             return callbacks + [model_checkpoint_]
         else:
@@ -367,7 +174,6 @@ def save_last_model_to_file(self, file_path="./"):
         """
         if self.model_ is None:
             raise ValueError("No model to save. Please fit the model first.")
-        
         self.model_.save(file_path + self.last_file_name + ".keras")
 
     def load_model(self, model_path):
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 5579ab9959..5c0671b4d3 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -1,73 +1,44 @@
-"""Test for BaseDeepForecaster class in aeon."""
+"""Test file for BaseDeepForecaster."""
 
-import numpy as np
 import pytest
 
-from aeon.forecasting.deep_learning import BaseDeepForecaster
+from aeon.forecasting.deep_learning.base import BaseDeepForecaster
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
-@pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-class SimpleDeepForecaster(BaseDeepForecaster):
-    """A simple concrete implementation of BaseDeepForecaster for testing."""
-
-    def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0):
-        super().__init__(
-            horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose
-        )
+class DummyDeepForecaster(BaseDeepForecaster):
+    """Minimal concrete subclass to allow instantiation."""
 
     def build_model(self, input_shape):
-        """Build a simple Keras model for testing."""
-        import tensorflow as tf
-
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.Flatten(input_shape=input_shape),
-                tf.keras.layers.Dense(10, activation="relu"),
-                tf.keras.layers.Dense(self.horizon),
-            ]
-        )
-        return model
+        """Construct and return a model based on the provided input shape."""
+        return None  # Not needed for this test
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
 )
-def test_base_deep_forecaster_fit_predict():
-    """Test fitting and predicting with BaseDeepForecaster implementation."""
-    # Generate synthetic data
-    np.random.seed(42)
-    data = np.random.randn(50)
-    y = np.random.randn(10)
-
-    # Initialize forecaster
-    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
-
-    # Fit the model
-    forecaster.fit(data)
-
-    # Predict
-    predictions = forecaster.predict(y)
-
-    # Validate output shape
-    assert (
-        len(predictions) == 1
-    ), f"Expected predictions of length 1, got {len(predictions)}"
-    assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array"
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-def test_base_deep_forecaster_insufficient_data():
-    """Test error handling for insufficient data."""
-    data = np.random.randn(5)
-    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
-
-    with pytest.raises(ValueError, match="Data length.*insufficient"):
-        forecaster.fit(data)
+def test_default_init_attributes():
+    """Test that BaseDeepForecaster sets default params and attributes correctly."""
+    forecaster = DummyDeepForecaster()
+
+    # check default parameters
+    assert forecaster.horizon == 1
+    assert forecaster.window == 10
+    assert forecaster.verbose == 0
+    assert forecaster.callbacks is None
+    assert forecaster.axis == 0
+    assert forecaster.last_file_name == "last_model"
+    assert forecaster.save_best_model is False
+    assert forecaster.file_path == "./"
+
+    # check default attributes after init
+    assert forecaster.model_ is None
+    assert forecaster.history_ is None
+    assert forecaster.last_window_ is None
+
+    # check tags
+    tags = forecaster.get_tags()
+    assert tags["algorithm_type"] == "deeplearning"
+    assert tags["capability:horizon"]
+    assert tags["capability:multivariate"]
diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py
index 2f5b0710b6..7ee3139321 100644
--- a/aeon/forecasting/deep_learning/tests/test_tcn.py
+++ b/aeon/forecasting/deep_learning/tests/test_tcn.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from aeon.datasets import load_airline
+from aeon.datasets import load_airline, load_longley
 from aeon.forecasting.deep_learning._tcn import TCNForecaster
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
@@ -35,3 +35,44 @@ def test_tcn_forecaster(horizon, window, epochs):
     assert prediction is not None
     if isinstance(prediction, tf.Tensor):
         assert not tf.math.is_nan(prediction).numpy()
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
+)
+@pytest.mark.parametrize(
+    "loader,is_univariate",
+    [
+        (load_airline, True),  # univariate dataset
+        (load_longley, False),  # multivariate dataset
+    ],
+)
+def test_tcn_forecaster_uni_mutli(loader, is_univariate):
+    """Test TCNForecaster on univariate (airline) and multivariate (longley) data."""
+    y = loader()
+
+    forecaster = TCNForecaster(
+        horizon=1,
+        window=10,
+        n_epochs=2,
+        batch_size=16,
+        verbose=0,
+    )
+
+    # fit
+    forecaster.fit(y)
+
+    # predict
+    prediction = forecaster.predict(y)
+    assert prediction is not None
+
+    # forecast
+    prediction = forecaster.forecast(y)
+    assert prediction is not None
+
+    # only for univariate case, test direct forecasting
+    if is_univariate:
+        prediction = forecaster.direct_forecast(y, 3)
+        assert prediction is not None
+        assert len(prediction) == 3

From 4aad1b62dfa194de40eb3ba30ce54297bb28198d Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 12:38:51 +0530
Subject: [PATCH 27/36] workflow corrected

---
 .github/workflows/pr_pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index 401ae8f8d1..5addb89627 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -3,7 +3,7 @@ name: PR pytest
 on:
   push:
     branches:
-      - tcn_fst
+      - main
   pull_request:
     paths:
       - "aeon/**"

From 6c4dca7de9c894f538854f0a7e8a718b2ee3e36d Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 13:22:45 +0530
Subject: [PATCH 28/36] excluded forecasting test for tcn

---
 aeon/testing/testing_config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py
index 990befb15d..759b5f717f 100644
--- a/aeon/testing/testing_config.py
+++ b/aeon/testing/testing_config.py
@@ -46,6 +46,8 @@
         "check_persistence_via_pickle",
         "check_save_estimators_to_file",
     ],
+    # checks required for multivariate
+    "TCNForecaster": ["check_forecaster_output"],
     # needs investigation
     "SASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"],
     "RSASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"],

From b25059dc75384bc653922112167347bfe880f897 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 14:30:44 +0530
Subject: [PATCH 29/36] base changed

---
 aeon/forecasting/deep_learning/base.py        | 218 +-----------------
 .../deep_learning/tests/test_base.py          |  93 +++-----
 2 files changed, 44 insertions(+), 267 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index a08690a988..b861305c21 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -11,10 +11,6 @@
 
 from abc import abstractmethod
 
-import numpy as np
-import pandas as pd
-from sklearn.utils import check_random_state
-
 from aeon.forecasting.base import BaseForecaster
 
 
@@ -31,23 +27,10 @@ class BaseDeepForecaster(BaseForecaster):
         Forecasting horizon, the number of steps ahead to predict.
     window : int, default=10
         The window size for creating input sequences.
-    batch_size : int, default=32
-        Batch size for training the model.
-    n_epochs : int, default=100
-        Number of epochs to train the model.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
-    optimizer : str or tf.keras.optimizers.Optimizer, default='adam'
-        Optimizer to use for training.
-    loss : str or tf.keras.losses.Loss, default='mse'
-        Loss function for training.
     callbacks : list of tf.keras.callbacks.Callback or None, default=None
         List of Keras callbacks to be applied during training.
-    random_state : int, RandomState instance or None, default=None
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by np.random.
     axis : int, default=0
         Axis along which to apply the forecaster.
     last_file_name : str, default="last_model"
@@ -81,13 +64,8 @@ def __init__(
         self,
         horizon=1,
         window=10,
-        batch_size=32, # remove
-        n_epochs=100, # remove 
         verbose=0,
-        optimizer="adam", # remove it 
-        loss="mse", # remove it 
         callbacks=None,
-        random_state=None, # remove it 
         axis=0,
         last_file_name="last_model",
         save_best_model=False,
@@ -95,197 +73,32 @@ def __init__(
     ):
         self.horizon = horizon
         self.window = window
-        self.batch_size = batch_size
-        self.n_epochs = n_epochs
         self.verbose = verbose
-        self.optimizer = optimizer
-        self.loss = loss
         self.callbacks = callbacks
-        self.random_state = random_state
         self.axis = axis
         self.last_file_name = last_file_name
         self.save_best_model = save_best_model
         self.file_path = file_path
-        
-        # Initialize attributes
+
         self.model_ = None
         self.history_ = None
         self.last_window_ = None
 
-        # Pass horizon and axis to BaseForecaster
         super().__init__(horizon=horizon, axis=axis)
 
-    def _fit(self, y, X=None): # remove it 
-        """Fit the forecaster to training data.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series
-            Target time series to which to fit the forecaster.
-        X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables.
-
-        Returns
-        -------
-        self : BaseDeepForecaster
-            Returns an instance of self.
-        """
-        import tensorflow as tf
-
-        # Set random seed for reproducibility
-        rng = check_random_state(self.random_state)
-        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
-        tf.keras.utils.set_random_seed(self.random_state_)
-
-        # Convert input data to numpy array
-        y_inner = self._convert_input(y)
-
-        if y_inner.ndim == 1:
-            y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-
-        if y_inner.shape[0] < self.window + self.horizon:
-            raise ValueError(
-                f"Data length ({y_inner.shape[0]}) is insufficient for window "
-                f"({self.window}) and horizon ({self.horizon})."
-            )
-
-        # Create sequences for training
-        X_train, y_train = self._create_sequences(y_inner)
-
-        if X_train.shape[0] == 0:
-            raise ValueError("No training sequences could be created.")
-
-        # Build and compile the model
-        input_shape = X_train.shape[1:]
-        self.model_ = self.build_model(input_shape)
-        self.model_.compile(optimizer=self.optimizer, loss=self.loss)
-
-        # Prepare callbacks
-        callbacks_list = self._prepare_callbacks()
-
-        # Train the model
-        self.history_ = self.model_.fit(
-            X_train,
-            y_train,
-            batch_size=self.batch_size,
-            epochs=self.n_epochs,
-            verbose=self.verbose,
-            callbacks=callbacks_list,
-        )
-
-        # Save the last window for prediction
-        self.last_window_ = y_inner[-self.window:]
-
-        return self
-
-    def _predict(self, y=None, X=None): # remove it
-        """Make forecasts for y.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series, default=None
-            Series to predict from. If None, uses last fitted window.
-        X : np.ndarray or pd.DataFrame, default=None
-            Exogenous variables (not supported by default).
-
-        Returns
-        -------
-        predictions : np.ndarray
-            Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate
-            data or (horizon,) for univariate data.
-        """
-        if y is None:
-            if not hasattr(self, "last_window_"):
-                raise ValueError("No fitted data available for prediction.")
-            y_inner = self.last_window_
-        else:
-            y_inner = self._convert_input(y)
-            if y_inner.ndim == 1:
-                y_inner = y_inner.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-            if y_inner.shape[0] < self.window:
-                raise ValueError(
-                    f"Input data length ({y_inner.shape[0]}) is less than the window size "
-                    f"({self.window})."
-                )
-            y_inner = y_inner[-self.window:]
-           
-        # Get the number of channels from the input data
-        num_channels = y_inner.shape[-1]
-        last_window = y_inner.reshape(1, self.window, num_channels)
-        predictions = []
-        current_window = last_window
-
-        for _ in range(self.horizon):
-            pred = self.model_.predict(current_window, verbose=0)
-            predictions.append(pred)  
-            current_window = np.roll(current_window, -1, axis=1)
-            current_window[0, -1, :] = pred[0, :]  # Update all channels
-
-        predictions = np.array(predictions)  
-        predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels)
-        if num_channels == 1:
-            predictions = predictions.flatten()  # Convert to (horizon,) for univariate
-        
-        return predictions
-
-    def _convert_input(self, y):
-        """Convert input data to numpy array.
-
-        Parameters
-        ----------
-        y : np.ndarray or pd.Series
-            Input time series.
-
-        Returns
-        -------
-        y_inner : np.ndarray
-            Converted numpy array.
-        """
-        if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame):
-            y_inner = y.values
-        else:
-            y_inner = y
-
-        return y_inner
-
-    def _create_sequences(self, data):
-        """Create input sequences and target values for training.
-
-        Parameters
-        ----------
-        data : np.ndarray
-            Time series data. Assumes shape (timepoints, channels) for multivariate
-            data or (timepoints,) for univariate.
-
-        Returns
-        -------
-        X : np.ndarray
-            Input sequences. Shape: (num_sequences, window, channels) for multivariate
-            or (num_sequences, window, 1) for univariate.
-        y : np.ndarray
-            Target values. Shape: (num_sequences, horizon, channels) for multivariate
-            or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed).
-        """
-        if data.ndim == 1:
-            data = data.reshape(-1, 1)  # Convert univariate to (timepoints, 1)
-
-        num_timepoints, num_channels = data.shape
-
-        if num_timepoints < self.window + self.horizon:
-            raise ValueError(
-                f"Data length ({num_timepoints}) is insufficient for window "
-                f"({self.window}) and horizon ({self.horizon})."
-            )
-
-        X, y = [], []
-        for i in range(num_timepoints - self.window - self.horizon + 1):
-            X.append(data[i : (i + self.window)])
-            y.append(data[i + self.window : (i + self.window + self.horizon)])
+    def _fit(self, y, exog=None):
+        """Fit the model."""
+        pass
 
-        X = np.array(X)  # Shape: (num_sequences, window, channels)
-        y = np.array(y)  # Shape: (num_sequences, horizon, channels)
+    def _predict(self, y, exog=None):
+        """Predict using the model."""
+        pass
 
-        return X, y
+    def _forecast(self, y, exog=None):
+        """Forecast values for time series X."""
+        y = self._preprocess_series(y, 1, True)
+        self.fit(y, exog)
+        return self.predict(y, exog)
 
     def _prepare_callbacks(self):
         """Prepare callbacks for training.
@@ -296,20 +109,15 @@ def _prepare_callbacks(self):
             List of callbacks to be used during training.
         """
         callbacks_list = []
-
-        # Add user-provided callbacks
         if self.callbacks is not None:
             if isinstance(self.callbacks, list):
                 callbacks_list.extend(self.callbacks)
             else:
                 callbacks_list.append(self.callbacks)
-
-        # Add model checkpoint callback if save_best_model is True
         if self.save_best_model:
             callbacks_list = self._get_model_checkpoint_callback(
                 callbacks_list, self.file_path, "best_model"
             )
-
         return callbacks_list
 
     def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
@@ -337,7 +145,6 @@ def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
             save_best_only=True,
             verbose=self.verbose,
         )
-
         if isinstance(callbacks, list):
             return callbacks + [model_checkpoint_]
         else:
@@ -367,7 +174,6 @@ def save_last_model_to_file(self, file_path="./"):
         """
         if self.model_ is None:
             raise ValueError("No model to save. Please fit the model first.")
-        
         self.model_.save(file_path + self.last_file_name + ".keras")
 
     def load_model(self, model_path):
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 46774b520d..5c0671b4d3 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -1,73 +1,44 @@
-"""Test for BaseDeepForecaster class in aeon."""
+"""Test file for BaseDeepForecaster."""
 
-import numpy as np
 import pytest
 
-from aeon.forecasting.deep_learning import BaseDeepForecaster
+from aeon.forecasting.deep_learning.base import BaseDeepForecaster
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
-@pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-class SimpleDeepForecaster(BaseDeepForecaster):
-    """A simple concrete implementation of BaseDeepForecaster for testing."""
-
-    def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0):
-        super().__init__(
-            horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose
-        )
+class DummyDeepForecaster(BaseDeepForecaster):
+    """Minimal concrete subclass to allow instantiation."""
 
     def build_model(self, input_shape):
-        """Build a simple Keras model for testing."""
-        import tensorflow as tf
-
-        model = tf.keras.Sequential(
-            [
-                tf.keras.layers.Flatten(input_shape=input_shape),
-                tf.keras.layers.Dense(10, activation="relu"),
-                tf.keras.layers.Dense(self.horizon),
-            ]
-        )
-        return model
+        """Construct and return a model based on the provided input shape."""
+        return None  # Not needed for this test
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
+    not _check_soft_dependencies(["tensorflow"], severity="none"),
+    reason="Tensorflow soft dependency unavailable.",
 )
-def test_base_deep_forecaster_fit_predict():
-    """Test fitting and predicting with BaseDeepForecaster implementation."""
-    # Generate synthetic data
-    np.random.seed(42)
-    data = np.random.randn(50)
-    y = np.random.randn(10)
-
-    # Initialize forecaster
-    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
-
-    # Fit the model
-    forecaster.fit(data)
-
-    # Predict
-    predictions = forecaster.predict(y)
-
-    # Validate output shape
-    assert (
-        len(predictions) == 1
-    ), f"Expected predictions of length 1, got {len(predictions)}"
-    assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array"
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies("tensorflow", severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-def test_base_deep_forecaster_insufficient_data():
-    """Test error handling for insufficient data."""
-    data = np.random.randn(5)
-    forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0)
-
-    with pytest.raises(ValueError, match="Data length.*insufficient"):
-        forecaster.fit(data)
\ No newline at end of file
+def test_default_init_attributes():
+    """Test that BaseDeepForecaster sets default params and attributes correctly."""
+    forecaster = DummyDeepForecaster()
+
+    # check default parameters
+    assert forecaster.horizon == 1
+    assert forecaster.window == 10
+    assert forecaster.verbose == 0
+    assert forecaster.callbacks is None
+    assert forecaster.axis == 0
+    assert forecaster.last_file_name == "last_model"
+    assert forecaster.save_best_model is False
+    assert forecaster.file_path == "./"
+
+    # check default attributes after init
+    assert forecaster.model_ is None
+    assert forecaster.history_ is None
+    assert forecaster.last_window_ is None
+
+    # check tags
+    tags = forecaster.get_tags()
+    assert tags["algorithm_type"] == "deeplearning"
+    assert tags["capability:horizon"]
+    assert tags["capability:multivariate"]

From f3af43320cd64c6642627e2d467a02ef6e4c9ff0 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 15:41:53 +0530
Subject: [PATCH 30/36] save best model changed

---
 aeon/forecasting/deep_learning/base.py           | 16 +++++++---------
 .../forecasting/deep_learning/tests/test_base.py |  1 -
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index b861305c21..28f4c51784 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -35,8 +35,6 @@ class BaseDeepForecaster(BaseForecaster):
         Axis along which to apply the forecaster.
     last_file_name : str, default="last_model"
         The name of the file of the last model, used for saving models.
-    save_best_model : bool, default=False
-        Whether to save the best model during training based on validation loss.
     file_path : str, default="./"
         Directory path where models will be saved.
 
@@ -68,7 +66,6 @@ def __init__(
         callbacks=None,
         axis=0,
         last_file_name="last_model",
-        save_best_model=False,
         file_path="./",
     ):
         self.horizon = horizon
@@ -77,7 +74,6 @@ def __init__(
         self.callbacks = callbacks
         self.axis = axis
         self.last_file_name = last_file_name
-        self.save_best_model = save_best_model
         self.file_path = file_path
 
         self.model_ = None
@@ -114,10 +110,10 @@ def _prepare_callbacks(self):
                 callbacks_list.extend(self.callbacks)
             else:
                 callbacks_list.append(self.callbacks)
-        if self.save_best_model:
-            callbacks_list = self._get_model_checkpoint_callback(
-                callbacks_list, self.file_path, "best_model"
-            )
+
+        callbacks_list = self._get_model_checkpoint_callback(
+            callbacks_list, self.file_path, "best_model"
+        )
         return callbacks_list
 
     def _get_model_checkpoint_callback(self, callbacks, file_path, file_name):
@@ -172,9 +168,11 @@ def save_last_model_to_file(self, file_path="./"):
         -------
         None
         """
+        import os
+
         if self.model_ is None:
             raise ValueError("No model to save. Please fit the model first.")
-        self.model_.save(file_path + self.last_file_name + ".keras")
+        self.model_.save(os.path.join(file_path, self.last_file_name + ".keras"))
 
     def load_model(self, model_path):
         """Load a pre-trained keras model instead of fitting.
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 5c0671b4d3..e65321dbfa 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -29,7 +29,6 @@ def test_default_init_attributes():
     assert forecaster.callbacks is None
     assert forecaster.axis == 0
     assert forecaster.last_file_name == "last_model"
-    assert forecaster.save_best_model is False
     assert forecaster.file_path == "./"
 
     # check default attributes after init

From 7ce3e681da5bf2b7d8bbb0563bdb5affdfea91bc Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 18:11:55 +0530
Subject: [PATCH 31/36] conversations resolved

---
 aeon/forecasting/deep_learning/base.py        | 35 +++++++++++++++----
 .../deep_learning/tests/test_base.py          |  7 ++--
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index 28f4c51784..5e812282f4 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -10,6 +10,7 @@
 __all__ = ["BaseDeepForecaster"]
 
 from abc import abstractmethod
+from typing import Any
 
 from aeon.forecasting.base import BaseForecaster
 
@@ -23,10 +24,10 @@ class BaseDeepForecaster(BaseForecaster):
 
     Parameters
     ----------
+    window : int,
+        The window size for creating input sequences.
     horizon : int, default=1
         Forecasting horizon, the number of steps ahead to predict.
-    window : int, default=10
-        The window size for creating input sequences.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
     callbacks : list of tf.keras.callbacks.Callback or None, default=None
@@ -55,13 +56,13 @@ class BaseDeepForecaster(BaseForecaster):
         "non_deterministic": True,
         "cant_pickle": True,
         "python_dependencies": "tensorflow",
-        "capability:multivariate": True,
+        "capability:multivariate": False,
     }
 
     def __init__(
         self,
+        window,
         horizon=1,
-        window=10,
         verbose=0,
         callbacks=None,
         axis=0,
@@ -84,11 +85,11 @@ def __init__(
 
     def _fit(self, y, exog=None):
         """Fit the model."""
-        pass
+        ...
 
     def _predict(self, y, exog=None):
         """Predict using the model."""
-        pass
+        ...
 
     def _forecast(self, y, exog=None):
         """Forecast values for time series X."""
@@ -210,3 +211,25 @@ def build_model(self, input_shape):
             Compiled Keras model.
         """
         pass
+
+    @classmethod
+    def _get_test_params(
+        cls, parameter_set: str = "default"
+    ) -> dict[str, Any] | list[dict[str, Any]]:
+        """
+        Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests.
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+        """
+        param = {
+            "window": 10,
+        }
+        return [param]
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index e65321dbfa..9a88503686 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -9,6 +9,9 @@
 class DummyDeepForecaster(BaseDeepForecaster):
     """Minimal concrete subclass to allow instantiation."""
 
+    def __init__(self, window):
+        super().__init__(window=window)
+
     def build_model(self, input_shape):
         """Construct and return a model based on the provided input shape."""
         return None  # Not needed for this test
@@ -20,7 +23,7 @@ def build_model(self, input_shape):
 )
 def test_default_init_attributes():
     """Test that BaseDeepForecaster sets default params and attributes correctly."""
-    forecaster = DummyDeepForecaster()
+    forecaster = DummyDeepForecaster(window=10)
 
     # check default parameters
     assert forecaster.horizon == 1
@@ -40,4 +43,4 @@ def test_default_init_attributes():
     tags = forecaster.get_tags()
     assert tags["algorithm_type"] == "deeplearning"
     assert tags["capability:horizon"]
-    assert tags["capability:multivariate"]
+    assert tags["capability:univariate"]

From 50dbec6364d221fbf4248b994d4848b2c9e51e72 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 18:12:14 +0530
Subject: [PATCH 32/36] conversations resolved

---
 aeon/forecasting/deep_learning/base.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index 5e812282f4..cc90af48c7 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -91,12 +91,6 @@ def _predict(self, y, exog=None):
         """Predict using the model."""
         ...
 
-    def _forecast(self, y, exog=None):
-        """Forecast values for time series X."""
-        y = self._preprocess_series(y, 1, True)
-        self.fit(y, exog)
-        return self.predict(y, exog)
-
     def _prepare_callbacks(self):
         """Prepare callbacks for training.
 

From 39c20e4fd3c735238b54df54787c6cbc3e593a06 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 19 Aug 2025 20:40:36 +0530
Subject: [PATCH 33/36] TCN forecaster corrected

---
 aeon/forecasting/deep_learning/_tcn.py           | 15 +++++++--------
 aeon/forecasting/deep_learning/tests/test_tcn.py | 13 ++++++-------
 aeon/testing/testing_config.py                   |  2 --
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 95b0a91f0e..732ed30a75 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -10,12 +10,12 @@
 import numpy as np
 from sklearn.utils import check_random_state
 
-from aeon.forecasting.base import DirectForecastingMixin
+from aeon.forecasting.base import IterativeForecastingMixin
 from aeon.forecasting.deep_learning.base import BaseDeepForecaster
 from aeon.networks._tcn import TCNNetwork
 
 
-class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin):
+class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin):
     """A deep learning forecaster using Temporal Convolutional Network (TCN).
 
     It leverages the `TCNNetwork` from aeon's network module
@@ -25,8 +25,6 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin):
     ----------
     horizon : int, default=1
         Forecasting horizon, the number of steps ahead to predict.
-    window : int, default=10
-        The window size for creating input sequences.
     batch_size : int, default=32
         Batch size for training the model.
     n_epochs : int, default=100
@@ -62,7 +60,7 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin):
     _tags = {
         "python_dependencies": ["tensorflow"],
         "capability:horizon": True,
-        "capability:multivariate": True,
+        "capability:multivariate": False,
         "capability:exogenous": False,
         "capability:univariate": True,
         "algorithm_type": "deeplearning",
@@ -72,8 +70,8 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin):
 
     def __init__(
         self,
+        window,
         horizon=1,
-        window=10,
         batch_size=32,
         n_epochs=100,
         verbose=0,
@@ -96,7 +94,6 @@ def __init__(
             callbacks=callbacks,
             axis=axis,
             last_file_name=last_file_name,
-            save_best_model=save_best_model,
             file_path=file_path,
         )
         self.n_blocks = n_blocks
@@ -107,6 +104,7 @@ def __init__(
         self.optimizer = optimizer
         self.loss = loss
         self.random_state = random_state
+        self.save_best_model = save_best_model
 
     def build_model(self, input_shape):
         """Build the TCN model for forecasting.
@@ -221,7 +219,7 @@ def _predict(self, y=None, exog=None):
             prediction = pred.flatten()[0]
         else:
             prediction = pred[0, :]
-        return prediction
+        return float(prediction)
 
     @classmethod
     def _get_test_params(
@@ -242,6 +240,7 @@ def _get_test_params(
             Parameters to create testing instances of the class.
         """
         param = {
+            "window": 10,
             "n_epochs": 10,
             "batch_size": 4,
             "n_blocks": [8, 8],
diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py
index 7ee3139321..afaf3bd16b 100644
--- a/aeon/forecasting/deep_learning/tests/test_tcn.py
+++ b/aeon/forecasting/deep_learning/tests/test_tcn.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from aeon.datasets import load_airline, load_longley
+from aeon.datasets import load_airline
 from aeon.forecasting.deep_learning._tcn import TCNForecaster
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
@@ -45,7 +45,7 @@ def test_tcn_forecaster(horizon, window, epochs):
     "loader,is_univariate",
     [
         (load_airline, True),  # univariate dataset
-        (load_longley, False),  # multivariate dataset
+        # (load_longley, False),  # multivariate dataset
     ],
 )
 def test_tcn_forecaster_uni_mutli(loader, is_univariate):
@@ -71,8 +71,7 @@ def test_tcn_forecaster_uni_mutli(loader, is_univariate):
     prediction = forecaster.forecast(y)
     assert prediction is not None
 
-    # only for univariate case, test direct forecasting
-    if is_univariate:
-        prediction = forecaster.direct_forecast(y, 3)
-        assert prediction is not None
-        assert len(prediction) == 3
+    # iterative forecasting
+    prediction = forecaster.iterative_forecast(y, 3)
+    assert prediction is not None
+    assert len(prediction) == 3
diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py
index 759b5f717f..990befb15d 100644
--- a/aeon/testing/testing_config.py
+++ b/aeon/testing/testing_config.py
@@ -46,8 +46,6 @@
         "check_persistence_via_pickle",
         "check_save_estimators_to_file",
     ],
-    # checks required for multivariate
-    "TCNForecaster": ["check_forecaster_output"],
     # needs investigation
     "SASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"],
     "RSASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"],

From 784a26a78c3cd29c8170fc7fbfd232daf615105b Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Sat, 23 Aug 2025 12:14:53 +0530
Subject: [PATCH 34/36] Basedlf corrected

---
 aeon/forecasting/deep_learning/base.py            | 8 --------
 aeon/forecasting/deep_learning/tests/test_base.py | 3 +++
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py
index cc90af48c7..0c6843848c 100644
--- a/aeon/forecasting/deep_learning/base.py
+++ b/aeon/forecasting/deep_learning/base.py
@@ -83,14 +83,6 @@ def __init__(
 
         super().__init__(horizon=horizon, axis=axis)
 
-    def _fit(self, y, exog=None):
-        """Fit the model."""
-        ...
-
-    def _predict(self, y, exog=None):
-        """Predict using the model."""
-        ...
-
     def _prepare_callbacks(self):
         """Prepare callbacks for training.
 
diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py
index 9a88503686..7d13c3bf79 100644
--- a/aeon/forecasting/deep_learning/tests/test_base.py
+++ b/aeon/forecasting/deep_learning/tests/test_base.py
@@ -12,6 +12,9 @@ class DummyDeepForecaster(BaseDeepForecaster):
     def __init__(self, window):
         super().__init__(window=window)
 
+    def _predict(self, y, exog=None):
+        return None
+
     def build_model(self, input_shape):
         """Construct and return a model based on the provided input shape."""
         return None  # Not needed for this test

From 2de7b2df8eb77924aa387716503a0e32ab5b62f1 Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 26 Aug 2025 02:59:42 +0530
Subject: [PATCH 35/36] tcn updated with args

---
 aeon/forecasting/deep_learning/_tcn.py | 152 +++++++++++++++++++------
 1 file changed, 120 insertions(+), 32 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index 732ed30a75..ed4c92b4b1 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -3,8 +3,12 @@
 from __future__ import annotations
 
 __maintainer__ = []
+
 __all__ = ["TCNForecaster"]
 
+import os
+import time
+from copy import deepcopy
 from typing import Any
 
 import numpy as np
@@ -18,8 +22,8 @@
 class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin):
     """A deep learning forecaster using Temporal Convolutional Network (TCN).
 
-    It leverages the `TCNNetwork` from aeon's network module
-    to build the architecture suitable for forecasting tasks.
+    Leverages the `TCNNetwork` from aeon's network module to build the architecture
+    suitable for forecasting tasks.
 
     Parameters
     ----------
@@ -31,8 +35,13 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin):
         Number of epochs to train the model.
     verbose : int, default=0
         Verbosity mode (0, 1, or 2).
-    optimizer : str or tf.keras.optimizers.Optimizer, default='adam'
+    optimizer : str or tf.keras.optimizers.Optimizer, default=None
         Optimizer to use for training.
+    metrics : str or list[str|function|keras.metrics.Metric], default="accuracy"
+        The evaluation metrics to use during training. Each can be a string, function,
+        or a keras.metrics.Metric instance (see https://keras.io/api/metrics/).
+        If a single string metric is provided, it will be used as the only metric.
+        If a list of metrics are provided, all will be used for evaluation.
     loss : str or tf.keras.losses.Loss, default='mse'
         Loss function for training.
     callbacks : list of tf.keras.callbacks.Callback or None, default=None
@@ -48,13 +57,26 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin):
     file_path : str, default="./"
         Directory path where models will be saved.
     n_blocks : list of int, default=[16, 16, 16]
-        List specifying the number of output channels for each layer of the
-        TCN. The length determines the depth of the network.
+        List specifying the number of output channels for each layer of the TCN.
+        The length determines the depth of the network.
     kernel_size : int, default=2
         Size of the convolutional kernel in the TCN.
     dropout : float, default=0.2
-        Dropout rate applied after each convolutional layer for
-        regularization.
+        Dropout rate applied after each convolutional layer for regularization.
+    save_last_model : bool, default=False
+        Whether or not to save the last model, last epoch trained.
+    save_init_model : bool, default=False
+        Whether to save the initialization of the model.
+    best_file_name : str, default="best_model"
+        The name of the file of the best model.
+    init_file_name : str, default="init_model"
+        The name of the file of the init model.
+
+    References
+    ----------
+    .. [1] Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of
+       generic convolutional and recurrent networks for sequence modeling.
+       arXiv preprint arXiv:1803.01271.
     """
 
     _tags = {
@@ -75,7 +97,8 @@ def __init__(
         batch_size=32,
         n_epochs=100,
         verbose=0,
-        optimizer="adam",
+        optimizer=None,
+        metrics="accuracy",
         loss="mse",
         callbacks=None,
         random_state=None,
@@ -86,25 +109,42 @@ def __init__(
         n_blocks=None,
         kernel_size=2,
         dropout=0.2,
+        save_last_model=False,
+        save_init_model=False,
+        best_file_name="best_model",
+        init_file_name="init_model",
     ):
-        super().__init__(
-            horizon=horizon,
-            window=window,
-            verbose=verbose,
-            callbacks=callbacks,
-            axis=axis,
-            last_file_name=last_file_name,
-            file_path=file_path,
-        )
-        self.n_blocks = n_blocks
-        self.kernel_size = kernel_size
-        self.dropout = dropout
+        self.window = window
+        self.horizon = horizon
         self.batch_size = batch_size
         self.n_epochs = n_epochs
+        self.verbose = verbose
         self.optimizer = optimizer
+        self.metrics = metrics
         self.loss = loss
+        self.callbacks = callbacks
         self.random_state = random_state
+        self.axis = axis
+        self.last_file_name = last_file_name
         self.save_best_model = save_best_model
+        self.file_path = file_path
+        self.n_blocks = n_blocks
+        self.kernel_size = kernel_size
+        self.dropout = dropout
+        self.save_last_model = save_last_model
+        self.save_init_model = save_init_model
+        self.best_file_name = best_file_name
+        self.init_file_name = init_file_name
+
+        super().__init__(
+            horizon=self.horizon,
+            window=self.window,
+            verbose=self.verbose,
+            callbacks=self.callbacks,
+            axis=self.axis,
+            last_file_name=self.last_file_name,
+            file_path=self.file_path,
+        )
 
     def build_model(self, input_shape):
         """Build the TCN model for forecasting.
@@ -121,17 +161,35 @@ def build_model(self, input_shape):
         """
         import tensorflow as tf
 
+        rng = check_random_state(self.random_state)
+        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
+
+        tf.keras.utils.set_random_seed(self.random_state_)
         network = TCNNetwork(
             n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16],
             kernel_size=self.kernel_size,
             dropout=self.dropout,
         )
+
         input_layer, output = network.build_network(input_shape=input_shape)
+
         model = tf.keras.Model(inputs=input_layer, outputs=output)
+
+        self.optimizer_ = (
+            tf.keras.optimizers.Adam() if self.optimizer is None else self.optimizer
+        )
+
+        model.compile(
+            loss=self.loss,
+            optimizer=self.optimizer_,
+            metrics=self._metrics,
+        )
+
         return model
 
     def _fit(self, y, exog=None):
-        """Fit the forecaster to training data.
+        """
+        Fit the TCN forecaster model to the training data.
 
         Parameters
         ----------
@@ -140,40 +198,56 @@ def _fit(self, y, exog=None):
 
         Returns
         -------
-        self : TCNForecaster
-            Returns an instance of self.
+        self : object
         """
         import tensorflow as tf
 
-        rng = check_random_state(self.random_state)
-        self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
-        tf.keras.utils.set_random_seed(self.random_state_)
         y_inner = y
         num_timepoints, num_channels = y_inner.shape
         num_sequences = num_timepoints - self.window - self.horizon + 1
+
         if y_inner.shape[0] < self.window + self.horizon:
             raise ValueError(
                 f"Data length ({y_inner.shape}) is insufficient for window "
                 f"({self.window}) and horizon ({self.horizon})."
             )
+
+        if isinstance(self.metrics, list):
+            self._metrics = self.metrics
+        elif isinstance(self.metrics, str):
+            self._metrics = [self.metrics]
+
         windows_full = np.lib.stride_tricks.sliding_window_view(
             y_inner, window_shape=(self.window, num_channels)
         )
         windows_full = np.squeeze(windows_full, axis=1)
         X_train = windows_full[:num_sequences]
-        # print(f"Shape of X_train is {X_train.shape}")
+
         tail = y_inner[self.window :]
         y_windows = np.lib.stride_tricks.sliding_window_view(
             tail, window_shape=(self.horizon, num_channels)
         )
         y_windows = np.squeeze(y_windows, axis=1)
         y_train = y_windows[:num_sequences]
-        # print(f"Shape of y_train is {y_train.shape}")
+
         input_shape = X_train.shape[1:]
-        self.model_ = self.build_model(input_shape)
-        self.model_.compile(optimizer=self.optimizer, loss=self.loss)
+        self.training_model_ = self.build_model(input_shape)
+
+        if self.save_init_model:
+            self.training_model_.save(self.file_path + self.init_file_name + ".keras")
+
+        self.file_name_ = (
+            self.best_file_name if self.save_best_model else str(time.time_ns())
+        )
+
         callbacks_list = self._prepare_callbacks()
-        self.history_ = self.model_.fit(
+        callbacks_list.append(
+            tf.keras.callbacks.ReduceLROnPlateau(
+                monitor="loss", factor=0.5, patience=50, min_lr=0.0001
+            )
+        )
+
+        self.history = self.training_model_.fit(
             X_train,
             y_train,
             batch_size=self.batch_size,
@@ -181,7 +255,21 @@ def _fit(self, y, exog=None):
             verbose=self.verbose,
             callbacks=callbacks_list,
         )
+
+        try:
+            self.model_ = tf.keras.models.load_model(
+                self.file_path + self.file_name_ + ".keras", compile=False
+            )
+            if not self.save_best_model:
+                os.remove(self.file_path + self.file_name_ + ".keras")
+        except ValueError:
+            self.model_ = deepcopy(self.training_model_)
+
+        if self.save_last_model:
+            self.save_last_model_to_file(file_path=self.file_path)
+
         self.last_window_ = y_inner[-self.window :]
+
         return self
 
     def _predict(self, y=None, exog=None):
@@ -211,7 +299,7 @@ def _predict(self, y=None, exog=None):
                     f"Input data length ({y_inner.shape}) is less than the "
                     f"window size ({self.window})."
                 )
-            y_inner = y_inner[-self.window :]
+        y_inner = y_inner[-self.window :]
         num_channels = y_inner.shape[-1]
         last_window = y_inner.reshape(1, self.window, num_channels)
         pred = self.model_.predict(last_window, verbose=0)

From 9b8d2d3f7c8d25315320d00aa6860bc65f0aff1a Mon Sep 17 00:00:00 2001
From: lucifer4073 <lucifer4073@gmail.com>
Date: Tue, 26 Aug 2025 14:46:10 +0530
Subject: [PATCH 36/36] conversations resolved

---
 aeon/forecasting/deep_learning/_tcn.py           | 4 ++--
 aeon/forecasting/deep_learning/tests/test_tcn.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py
index ed4c92b4b1..1221352a01 100644
--- a/aeon/forecasting/deep_learning/_tcn.py
+++ b/aeon/forecasting/deep_learning/_tcn.py
@@ -22,8 +22,8 @@
 class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin):
     """A deep learning forecaster using Temporal Convolutional Network (TCN).
 
-    Leverages the `TCNNetwork` from aeon's network module to build the architecture
-    suitable for forecasting tasks.
+    Adapted from the implementation used in [1]_. Leverages the `TCNNetwork` from
+    aeon's network module to build the architecture suitable for forecasting tasks.
 
     Parameters
     ----------
diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py
index afaf3bd16b..2755201672 100644
--- a/aeon/forecasting/deep_learning/tests/test_tcn.py
+++ b/aeon/forecasting/deep_learning/tests/test_tcn.py
@@ -32,7 +32,7 @@ def test_tcn_forecaster(horizon, window, epochs):
     prediction = forecaster.predict(y)
 
     # Basic assertions
-    assert prediction is not None
+    assert isinstance(prediction, float)
     if isinstance(prediction, tf.Tensor):
         assert not tf.math.is_nan(prediction).numpy()
 
@@ -65,11 +65,11 @@ def test_tcn_forecaster_uni_mutli(loader, is_univariate):
 
     # predict
     prediction = forecaster.predict(y)
-    assert prediction is not None
+    assert isinstance(prediction, float)
 
     # forecast
     prediction = forecaster.forecast(y)
-    assert prediction is not None
+    assert isinstance(prediction, float)
 
     # iterative forecasting
     prediction = forecaster.iterative_forecast(y, 3)