From af94a866b7ea9604043c421a7db3edf17925e29d Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Thu, 22 May 2025 21:51:48 +0530 Subject: [PATCH 01/36] Basedeep forecaster added --- aeon/forecasting/deep_learning/__init__.py | 1 + aeon/forecasting/deep_learning/base.py | 215 +++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 aeon/forecasting/deep_learning/__init__.py create mode 100644 aeon/forecasting/deep_learning/base.py diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py new file mode 100644 index 0000000000..42067031dc --- /dev/null +++ b/aeon/forecasting/deep_learning/__init__.py @@ -0,0 +1 @@ +"""Initialization for aeon forecasting deep learning module.""" diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py new file mode 100644 index 0000000000..17eead9c49 --- /dev/null +++ b/aeon/forecasting/deep_learning/base.py @@ -0,0 +1,215 @@ +""" +BaseDeepForecaster class. + +A simplified first base class for deep learning forecasting models. +This class is a subclass of BaseForecaster and inherits its methods and attributes. +It provides a base for deep learning models, including methods for training and +predicting. + +""" + +from abc import abstractmethod + +import numpy as np +import pandas as pd +import tensorflow as tf + +from aeon.forecasting.base import BaseForecaster + + +class BaseDeepForecaster(BaseForecaster): + """Base class for deep learning forecasters in aeon. + + Parameters + ---------- + horizon : int, default=1 + Forecasting horizon, the number of steps ahead to predict. + window : int, default=10 + The window size for creating input sequences. + batch_size : int, default=32 + Batch size for training the model. + epochs : int, default=100 + Number of epochs to train the model. + verbose : int, default=0 + Verbosity mode (0, 1, or 2). + optimizer : str or tf.keras.optimizers.Optimizer, default='adam' + Optimizer to use for training. + loss : str or tf.keras.losses.Loss, default='mse' + Loss function for training. + random_state : int, default=None + Seed for random number generators. + """ + + def __init__( + self, + horizon=1, + window=10, + batch_size=32, + epochs=100, + verbose=0, + optimizer="adam", + loss="mse", + random_state=None, + ): + self.horizon = horizon + self.window = window + self.batch_size = batch_size + self.epochs = epochs + self.verbose = verbose + self.optimizer = optimizer + self.loss = loss + self.random_state = random_state + self.model_ = None + super().__init__() + + def _fit(self, y, X=None): + """Fit the forecaster to training data. + + Parameters + ---------- + y : np.ndarray or pd.Series + Target time series to which to fit the forecaster. + X : np.ndarray or pd.DataFrame, default=None + Exogenous variables. + + Returns + ------- + self : returns an instance of self + """ + # Set random seed for reproducibility + if self.random_state is not None: + np.random.seed(self.random_state) + tf.random.set_seed(self.random_state) + + # Convert input data to numpy array + y_inner = self._convert_input(y) + + # Create sequences for training + X_train, y_train = self._create_sequences(y_inner) + + # Build and compile the model + self.model_ = self._build_model(X_train.shape[1:]) + self.model_.compile(optimizer=self.optimizer, loss=self.loss) + + # Train the model + self.model_.fit( + X_train, + y_train, + batch_size=self.batch_size, + epochs=self.epochs, + verbose=self.verbose, + ) + + return self + + def _predict(self, y=None, X=None): + """Make forecasts for y. + + Parameters + ---------- + y : np.ndarray or pd.Series, default=None + Series to predict from. + X : np.ndarray or pd.DataFrame, default=None + Exogenous variables. + + Returns + ------- + predictions : np.ndarray + Predicted values. + """ + if y is None: + raise ValueError("y cannot be None for prediction") + + # Convert input data to numpy array + y_inner = self._convert_input(y) + + # Use the last window of data for prediction + last_window = y_inner[-self.window :].reshape(1, self.window, 1) + + # Make prediction + prediction = self.model_.predict(last_window, verbose=0) + + return prediction.flatten() + + def _forecast(self, y, X=None): + """Forecast time series at future horizon. + + Parameters + ---------- + y : np.ndarray or pd.Series + Time series to forecast from. + X : np.ndarray or pd.DataFrame, default=None + Exogenous variables. + + Returns + ------- + forecasts : np.ndarray + Forecasted values. + """ + # Fit the model + self._fit(y, X) + + # Make prediction + return self._predict(y, X) + + def _convert_input(self, y): + """Convert input data to numpy array. + + Parameters + ---------- + y : np.ndarray or pd.Series + Input time series. + + Returns + ------- + y_inner : np.ndarray + Converted numpy array. + """ + if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame): + y_inner = y.values + else: + y_inner = y + + # Ensure 1D array + if len(y_inner.shape) > 1: + y_inner = y_inner.flatten() + + return y_inner + + def _create_sequences(self, data): + """Create input sequences and target values for training. + + Parameters + ---------- + data : np.ndarray + Time series data. + + Returns + ------- + X : np.ndarray + Input sequences. + y : np.ndarray + Target values. + """ + X, y = [], [] + for i in range(len(data) - self.window - self.horizon + 1): + X.append(data[i : (i + self.window)]) + y.append(data[i + self.window : (i + self.window + self.horizon)]) + + return np.array(X).reshape(-1, self.window, 1), np.array(y) + + @abstractmethod + def _build_model(self, input_shape): + """Build the deep learning model. + + Parameters + ---------- + input_shape : tuple + Shape of input data. + + Returns + ------- + model : tf.keras.Model + Compiled Keras model. + """ + pass From d2ee9ec5acda38de318ad22c1df0563b3f9d526f Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Mon, 26 May 2025 20:30:50 +0530 Subject: [PATCH 02/36] init for basedlf added --- aeon/forecasting/deep_learning/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py index 42067031dc..c4b7a27030 100644 --- a/aeon/forecasting/deep_learning/__init__.py +++ b/aeon/forecasting/deep_learning/__init__.py @@ -1 +1,7 @@ """Initialization for aeon forecasting deep learning module.""" + +__all__ = [ + "BaseDeepForecaster", +] + +from aeon.forecasting.deep_learning.base import BaseDeepForecaster From ab3030c0767fb773714c523a32963aa99c18c078 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 15 Jun 2025 16:13:43 +0530 Subject: [PATCH 03/36] test file and axis added for basedeepforecaster --- aeon/forecasting/deep_learning/base.py | 76 ++++++++++++++----- .../deep_learning/tests/__init__.py | 1 + .../deep_learning/tests/test_base.py | 62 +++++++++++++++ 3 files changed, 120 insertions(+), 19 deletions(-) create mode 100644 aeon/forecasting/deep_learning/tests/__init__.py create mode 100644 aeon/forecasting/deep_learning/tests/test_base.py diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index 17eead9c49..ebab0116bd 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -1,12 +1,13 @@ +"""Base class module for deep learning forecasters in aeon. + +This module defines the `BaseDeepForecaster` class, an abstract base class for +deep learning-based forecasting models within the aeon toolkit. """ -BaseDeepForecaster class. -A simplified first base class for deep learning forecasting models. -This class is a subclass of BaseForecaster and inherits its methods and attributes. -It provides a base for deep learning models, including methods for training and -predicting. +from __future__ import annotations -""" +__maintainer__ = [] +__all__ = ["BaseDeepForecaster"] from abc import abstractmethod @@ -20,6 +21,9 @@ class BaseDeepForecaster(BaseForecaster): """Base class for deep learning forecasters in aeon. + This class provides a foundation for deep learning-based forecasting models, + handling data preprocessing, model training, and prediction. + Parameters ---------- horizon : int, default=1 @@ -38,6 +42,9 @@ class BaseDeepForecaster(BaseForecaster): Loss function for training. random_state : int, default=None Seed for random number generators. + axis : int, default=0 + Axis along which to apply the forecaster. + Default is 0 for univariate time series. """ def __init__( @@ -50,6 +57,7 @@ def __init__( optimizer="adam", loss="mse", random_state=None, + axis=0, ): self.horizon = horizon self.window = window @@ -59,8 +67,11 @@ def __init__( self.optimizer = optimizer self.loss = loss self.random_state = random_state + self.axis = axis self.model_ = None - super().__init__() + + # Pass horizon and axis to BaseForecaster + super().__init__(horizon=horizon, axis=axis) def _fit(self, y, X=None): """Fit the forecaster to training data. @@ -74,7 +85,8 @@ def _fit(self, y, X=None): Returns ------- - self : returns an instance of self + self : BaseDeepForecaster + Returns an instance of self. """ # Set random seed for reproducibility if self.random_state is not None: @@ -83,12 +95,21 @@ def _fit(self, y, X=None): # Convert input data to numpy array y_inner = self._convert_input(y) + if y_inner.shape[0] < self.window + self.horizon: + raise ValueError( + f"Data length ({y_inner.shape[0]}) is insufficient" + f"({self.window}) and horizon ({self.horizon})." + ) # Create sequences for training X_train, y_train = self._create_sequences(y_inner) + if X_train.shape[0] == 0: + raise ValueError("No training sequences could be created.") + # Build and compile the model - self.model_ = self._build_model(X_train.shape[1:]) + input_shape = X_train.shape[1:] + self.model_ = self._build_model(input_shape) self.model_.compile(optimizer=self.optimizer, loss=self.loss) # Train the model @@ -115,7 +136,7 @@ def _predict(self, y=None, X=None): Returns ------- predictions : np.ndarray - Predicted values. + Predicted values for the specified horizon. """ if y is None: raise ValueError("y cannot be None for prediction") @@ -123,13 +144,26 @@ def _predict(self, y=None, X=None): # Convert input data to numpy array y_inner = self._convert_input(y) + if len(y_inner) < self.window: + raise ValueError( + f"Input data length ({len(y_inner)}) is less than the window size " + f"({self.window})." + ) + # Use the last window of data for prediction last_window = y_inner[-self.window :].reshape(1, self.window, 1) # Make prediction - prediction = self.model_.predict(last_window, verbose=0) + predictions = [] + current_window = last_window + for _ in range(self.horizon): + pred = self.model_.predict(current_window, verbose=0) + predictions.append(pred[0, 0]) + # Update the window with the latest prediction (autoregressive) + current_window = np.roll(current_window, -1, axis=1) + current_window[0, -1, 0] = pred[0, 0] - return prediction.flatten() + return np.array(predictions) def _forecast(self, y, X=None): """Forecast time series at future horizon. @@ -144,13 +178,9 @@ def _forecast(self, y, X=None): Returns ------- forecasts : np.ndarray - Forecasted values. + Forecasted values for the specified horizon. """ - # Fit the model - self._fit(y, X) - - # Make prediction - return self._predict(y, X) + return self._fit(y, X)._predict(y, X) def _convert_input(self, y): """Convert input data to numpy array. @@ -191,12 +221,20 @@ def _create_sequences(self, data): y : np.ndarray Target values. """ + if len(data) < self.window + self.horizon: + raise ValueError( + f"Data length ({len(data)}) is insufficient for window " + f"({self.window}) and horizon ({self.horizon})." + ) + X, y = [], [] for i in range(len(data) - self.window - self.horizon + 1): X.append(data[i : (i + self.window)]) y.append(data[i + self.window : (i + self.window + self.horizon)]) - return np.array(X).reshape(-1, self.window, 1), np.array(y) + X = np.array(X).reshape(-1, self.window, 1) + y = np.array(y).reshape(-1, self.horizon) + return X, y @abstractmethod def _build_model(self, input_shape): diff --git a/aeon/forecasting/deep_learning/tests/__init__.py b/aeon/forecasting/deep_learning/tests/__init__.py new file mode 100644 index 0000000000..3dda9d25ea --- /dev/null +++ b/aeon/forecasting/deep_learning/tests/__init__.py @@ -0,0 +1 @@ +"""Deep Learning Forecasting Tests File.""" diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py new file mode 100644 index 0000000000..05536f98c5 --- /dev/null +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -0,0 +1,62 @@ +"""Test for BaseDeepForecaster class in aeon.""" + +import numpy as np +import pytest + +from aeon.forecasting.deep_learning import BaseDeepForecaster +from aeon.utils.validation._dependencies import _check_soft_dependencies + + +class SimpleDeepForecaster(BaseDeepForecaster): + """A simple concrete implementation of BaseDeepForecaster for testing.""" + + def _build_model(self, input_shape): + import tensorflow as tf + + model = tf.keras.Sequential( + [ + tf.keras.layers.Flatten(input_shape=input_shape), + tf.keras.layers.Dense(10, activation="relu"), + tf.keras.layers.Dense(self.horizon), + ] + ) + return model + + +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency not available", +) +def test_base_deep_forecaster_fit_predict(): + """Test fitting and predicting with BaseDeepForecaster implementation.""" + # Generate synthetic data + np.random.seed(42) + data = np.random.randn(50) + + # Initialize forecaster + forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0) + + # Fit the model + forecaster.fit(data) + + # Predict + predictions = forecaster.predict(data) + + # Validate output shape + assert ( + len(predictions) == 2 + ), f"Expected predictions of length 2, got {len(predictions)}" + assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array" + + +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency not available", +) +def test_base_deep_forecaster_insufficient_data(): + """Test error handling for insufficient data.""" + data = np.random.randn(5) + forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0) + + with pytest.raises(ValueError, match="Data length.*insufficient"): + forecaster.fit(data) From 1f202db1cae45834503986ffb37859599b29759a Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 15 Jun 2025 16:38:39 +0530 Subject: [PATCH 04/36] test locally --- .github/workflows/pr_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index cf1baee900..9c70b16b4c 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -3,7 +3,7 @@ name: PR pytest on: push: branches: - - main + - basedlf pull_request: paths: - "aeon/**" From 14eb41fa83a5799d0fa8608ffd516f1766da7a1c Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 15 Jun 2025 17:19:27 +0530 Subject: [PATCH 05/36] dlf corrected --- .github/workflows/pr_pytest.yml | 2 +- aeon/forecasting/deep_learning/tests/test_base.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 9c70b16b4c..cf1baee900 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -3,7 +3,7 @@ name: PR pytest on: push: branches: - - basedlf + - main pull_request: paths: - "aeon/**" diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 05536f98c5..1eae0969e1 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -10,6 +10,9 @@ class SimpleDeepForecaster(BaseDeepForecaster): """A simple concrete implementation of BaseDeepForecaster for testing.""" + def __init__(self, horizon=1, window=5, epochs=1, verbose=0): + super().__init__(horizon=horizon, window=window, epochs=epochs, verbose=verbose) + def _build_model(self, input_shape): import tensorflow as tf From d1a2aab72097dd38658fa0a10f572005c5b70aaa Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 22 Jun 2025 12:27:15 +0530 Subject: [PATCH 06/36] tf soft dep added --- aeon/forecasting/deep_learning/base.py | 3 ++- aeon/forecasting/deep_learning/tests/test_base.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index ebab0116bd..ba33331fc7 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -13,7 +13,6 @@ import numpy as np import pandas as pd -import tensorflow as tf from aeon.forecasting.base import BaseForecaster @@ -88,6 +87,8 @@ def _fit(self, y, X=None): self : BaseDeepForecaster Returns an instance of self. """ + import tensorflow as tf + # Set random seed for reproducibility if self.random_state is not None: np.random.seed(self.random_state) diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 1eae0969e1..270a60225e 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -7,6 +7,10 @@ from aeon.utils.validation._dependencies import _check_soft_dependencies +@pytest.mark.skipif( + not _check_soft_dependencies("tensorflow", severity="none"), + reason="skip test if required soft dependency not available", +) class SimpleDeepForecaster(BaseDeepForecaster): """A simple concrete implementation of BaseDeepForecaster for testing.""" From 5fb72c706e87b3b7bf4fcd9701298b250cc98d89 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 6 Jul 2025 16:51:57 +0530 Subject: [PATCH 07/36] tcn network added --- aeon/networks/__init__.py | 2 + aeon/networks/_tcn.py | 326 ++++++++++++++++++++++++++++++++ aeon/networks/tests/test_tcn.py | 212 +++++++++++++++++++++ 3 files changed, 540 insertions(+) create mode 100644 aeon/networks/_tcn.py create mode 100644 aeon/networks/tests/test_tcn.py diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py index d774abe102..61e669283c 100644 --- a/aeon/networks/__init__.py +++ b/aeon/networks/__init__.py @@ -19,6 +19,7 @@ "AEBiGRUNetwork", "DisjointCNNNetwork", "RecurrentNetwork", + "TemporalConvolutionalNetwork", ] from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork from aeon.networks._ae_bgru import AEBiGRUNetwork @@ -36,4 +37,5 @@ from aeon.networks._mlp import MLPNetwork from aeon.networks._resnet import ResNetNetwork from aeon.networks._rnn import RecurrentNetwork +from aeon.networks._tcn import TemporalConvolutionalNetwork from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py new file mode 100644 index 0000000000..08f4ff9341 --- /dev/null +++ b/aeon/networks/_tcn.py @@ -0,0 +1,326 @@ +"""Implementation of Temporal Convolutional Network (TCN). + +Based on the paper "An Empirical Evaluation of Generic Convolutional and +Recurrent Networks for Sequence Modeling" by Bai et al. (2018). +""" + +__maintainer__ = [] + +from aeon.networks.base import BaseDeepLearningNetwork + + +class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): + """Temporal Convolutional Network (TCN) for sequence modeling. + + A generic convolutional architecture for sequence modeling that combines: + - Dilated convolutions for exponentially large receptive fields + - Residual connections for training stability + + The TCN can take sequences of any length and map them to output sequences + of the same length, making it suitable for autoregressive prediction tasks. + + Parameters + ---------- + num_inputs : int + Number of input channels/features in the input sequence. + num_channels : list of int + List specifying the number of output channels for each layer. + The length determines the depth of the network. + kernel_size : int, default=2 + Size of the convolutional kernel. Larger kernels can capture + more local context but require more parameters. + dropout : float, default=0.2 + Dropout rate applied after each convolutional layer for regularization. + + Notes + ----- + The receptive field size grows exponentially with network depth due to + dilated convolutions with dilation factors of 2^i for layer i. + + References + ---------- + Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of + generic convolutional and recurrent networks for sequence modeling. + arXiv preprint arXiv:1803.01271. + """ + + _config = { + "python_dependencies": ["tensorflow"], + "python_version": "<3.13", + "structure": "encoder", + } + + def __init__( + self, + num_inputs: int, + num_channels: list, + kernel_size: int = 2, + dropout: float = 0.2, + ): + """Initialize the TCN architecture. + + Parameters + ---------- + num_inputs : int + Number of input channels/features. + num_channels : list of int + Number of output channels for each temporal block. + kernel_size : int, default=2 + Size of convolutional kernels. + dropout : float, default=0.2 + Dropout rate for regularization. + """ + super().__init__() + self.num_inputs = num_inputs + self.num_channels = num_channels + self.kernel_size = kernel_size + self.dropout = dropout + + def _conv1d_with_variable_padding( + self, + x, + filters: int, + kernel_size: int, + padding_value: int, + stride: int = 1, + dilation_rate: int = 1, + ): + """Apply 1D convolution with variable padding for causal convolutions. + + Parameters + ---------- + x : tf.Tensor + Input tensor of shape (batch_size, channels, sequence_length). + filters : int + Number of output filters. + kernel_size : int + Size of the convolutional kernel. + padding_value : int + Amount of padding to apply. + stride : int, default=1 + Stride of the convolution. + dilation_rate : int, default=1 + Dilation rate for dilated convolutions. + + Returns + ------- + tf.Tensor + Output tensor after convolution. + """ + import tensorflow as tf + + # Transpose to Keras format (batch, sequence, channels) + x_keras_format = tf.keras.layers.Permute((2, 1))(x) + + # Apply padding in sequence dimension + padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format) + + # Create and apply convolution layer + conv_layer = tf.keras.layers.Conv1D( + filters=filters, + kernel_size=kernel_size, + strides=stride, + dilation_rate=dilation_rate, + padding="valid", + ) + + # Apply convolution + out = conv_layer(padded_x) + + # Transpose back to PyTorch format (batch, channels, sequence) + return tf.keras.layers.Permute((2, 1))(out) + + def _chomp_1d(self, x, chomp_size: int): + """Remove padding from the end of sequences to maintain causality. + + This operation ensures that the output at time t only depends on + inputs from times 0 to t, preventing information leakage from future. + + Parameters + ---------- + x : tf.Tensor + Input tensor of shape (batch_size, channels, sequence_length). + chomp_size : int + Number of time steps to remove from the end. + + Returns + ------- + tf.Tensor + Chomped tensor with reduced sequence length. + """ + return x[:, :, :-chomp_size] + + def _temporal_block( + self, + x, + n_inputs: int, + n_outputs: int, + kernel_size: int, + stride: int, + dilation: int, + padding: int, + dropout: float = 0.2, + training: bool = None, + ): + """Create a temporal block with dilated causal convolutions. + + Each temporal block consists of: + 1. Two dilated causal convolutions + 2. ReLU activations and dropout for regularization + 3. Residual connection with optional 1x1 convolution for dimension + matching + + Parameters + ---------- + x : tf.Tensor + Input tensor of shape (batch_size, channels, sequence_length). + n_inputs : int + Number of input channels. + n_outputs : int + Number of output channels. + kernel_size : int + Size of convolutional kernels. + stride : int + Stride of convolutions (typically 1). + dilation : int + Dilation factor for dilated convolutions. + padding : int + Padding size to be chomped off. + dropout : float, default=0.2 + Dropout rate for regularization. + training : bool, optional + Whether the model is in training mode. + + Returns + ------- + tf.Tensor + Output tensor of shape (batch_size, n_outputs, sequence_length). + """ + import tensorflow as tf + + # First convolution block + out = self._conv1d_with_variable_padding( + x, n_outputs, kernel_size, padding, stride, dilation + ) + out = self._chomp_1d(out, padding) + out = tf.keras.layers.ReLU()(out) + out = tf.keras.layers.Dropout(dropout)(out, training=training) + + # Second convolution block + out = self._conv1d_with_variable_padding( + out, n_outputs, kernel_size, padding, stride, dilation + ) + out = self._chomp_1d(out, padding) + out = tf.keras.layers.ReLU()(out) + out = tf.keras.layers.Dropout(dropout)(out, training=training) + + # Residual connection with optional dimension matching + if n_inputs != n_outputs: + res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1) + else: + res = x + + # Add residual and apply final ReLU + result = tf.keras.layers.Add()([out, res]) + return tf.keras.layers.ReLU()(result) + + def _temporal_conv_net( + self, + x, + num_inputs: int, + num_channels: list, + kernel_size: int = 2, + dropout: float = 0.2, + training: bool = None, + ): + """Apply the complete Temporal Convolutional Network. + + Stacks multiple temporal blocks with exponentially increasing dilation + factors to achieve a large receptive field efficiently. + + Parameters + ---------- + x : tf.Tensor + Input tensor of shape (batch_size, channels, sequence_length). + num_inputs : int + Number of input channels. + num_channels : list of int + Number of output channels for each temporal block. + kernel_size : int, default=2 + Size of convolutional kernels. + dropout : float, default=0.2 + Dropout rate for regularization. + training : bool, optional + Whether the model is in training mode. + + Returns + ------- + tf.Tensor + Output tensor after applying all temporal blocks. + """ + num_levels = len(num_channels) + for i in range(num_levels): + dilation_size = 2**i + in_channels = num_inputs if i == 0 else num_channels[i - 1] + out_channels = num_channels[i] + padding = (kernel_size - 1) * dilation_size + + x = self._temporal_block( + x, + n_inputs=in_channels, + n_outputs=out_channels, + kernel_size=kernel_size, + stride=1, + dilation=dilation_size, + padding=padding, + dropout=dropout, + training=training, + ) + + return x + + def build_network(self, input_shape: tuple, **kwargs) -> tuple: + """Build the complete TCN architecture. + + Constructs a series of temporal blocks with exponentially increasing + dilation factors to achieve a large receptive field efficiently. + + Parameters + ---------- + input_shape : tuple + Shape of input data (sequence_length, num_features). + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + tuple + A tuple containing (input_layer, output_tensor) representing + the complete network architecture. + + Notes + ----- + The dilation factor for layer i is 2^i, which ensures exponential + growth of the receptive field while maintaining computational + efficiency. + """ + import tensorflow as tf + + # Create input layer + input_layer = tf.keras.layers.Input(shape=input_shape) + + # Transpose input to match the expected format (batch, channels, seq) + x = input_layer + + # Apply TCN using the private function + x = self._temporal_conv_net( + x, + num_inputs=self.num_inputs, + num_channels=self.num_channels, + kernel_size=self.kernel_size, + dropout=self.dropout, + ) + + output = x + + return input_layer, output diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py new file mode 100644 index 0000000000..97500f5557 --- /dev/null +++ b/aeon/networks/tests/test_tcn.py @@ -0,0 +1,212 @@ +"""Tests for the TemporalConvolutionalNetwork.""" + +import pytest + +from aeon.networks import TemporalConvolutionalNetwork +from aeon.utils.validation._dependencies import _check_soft_dependencies + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_tcn_network_basic(): + """Test basic TCN network creation and build_network functionality.""" + import tensorflow as tf + + input_shape = (100, 5) + num_inputs = 5 + num_channels = [32, 64] + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, num_channels=num_channels + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + + # Check that layers are created correctly + assert hasattr(input_layer, "shape"), "Input layer should have a shape attribute" + assert hasattr(output_layer, "shape"), "Output layer should have a shape attribute" + assert input_layer.dtype == tf.float32 + assert output_layer.dtype == tf.float32 + + # Create a model to test the network structure + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + assert model is not None, "Model should be created successfully" + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +@pytest.mark.parametrize("num_channels", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) +def test_tcn_network_different_channels(num_channels): + """Test TCN network with different channel configurations.""" + import tensorflow as tf + + input_shape = (50, 3) + num_inputs = 3 + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, num_channels=num_channels + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + + # Create a model and verify it works + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + assert model is not None + + # Test with dummy data + import numpy as np + + dummy_input = np.random.random((8,) + input_shape) + output = model(dummy_input) + assert output is not None, "Model should produce output" + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +@pytest.mark.parametrize("kernel_size", [2, 3, 5]) +def test_tcn_network_kernel_sizes(kernel_size): + """Test TCN network with different kernel sizes.""" + import tensorflow as tf + + input_shape = (80, 4) + num_inputs = 4 + num_channels = [32, 64] + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, + num_channels=num_channels, + kernel_size=kernel_size, + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + + # Verify network builds successfully + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + assert model is not None + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +@pytest.mark.parametrize("dropout", [0.0, 0.1, 0.3, 0.5]) +def test_tcn_network_dropout_rates(dropout): + """Test TCN network with different dropout rates.""" + import tensorflow as tf + + input_shape = (60, 2) + num_inputs = 2 + num_channels = [16, 32] + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, num_channels=num_channels, dropout=dropout + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + + # Verify network builds successfully + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + assert model is not None + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_tcn_network_output_shape(): + """Test TCN network output shapes.""" + import numpy as np + import tensorflow as tf + + input_shape = (40, 6) + batch_size = 16 + num_inputs = 6 + num_channels = [32, 64] + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, num_channels=num_channels + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + + # Create dummy input and test output shape + dummy_input = np.random.random((batch_size,) + input_shape) + output = model(dummy_input) + + # Output should maintain sequence length and have final channel dimension + expected_shape = (batch_size, num_channels[-1], input_shape[1]) + assert ( + output.shape == expected_shape + ), f"Expected shape {expected_shape}, got {output.shape}" + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_tcn_network_config(): + """Test TCN network configuration attributes.""" + tcn_network = TemporalConvolutionalNetwork(num_inputs=3, num_channels=[16, 32]) + + # Check _config attributes + assert "python_dependencies" in tcn_network._config + assert "tensorflow" in tcn_network._config["python_dependencies"] + assert "python_version" in tcn_network._config + assert "structure" in tcn_network._config + assert tcn_network._config["structure"] == "encoder" + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_tcn_network_parameter_initialization(): + """Test TCN network parameter initialization.""" + num_inputs = 4 + num_channels = [32, 64, 128] + kernel_size = 3 + dropout = 0.2 + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, + num_channels=num_channels, + kernel_size=kernel_size, + dropout=dropout, + ) + + # Check that parameters are set correctly + assert tcn_network.num_inputs == num_inputs + assert tcn_network.num_channels == num_channels + assert tcn_network.kernel_size == kernel_size + assert tcn_network.dropout == dropout + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +def test_tcn_network_single_layer(): + """Test TCN network with single temporal block.""" + import tensorflow as tf + + input_shape = (30, 2) + num_inputs = 2 + num_channels = [16] # Single layer + + tcn_network = TemporalConvolutionalNetwork( + num_inputs=num_inputs, num_channels=num_channels + ) + input_layer, output_layer = tcn_network.build_network(input_shape) + + # Verify single layer network works + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) + assert model is not None + + # Test with dummy data + import numpy as np + + dummy_input = np.random.random((4,) + input_shape) + output = model(dummy_input) + assert output.shape == (4, 16, 2) From 3434757d2403729f4b07f67a960c41cc8250a4ff Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 6 Jul 2025 16:52:47 +0530 Subject: [PATCH 08/36] tcn_net pytest added --- .github/workflows/pr_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 69323e47c5..368425d136 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -3,7 +3,7 @@ name: PR pytest on: push: branches: - - main + - tcn_net pull_request: paths: - "aeon/**" From c602e39cb5edb82537cd697096536f9b9733fb38 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 6 Jul 2025 17:29:00 +0530 Subject: [PATCH 09/36] tcn_network updated with default params --- aeon/networks/_tcn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 08f4ff9341..900b9e47c1 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -52,8 +52,8 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): def __init__( self, - num_inputs: int, - num_channels: list, + num_inputs: int = 1, + num_channels: list = [16] * 3, kernel_size: int = 2, dropout: float = 0.2, ): From 05a0f355017c449980d7aafaebdbb3ae7d8aa7dd Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Mon, 7 Jul 2025 17:29:16 +0530 Subject: [PATCH 10/36] TCN forecaster added --- .github/workflows/pr_pytest.yml | 2 +- aeon/forecasting/deep_learning/__init__.py | 2 + aeon/forecasting/deep_learning/_tcn.py | 140 ++++++++++++++++++ .../deep_learning/tests/test_tcn.py | 37 +++++ 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 aeon/forecasting/deep_learning/_tcn.py create mode 100644 aeon/forecasting/deep_learning/tests/test_tcn.py diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 5240ce85ec..82dbdc0f14 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -3,7 +3,7 @@ name: PR pytest on: push: branches: - - tcn_net + - tcn_fst pull_request: paths: - "aeon/**" diff --git a/aeon/forecasting/deep_learning/__init__.py b/aeon/forecasting/deep_learning/__init__.py index c4b7a27030..8e3bac6a86 100644 --- a/aeon/forecasting/deep_learning/__init__.py +++ b/aeon/forecasting/deep_learning/__init__.py @@ -2,6 +2,8 @@ __all__ = [ "BaseDeepForecaster", + "TCNForecaster", ] +from aeon.forecasting.deep_learning._tcn import TCNForecaster from aeon.forecasting.deep_learning.base import BaseDeepForecaster diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py new file mode 100644 index 0000000000..1b591e6c74 --- /dev/null +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -0,0 +1,140 @@ +"""TCNForecaster module for deep learning forecasting in aeon.""" + +from __future__ import annotations + +__maintainer__ = [] +__all__ = ["TCNForecaster"] + +from aeon.forecasting.deep_learning.base import BaseDeepForecaster +from aeon.networks._tcn import TemporalConvolutionalNetwork + + +class TCNForecaster(BaseDeepForecaster): + """A deep learning forecaster using Temporal Convolutional Network (TCN). + + It leverages the `TemporalConvolutionalNetwork` from aeon's network module + to build the architecture suitable for forecasting tasks. + + Parameters + ---------- + horizon : int, default=1 + Forecasting horizon, the number of steps ahead to predict. + window : int, default=10 + The window size for creating input sequences. + batch_size : int, default=32 + Batch size for training the model. + epochs : int, default=100 + Number of epochs to train the model. + verbose : int, default=0 + Verbosity mode (0, 1, or 2). + optimizer : str or tf.keras.optimizers.Optimizer, default='adam' + Optimizer to use for training. + loss : str or tf.keras.losses.Loss, default='mse' + Loss function for training. + random_state : int, default=None + Seed for random number generators. + axis : int, default=0 + Axis along which to apply the forecaster. + num_inputs : int, default=1 + Number of input channels/features in the input sequence. + num_channels : list of int, default=[16, 16, 16] + List specifying the number of output channels for each layer of the + TCN. The length determines the depth of the network. + kernel_size : int, default=2 + Size of the convolutional kernel in the TCN. + dropout : float, default=0.2 + Dropout rate applied after each convolutional layer for + regularization. + + + """ + + def __init__( + self, + horizon=1, + window=10, + batch_size=32, + epochs=100, + verbose=0, + optimizer="adam", + loss="mse", + random_state=None, + axis=0, + num_inputs=1, + num_channels=None, + kernel_size=2, + dropout=0.2, + ): + super().__init__( + horizon=horizon, + window=window, + batch_size=batch_size, + epochs=epochs, + verbose=verbose, + optimizer=optimizer, + loss=loss, + random_state=random_state, + axis=axis, + ) + self.num_inputs = num_inputs + self.num_channels = num_channels if num_channels is not None else [16, 16, 16] + self.kernel_size = kernel_size + self.dropout = dropout + + def _add_linear_layer(self, x, output_units): + """Add a linear layer to the output of the TCN network. + + Parameters + ---------- + x : tf.Tensor + Input tensor from the TCN output, typically of shape + (batch_size, channels, sequence_length). + output_units : int + Number of output units for the linear layer, typically matching + the forecasting horizon. + + Returns + ------- + tf.Tensor + Output tensor after applying the linear layer. + """ + import tensorflow as tf + + # Take the last time step's output for forecasting + x_last = x[:, -1, :] + # Apply a dense layer to map to the desired output size (horizon) + output = tf.keras.layers.Dense(output_units)(x_last) + return output + + def _build_model(self, input_shape): + """Build the TCN model for forecasting. + + Parameters + ---------- + input_shape : tuple + Shape of input data, typically (window, num_inputs). + + Returns + ------- + model : tf.keras.Model + Compiled Keras model with TCN architecture. + """ + import tensorflow as tf + + # Initialize the TCN network with the updated parameters + network = TemporalConvolutionalNetwork( + num_inputs=self.num_inputs, + num_channels=self.num_channels, + kernel_size=self.kernel_size, + dropout=self.dropout, + ) + + # Build the network with the given input shape + input_layer, output = network.build_network(input_shape=input_shape) + + # Adjust the output layer to match the forecasting horizon + output = self._add_linear_layer(output, self.horizon) + + # Create the final model + model = tf.keras.Model(inputs=input_layer, outputs=output) + return model diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py new file mode 100644 index 0000000000..2717eaf4b4 --- /dev/null +++ b/aeon/forecasting/deep_learning/tests/test_tcn.py @@ -0,0 +1,37 @@ +"""Test TCN.""" + +__maintainer__ = [] +__all__ = [] + +import pytest + +from aeon.datasets import load_airline +from aeon.forecasting.deep_learning._tcn import TCNForecaster +from aeon.utils.validation._dependencies import _check_soft_dependencies + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +@pytest.mark.parametrize("horizon,window,epochs", [(1, 10, 2), (3, 12, 3), (5, 15, 2)]) +def test_tcn_forecaster(horizon, window, epochs): + """Test TCNForecaster with different parameter combinations.""" + import tensorflow as tf + + # Load airline dataset + y = load_airline() + + # Initialize TCNForecaster + forecaster = TCNForecaster( + horizon=horizon, window=window, epochs=epochs, batch_size=16, verbose=0 + ) + + # Fit and predict + forecaster.fit(y) + prediction = forecaster.predict(y) + + # Basic assertions + assert prediction is not None + if isinstance(prediction, tf.Tensor): + assert not tf.math.is_nan(prediction).numpy() From 2f3c98b9008d8a7504f24dcd1d22760261a4e1b3 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Mon, 7 Jul 2025 23:43:22 +0530 Subject: [PATCH 11/36] tcn reshaped --- aeon/networks/_tcn.py | 23 ++++++++++++++++++++--- aeon/networks/tests/test_tcn.py | 4 ++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 900b9e47c1..987b0f2b68 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -42,6 +42,21 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271. + + Examples + -------- + >>> from aeon.networks._tcn import TemporalConvolutionalNetwork + >>> from aeon.testing.data_generation import make_example_3d_numpy + >>> import tensorflow as tf + >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150, + ... return_y=True, regression_target=True, + ... random_state=42) + >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8]) + >>> input_layer, output = network.build_network(input_shape=(4, 150)) + >>> model = tf.keras.Model(inputs=input_layer, outputs=output) + >>> model.compile(optimizer="adam", loss="mse") + >>> model.fit(X, y, epochs=2, batch_size=2, verbose=0) # doctest: +SKIP + """ _config = { @@ -53,7 +68,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): def __init__( self, num_inputs: int = 1, - num_channels: list = [16] * 3, + num_channels: list = [16] * 3, # change to n_filters kernel_size: int = 2, dropout: float = 0.2, ): @@ -321,6 +336,8 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: dropout=self.dropout, ) - output = x - + x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :]) + output = tf.keras.layers.Lambda( + lambda x: tf.reduce_mean(x, axis=1, keepdims=True), output_shape=(1,) + )(x) return input_layer, output diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py index 97500f5557..b78424a73a 100644 --- a/aeon/networks/tests/test_tcn.py +++ b/aeon/networks/tests/test_tcn.py @@ -136,7 +136,7 @@ def test_tcn_network_output_shape(): output = model(dummy_input) # Output should maintain sequence length and have final channel dimension - expected_shape = (batch_size, num_channels[-1], input_shape[1]) + expected_shape = (batch_size, 1) assert ( output.shape == expected_shape ), f"Expected shape {expected_shape}, got {output.shape}" @@ -209,4 +209,4 @@ def test_tcn_network_single_layer(): dummy_input = np.random.random((4,) + input_shape) output = model(dummy_input) - assert output.shape == (4, 16, 2) + assert output.shape == (4, 1) From f6447b180c4b725d889dbed5b80fe948945bbd6c Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 13:34:47 +0530 Subject: [PATCH 12/36] tcn changed --- aeon/networks/_tcn.py | 22 ++++++++++----------- aeon/networks/tests/test_tcn.py | 34 ++++++++++++++++----------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 987b0f2b68..62ed404f1b 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -68,7 +68,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): def __init__( self, num_inputs: int = 1, - num_channels: list = [16] * 3, # change to n_filters + n_filters: list = [16] * 3, # changed from num_channels kernel_size: int = 2, dropout: float = 0.2, ): @@ -78,7 +78,7 @@ def __init__( ---------- num_inputs : int Number of input channels/features. - num_channels : list of int + n_filters : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -87,7 +87,7 @@ def __init__( """ super().__init__() self.num_inputs = num_inputs - self.num_channels = num_channels + self.n_filters = n_filters self.kernel_size = kernel_size self.dropout = dropout @@ -243,7 +243,7 @@ def _temporal_conv_net( self, x, num_inputs: int, - num_channels: list, + n_filters: list, # changed from num_channels kernel_size: int = 2, dropout: float = 0.2, training: bool = None, @@ -259,7 +259,7 @@ def _temporal_conv_net( Input tensor of shape (batch_size, channels, sequence_length). num_inputs : int Number of input channels. - num_channels : list of int + n_filters : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -273,11 +273,11 @@ def _temporal_conv_net( tf.Tensor Output tensor after applying all temporal blocks. """ - num_levels = len(num_channels) + num_levels = len(n_filters) for i in range(num_levels): dilation_size = 2**i - in_channels = num_inputs if i == 0 else num_channels[i - 1] - out_channels = num_channels[i] + in_channels = num_inputs if i == 0 else n_filters[i - 1] + out_channels = n_filters[i] padding = (kernel_size - 1) * dilation_size x = self._temporal_block( @@ -331,13 +331,11 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: x = self._temporal_conv_net( x, num_inputs=self.num_inputs, - num_channels=self.num_channels, + n_filters=self.n_filters, kernel_size=self.kernel_size, dropout=self.dropout, ) x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :]) - output = tf.keras.layers.Lambda( - lambda x: tf.reduce_mean(x, axis=1, keepdims=True), output_shape=(1,) - )(x) + output = tf.keras.layers.Dense(1)(x) return input_layer, output diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py index b78424a73a..47c1d38615 100644 --- a/aeon/networks/tests/test_tcn.py +++ b/aeon/networks/tests/test_tcn.py @@ -16,10 +16,10 @@ def test_tcn_network_basic(): input_shape = (100, 5) num_inputs = 5 - num_channels = [32, 64] + n_filters = [32, 64] tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, num_channels=num_channels + num_inputs=num_inputs, n_filters=n_filters ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -38,8 +38,8 @@ def test_tcn_network_basic(): not _check_soft_dependencies(["tensorflow"], severity="none"), reason="Tensorflow soft dependency unavailable.", ) -@pytest.mark.parametrize("num_channels", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) -def test_tcn_network_different_channels(num_channels): +@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) +def test_tcn_network_different_channels(n_filters): """Test TCN network with different channel configurations.""" import tensorflow as tf @@ -47,7 +47,7 @@ def test_tcn_network_different_channels(num_channels): num_inputs = 3 tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, num_channels=num_channels + num_inputs=num_inputs, n_filters=n_filters ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -74,11 +74,11 @@ def test_tcn_network_kernel_sizes(kernel_size): input_shape = (80, 4) num_inputs = 4 - num_channels = [32, 64] + n_filters = [32, 64] tcn_network = TemporalConvolutionalNetwork( num_inputs=num_inputs, - num_channels=num_channels, + n_filters=n_filters, kernel_size=kernel_size, ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -99,10 +99,10 @@ def test_tcn_network_dropout_rates(dropout): input_shape = (60, 2) num_inputs = 2 - num_channels = [16, 32] + n_filters = [16, 32] tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, num_channels=num_channels, dropout=dropout + num_inputs=num_inputs, n_filters=n_filters, dropout=dropout ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -123,10 +123,10 @@ def test_tcn_network_output_shape(): input_shape = (40, 6) batch_size = 16 num_inputs = 6 - num_channels = [32, 64] + n_filters = [32, 64] tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, num_channels=num_channels + num_inputs=num_inputs, n_filters=n_filters ) input_layer, output_layer = tcn_network.build_network(input_shape) model = tf.keras.Model(inputs=input_layer, outputs=output_layer) @@ -148,7 +148,7 @@ def test_tcn_network_output_shape(): ) def test_tcn_network_config(): """Test TCN network configuration attributes.""" - tcn_network = TemporalConvolutionalNetwork(num_inputs=3, num_channels=[16, 32]) + tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32]) # Check _config attributes assert "python_dependencies" in tcn_network._config @@ -165,20 +165,20 @@ def test_tcn_network_config(): def test_tcn_network_parameter_initialization(): """Test TCN network parameter initialization.""" num_inputs = 4 - num_channels = [32, 64, 128] + n_filters = [32, 64, 128] kernel_size = 3 dropout = 0.2 tcn_network = TemporalConvolutionalNetwork( num_inputs=num_inputs, - num_channels=num_channels, + n_filters=n_filters, kernel_size=kernel_size, dropout=dropout, ) # Check that parameters are set correctly assert tcn_network.num_inputs == num_inputs - assert tcn_network.num_channels == num_channels + assert tcn_network.n_filters == n_filters assert tcn_network.kernel_size == kernel_size assert tcn_network.dropout == dropout @@ -193,10 +193,10 @@ def test_tcn_network_single_layer(): input_shape = (30, 2) num_inputs = 2 - num_channels = [16] # Single layer + n_filters = [16] # Single layer tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, num_channels=num_channels + num_inputs=num_inputs, n_filters=n_filters ) input_layer, output_layer = tcn_network.build_network(input_shape) From 30d862abc9ab04f424d643e1dcf69598d7f85eb7 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 14:09:51 +0530 Subject: [PATCH 13/36] base fst changed --- aeon/forecasting/deep_learning/base.py | 54 ++++++++------------------ 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index ba33331fc7..40c7b3e212 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -68,6 +68,7 @@ def __init__( self.random_state = random_state self.axis = axis self.model_ = None + self.last_window_ = None # Pass horizon and axis to BaseForecaster super().__init__(horizon=horizon, axis=axis) @@ -121,7 +122,7 @@ def _fit(self, y, X=None): epochs=self.epochs, verbose=self.verbose, ) - + self.last_window_ = y_inner[-self.window :] return self def _predict(self, y=None, X=None): @@ -130,9 +131,9 @@ def _predict(self, y=None, X=None): Parameters ---------- y : np.ndarray or pd.Series, default=None - Series to predict from. + Series to predict from. If None, uses last fitted window. X : np.ndarray or pd.DataFrame, default=None - Exogenous variables. + Exogenous variables (not supported by default). Returns ------- @@ -140,49 +141,28 @@ def _predict(self, y=None, X=None): Predicted values for the specified horizon. """ if y is None: - raise ValueError("y cannot be None for prediction") - - # Convert input data to numpy array - y_inner = self._convert_input(y) - - if len(y_inner) < self.window: - raise ValueError( - f"Input data length ({len(y_inner)}) is less than the window size " - f"({self.window})." - ) - - # Use the last window of data for prediction - last_window = y_inner[-self.window :].reshape(1, self.window, 1) - - # Make prediction + if not hasattr(self, "last_window_"): + raise ValueError("No fitted data available for prediction.") + y_inner = self.last_window_ + else: + y_inner = self._convert_input(y) + if len(y_inner) < self.window: + raise ValueError( + f"Input data length ({len(y_inner)}) is less than the window size " + f"({self.window})." + ) + y_inner = y_inner[-self.window :] + + last_window = y_inner.reshape(1, self.window, 1) predictions = [] current_window = last_window for _ in range(self.horizon): pred = self.model_.predict(current_window, verbose=0) predictions.append(pred[0, 0]) - # Update the window with the latest prediction (autoregressive) current_window = np.roll(current_window, -1, axis=1) current_window[0, -1, 0] = pred[0, 0] - return np.array(predictions) - def _forecast(self, y, X=None): - """Forecast time series at future horizon. - - Parameters - ---------- - y : np.ndarray or pd.Series - Time series to forecast from. - X : np.ndarray or pd.DataFrame, default=None - Exogenous variables. - - Returns - ------- - forecasts : np.ndarray - Forecasted values for the specified horizon. - """ - return self._fit(y, X)._predict(y, X) - def _convert_input(self, y): """Convert input data to numpy array. From 9b9d26608980f0d87ab69eb890f276ab89192474 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 14:17:21 +0530 Subject: [PATCH 14/36] TCN forecaster updated --- aeon/forecasting/deep_learning/_tcn.py | 44 +++++++------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 1b591e6c74..147dc618ec 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -37,7 +37,7 @@ class TCNForecaster(BaseDeepForecaster): Axis along which to apply the forecaster. num_inputs : int, default=1 Number of input channels/features in the input sequence. - num_channels : list of int, default=[16, 16, 16] + n_filters : list of int, default=[16, 16, 16] List specifying the number of output channels for each layer of the TCN. The length determines the depth of the network. kernel_size : int, default=2 @@ -49,6 +49,12 @@ class TCNForecaster(BaseDeepForecaster): """ + _tags = { + "capability:horizon": True, + "capability:multivariate": True, + "capability:exogenous": False, + } + def __init__( self, horizon=1, @@ -61,7 +67,7 @@ def __init__( random_state=None, axis=0, num_inputs=1, - num_channels=None, + n_filters=None, kernel_size=2, dropout=0.2, ): @@ -77,35 +83,10 @@ def __init__( axis=axis, ) self.num_inputs = num_inputs - self.num_channels = num_channels if num_channels is not None else [16, 16, 16] + self.n_filters = n_filters self.kernel_size = kernel_size self.dropout = dropout - def _add_linear_layer(self, x, output_units): - """Add a linear layer to the output of the TCN network. - - Parameters - ---------- - x : tf.Tensor - Input tensor from the TCN output, typically of shape - (batch_size, channels, sequence_length). - output_units : int - Number of output units for the linear layer, typically matching - the forecasting horizon. - - Returns - ------- - tf.Tensor - Output tensor after applying the linear layer. - """ - import tensorflow as tf - - # Take the last time step's output for forecasting - x_last = x[:, -1, :] - # Apply a dense layer to map to the desired output size (horizon) - output = tf.keras.layers.Dense(output_units)(x_last) - return output - def _build_model(self, input_shape): """Build the TCN model for forecasting. @@ -121,10 +102,12 @@ def _build_model(self, input_shape): """ import tensorflow as tf + if self.n_filters is None: + self.n_filters = [16] * 3 # Initialize the TCN network with the updated parameters network = TemporalConvolutionalNetwork( num_inputs=self.num_inputs, - num_channels=self.num_channels, + n_filters=self.n_filters, kernel_size=self.kernel_size, dropout=self.dropout, ) @@ -132,9 +115,6 @@ def _build_model(self, input_shape): # Build the network with the given input shape input_layer, output = network.build_network(input_shape=input_shape) - # Adjust the output layer to match the forecasting horizon - output = self._add_linear_layer(output, self.horizon) - # Create the final model model = tf.keras.Model(inputs=input_layer, outputs=output) return model From 78b2f3dfcc3a3eb01558d463da64c2549d0a7c5a Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 14:18:17 +0530 Subject: [PATCH 15/36] test file corrected --- aeon/forecasting/deep_learning/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 270a60225e..21e90e4a68 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -47,7 +47,7 @@ def test_base_deep_forecaster_fit_predict(): forecaster.fit(data) # Predict - predictions = forecaster.predict(data) + predictions = forecaster.predict() # Validate output shape assert ( From 49be666b27010b86ea7b85834e2b861012980bfa Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 20:25:05 +0530 Subject: [PATCH 16/36] tcn updated --- aeon/networks/__init__.py | 4 +- aeon/networks/_tcn.py | 138 ++++++++++++++++---------------- aeon/networks/tests/test_tcn.py | 62 +++++--------- 3 files changed, 92 insertions(+), 112 deletions(-) diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py index 61e669283c..aed37be7e7 100644 --- a/aeon/networks/__init__.py +++ b/aeon/networks/__init__.py @@ -19,7 +19,7 @@ "AEBiGRUNetwork", "DisjointCNNNetwork", "RecurrentNetwork", - "TemporalConvolutionalNetwork", + "TCNNetwork", ] from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork from aeon.networks._ae_bgru import AEBiGRUNetwork @@ -37,5 +37,5 @@ from aeon.networks._mlp import MLPNetwork from aeon.networks._resnet import ResNetNetwork from aeon.networks._rnn import RecurrentNetwork -from aeon.networks._tcn import TemporalConvolutionalNetwork +from aeon.networks._tcn import TCNNetwork from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 62ed404f1b..5400a557b7 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -1,15 +1,11 @@ -"""Implementation of Temporal Convolutional Network (TCN). - -Based on the paper "An Empirical Evaluation of Generic Convolutional and -Recurrent Networks for Sequence Modeling" by Bai et al. (2018). -""" +"""Implementation of Temporal Convolutional Network (TCN).""" __maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork -class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): +class TCNNetwork(BaseDeepLearningNetwork): """Temporal Convolutional Network (TCN) for sequence modeling. A generic convolutional architecture for sequence modeling that combines: @@ -21,9 +17,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): Parameters ---------- - num_inputs : int - Number of input channels/features in the input sequence. - num_channels : list of int + n_blocks : list of int List specifying the number of output channels for each layer. The length determines the depth of the network. kernel_size : int, default=2 @@ -39,19 +33,19 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): References ---------- - Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of + .. [1] Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271. Examples -------- - >>> from aeon.networks._tcn import TemporalConvolutionalNetwork + >>> from aeon.networks._tcn import TCNNetwork >>> from aeon.testing.data_generation import make_example_3d_numpy >>> import tensorflow as tf >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150, ... return_y=True, regression_target=True, ... random_state=42) - >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8]) + >>> network = TCNNetwork(num_channels=[8, 8]) >>> input_layer, output = network.build_network(input_shape=(4, 150)) >>> model = tf.keras.Model(inputs=input_layer, outputs=output) >>> model.compile(optimizer="adam", loss="mse") @@ -67,8 +61,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): def __init__( self, - num_inputs: int = 1, - n_filters: list = [16] * 3, # changed from num_channels + n_blocks: list = [16] * 3, kernel_size: int = 2, dropout: float = 0.2, ): @@ -78,7 +71,7 @@ def __init__( ---------- num_inputs : int Number of input channels/features. - n_filters : list of int + n_blocks : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -86,33 +79,32 @@ def __init__( Dropout rate for regularization. """ super().__init__() - self.num_inputs = num_inputs - self.n_filters = n_filters + self.n_blocks = n_blocks self.kernel_size = kernel_size self.dropout = dropout def _conv1d_with_variable_padding( self, - x, - filters: int, + input_tensor, + n_filters: int, kernel_size: int, padding_value: int, - stride: int = 1, + strides: int = 1, dilation_rate: int = 1, ): """Apply 1D convolution with variable padding for causal convolutions. Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). - filters : int + n_filters : int Number of output filters. kernel_size : int Size of the convolutional kernel. padding_value : int Amount of padding to apply. - stride : int, default=1 + strides : int, default=1 Stride of the convolution. dilation_rate : int, default=1 Dilation rate for dilated convolutions. @@ -125,16 +117,16 @@ def _conv1d_with_variable_padding( import tensorflow as tf # Transpose to Keras format (batch, sequence, channels) - x_keras_format = tf.keras.layers.Permute((2, 1))(x) + x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor) # Apply padding in sequence dimension padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format) # Create and apply convolution layer conv_layer = tf.keras.layers.Conv1D( - filters=filters, + filters=n_filters, kernel_size=kernel_size, - strides=stride, + strides=strides, dilation_rate=dilation_rate, padding="valid", ) @@ -145,7 +137,7 @@ def _conv1d_with_variable_padding( # Transpose back to PyTorch format (batch, channels, sequence) return tf.keras.layers.Permute((2, 1))(out) - def _chomp_1d(self, x, chomp_size: int): + def _chomp(self, input_tensor, chomp_size: int): """Remove padding from the end of sequences to maintain causality. This operation ensures that the output at time t only depends on @@ -153,7 +145,7 @@ def _chomp_1d(self, x, chomp_size: int): Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). chomp_size : int Number of time steps to remove from the end. @@ -163,17 +155,17 @@ def _chomp_1d(self, x, chomp_size: int): tf.Tensor Chomped tensor with reduced sequence length. """ - return x[:, :, :-chomp_size] + return input_tensor[:, :, :-chomp_size] def _temporal_block( self, - x, + input_tensor, n_inputs: int, - n_outputs: int, + n_filters: int, kernel_size: int, - stride: int, - dilation: int, - padding: int, + strides: int, + dilation_rate: int, + padding_value: int, dropout: float = 0.2, training: bool = None, ): @@ -187,19 +179,19 @@ def _temporal_block( Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). n_inputs : int Number of input channels. - n_outputs : int - Number of output channels. + n_filters : int + Number of output filters. kernel_size : int Size of convolutional kernels. - stride : int + strides : int Stride of convolutions (typically 1). - dilation : int + dilation_rate : int Dilation factor for dilated convolutions. - padding : int + padding_value : int Padding size to be chomped off. dropout : float, default=0.2 Dropout rate for regularization. @@ -209,31 +201,38 @@ def _temporal_block( Returns ------- tf.Tensor - Output tensor of shape (batch_size, n_outputs, sequence_length). + Output tensor of shape (batch_size, n_filters, sequence_length). """ import tensorflow as tf # First convolution block out = self._conv1d_with_variable_padding( - x, n_outputs, kernel_size, padding, stride, dilation + input_tensor, n_filters, kernel_size, padding_value, strides, dilation_rate ) - out = self._chomp_1d(out, padding) + out = self._chomp(out, padding_value) out = tf.keras.layers.ReLU()(out) out = tf.keras.layers.Dropout(dropout)(out, training=training) # Second convolution block out = self._conv1d_with_variable_padding( - out, n_outputs, kernel_size, padding, stride, dilation + out, n_filters, kernel_size, padding_value, strides, dilation_rate ) - out = self._chomp_1d(out, padding) + out = self._chomp(out, padding_value) out = tf.keras.layers.ReLU()(out) out = tf.keras.layers.Dropout(dropout)(out, training=training) # Residual connection with optional dimension matching - if n_inputs != n_outputs: - res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1) + if n_inputs != n_filters: + res = self._conv1d_with_variable_padding( + input_tensor=input_tensor, + n_filters=n_filters, + kernel_size=1, + padding_value=0, + strides=1, + dilation_rate=1, + ) else: - res = x + res = input_tensor # Add residual and apply final ReLU result = tf.keras.layers.Add()([out, res]) @@ -241,9 +240,9 @@ def _temporal_block( def _temporal_conv_net( self, - x, - num_inputs: int, - n_filters: list, # changed from num_channels + input_tensor, + n_inputs: int, + n_blocks: list, kernel_size: int = 2, dropout: float = 0.2, training: bool = None, @@ -255,11 +254,11 @@ def _temporal_conv_net( Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). - num_inputs : int + n_inputs : int Number of input channels. - n_filters : list of int + n_blocks : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -273,26 +272,26 @@ def _temporal_conv_net( tf.Tensor Output tensor after applying all temporal blocks. """ - num_levels = len(n_filters) + num_levels = len(n_blocks) for i in range(num_levels): - dilation_size = 2**i - in_channels = num_inputs if i == 0 else n_filters[i - 1] - out_channels = n_filters[i] - padding = (kernel_size - 1) * dilation_size + dilation_rate = 2**i + in_channels = n_inputs if i == 0 else n_blocks[i - 1] + out_channels = n_blocks[i] + padding_value = (kernel_size - 1) * dilation_rate - x = self._temporal_block( - x, + input_tensor = self._temporal_block( + input_tensor, n_inputs=in_channels, - n_outputs=out_channels, + n_filters=out_channels, kernel_size=kernel_size, - stride=1, - dilation=dilation_size, - padding=padding, + strides=1, + dilation_rate=dilation_rate, + padding_value=padding_value, dropout=dropout, training=training, ) - return x + return input_tensor def build_network(self, input_shape: tuple, **kwargs) -> tuple: """Build the complete TCN architecture. @@ -303,7 +302,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: Parameters ---------- input_shape : tuple - Shape of input data (sequence_length, num_features). + Shape of input data (n_channels, n_timepoints). **kwargs Additional keyword arguments (unused). @@ -326,12 +325,13 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: # Transpose input to match the expected format (batch, channels, seq) x = input_layer + n_inputs = input_shape[0] # Apply TCN using the private function x = self._temporal_conv_net( x, - num_inputs=self.num_inputs, - n_filters=self.n_filters, + n_inputs=n_inputs, + n_blocks=self.n_blocks, kernel_size=self.kernel_size, dropout=self.dropout, ) diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py index 47c1d38615..94495e3c41 100644 --- a/aeon/networks/tests/test_tcn.py +++ b/aeon/networks/tests/test_tcn.py @@ -1,8 +1,8 @@ -"""Tests for the TemporalConvolutionalNetwork.""" +"""Tests for the TCNNetwork.""" import pytest -from aeon.networks import TemporalConvolutionalNetwork +from aeon.networks import TCNNetwork from aeon.utils.validation._dependencies import _check_soft_dependencies @@ -15,12 +15,9 @@ def test_tcn_network_basic(): import tensorflow as tf input_shape = (100, 5) - num_inputs = 5 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Check that layers are created correctly @@ -38,17 +35,14 @@ def test_tcn_network_basic(): not _check_soft_dependencies(["tensorflow"], severity="none"), reason="Tensorflow soft dependency unavailable.", ) -@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) -def test_tcn_network_different_channels(n_filters): +@pytest.mark.parametrize("n_blocks", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) +def test_tcn_network_different_channels(n_blocks): """Test TCN network with different channel configurations.""" import tensorflow as tf input_shape = (50, 3) - num_inputs = 3 - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Create a model and verify it works @@ -73,12 +67,10 @@ def test_tcn_network_kernel_sizes(kernel_size): import tensorflow as tf input_shape = (80, 4) - num_inputs = 4 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, - n_filters=n_filters, + tcn_network = TCNNetwork( + n_blocks=n_blocks, kernel_size=kernel_size, ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -98,12 +90,9 @@ def test_tcn_network_dropout_rates(dropout): import tensorflow as tf input_shape = (60, 2) - num_inputs = 2 - n_filters = [16, 32] + n_blocks = [16, 32] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters, dropout=dropout - ) + tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout) input_layer, output_layer = tcn_network.build_network(input_shape) # Verify network builds successfully @@ -122,12 +111,9 @@ def test_tcn_network_output_shape(): input_shape = (40, 6) batch_size = 16 - num_inputs = 6 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) model = tf.keras.Model(inputs=input_layer, outputs=output_layer) @@ -148,7 +134,7 @@ def test_tcn_network_output_shape(): ) def test_tcn_network_config(): """Test TCN network configuration attributes.""" - tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32]) + tcn_network = TCNNetwork(n_blocks=[16, 32]) # Check _config attributes assert "python_dependencies" in tcn_network._config @@ -164,21 +150,18 @@ def test_tcn_network_config(): ) def test_tcn_network_parameter_initialization(): """Test TCN network parameter initialization.""" - num_inputs = 4 - n_filters = [32, 64, 128] + n_blocks = [32, 64, 128] kernel_size = 3 dropout = 0.2 - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, - n_filters=n_filters, + tcn_network = TCNNetwork( + n_blocks=n_blocks, kernel_size=kernel_size, dropout=dropout, ) # Check that parameters are set correctly - assert tcn_network.num_inputs == num_inputs - assert tcn_network.n_filters == n_filters + assert tcn_network.n_blocks == n_blocks assert tcn_network.kernel_size == kernel_size assert tcn_network.dropout == dropout @@ -192,12 +175,9 @@ def test_tcn_network_single_layer(): import tensorflow as tf input_shape = (30, 2) - num_inputs = 2 - n_filters = [16] # Single layer + n_blocks = [16] # Single layer - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Verify single layer network works From 7bacdac1a9df547330c08691ab89d4b0ece3a23d Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 20:28:53 +0530 Subject: [PATCH 17/36] tcn updated --- aeon/networks/__init__.py | 4 +- aeon/networks/_tcn.py | 138 ++++++++++++++++---------------- aeon/networks/tests/test_tcn.py | 62 +++++--------- 3 files changed, 92 insertions(+), 112 deletions(-) diff --git a/aeon/networks/__init__.py b/aeon/networks/__init__.py index 61e669283c..aed37be7e7 100644 --- a/aeon/networks/__init__.py +++ b/aeon/networks/__init__.py @@ -19,7 +19,7 @@ "AEBiGRUNetwork", "DisjointCNNNetwork", "RecurrentNetwork", - "TemporalConvolutionalNetwork", + "TCNNetwork", ] from aeon.networks._ae_abgru import AEAttentionBiGRUNetwork from aeon.networks._ae_bgru import AEBiGRUNetwork @@ -37,5 +37,5 @@ from aeon.networks._mlp import MLPNetwork from aeon.networks._resnet import ResNetNetwork from aeon.networks._rnn import RecurrentNetwork -from aeon.networks._tcn import TemporalConvolutionalNetwork +from aeon.networks._tcn import TCNNetwork from aeon.networks.base import BaseDeepLearningNetwork diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 62ed404f1b..5400a557b7 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -1,15 +1,11 @@ -"""Implementation of Temporal Convolutional Network (TCN). - -Based on the paper "An Empirical Evaluation of Generic Convolutional and -Recurrent Networks for Sequence Modeling" by Bai et al. (2018). -""" +"""Implementation of Temporal Convolutional Network (TCN).""" __maintainer__ = [] from aeon.networks.base import BaseDeepLearningNetwork -class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): +class TCNNetwork(BaseDeepLearningNetwork): """Temporal Convolutional Network (TCN) for sequence modeling. A generic convolutional architecture for sequence modeling that combines: @@ -21,9 +17,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): Parameters ---------- - num_inputs : int - Number of input channels/features in the input sequence. - num_channels : list of int + n_blocks : list of int List specifying the number of output channels for each layer. The length determines the depth of the network. kernel_size : int, default=2 @@ -39,19 +33,19 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): References ---------- - Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of + .. [1] Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271. Examples -------- - >>> from aeon.networks._tcn import TemporalConvolutionalNetwork + >>> from aeon.networks._tcn import TCNNetwork >>> from aeon.testing.data_generation import make_example_3d_numpy >>> import tensorflow as tf >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150, ... return_y=True, regression_target=True, ... random_state=42) - >>> network = TemporalConvolutionalNetwork(num_inputs=4, num_channels=[8, 8]) + >>> network = TCNNetwork(num_channels=[8, 8]) >>> input_layer, output = network.build_network(input_shape=(4, 150)) >>> model = tf.keras.Model(inputs=input_layer, outputs=output) >>> model.compile(optimizer="adam", loss="mse") @@ -67,8 +61,7 @@ class TemporalConvolutionalNetwork(BaseDeepLearningNetwork): def __init__( self, - num_inputs: int = 1, - n_filters: list = [16] * 3, # changed from num_channels + n_blocks: list = [16] * 3, kernel_size: int = 2, dropout: float = 0.2, ): @@ -78,7 +71,7 @@ def __init__( ---------- num_inputs : int Number of input channels/features. - n_filters : list of int + n_blocks : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -86,33 +79,32 @@ def __init__( Dropout rate for regularization. """ super().__init__() - self.num_inputs = num_inputs - self.n_filters = n_filters + self.n_blocks = n_blocks self.kernel_size = kernel_size self.dropout = dropout def _conv1d_with_variable_padding( self, - x, - filters: int, + input_tensor, + n_filters: int, kernel_size: int, padding_value: int, - stride: int = 1, + strides: int = 1, dilation_rate: int = 1, ): """Apply 1D convolution with variable padding for causal convolutions. Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). - filters : int + n_filters : int Number of output filters. kernel_size : int Size of the convolutional kernel. padding_value : int Amount of padding to apply. - stride : int, default=1 + strides : int, default=1 Stride of the convolution. dilation_rate : int, default=1 Dilation rate for dilated convolutions. @@ -125,16 +117,16 @@ def _conv1d_with_variable_padding( import tensorflow as tf # Transpose to Keras format (batch, sequence, channels) - x_keras_format = tf.keras.layers.Permute((2, 1))(x) + x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor) # Apply padding in sequence dimension padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format) # Create and apply convolution layer conv_layer = tf.keras.layers.Conv1D( - filters=filters, + filters=n_filters, kernel_size=kernel_size, - strides=stride, + strides=strides, dilation_rate=dilation_rate, padding="valid", ) @@ -145,7 +137,7 @@ def _conv1d_with_variable_padding( # Transpose back to PyTorch format (batch, channels, sequence) return tf.keras.layers.Permute((2, 1))(out) - def _chomp_1d(self, x, chomp_size: int): + def _chomp(self, input_tensor, chomp_size: int): """Remove padding from the end of sequences to maintain causality. This operation ensures that the output at time t only depends on @@ -153,7 +145,7 @@ def _chomp_1d(self, x, chomp_size: int): Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). chomp_size : int Number of time steps to remove from the end. @@ -163,17 +155,17 @@ def _chomp_1d(self, x, chomp_size: int): tf.Tensor Chomped tensor with reduced sequence length. """ - return x[:, :, :-chomp_size] + return input_tensor[:, :, :-chomp_size] def _temporal_block( self, - x, + input_tensor, n_inputs: int, - n_outputs: int, + n_filters: int, kernel_size: int, - stride: int, - dilation: int, - padding: int, + strides: int, + dilation_rate: int, + padding_value: int, dropout: float = 0.2, training: bool = None, ): @@ -187,19 +179,19 @@ def _temporal_block( Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). n_inputs : int Number of input channels. - n_outputs : int - Number of output channels. + n_filters : int + Number of output filters. kernel_size : int Size of convolutional kernels. - stride : int + strides : int Stride of convolutions (typically 1). - dilation : int + dilation_rate : int Dilation factor for dilated convolutions. - padding : int + padding_value : int Padding size to be chomped off. dropout : float, default=0.2 Dropout rate for regularization. @@ -209,31 +201,38 @@ def _temporal_block( Returns ------- tf.Tensor - Output tensor of shape (batch_size, n_outputs, sequence_length). + Output tensor of shape (batch_size, n_filters, sequence_length). """ import tensorflow as tf # First convolution block out = self._conv1d_with_variable_padding( - x, n_outputs, kernel_size, padding, stride, dilation + input_tensor, n_filters, kernel_size, padding_value, strides, dilation_rate ) - out = self._chomp_1d(out, padding) + out = self._chomp(out, padding_value) out = tf.keras.layers.ReLU()(out) out = tf.keras.layers.Dropout(dropout)(out, training=training) # Second convolution block out = self._conv1d_with_variable_padding( - out, n_outputs, kernel_size, padding, stride, dilation + out, n_filters, kernel_size, padding_value, strides, dilation_rate ) - out = self._chomp_1d(out, padding) + out = self._chomp(out, padding_value) out = tf.keras.layers.ReLU()(out) out = tf.keras.layers.Dropout(dropout)(out, training=training) # Residual connection with optional dimension matching - if n_inputs != n_outputs: - res = self._conv1d_with_variable_padding(x, n_outputs, 1, 0, 1, 1) + if n_inputs != n_filters: + res = self._conv1d_with_variable_padding( + input_tensor=input_tensor, + n_filters=n_filters, + kernel_size=1, + padding_value=0, + strides=1, + dilation_rate=1, + ) else: - res = x + res = input_tensor # Add residual and apply final ReLU result = tf.keras.layers.Add()([out, res]) @@ -241,9 +240,9 @@ def _temporal_block( def _temporal_conv_net( self, - x, - num_inputs: int, - n_filters: list, # changed from num_channels + input_tensor, + n_inputs: int, + n_blocks: list, kernel_size: int = 2, dropout: float = 0.2, training: bool = None, @@ -255,11 +254,11 @@ def _temporal_conv_net( Parameters ---------- - x : tf.Tensor + input_tensor : tf.Tensor Input tensor of shape (batch_size, channels, sequence_length). - num_inputs : int + n_inputs : int Number of input channels. - n_filters : list of int + n_blocks : list of int Number of output channels for each temporal block. kernel_size : int, default=2 Size of convolutional kernels. @@ -273,26 +272,26 @@ def _temporal_conv_net( tf.Tensor Output tensor after applying all temporal blocks. """ - num_levels = len(n_filters) + num_levels = len(n_blocks) for i in range(num_levels): - dilation_size = 2**i - in_channels = num_inputs if i == 0 else n_filters[i - 1] - out_channels = n_filters[i] - padding = (kernel_size - 1) * dilation_size + dilation_rate = 2**i + in_channels = n_inputs if i == 0 else n_blocks[i - 1] + out_channels = n_blocks[i] + padding_value = (kernel_size - 1) * dilation_rate - x = self._temporal_block( - x, + input_tensor = self._temporal_block( + input_tensor, n_inputs=in_channels, - n_outputs=out_channels, + n_filters=out_channels, kernel_size=kernel_size, - stride=1, - dilation=dilation_size, - padding=padding, + strides=1, + dilation_rate=dilation_rate, + padding_value=padding_value, dropout=dropout, training=training, ) - return x + return input_tensor def build_network(self, input_shape: tuple, **kwargs) -> tuple: """Build the complete TCN architecture. @@ -303,7 +302,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: Parameters ---------- input_shape : tuple - Shape of input data (sequence_length, num_features). + Shape of input data (n_channels, n_timepoints). **kwargs Additional keyword arguments (unused). @@ -326,12 +325,13 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: # Transpose input to match the expected format (batch, channels, seq) x = input_layer + n_inputs = input_shape[0] # Apply TCN using the private function x = self._temporal_conv_net( x, - num_inputs=self.num_inputs, - n_filters=self.n_filters, + n_inputs=n_inputs, + n_blocks=self.n_blocks, kernel_size=self.kernel_size, dropout=self.dropout, ) diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py index 47c1d38615..94495e3c41 100644 --- a/aeon/networks/tests/test_tcn.py +++ b/aeon/networks/tests/test_tcn.py @@ -1,8 +1,8 @@ -"""Tests for the TemporalConvolutionalNetwork.""" +"""Tests for the TCNNetwork.""" import pytest -from aeon.networks import TemporalConvolutionalNetwork +from aeon.networks import TCNNetwork from aeon.utils.validation._dependencies import _check_soft_dependencies @@ -15,12 +15,9 @@ def test_tcn_network_basic(): import tensorflow as tf input_shape = (100, 5) - num_inputs = 5 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Check that layers are created correctly @@ -38,17 +35,14 @@ def test_tcn_network_basic(): not _check_soft_dependencies(["tensorflow"], severity="none"), reason="Tensorflow soft dependency unavailable.", ) -@pytest.mark.parametrize("n_filters", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) -def test_tcn_network_different_channels(n_filters): +@pytest.mark.parametrize("n_blocks", [[32], [32, 64], [16, 32, 64], [64, 32, 16]]) +def test_tcn_network_different_channels(n_blocks): """Test TCN network with different channel configurations.""" import tensorflow as tf input_shape = (50, 3) - num_inputs = 3 - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Create a model and verify it works @@ -73,12 +67,10 @@ def test_tcn_network_kernel_sizes(kernel_size): import tensorflow as tf input_shape = (80, 4) - num_inputs = 4 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, - n_filters=n_filters, + tcn_network = TCNNetwork( + n_blocks=n_blocks, kernel_size=kernel_size, ) input_layer, output_layer = tcn_network.build_network(input_shape) @@ -98,12 +90,9 @@ def test_tcn_network_dropout_rates(dropout): import tensorflow as tf input_shape = (60, 2) - num_inputs = 2 - n_filters = [16, 32] + n_blocks = [16, 32] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters, dropout=dropout - ) + tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout) input_layer, output_layer = tcn_network.build_network(input_shape) # Verify network builds successfully @@ -122,12 +111,9 @@ def test_tcn_network_output_shape(): input_shape = (40, 6) batch_size = 16 - num_inputs = 6 - n_filters = [32, 64] + n_blocks = [32, 64] - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) model = tf.keras.Model(inputs=input_layer, outputs=output_layer) @@ -148,7 +134,7 @@ def test_tcn_network_output_shape(): ) def test_tcn_network_config(): """Test TCN network configuration attributes.""" - tcn_network = TemporalConvolutionalNetwork(num_inputs=3, n_filters=[16, 32]) + tcn_network = TCNNetwork(n_blocks=[16, 32]) # Check _config attributes assert "python_dependencies" in tcn_network._config @@ -164,21 +150,18 @@ def test_tcn_network_config(): ) def test_tcn_network_parameter_initialization(): """Test TCN network parameter initialization.""" - num_inputs = 4 - n_filters = [32, 64, 128] + n_blocks = [32, 64, 128] kernel_size = 3 dropout = 0.2 - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, - n_filters=n_filters, + tcn_network = TCNNetwork( + n_blocks=n_blocks, kernel_size=kernel_size, dropout=dropout, ) # Check that parameters are set correctly - assert tcn_network.num_inputs == num_inputs - assert tcn_network.n_filters == n_filters + assert tcn_network.n_blocks == n_blocks assert tcn_network.kernel_size == kernel_size assert tcn_network.dropout == dropout @@ -192,12 +175,9 @@ def test_tcn_network_single_layer(): import tensorflow as tf input_shape = (30, 2) - num_inputs = 2 - n_filters = [16] # Single layer + n_blocks = [16] # Single layer - tcn_network = TemporalConvolutionalNetwork( - num_inputs=num_inputs, n_filters=n_filters - ) + tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) # Verify single layer network works From 9a1b8782fadede1aabf30c429ad9113d04dffc32 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 20:41:34 +0530 Subject: [PATCH 18/36] tcnfst updated with net --- aeon/forecasting/deep_learning/_tcn.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 147dc618ec..97d5a282ba 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -6,13 +6,13 @@ __all__ = ["TCNForecaster"] from aeon.forecasting.deep_learning.base import BaseDeepForecaster -from aeon.networks._tcn import TemporalConvolutionalNetwork +from aeon.networks._tcn import TCNNetwork class TCNForecaster(BaseDeepForecaster): """A deep learning forecaster using Temporal Convolutional Network (TCN). - It leverages the `TemporalConvolutionalNetwork` from aeon's network module + It leverages the `TCNNetwork` from aeon's network module to build the architecture suitable for forecasting tasks. Parameters @@ -35,9 +35,7 @@ class TCNForecaster(BaseDeepForecaster): Seed for random number generators. axis : int, default=0 Axis along which to apply the forecaster. - num_inputs : int, default=1 - Number of input channels/features in the input sequence. - n_filters : list of int, default=[16, 16, 16] + n_blocks : list of int, default=[16, 16, 16] List specifying the number of output channels for each layer of the TCN. The length determines the depth of the network. kernel_size : int, default=2 @@ -66,8 +64,7 @@ def __init__( loss="mse", random_state=None, axis=0, - num_inputs=1, - n_filters=None, + n_blocks=None, kernel_size=2, dropout=0.2, ): @@ -78,12 +75,12 @@ def __init__( epochs=epochs, verbose=verbose, optimizer=optimizer, - loss=loss, random_state=random_state, axis=axis, + loss=loss, ) - self.num_inputs = num_inputs - self.n_filters = n_filters + + self.n_blocks = n_blocks self.kernel_size = kernel_size self.dropout = dropout @@ -102,12 +99,11 @@ def _build_model(self, input_shape): """ import tensorflow as tf - if self.n_filters is None: - self.n_filters = [16] * 3 + if self.n_blocks is None: + self.n_blocks = [16] * 3 # Initialize the TCN network with the updated parameters - network = TemporalConvolutionalNetwork( - num_inputs=self.num_inputs, - n_filters=self.n_filters, + network = TCNNetwork( + n_blocks=self.n_blocks, kernel_size=self.kernel_size, dropout=self.dropout, ) From 08dadeca78beba0b1c2a6685e003950366603e58 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 8 Jul 2025 20:49:27 +0530 Subject: [PATCH 19/36] doctest corrected --- aeon/networks/_tcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 5400a557b7..834b5865e7 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -45,7 +45,7 @@ class TCNNetwork(BaseDeepLearningNetwork): >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150, ... return_y=True, regression_target=True, ... random_state=42) - >>> network = TCNNetwork(num_channels=[8, 8]) + >>> network = TCNNetwork(n_blocks=[8, 8]) >>> input_layer, output = network.build_network(input_shape=(4, 150)) >>> model = tf.keras.Model(inputs=input_layer, outputs=output) >>> model.compile(optimizer="adam", loss="mse") From 086c5a46701a6c068bfccba0dd71a8a445dc0d41 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 13 Jul 2025 12:57:26 +0530 Subject: [PATCH 20/36] changes made --- aeon/forecasting/deep_learning/_tcn.py | 46 ++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 97d5a282ba..d3d06bf0fe 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -3,8 +3,11 @@ from __future__ import annotations __maintainer__ = [] + __all__ = ["TCNForecaster"] +from typing import Any + from aeon.forecasting.deep_learning.base import BaseDeepForecaster from aeon.networks._tcn import TCNNetwork @@ -43,14 +46,14 @@ class TCNForecaster(BaseDeepForecaster): dropout : float, default=0.2 Dropout rate applied after each convolutional layer for regularization. - - """ _tags = { + "python_dependencies": ["tensorflow"], "capability:horizon": True, "capability:multivariate": True, "capability:exogenous": False, + "capability:univariate": True, } def __init__( @@ -79,7 +82,6 @@ def __init__( axis=axis, loss=loss, ) - self.n_blocks = n_blocks self.kernel_size = kernel_size self.dropout = dropout @@ -99,11 +101,9 @@ def _build_model(self, input_shape): """ import tensorflow as tf - if self.n_blocks is None: - self.n_blocks = [16] * 3 # Initialize the TCN network with the updated parameters network = TCNNetwork( - n_blocks=self.n_blocks, + n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16], kernel_size=self.kernel_size, dropout=self.dropout, ) @@ -114,3 +114,37 @@ def _build_model(self, input_shape): # Create the final model model = tf.keras.Model(inputs=input_layer, outputs=output) return model + + # Added to handle __name__ in tests (class-level access) + @classmethod + def _get_test_params( + cls, parameter_set: str = "default" + ) -> dict[str, Any] | list[dict[str, Any]]: + """ + Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + For forecasters, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + param = { + "epochs": 10, + "batch_size": 4, + "n_blocks": [8, 8], + "kernel_size": 2, + "dropout": 0.1, + } + return [param] From b6ccd079f6b7b4771fff40f1ee61ad536617f99d Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 20 Jul 2025 13:46:28 +0530 Subject: [PATCH 21/36] basedelf updated --- aeon/forecasting/deep_learning/base.py | 37 +++++++++++++------ .../deep_learning/tests/test_base.py | 17 +++++---- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index 40c7b3e212..3fa421c772 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -31,7 +31,7 @@ class BaseDeepForecaster(BaseForecaster): The window size for creating input sequences. batch_size : int, default=32 Batch size for training the model. - epochs : int, default=100 + n_epochs : int, default=100 Number of epochs to train the model. verbose : int, default=0 Verbosity mode (0, 1, or 2). @@ -39,19 +39,33 @@ class BaseDeepForecaster(BaseForecaster): Optimizer to use for training. loss : str or tf.keras.losses.Loss, default='mse' Loss function for training. - random_state : int, default=None - Seed for random number generators. + random_state : int, RandomState instance or None, default=None + If int, random_state is the seed used by the random number generator; + If RandomState instance, random_state is the random number generator; + If None, the random number generator is the RandomState instance used + by np.random. + Seeded random number generation can only be guaranteed on CPU processing, + GPU processing will be non-deterministic. axis : int, default=0 Axis along which to apply the forecaster. Default is 0 for univariate time series. """ + _tags = { + "capability:horizon": False, + "capability:exogenous": False, + "algorithm_type": "deeplearning", + "non_deterministic": True, + "cant_pickle": True, + "python_dependencies": "tensorflow", + } + def __init__( self, horizon=1, window=10, batch_size=32, - epochs=100, + n_epochs=100, verbose=0, optimizer="adam", loss="mse", @@ -61,7 +75,7 @@ def __init__( self.horizon = horizon self.window = window self.batch_size = batch_size - self.epochs = epochs + self.n_epochs = n_epochs self.verbose = verbose self.optimizer = optimizer self.loss = loss @@ -89,11 +103,12 @@ def _fit(self, y, X=None): Returns an instance of self. """ import tensorflow as tf + from sklearn.utils import check_random_state # Set random seed for reproducibility - if self.random_state is not None: - np.random.seed(self.random_state) - tf.random.set_seed(self.random_state) + rng = check_random_state(self.random_state) + self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) + tf.keras.utils.set_random_seed(self.random_state_) # Convert input data to numpy array y_inner = self._convert_input(y) @@ -111,7 +126,7 @@ def _fit(self, y, X=None): # Build and compile the model input_shape = X_train.shape[1:] - self.model_ = self._build_model(input_shape) + self.model_ = self.build_model(input_shape) self.model_.compile(optimizer=self.optimizer, loss=self.loss) # Train the model @@ -119,7 +134,7 @@ def _fit(self, y, X=None): X_train, y_train, batch_size=self.batch_size, - epochs=self.epochs, + epochs=self.n_epochs, verbose=self.verbose, ) self.last_window_ = y_inner[-self.window :] @@ -218,7 +233,7 @@ def _create_sequences(self, data): return X, y @abstractmethod - def _build_model(self, input_shape): + def build_model(self, input_shape): """Build the deep learning model. Parameters diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 21e90e4a68..5827c16a18 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -14,10 +14,13 @@ class SimpleDeepForecaster(BaseDeepForecaster): """A simple concrete implementation of BaseDeepForecaster for testing.""" - def __init__(self, horizon=1, window=5, epochs=1, verbose=0): - super().__init__(horizon=horizon, window=window, epochs=epochs, verbose=verbose) + def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0): + super().__init__( + horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose + ) - def _build_model(self, input_shape): + def build_model(self, input_shape): + """Build a simple Keras model for testing.""" import tensorflow as tf model = tf.keras.Sequential( @@ -41,7 +44,7 @@ def test_base_deep_forecaster_fit_predict(): data = np.random.randn(50) # Initialize forecaster - forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0) + forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) # Fit the model forecaster.fit(data) @@ -51,8 +54,8 @@ def test_base_deep_forecaster_fit_predict(): # Validate output shape assert ( - len(predictions) == 2 - ), f"Expected predictions of length 2, got {len(predictions)}" + len(predictions) == 1 + ), f"Expected predictions of length 1, got {len(predictions)}" assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array" @@ -63,7 +66,7 @@ def test_base_deep_forecaster_fit_predict(): def test_base_deep_forecaster_insufficient_data(): """Test error handling for insufficient data.""" data = np.random.randn(5) - forecaster = SimpleDeepForecaster(horizon=2, window=5, epochs=1, verbose=0) + forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) with pytest.raises(ValueError, match="Data length.*insufficient"): forecaster.fit(data) From 405fa80712465ae5145c5f3b21643c0319914bef Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 20 Jul 2025 14:20:49 +0530 Subject: [PATCH 22/36] test base chanegd --- aeon/forecasting/deep_learning/tests/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 5827c16a18..5579ab9959 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -42,6 +42,7 @@ def test_base_deep_forecaster_fit_predict(): # Generate synthetic data np.random.seed(42) data = np.random.randn(50) + y = np.random.randn(10) # Initialize forecaster forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) @@ -50,7 +51,7 @@ def test_base_deep_forecaster_fit_predict(): forecaster.fit(data) # Predict - predictions = forecaster.predict() + predictions = forecaster.predict(y) # Validate output shape assert ( From 5cb1523cb3c0aa034351f78f63c8ffce136236b7 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Mon, 21 Jul 2025 15:44:14 +0530 Subject: [PATCH 23/36] tcn rshaped --- aeon/networks/_tcn.py | 38 +++++++++++---------------------- aeon/networks/tests/test_tcn.py | 24 +++++++++++---------- 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/aeon/networks/_tcn.py b/aeon/networks/_tcn.py index 834b5865e7..f88242823c 100644 --- a/aeon/networks/_tcn.py +++ b/aeon/networks/_tcn.py @@ -40,17 +40,10 @@ class TCNNetwork(BaseDeepLearningNetwork): Examples -------- >>> from aeon.networks._tcn import TCNNetwork - >>> from aeon.testing.data_generation import make_example_3d_numpy - >>> import tensorflow as tf - >>> X, y = make_example_3d_numpy(n_cases=8, n_channels=4, n_timepoints=150, - ... return_y=True, regression_target=True, - ... random_state=42) >>> network = TCNNetwork(n_blocks=[8, 8]) - >>> input_layer, output = network.build_network(input_shape=(4, 150)) - >>> model = tf.keras.Model(inputs=input_layer, outputs=output) - >>> model.compile(optimizer="adam", loss="mse") - >>> model.fit(X, y, epochs=2, batch_size=2, verbose=0) # doctest: +SKIP - + >>> input_layer, output = network.build_network(input_shape=(150, 4)) + >>> input_layer.shape, output.shape + ((None, 150, 4), (None, 4)) """ _config = { @@ -69,8 +62,6 @@ def __init__( Parameters ---------- - num_inputs : int - Number of input channels/features. n_blocks : list of int Number of output channels for each temporal block. kernel_size : int, default=2 @@ -97,7 +88,7 @@ def _conv1d_with_variable_padding( Parameters ---------- input_tensor : tf.Tensor - Input tensor of shape (batch_size, channels, sequence_length). + Input tensor of shape (batch_size, n_timepoints, n_channels). n_filters : int Number of output filters. kernel_size : int @@ -116,11 +107,8 @@ def _conv1d_with_variable_padding( """ import tensorflow as tf - # Transpose to Keras format (batch, sequence, channels) - x_keras_format = tf.keras.layers.Permute((2, 1))(input_tensor) - # Apply padding in sequence dimension - padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(x_keras_format) + padded_x = tf.keras.layers.ZeroPadding1D(padding=padding_value)(input_tensor) # Create and apply convolution layer conv_layer = tf.keras.layers.Conv1D( @@ -134,8 +122,7 @@ def _conv1d_with_variable_padding( # Apply convolution out = conv_layer(padded_x) - # Transpose back to PyTorch format (batch, channels, sequence) - return tf.keras.layers.Permute((2, 1))(out) + return out def _chomp(self, input_tensor, chomp_size: int): """Remove padding from the end of sequences to maintain causality. @@ -155,7 +142,7 @@ def _chomp(self, input_tensor, chomp_size: int): tf.Tensor Chomped tensor with reduced sequence length. """ - return input_tensor[:, :, :-chomp_size] + return input_tensor[:, :-chomp_size, :] def _temporal_block( self, @@ -302,7 +289,7 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: Parameters ---------- input_shape : tuple - Shape of input data (n_channels, n_timepoints). + Shape of input data (n_timepoints, n_channels). **kwargs Additional keyword arguments (unused). @@ -323,9 +310,9 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: # Create input layer input_layer = tf.keras.layers.Input(shape=input_shape) - # Transpose input to match the expected format (batch, channels, seq) + # Transpose input to match the expected format (batch, n_timepoints, n_channels) x = input_layer - n_inputs = input_shape[0] + n_inputs = input_shape[1] # Apply TCN using the private function x = self._temporal_conv_net( @@ -335,7 +322,6 @@ def build_network(self, input_shape: tuple, **kwargs) -> tuple: kernel_size=self.kernel_size, dropout=self.dropout, ) - - x = tf.keras.layers.Dense(input_shape[0])(x[:, -1, :]) - output = tf.keras.layers.Dense(1)(x) + output = tf.keras.layers.Dense(input_shape[1])(x[:, :, -1]) + # output = tf.keras.layers.Dense(1)(x) return input_layer, output diff --git a/aeon/networks/tests/test_tcn.py b/aeon/networks/tests/test_tcn.py index 94495e3c41..b21df0784d 100644 --- a/aeon/networks/tests/test_tcn.py +++ b/aeon/networks/tests/test_tcn.py @@ -14,10 +14,10 @@ def test_tcn_network_basic(): """Test basic TCN network creation and build_network functionality.""" import tensorflow as tf - input_shape = (100, 5) + input_shape = (100, 5) # (n_timepoints, n_channels) n_blocks = [32, 64] - tcn_network = TCNNetwork(n_blocks=n_blocks) + input_layer, output_layer = tcn_network.build_network(input_shape) # Check that layers are created correctly @@ -40,9 +40,9 @@ def test_tcn_network_different_channels(n_blocks): """Test TCN network with different channel configurations.""" import tensorflow as tf - input_shape = (50, 3) - + input_shape = (50, 3) # (n_timepoints, n_channels) tcn_network = TCNNetwork(n_blocks=n_blocks) + input_layer, output_layer = tcn_network.build_network(input_shape) # Create a model and verify it works @@ -66,13 +66,14 @@ def test_tcn_network_kernel_sizes(kernel_size): """Test TCN network with different kernel sizes.""" import tensorflow as tf - input_shape = (80, 4) + input_shape = (80, 4) # (n_timepoints, n_channels) n_blocks = [32, 64] tcn_network = TCNNetwork( n_blocks=n_blocks, kernel_size=kernel_size, ) + input_layer, output_layer = tcn_network.build_network(input_shape) # Verify network builds successfully @@ -89,7 +90,7 @@ def test_tcn_network_dropout_rates(dropout): """Test TCN network with different dropout rates.""" import tensorflow as tf - input_shape = (60, 2) + input_shape = (60, 2) # (n_timepoints, n_channels) n_blocks = [16, 32] tcn_network = TCNNetwork(n_blocks=n_blocks, dropout=dropout) @@ -109,20 +110,21 @@ def test_tcn_network_output_shape(): import numpy as np import tensorflow as tf - input_shape = (40, 6) + input_shape = (40, 6) # (n_timepoints, n_channels) batch_size = 16 n_blocks = [32, 64] tcn_network = TCNNetwork(n_blocks=n_blocks) input_layer, output_layer = tcn_network.build_network(input_shape) + model = tf.keras.Model(inputs=input_layer, outputs=output_layer) # Create dummy input and test output shape dummy_input = np.random.random((batch_size,) + input_shape) output = model(dummy_input) - # Output should maintain sequence length and have final channel dimension - expected_shape = (batch_size, 1) + # Output should have the same number of channels as input + expected_shape = (batch_size, input_shape[1]) # (batch_size, n_channels) assert ( output.shape == expected_shape ), f"Expected shape {expected_shape}, got {output.shape}" @@ -174,7 +176,7 @@ def test_tcn_network_single_layer(): """Test TCN network with single temporal block.""" import tensorflow as tf - input_shape = (30, 2) + input_shape = (30, 2) # (n_timepoints, n_channels) n_blocks = [16] # Single layer tcn_network = TCNNetwork(n_blocks=n_blocks) @@ -189,4 +191,4 @@ def test_tcn_network_single_layer(): dummy_input = np.random.random((4,) + input_shape) output = model(dummy_input) - assert output.shape == (4, 1) + assert output.shape == (4, input_shape[1]) # (batch_size, n_channels) From 2ab68c92cc265081f9148813523919ebfef75d25 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 22 Jul 2025 15:01:25 +0530 Subject: [PATCH 24/36] tcn fst updated --- aeon/forecasting/deep_learning/_tcn.py | 12 ++++++------ aeon/forecasting/deep_learning/tests/test_tcn.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index d3d06bf0fe..fbe28d935e 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -61,7 +61,7 @@ def __init__( horizon=1, window=10, batch_size=32, - epochs=100, + n_epochs=100, verbose=0, optimizer="adam", loss="mse", @@ -75,7 +75,7 @@ def __init__( horizon=horizon, window=window, batch_size=batch_size, - epochs=epochs, + n_epochs=n_epochs, verbose=verbose, optimizer=optimizer, random_state=random_state, @@ -86,7 +86,7 @@ def __init__( self.kernel_size = kernel_size self.dropout = dropout - def _build_model(self, input_shape): + def build_model(self, input_shape): """Build the TCN model for forecasting. Parameters @@ -107,7 +107,7 @@ def _build_model(self, input_shape): kernel_size=self.kernel_size, dropout=self.dropout, ) - + # input_shape = (input_shape[1], input_shape[0]) # Build the network with the given input shape input_layer, output = network.build_network(input_shape=input_shape) @@ -127,7 +127,7 @@ def _get_test_params( ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. + special parameters are defined for a value,` will return `"default"` set. For forecasters, a "default" set of parameters should be provided for general testing, and a "results_comparison" set for comparing against previously recorded results if the general set does not produce suitable @@ -141,7 +141,7 @@ def _get_test_params( `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ param = { - "epochs": 10, + "n_epochs": 10, "batch_size": 4, "n_blocks": [8, 8], "kernel_size": 2, diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py index 2717eaf4b4..851069bf73 100644 --- a/aeon/forecasting/deep_learning/tests/test_tcn.py +++ b/aeon/forecasting/deep_learning/tests/test_tcn.py @@ -24,7 +24,7 @@ def test_tcn_forecaster(horizon, window, epochs): # Initialize TCNForecaster forecaster = TCNForecaster( - horizon=horizon, window=window, epochs=epochs, batch_size=16, verbose=0 + horizon=horizon, window=window, n_epochs=epochs, batch_size=16, verbose=0 ) # Fit and predict From bc1adbafd32a74cbd9cc0efabab77a4995dd1397 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sun, 17 Aug 2025 17:50:26 +0530 Subject: [PATCH 25/36] current basedlf --- aeon/forecasting/deep_learning/base.py | 233 +++++++++++++++--- .../deep_learning/tests/test_base.py | 2 +- 2 files changed, 197 insertions(+), 38 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index 3fa421c772..a08690a988 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from sklearn.utils import check_random_state from aeon.forecasting.base import BaseForecaster @@ -21,7 +22,8 @@ class BaseDeepForecaster(BaseForecaster): """Base class for deep learning forecasters in aeon. This class provides a foundation for deep learning-based forecasting models, - handling data preprocessing, model training, and prediction. + handling data preprocessing, model training, and prediction with enhanced + capabilities for callbacks, model saving/loading, and efficiency. Parameters ---------- @@ -39,38 +41,57 @@ class BaseDeepForecaster(BaseForecaster): Optimizer to use for training. loss : str or tf.keras.losses.Loss, default='mse' Loss function for training. + callbacks : list of tf.keras.callbacks.Callback or None, default=None + List of Keras callbacks to be applied during training. random_state : int, RandomState instance or None, default=None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. - Seeded random number generation can only be guaranteed on CPU processing, - GPU processing will be non-deterministic. axis : int, default=0 Axis along which to apply the forecaster. - Default is 0 for univariate time series. + last_file_name : str, default="last_model" + The name of the file of the last model, used for saving models. + save_best_model : bool, default=False + Whether to save the best model during training based on validation loss. + file_path : str, default="./" + Directory path where models will be saved. + + Attributes + ---------- + model_ : tf.keras.Model or None + The fitted Keras model. + history_ : tf.keras.callbacks.History or None + Training history containing loss and metrics. + last_window_ : np.ndarray or None + The last window of data used for prediction. """ _tags = { - "capability:horizon": False, + "capability:horizon": True, "capability:exogenous": False, "algorithm_type": "deeplearning", "non_deterministic": True, "cant_pickle": True, "python_dependencies": "tensorflow", + "capability:multivariate": True, } def __init__( self, horizon=1, window=10, - batch_size=32, - n_epochs=100, + batch_size=32, # remove + n_epochs=100, # remove verbose=0, - optimizer="adam", - loss="mse", - random_state=None, + optimizer="adam", # remove it + loss="mse", # remove it + callbacks=None, + random_state=None, # remove it axis=0, + last_file_name="last_model", + save_best_model=False, + file_path="./", ): self.horizon = horizon self.window = window @@ -79,15 +100,22 @@ def __init__( self.verbose = verbose self.optimizer = optimizer self.loss = loss + self.callbacks = callbacks self.random_state = random_state self.axis = axis + self.last_file_name = last_file_name + self.save_best_model = save_best_model + self.file_path = file_path + + # Initialize attributes self.model_ = None + self.history_ = None self.last_window_ = None # Pass horizon and axis to BaseForecaster super().__init__(horizon=horizon, axis=axis) - def _fit(self, y, X=None): + def _fit(self, y, X=None): # remove it """Fit the forecaster to training data. Parameters @@ -103,7 +131,6 @@ def _fit(self, y, X=None): Returns an instance of self. """ import tensorflow as tf - from sklearn.utils import check_random_state # Set random seed for reproducibility rng = check_random_state(self.random_state) @@ -112,9 +139,13 @@ def _fit(self, y, X=None): # Convert input data to numpy array y_inner = self._convert_input(y) + + if y_inner.ndim == 1: + y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) + if y_inner.shape[0] < self.window + self.horizon: raise ValueError( - f"Data length ({y_inner.shape[0]}) is insufficient" + f"Data length ({y_inner.shape[0]}) is insufficient for window " f"({self.window}) and horizon ({self.horizon})." ) @@ -129,18 +160,25 @@ def _fit(self, y, X=None): self.model_ = self.build_model(input_shape) self.model_.compile(optimizer=self.optimizer, loss=self.loss) + # Prepare callbacks + callbacks_list = self._prepare_callbacks() + # Train the model - self.model_.fit( + self.history_ = self.model_.fit( X_train, y_train, batch_size=self.batch_size, epochs=self.n_epochs, verbose=self.verbose, + callbacks=callbacks_list, ) - self.last_window_ = y_inner[-self.window :] + + # Save the last window for prediction + self.last_window_ = y_inner[-self.window:] + return self - def _predict(self, y=None, X=None): + def _predict(self, y=None, X=None): # remove it """Make forecasts for y. Parameters @@ -153,7 +191,8 @@ def _predict(self, y=None, X=None): Returns ------- predictions : np.ndarray - Predicted values for the specified horizon. + Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate + data or (horizon,) for univariate data. """ if y is None: if not hasattr(self, "last_window_"): @@ -161,22 +200,33 @@ def _predict(self, y=None, X=None): y_inner = self.last_window_ else: y_inner = self._convert_input(y) - if len(y_inner) < self.window: + if y_inner.ndim == 1: + y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) + if y_inner.shape[0] < self.window: raise ValueError( - f"Input data length ({len(y_inner)}) is less than the window size " + f"Input data length ({y_inner.shape[0]}) is less than the window size " f"({self.window})." ) - y_inner = y_inner[-self.window :] - - last_window = y_inner.reshape(1, self.window, 1) + y_inner = y_inner[-self.window:] + + # Get the number of channels from the input data + num_channels = y_inner.shape[-1] + last_window = y_inner.reshape(1, self.window, num_channels) predictions = [] current_window = last_window + for _ in range(self.horizon): pred = self.model_.predict(current_window, verbose=0) - predictions.append(pred[0, 0]) + predictions.append(pred) current_window = np.roll(current_window, -1, axis=1) - current_window[0, -1, 0] = pred[0, 0] - return np.array(predictions) + current_window[0, -1, :] = pred[0, :] # Update all channels + + predictions = np.array(predictions) + predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels) + if num_channels == 1: + predictions = predictions.flatten() # Convert to (horizon,) for univariate + + return predictions def _convert_input(self, y): """Convert input data to numpy array. @@ -196,10 +246,6 @@ def _convert_input(self, y): else: y_inner = y - # Ensure 1D array - if len(y_inner.shape) > 1: - y_inner = y_inner.flatten() - return y_inner def _create_sequences(self, data): @@ -208,30 +254,143 @@ def _create_sequences(self, data): Parameters ---------- data : np.ndarray - Time series data. + Time series data. Assumes shape (timepoints, channels) for multivariate + data or (timepoints,) for univariate. Returns ------- X : np.ndarray - Input sequences. + Input sequences. Shape: (num_sequences, window, channels) for multivariate + or (num_sequences, window, 1) for univariate. y : np.ndarray - Target values. + Target values. Shape: (num_sequences, horizon, channels) for multivariate + or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed). """ - if len(data) < self.window + self.horizon: + if data.ndim == 1: + data = data.reshape(-1, 1) # Convert univariate to (timepoints, 1) + + num_timepoints, num_channels = data.shape + + if num_timepoints < self.window + self.horizon: raise ValueError( - f"Data length ({len(data)}) is insufficient for window " + f"Data length ({num_timepoints}) is insufficient for window " f"({self.window}) and horizon ({self.horizon})." ) X, y = [], [] - for i in range(len(data) - self.window - self.horizon + 1): + for i in range(num_timepoints - self.window - self.horizon + 1): X.append(data[i : (i + self.window)]) y.append(data[i + self.window : (i + self.window + self.horizon)]) - X = np.array(X).reshape(-1, self.window, 1) - y = np.array(y).reshape(-1, self.horizon) + X = np.array(X) # Shape: (num_sequences, window, channels) + y = np.array(y) # Shape: (num_sequences, horizon, channels) + return X, y + def _prepare_callbacks(self): + """Prepare callbacks for training. + + Returns + ------- + callbacks_list : list + List of callbacks to be used during training. + """ + callbacks_list = [] + + # Add user-provided callbacks + if self.callbacks is not None: + if isinstance(self.callbacks, list): + callbacks_list.extend(self.callbacks) + else: + callbacks_list.append(self.callbacks) + + # Add model checkpoint callback if save_best_model is True + if self.save_best_model: + callbacks_list = self._get_model_checkpoint_callback( + callbacks_list, self.file_path, "best_model" + ) + + return callbacks_list + + def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): + """Add model checkpoint callback to save the best model. + + Parameters + ---------- + callbacks : list + Existing list of callbacks. + file_path : str + Directory path where the model will be saved. + file_name : str + Name of the model file. + + Returns + ------- + callbacks : list + Updated list of callbacks including ModelCheckpoint. + """ + import tensorflow as tf + + model_checkpoint_ = tf.keras.callbacks.ModelCheckpoint( + filepath=file_path + file_name + ".keras", + monitor="loss", + save_best_only=True, + verbose=self.verbose, + ) + + if isinstance(callbacks, list): + return callbacks + [model_checkpoint_] + else: + return [callbacks] + [model_checkpoint_] + + def summary(self): + """Summary function to return the losses/metrics for model fit. + + Returns + ------- + history : dict or None + Dictionary containing model's train/validation losses and metrics. + """ + return self.history_.history if self.history_ is not None else None + + def save_last_model_to_file(self, file_path="./"): + """Save the last epoch of the trained deep learning model. + + Parameters + ---------- + file_path : str, default="./" + The directory where the model will be saved. + + Returns + ------- + None + """ + if self.model_ is None: + raise ValueError("No model to save. Please fit the model first.") + + self.model_.save(file_path + self.last_file_name + ".keras") + + def load_model(self, model_path): + """Load a pre-trained keras model instead of fitting. + + When calling this function, all functionalities can be used + such as predict with the loaded model. + + Parameters + ---------- + model_path : str + Path to the saved model file including extension. + Example: model_path="path/to/file/best_model.keras" + + Returns + ------- + None + """ + import tensorflow as tf + + self.model_ = tf.keras.models.load_model(model_path) + self.is_fitted = True + @abstractmethod def build_model(self, input_shape): """Build the deep learning model. diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 5579ab9959..46774b520d 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -70,4 +70,4 @@ def test_base_deep_forecaster_insufficient_data(): forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) with pytest.raises(ValueError, match="Data length.*insufficient"): - forecaster.fit(data) + forecaster.fit(data) \ No newline at end of file From 4c6b7898f504bb2ec25ea01de687927405e9a30c Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 12:34:43 +0530 Subject: [PATCH 26/36] tcn forecaster updated with new base class --- aeon/forecasting/deep_learning/_tcn.py | 145 ++++++++++-- aeon/forecasting/deep_learning/base.py | 218 +----------------- .../deep_learning/tests/test_base.py | 93 +++----- .../deep_learning/tests/test_tcn.py | 43 +++- 4 files changed, 209 insertions(+), 290 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 2fbb26ae91..95b0a91f0e 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -3,16 +3,19 @@ from __future__ import annotations __maintainer__ = [] - __all__ = ["TCNForecaster"] from typing import Any +import numpy as np +from sklearn.utils import check_random_state + +from aeon.forecasting.base import DirectForecastingMixin from aeon.forecasting.deep_learning.base import BaseDeepForecaster from aeon.networks._tcn import TCNNetwork -class TCNForecaster(BaseDeepForecaster): +class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin): """A deep learning forecaster using Temporal Convolutional Network (TCN). It leverages the `TCNNetwork` from aeon's network module @@ -26,7 +29,7 @@ class TCNForecaster(BaseDeepForecaster): The window size for creating input sequences. batch_size : int, default=32 Batch size for training the model. - epochs : int, default=100 + n_epochs : int, default=100 Number of epochs to train the model. verbose : int, default=0 Verbosity mode (0, 1, or 2). @@ -34,10 +37,18 @@ class TCNForecaster(BaseDeepForecaster): Optimizer to use for training. loss : str or tf.keras.losses.Loss, default='mse' Loss function for training. + callbacks : list of tf.keras.callbacks.Callback or None, default=None + List of Keras callbacks to be applied during training. random_state : int, default=None Seed for random number generators. axis : int, default=0 Axis along which to apply the forecaster. + last_file_name : str, default="last_model" + The name of the file of the last model, used for saving models. + save_best_model : bool, default=False + Whether to save the best model during training based on validation loss. + file_path : str, default="./" + Directory path where models will be saved. n_blocks : list of int, default=[16, 16, 16] List specifying the number of output channels for each layer of the TCN. The length determines the depth of the network. @@ -50,10 +61,13 @@ class TCNForecaster(BaseDeepForecaster): _tags = { "python_dependencies": ["tensorflow"], - "capability:horizon": False, + "capability:horizon": True, "capability:multivariate": True, "capability:exogenous": False, "capability:univariate": True, + "algorithm_type": "deeplearning", + "non_deterministic": True, + "cant_pickle": True, } def __init__( @@ -65,8 +79,12 @@ def __init__( verbose=0, optimizer="adam", loss="mse", + callbacks=None, random_state=None, axis=0, + last_file_name="last_model", + save_best_model=False, + file_path="./", n_blocks=None, kernel_size=2, dropout=0.2, @@ -74,17 +92,21 @@ def __init__( super().__init__( horizon=horizon, window=window, - batch_size=batch_size, - n_epochs=n_epochs, verbose=verbose, - optimizer=optimizer, - random_state=random_state, + callbacks=callbacks, axis=axis, - loss=loss, + last_file_name=last_file_name, + save_best_model=save_best_model, + file_path=file_path, ) self.n_blocks = n_blocks self.kernel_size = kernel_size self.dropout = dropout + self.batch_size = batch_size + self.n_epochs = n_epochs + self.optimizer = optimizer + self.loss = loss + self.random_state = random_state def build_model(self, input_shape): """Build the TCN model for forecasting. @@ -101,21 +123,106 @@ def build_model(self, input_shape): """ import tensorflow as tf - # Initialize the TCN network with the updated parameters network = TCNNetwork( n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16], kernel_size=self.kernel_size, dropout=self.dropout, ) - # input_shape = (input_shape[1], input_shape[0]) - # Build the network with the given input shape input_layer, output = network.build_network(input_shape=input_shape) - - # Create the final model model = tf.keras.Model(inputs=input_layer, outputs=output) return model - # Added to handle __name__ in tests (class-level access) + def _fit(self, y, exog=None): + """Fit the forecaster to training data. + + Parameters + ---------- + y : np.ndarray or pd.Series + Target time series to which to fit the forecaster. + + Returns + ------- + self : TCNForecaster + Returns an instance of self. + """ + import tensorflow as tf + + rng = check_random_state(self.random_state) + self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) + tf.keras.utils.set_random_seed(self.random_state_) + y_inner = y + num_timepoints, num_channels = y_inner.shape + num_sequences = num_timepoints - self.window - self.horizon + 1 + if y_inner.shape[0] < self.window + self.horizon: + raise ValueError( + f"Data length ({y_inner.shape}) is insufficient for window " + f"({self.window}) and horizon ({self.horizon})." + ) + windows_full = np.lib.stride_tricks.sliding_window_view( + y_inner, window_shape=(self.window, num_channels) + ) + windows_full = np.squeeze(windows_full, axis=1) + X_train = windows_full[:num_sequences] + # print(f"Shape of X_train is {X_train.shape}") + tail = y_inner[self.window :] + y_windows = np.lib.stride_tricks.sliding_window_view( + tail, window_shape=(self.horizon, num_channels) + ) + y_windows = np.squeeze(y_windows, axis=1) + y_train = y_windows[:num_sequences] + # print(f"Shape of y_train is {y_train.shape}") + input_shape = X_train.shape[1:] + self.model_ = self.build_model(input_shape) + self.model_.compile(optimizer=self.optimizer, loss=self.loss) + callbacks_list = self._prepare_callbacks() + self.history_ = self.model_.fit( + X_train, + y_train, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=callbacks_list, + ) + self.last_window_ = y_inner[-self.window :] + return self + + def _predict(self, y=None, exog=None): + """Make forecasts for y. + + Parameters + ---------- + y : np.ndarray or pd.Series, default=None + Series to predict from. If None, uses last fitted window. + + Returns + ------- + predictions : np.ndarray + Predicted values for the specified horizon. Since TCN has single + horizon capability, returns single step prediction. + """ + if y is None: + if not hasattr(self, "last_window_"): + raise ValueError("No fitted data available for prediction.") + y_inner = self.last_window_ + else: + y_inner = y + if y_inner.ndim == 1: + y_inner = y_inner.reshape(-1, 1) + if y_inner.shape[0] < self.window: + raise ValueError( + f"Input data length ({y_inner.shape}) is less than the " + f"window size ({self.window})." + ) + y_inner = y_inner[-self.window :] + num_channels = y_inner.shape[-1] + last_window = y_inner.reshape(1, self.window, num_channels) + pred = self.model_.predict(last_window, verbose=0) + if num_channels == 1: + prediction = pred.flatten()[0] + else: + prediction = pred[0, :] + return prediction + @classmethod def _get_test_params( cls, parameter_set: str = "default" @@ -127,18 +234,12 @@ def _get_test_params( ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value,` will return `"default"` set. - For forecasters, a "default" set of parameters should be provided for - general testing, and a "results_comparison" set for comparing against - previously recorded results if the general set does not produce suitable - probabilities to compare against. + special parameters are defined for a value, will return "default" set. Returns ------- params : dict or list of dict, default={} Parameters to create testing instances of the class. - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. """ param = { "n_epochs": 10, diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index a08690a988..b861305c21 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -11,10 +11,6 @@ from abc import abstractmethod -import numpy as np -import pandas as pd -from sklearn.utils import check_random_state - from aeon.forecasting.base import BaseForecaster @@ -31,23 +27,10 @@ class BaseDeepForecaster(BaseForecaster): Forecasting horizon, the number of steps ahead to predict. window : int, default=10 The window size for creating input sequences. - batch_size : int, default=32 - Batch size for training the model. - n_epochs : int, default=100 - Number of epochs to train the model. verbose : int, default=0 Verbosity mode (0, 1, or 2). - optimizer : str or tf.keras.optimizers.Optimizer, default='adam' - Optimizer to use for training. - loss : str or tf.keras.losses.Loss, default='mse' - Loss function for training. callbacks : list of tf.keras.callbacks.Callback or None, default=None List of Keras callbacks to be applied during training. - random_state : int, RandomState instance or None, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by np.random. axis : int, default=0 Axis along which to apply the forecaster. last_file_name : str, default="last_model" @@ -81,13 +64,8 @@ def __init__( self, horizon=1, window=10, - batch_size=32, # remove - n_epochs=100, # remove verbose=0, - optimizer="adam", # remove it - loss="mse", # remove it callbacks=None, - random_state=None, # remove it axis=0, last_file_name="last_model", save_best_model=False, @@ -95,197 +73,32 @@ def __init__( ): self.horizon = horizon self.window = window - self.batch_size = batch_size - self.n_epochs = n_epochs self.verbose = verbose - self.optimizer = optimizer - self.loss = loss self.callbacks = callbacks - self.random_state = random_state self.axis = axis self.last_file_name = last_file_name self.save_best_model = save_best_model self.file_path = file_path - - # Initialize attributes + self.model_ = None self.history_ = None self.last_window_ = None - # Pass horizon and axis to BaseForecaster super().__init__(horizon=horizon, axis=axis) - def _fit(self, y, X=None): # remove it - """Fit the forecaster to training data. - - Parameters - ---------- - y : np.ndarray or pd.Series - Target time series to which to fit the forecaster. - X : np.ndarray or pd.DataFrame, default=None - Exogenous variables. - - Returns - ------- - self : BaseDeepForecaster - Returns an instance of self. - """ - import tensorflow as tf - - # Set random seed for reproducibility - rng = check_random_state(self.random_state) - self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) - tf.keras.utils.set_random_seed(self.random_state_) - - # Convert input data to numpy array - y_inner = self._convert_input(y) - - if y_inner.ndim == 1: - y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) - - if y_inner.shape[0] < self.window + self.horizon: - raise ValueError( - f"Data length ({y_inner.shape[0]}) is insufficient for window " - f"({self.window}) and horizon ({self.horizon})." - ) - - # Create sequences for training - X_train, y_train = self._create_sequences(y_inner) - - if X_train.shape[0] == 0: - raise ValueError("No training sequences could be created.") - - # Build and compile the model - input_shape = X_train.shape[1:] - self.model_ = self.build_model(input_shape) - self.model_.compile(optimizer=self.optimizer, loss=self.loss) - - # Prepare callbacks - callbacks_list = self._prepare_callbacks() - - # Train the model - self.history_ = self.model_.fit( - X_train, - y_train, - batch_size=self.batch_size, - epochs=self.n_epochs, - verbose=self.verbose, - callbacks=callbacks_list, - ) - - # Save the last window for prediction - self.last_window_ = y_inner[-self.window:] - - return self - - def _predict(self, y=None, X=None): # remove it - """Make forecasts for y. - - Parameters - ---------- - y : np.ndarray or pd.Series, default=None - Series to predict from. If None, uses last fitted window. - X : np.ndarray or pd.DataFrame, default=None - Exogenous variables (not supported by default). - - Returns - ------- - predictions : np.ndarray - Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate - data or (horizon,) for univariate data. - """ - if y is None: - if not hasattr(self, "last_window_"): - raise ValueError("No fitted data available for prediction.") - y_inner = self.last_window_ - else: - y_inner = self._convert_input(y) - if y_inner.ndim == 1: - y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) - if y_inner.shape[0] < self.window: - raise ValueError( - f"Input data length ({y_inner.shape[0]}) is less than the window size " - f"({self.window})." - ) - y_inner = y_inner[-self.window:] - - # Get the number of channels from the input data - num_channels = y_inner.shape[-1] - last_window = y_inner.reshape(1, self.window, num_channels) - predictions = [] - current_window = last_window - - for _ in range(self.horizon): - pred = self.model_.predict(current_window, verbose=0) - predictions.append(pred) - current_window = np.roll(current_window, -1, axis=1) - current_window[0, -1, :] = pred[0, :] # Update all channels - - predictions = np.array(predictions) - predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels) - if num_channels == 1: - predictions = predictions.flatten() # Convert to (horizon,) for univariate - - return predictions - - def _convert_input(self, y): - """Convert input data to numpy array. - - Parameters - ---------- - y : np.ndarray or pd.Series - Input time series. - - Returns - ------- - y_inner : np.ndarray - Converted numpy array. - """ - if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame): - y_inner = y.values - else: - y_inner = y - - return y_inner - - def _create_sequences(self, data): - """Create input sequences and target values for training. - - Parameters - ---------- - data : np.ndarray - Time series data. Assumes shape (timepoints, channels) for multivariate - data or (timepoints,) for univariate. - - Returns - ------- - X : np.ndarray - Input sequences. Shape: (num_sequences, window, channels) for multivariate - or (num_sequences, window, 1) for univariate. - y : np.ndarray - Target values. Shape: (num_sequences, horizon, channels) for multivariate - or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed). - """ - if data.ndim == 1: - data = data.reshape(-1, 1) # Convert univariate to (timepoints, 1) - - num_timepoints, num_channels = data.shape - - if num_timepoints < self.window + self.horizon: - raise ValueError( - f"Data length ({num_timepoints}) is insufficient for window " - f"({self.window}) and horizon ({self.horizon})." - ) - - X, y = [], [] - for i in range(num_timepoints - self.window - self.horizon + 1): - X.append(data[i : (i + self.window)]) - y.append(data[i + self.window : (i + self.window + self.horizon)]) + def _fit(self, y, exog=None): + """Fit the model.""" + pass - X = np.array(X) # Shape: (num_sequences, window, channels) - y = np.array(y) # Shape: (num_sequences, horizon, channels) + def _predict(self, y, exog=None): + """Predict using the model.""" + pass - return X, y + def _forecast(self, y, exog=None): + """Forecast values for time series X.""" + y = self._preprocess_series(y, 1, True) + self.fit(y, exog) + return self.predict(y, exog) def _prepare_callbacks(self): """Prepare callbacks for training. @@ -296,20 +109,15 @@ def _prepare_callbacks(self): List of callbacks to be used during training. """ callbacks_list = [] - - # Add user-provided callbacks if self.callbacks is not None: if isinstance(self.callbacks, list): callbacks_list.extend(self.callbacks) else: callbacks_list.append(self.callbacks) - - # Add model checkpoint callback if save_best_model is True if self.save_best_model: callbacks_list = self._get_model_checkpoint_callback( callbacks_list, self.file_path, "best_model" ) - return callbacks_list def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): @@ -337,7 +145,6 @@ def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): save_best_only=True, verbose=self.verbose, ) - if isinstance(callbacks, list): return callbacks + [model_checkpoint_] else: @@ -367,7 +174,6 @@ def save_last_model_to_file(self, file_path="./"): """ if self.model_ is None: raise ValueError("No model to save. Please fit the model first.") - self.model_.save(file_path + self.last_file_name + ".keras") def load_model(self, model_path): diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 5579ab9959..5c0671b4d3 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -1,73 +1,44 @@ -"""Test for BaseDeepForecaster class in aeon.""" +"""Test file for BaseDeepForecaster.""" -import numpy as np import pytest -from aeon.forecasting.deep_learning import BaseDeepForecaster +from aeon.forecasting.deep_learning.base import BaseDeepForecaster from aeon.utils.validation._dependencies import _check_soft_dependencies -@pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", -) -class SimpleDeepForecaster(BaseDeepForecaster): - """A simple concrete implementation of BaseDeepForecaster for testing.""" - - def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0): - super().__init__( - horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose - ) +class DummyDeepForecaster(BaseDeepForecaster): + """Minimal concrete subclass to allow instantiation.""" def build_model(self, input_shape): - """Build a simple Keras model for testing.""" - import tensorflow as tf - - model = tf.keras.Sequential( - [ - tf.keras.layers.Flatten(input_shape=input_shape), - tf.keras.layers.Dense(10, activation="relu"), - tf.keras.layers.Dense(self.horizon), - ] - ) - return model + """Construct and return a model based on the provided input shape.""" + return None # Not needed for this test @pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", ) -def test_base_deep_forecaster_fit_predict(): - """Test fitting and predicting with BaseDeepForecaster implementation.""" - # Generate synthetic data - np.random.seed(42) - data = np.random.randn(50) - y = np.random.randn(10) - - # Initialize forecaster - forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) - - # Fit the model - forecaster.fit(data) - - # Predict - predictions = forecaster.predict(y) - - # Validate output shape - assert ( - len(predictions) == 1 - ), f"Expected predictions of length 1, got {len(predictions)}" - assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array" - - -@pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", -) -def test_base_deep_forecaster_insufficient_data(): - """Test error handling for insufficient data.""" - data = np.random.randn(5) - forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) - - with pytest.raises(ValueError, match="Data length.*insufficient"): - forecaster.fit(data) +def test_default_init_attributes(): + """Test that BaseDeepForecaster sets default params and attributes correctly.""" + forecaster = DummyDeepForecaster() + + # check default parameters + assert forecaster.horizon == 1 + assert forecaster.window == 10 + assert forecaster.verbose == 0 + assert forecaster.callbacks is None + assert forecaster.axis == 0 + assert forecaster.last_file_name == "last_model" + assert forecaster.save_best_model is False + assert forecaster.file_path == "./" + + # check default attributes after init + assert forecaster.model_ is None + assert forecaster.history_ is None + assert forecaster.last_window_ is None + + # check tags + tags = forecaster.get_tags() + assert tags["algorithm_type"] == "deeplearning" + assert tags["capability:horizon"] + assert tags["capability:multivariate"] diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py index 2f5b0710b6..7ee3139321 100644 --- a/aeon/forecasting/deep_learning/tests/test_tcn.py +++ b/aeon/forecasting/deep_learning/tests/test_tcn.py @@ -5,7 +5,7 @@ import pytest -from aeon.datasets import load_airline +from aeon.datasets import load_airline, load_longley from aeon.forecasting.deep_learning._tcn import TCNForecaster from aeon.utils.validation._dependencies import _check_soft_dependencies @@ -35,3 +35,44 @@ def test_tcn_forecaster(horizon, window, epochs): assert prediction is not None if isinstance(prediction, tf.Tensor): assert not tf.math.is_nan(prediction).numpy() + + +@pytest.mark.skipif( + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", +) +@pytest.mark.parametrize( + "loader,is_univariate", + [ + (load_airline, True), # univariate dataset + (load_longley, False), # multivariate dataset + ], +) +def test_tcn_forecaster_uni_mutli(loader, is_univariate): + """Test TCNForecaster on univariate (airline) and multivariate (longley) data.""" + y = loader() + + forecaster = TCNForecaster( + horizon=1, + window=10, + n_epochs=2, + batch_size=16, + verbose=0, + ) + + # fit + forecaster.fit(y) + + # predict + prediction = forecaster.predict(y) + assert prediction is not None + + # forecast + prediction = forecaster.forecast(y) + assert prediction is not None + + # only for univariate case, test direct forecasting + if is_univariate: + prediction = forecaster.direct_forecast(y, 3) + assert prediction is not None + assert len(prediction) == 3 From 4aad1b62dfa194de40eb3ba30ce54297bb28198d Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 12:38:51 +0530 Subject: [PATCH 27/36] workflow corrected --- .github/workflows/pr_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 401ae8f8d1..5addb89627 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -3,7 +3,7 @@ name: PR pytest on: push: branches: - - tcn_fst + - main pull_request: paths: - "aeon/**" From 6c4dca7de9c894f538854f0a7e8a718b2ee3e36d Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 13:22:45 +0530 Subject: [PATCH 28/36] excluded forecasting test for tcn --- aeon/testing/testing_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py index 990befb15d..759b5f717f 100644 --- a/aeon/testing/testing_config.py +++ b/aeon/testing/testing_config.py @@ -46,6 +46,8 @@ "check_persistence_via_pickle", "check_save_estimators_to_file", ], + # checks required for multivariate + "TCNForecaster": ["check_forecaster_output"], # needs investigation "SASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"], "RSASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"], From b25059dc75384bc653922112167347bfe880f897 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 14:30:44 +0530 Subject: [PATCH 29/36] base changed --- aeon/forecasting/deep_learning/base.py | 218 +----------------- .../deep_learning/tests/test_base.py | 93 +++----- 2 files changed, 44 insertions(+), 267 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index a08690a988..b861305c21 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -11,10 +11,6 @@ from abc import abstractmethod -import numpy as np -import pandas as pd -from sklearn.utils import check_random_state - from aeon.forecasting.base import BaseForecaster @@ -31,23 +27,10 @@ class BaseDeepForecaster(BaseForecaster): Forecasting horizon, the number of steps ahead to predict. window : int, default=10 The window size for creating input sequences. - batch_size : int, default=32 - Batch size for training the model. - n_epochs : int, default=100 - Number of epochs to train the model. verbose : int, default=0 Verbosity mode (0, 1, or 2). - optimizer : str or tf.keras.optimizers.Optimizer, default='adam' - Optimizer to use for training. - loss : str or tf.keras.losses.Loss, default='mse' - Loss function for training. callbacks : list of tf.keras.callbacks.Callback or None, default=None List of Keras callbacks to be applied during training. - random_state : int, RandomState instance or None, default=None - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by np.random. axis : int, default=0 Axis along which to apply the forecaster. last_file_name : str, default="last_model" @@ -81,13 +64,8 @@ def __init__( self, horizon=1, window=10, - batch_size=32, # remove - n_epochs=100, # remove verbose=0, - optimizer="adam", # remove it - loss="mse", # remove it callbacks=None, - random_state=None, # remove it axis=0, last_file_name="last_model", save_best_model=False, @@ -95,197 +73,32 @@ def __init__( ): self.horizon = horizon self.window = window - self.batch_size = batch_size - self.n_epochs = n_epochs self.verbose = verbose - self.optimizer = optimizer - self.loss = loss self.callbacks = callbacks - self.random_state = random_state self.axis = axis self.last_file_name = last_file_name self.save_best_model = save_best_model self.file_path = file_path - - # Initialize attributes + self.model_ = None self.history_ = None self.last_window_ = None - # Pass horizon and axis to BaseForecaster super().__init__(horizon=horizon, axis=axis) - def _fit(self, y, X=None): # remove it - """Fit the forecaster to training data. - - Parameters - ---------- - y : np.ndarray or pd.Series - Target time series to which to fit the forecaster. - X : np.ndarray or pd.DataFrame, default=None - Exogenous variables. - - Returns - ------- - self : BaseDeepForecaster - Returns an instance of self. - """ - import tensorflow as tf - - # Set random seed for reproducibility - rng = check_random_state(self.random_state) - self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) - tf.keras.utils.set_random_seed(self.random_state_) - - # Convert input data to numpy array - y_inner = self._convert_input(y) - - if y_inner.ndim == 1: - y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) - - if y_inner.shape[0] < self.window + self.horizon: - raise ValueError( - f"Data length ({y_inner.shape[0]}) is insufficient for window " - f"({self.window}) and horizon ({self.horizon})." - ) - - # Create sequences for training - X_train, y_train = self._create_sequences(y_inner) - - if X_train.shape[0] == 0: - raise ValueError("No training sequences could be created.") - - # Build and compile the model - input_shape = X_train.shape[1:] - self.model_ = self.build_model(input_shape) - self.model_.compile(optimizer=self.optimizer, loss=self.loss) - - # Prepare callbacks - callbacks_list = self._prepare_callbacks() - - # Train the model - self.history_ = self.model_.fit( - X_train, - y_train, - batch_size=self.batch_size, - epochs=self.n_epochs, - verbose=self.verbose, - callbacks=callbacks_list, - ) - - # Save the last window for prediction - self.last_window_ = y_inner[-self.window:] - - return self - - def _predict(self, y=None, X=None): # remove it - """Make forecasts for y. - - Parameters - ---------- - y : np.ndarray or pd.Series, default=None - Series to predict from. If None, uses last fitted window. - X : np.ndarray or pd.DataFrame, default=None - Exogenous variables (not supported by default). - - Returns - ------- - predictions : np.ndarray - Predicted values for the specified horizon. Shape: (horizon, channels) for multivariate - data or (horizon,) for univariate data. - """ - if y is None: - if not hasattr(self, "last_window_"): - raise ValueError("No fitted data available for prediction.") - y_inner = self.last_window_ - else: - y_inner = self._convert_input(y) - if y_inner.ndim == 1: - y_inner = y_inner.reshape(-1, 1) # Convert univariate to (timepoints, 1) - if y_inner.shape[0] < self.window: - raise ValueError( - f"Input data length ({y_inner.shape[0]}) is less than the window size " - f"({self.window})." - ) - y_inner = y_inner[-self.window:] - - # Get the number of channels from the input data - num_channels = y_inner.shape[-1] - last_window = y_inner.reshape(1, self.window, num_channels) - predictions = [] - current_window = last_window - - for _ in range(self.horizon): - pred = self.model_.predict(current_window, verbose=0) - predictions.append(pred) - current_window = np.roll(current_window, -1, axis=1) - current_window[0, -1, :] = pred[0, :] # Update all channels - - predictions = np.array(predictions) - predictions = np.squeeze(predictions, axis=1) # Shape: (horizon, channels) - if num_channels == 1: - predictions = predictions.flatten() # Convert to (horizon,) for univariate - - return predictions - - def _convert_input(self, y): - """Convert input data to numpy array. - - Parameters - ---------- - y : np.ndarray or pd.Series - Input time series. - - Returns - ------- - y_inner : np.ndarray - Converted numpy array. - """ - if isinstance(y, pd.Series) or isinstance(y, pd.DataFrame): - y_inner = y.values - else: - y_inner = y - - return y_inner - - def _create_sequences(self, data): - """Create input sequences and target values for training. - - Parameters - ---------- - data : np.ndarray - Time series data. Assumes shape (timepoints, channels) for multivariate - data or (timepoints,) for univariate. - - Returns - ------- - X : np.ndarray - Input sequences. Shape: (num_sequences, window, channels) for multivariate - or (num_sequences, window, 1) for univariate. - y : np.ndarray - Target values. Shape: (num_sequences, horizon, channels) for multivariate - or (num_sequences, horizon) for univariate (reshaped to (num_sequences, horizon, 1) if needed). - """ - if data.ndim == 1: - data = data.reshape(-1, 1) # Convert univariate to (timepoints, 1) - - num_timepoints, num_channels = data.shape - - if num_timepoints < self.window + self.horizon: - raise ValueError( - f"Data length ({num_timepoints}) is insufficient for window " - f"({self.window}) and horizon ({self.horizon})." - ) - - X, y = [], [] - for i in range(num_timepoints - self.window - self.horizon + 1): - X.append(data[i : (i + self.window)]) - y.append(data[i + self.window : (i + self.window + self.horizon)]) + def _fit(self, y, exog=None): + """Fit the model.""" + pass - X = np.array(X) # Shape: (num_sequences, window, channels) - y = np.array(y) # Shape: (num_sequences, horizon, channels) + def _predict(self, y, exog=None): + """Predict using the model.""" + pass - return X, y + def _forecast(self, y, exog=None): + """Forecast values for time series X.""" + y = self._preprocess_series(y, 1, True) + self.fit(y, exog) + return self.predict(y, exog) def _prepare_callbacks(self): """Prepare callbacks for training. @@ -296,20 +109,15 @@ def _prepare_callbacks(self): List of callbacks to be used during training. """ callbacks_list = [] - - # Add user-provided callbacks if self.callbacks is not None: if isinstance(self.callbacks, list): callbacks_list.extend(self.callbacks) else: callbacks_list.append(self.callbacks) - - # Add model checkpoint callback if save_best_model is True if self.save_best_model: callbacks_list = self._get_model_checkpoint_callback( callbacks_list, self.file_path, "best_model" ) - return callbacks_list def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): @@ -337,7 +145,6 @@ def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): save_best_only=True, verbose=self.verbose, ) - if isinstance(callbacks, list): return callbacks + [model_checkpoint_] else: @@ -367,7 +174,6 @@ def save_last_model_to_file(self, file_path="./"): """ if self.model_ is None: raise ValueError("No model to save. Please fit the model first.") - self.model_.save(file_path + self.last_file_name + ".keras") def load_model(self, model_path): diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 46774b520d..5c0671b4d3 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -1,73 +1,44 @@ -"""Test for BaseDeepForecaster class in aeon.""" +"""Test file for BaseDeepForecaster.""" -import numpy as np import pytest -from aeon.forecasting.deep_learning import BaseDeepForecaster +from aeon.forecasting.deep_learning.base import BaseDeepForecaster from aeon.utils.validation._dependencies import _check_soft_dependencies -@pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", -) -class SimpleDeepForecaster(BaseDeepForecaster): - """A simple concrete implementation of BaseDeepForecaster for testing.""" - - def __init__(self, horizon=1, window=5, n_epochs=1, verbose=0): - super().__init__( - horizon=horizon, window=window, n_epochs=n_epochs, verbose=verbose - ) +class DummyDeepForecaster(BaseDeepForecaster): + """Minimal concrete subclass to allow instantiation.""" def build_model(self, input_shape): - """Build a simple Keras model for testing.""" - import tensorflow as tf - - model = tf.keras.Sequential( - [ - tf.keras.layers.Flatten(input_shape=input_shape), - tf.keras.layers.Dense(10, activation="relu"), - tf.keras.layers.Dense(self.horizon), - ] - ) - return model + """Construct and return a model based on the provided input shape.""" + return None # Not needed for this test @pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", + not _check_soft_dependencies(["tensorflow"], severity="none"), + reason="Tensorflow soft dependency unavailable.", ) -def test_base_deep_forecaster_fit_predict(): - """Test fitting and predicting with BaseDeepForecaster implementation.""" - # Generate synthetic data - np.random.seed(42) - data = np.random.randn(50) - y = np.random.randn(10) - - # Initialize forecaster - forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) - - # Fit the model - forecaster.fit(data) - - # Predict - predictions = forecaster.predict(y) - - # Validate output shape - assert ( - len(predictions) == 1 - ), f"Expected predictions of length 1, got {len(predictions)}" - assert isinstance(predictions, np.ndarray), "Predictions should be a numpy array" - - -@pytest.mark.skipif( - not _check_soft_dependencies("tensorflow", severity="none"), - reason="skip test if required soft dependency not available", -) -def test_base_deep_forecaster_insufficient_data(): - """Test error handling for insufficient data.""" - data = np.random.randn(5) - forecaster = SimpleDeepForecaster(horizon=1, window=5, n_epochs=1, verbose=0) - - with pytest.raises(ValueError, match="Data length.*insufficient"): - forecaster.fit(data) \ No newline at end of file +def test_default_init_attributes(): + """Test that BaseDeepForecaster sets default params and attributes correctly.""" + forecaster = DummyDeepForecaster() + + # check default parameters + assert forecaster.horizon == 1 + assert forecaster.window == 10 + assert forecaster.verbose == 0 + assert forecaster.callbacks is None + assert forecaster.axis == 0 + assert forecaster.last_file_name == "last_model" + assert forecaster.save_best_model is False + assert forecaster.file_path == "./" + + # check default attributes after init + assert forecaster.model_ is None + assert forecaster.history_ is None + assert forecaster.last_window_ is None + + # check tags + tags = forecaster.get_tags() + assert tags["algorithm_type"] == "deeplearning" + assert tags["capability:horizon"] + assert tags["capability:multivariate"] From f3af43320cd64c6642627e2d467a02ef6e4c9ff0 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 15:41:53 +0530 Subject: [PATCH 30/36] save best model changed --- aeon/forecasting/deep_learning/base.py | 16 +++++++--------- .../forecasting/deep_learning/tests/test_base.py | 1 - 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index b861305c21..28f4c51784 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -35,8 +35,6 @@ class BaseDeepForecaster(BaseForecaster): Axis along which to apply the forecaster. last_file_name : str, default="last_model" The name of the file of the last model, used for saving models. - save_best_model : bool, default=False - Whether to save the best model during training based on validation loss. file_path : str, default="./" Directory path where models will be saved. @@ -68,7 +66,6 @@ def __init__( callbacks=None, axis=0, last_file_name="last_model", - save_best_model=False, file_path="./", ): self.horizon = horizon @@ -77,7 +74,6 @@ def __init__( self.callbacks = callbacks self.axis = axis self.last_file_name = last_file_name - self.save_best_model = save_best_model self.file_path = file_path self.model_ = None @@ -114,10 +110,10 @@ def _prepare_callbacks(self): callbacks_list.extend(self.callbacks) else: callbacks_list.append(self.callbacks) - if self.save_best_model: - callbacks_list = self._get_model_checkpoint_callback( - callbacks_list, self.file_path, "best_model" - ) + + callbacks_list = self._get_model_checkpoint_callback( + callbacks_list, self.file_path, "best_model" + ) return callbacks_list def _get_model_checkpoint_callback(self, callbacks, file_path, file_name): @@ -172,9 +168,11 @@ def save_last_model_to_file(self, file_path="./"): ------- None """ + import os + if self.model_ is None: raise ValueError("No model to save. Please fit the model first.") - self.model_.save(file_path + self.last_file_name + ".keras") + self.model_.save(os.path.join(file_path, self.last_file_name + ".keras")) def load_model(self, model_path): """Load a pre-trained keras model instead of fitting. diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 5c0671b4d3..e65321dbfa 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -29,7 +29,6 @@ def test_default_init_attributes(): assert forecaster.callbacks is None assert forecaster.axis == 0 assert forecaster.last_file_name == "last_model" - assert forecaster.save_best_model is False assert forecaster.file_path == "./" # check default attributes after init From 7ce3e681da5bf2b7d8bbb0563bdb5affdfea91bc Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 18:11:55 +0530 Subject: [PATCH 31/36] conversations resolved --- aeon/forecasting/deep_learning/base.py | 35 +++++++++++++++---- .../deep_learning/tests/test_base.py | 7 ++-- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index 28f4c51784..5e812282f4 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -10,6 +10,7 @@ __all__ = ["BaseDeepForecaster"] from abc import abstractmethod +from typing import Any from aeon.forecasting.base import BaseForecaster @@ -23,10 +24,10 @@ class BaseDeepForecaster(BaseForecaster): Parameters ---------- + window : int, + The window size for creating input sequences. horizon : int, default=1 Forecasting horizon, the number of steps ahead to predict. - window : int, default=10 - The window size for creating input sequences. verbose : int, default=0 Verbosity mode (0, 1, or 2). callbacks : list of tf.keras.callbacks.Callback or None, default=None @@ -55,13 +56,13 @@ class BaseDeepForecaster(BaseForecaster): "non_deterministic": True, "cant_pickle": True, "python_dependencies": "tensorflow", - "capability:multivariate": True, + "capability:multivariate": False, } def __init__( self, + window, horizon=1, - window=10, verbose=0, callbacks=None, axis=0, @@ -84,11 +85,11 @@ def __init__( def _fit(self, y, exog=None): """Fit the model.""" - pass + ... def _predict(self, y, exog=None): """Predict using the model.""" - pass + ... def _forecast(self, y, exog=None): """Forecast values for time series X.""" @@ -210,3 +211,25 @@ def build_model(self, input_shape): Compiled Keras model. """ pass + + @classmethod + def _get_test_params( + cls, parameter_set: str = "default" + ) -> dict[str, Any] | list[dict[str, Any]]: + """ + Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + """ + param = { + "window": 10, + } + return [param] diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index e65321dbfa..9a88503686 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -9,6 +9,9 @@ class DummyDeepForecaster(BaseDeepForecaster): """Minimal concrete subclass to allow instantiation.""" + def __init__(self, window): + super().__init__(window=window) + def build_model(self, input_shape): """Construct and return a model based on the provided input shape.""" return None # Not needed for this test @@ -20,7 +23,7 @@ def build_model(self, input_shape): ) def test_default_init_attributes(): """Test that BaseDeepForecaster sets default params and attributes correctly.""" - forecaster = DummyDeepForecaster() + forecaster = DummyDeepForecaster(window=10) # check default parameters assert forecaster.horizon == 1 @@ -40,4 +43,4 @@ def test_default_init_attributes(): tags = forecaster.get_tags() assert tags["algorithm_type"] == "deeplearning" assert tags["capability:horizon"] - assert tags["capability:multivariate"] + assert tags["capability:univariate"] From 50dbec6364d221fbf4248b994d4848b2c9e51e72 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 18:12:14 +0530 Subject: [PATCH 32/36] conversations resolved --- aeon/forecasting/deep_learning/base.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index 5e812282f4..cc90af48c7 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -91,12 +91,6 @@ def _predict(self, y, exog=None): """Predict using the model.""" ... - def _forecast(self, y, exog=None): - """Forecast values for time series X.""" - y = self._preprocess_series(y, 1, True) - self.fit(y, exog) - return self.predict(y, exog) - def _prepare_callbacks(self): """Prepare callbacks for training. From 39c20e4fd3c735238b54df54787c6cbc3e593a06 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 19 Aug 2025 20:40:36 +0530 Subject: [PATCH 33/36] TCN forecaster corrected --- aeon/forecasting/deep_learning/_tcn.py | 15 +++++++-------- aeon/forecasting/deep_learning/tests/test_tcn.py | 13 ++++++------- aeon/testing/testing_config.py | 2 -- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 95b0a91f0e..732ed30a75 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -10,12 +10,12 @@ import numpy as np from sklearn.utils import check_random_state -from aeon.forecasting.base import DirectForecastingMixin +from aeon.forecasting.base import IterativeForecastingMixin from aeon.forecasting.deep_learning.base import BaseDeepForecaster from aeon.networks._tcn import TCNNetwork -class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin): +class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin): """A deep learning forecaster using Temporal Convolutional Network (TCN). It leverages the `TCNNetwork` from aeon's network module @@ -25,8 +25,6 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin): ---------- horizon : int, default=1 Forecasting horizon, the number of steps ahead to predict. - window : int, default=10 - The window size for creating input sequences. batch_size : int, default=32 Batch size for training the model. n_epochs : int, default=100 @@ -62,7 +60,7 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin): _tags = { "python_dependencies": ["tensorflow"], "capability:horizon": True, - "capability:multivariate": True, + "capability:multivariate": False, "capability:exogenous": False, "capability:univariate": True, "algorithm_type": "deeplearning", @@ -72,8 +70,8 @@ class TCNForecaster(BaseDeepForecaster, DirectForecastingMixin): def __init__( self, + window, horizon=1, - window=10, batch_size=32, n_epochs=100, verbose=0, @@ -96,7 +94,6 @@ def __init__( callbacks=callbacks, axis=axis, last_file_name=last_file_name, - save_best_model=save_best_model, file_path=file_path, ) self.n_blocks = n_blocks @@ -107,6 +104,7 @@ def __init__( self.optimizer = optimizer self.loss = loss self.random_state = random_state + self.save_best_model = save_best_model def build_model(self, input_shape): """Build the TCN model for forecasting. @@ -221,7 +219,7 @@ def _predict(self, y=None, exog=None): prediction = pred.flatten()[0] else: prediction = pred[0, :] - return prediction + return float(prediction) @classmethod def _get_test_params( @@ -242,6 +240,7 @@ def _get_test_params( Parameters to create testing instances of the class. """ param = { + "window": 10, "n_epochs": 10, "batch_size": 4, "n_blocks": [8, 8], diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py index 7ee3139321..afaf3bd16b 100644 --- a/aeon/forecasting/deep_learning/tests/test_tcn.py +++ b/aeon/forecasting/deep_learning/tests/test_tcn.py @@ -5,7 +5,7 @@ import pytest -from aeon.datasets import load_airline, load_longley +from aeon.datasets import load_airline from aeon.forecasting.deep_learning._tcn import TCNForecaster from aeon.utils.validation._dependencies import _check_soft_dependencies @@ -45,7 +45,7 @@ def test_tcn_forecaster(horizon, window, epochs): "loader,is_univariate", [ (load_airline, True), # univariate dataset - (load_longley, False), # multivariate dataset + # (load_longley, False), # multivariate dataset ], ) def test_tcn_forecaster_uni_mutli(loader, is_univariate): @@ -71,8 +71,7 @@ def test_tcn_forecaster_uni_mutli(loader, is_univariate): prediction = forecaster.forecast(y) assert prediction is not None - # only for univariate case, test direct forecasting - if is_univariate: - prediction = forecaster.direct_forecast(y, 3) - assert prediction is not None - assert len(prediction) == 3 + # iterative forecasting + prediction = forecaster.iterative_forecast(y, 3) + assert prediction is not None + assert len(prediction) == 3 diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py index 759b5f717f..990befb15d 100644 --- a/aeon/testing/testing_config.py +++ b/aeon/testing/testing_config.py @@ -46,8 +46,6 @@ "check_persistence_via_pickle", "check_save_estimators_to_file", ], - # checks required for multivariate - "TCNForecaster": ["check_forecaster_output"], # needs investigation "SASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"], "RSASTClassifier": ["check_fit_deterministic", "check_estimator_multithreading"], From 784a26a78c3cd29c8170fc7fbfd232daf615105b Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Sat, 23 Aug 2025 12:14:53 +0530 Subject: [PATCH 34/36] Basedlf corrected --- aeon/forecasting/deep_learning/base.py | 8 -------- aeon/forecasting/deep_learning/tests/test_base.py | 3 +++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/aeon/forecasting/deep_learning/base.py b/aeon/forecasting/deep_learning/base.py index cc90af48c7..0c6843848c 100644 --- a/aeon/forecasting/deep_learning/base.py +++ b/aeon/forecasting/deep_learning/base.py @@ -83,14 +83,6 @@ def __init__( super().__init__(horizon=horizon, axis=axis) - def _fit(self, y, exog=None): - """Fit the model.""" - ... - - def _predict(self, y, exog=None): - """Predict using the model.""" - ... - def _prepare_callbacks(self): """Prepare callbacks for training. diff --git a/aeon/forecasting/deep_learning/tests/test_base.py b/aeon/forecasting/deep_learning/tests/test_base.py index 9a88503686..7d13c3bf79 100644 --- a/aeon/forecasting/deep_learning/tests/test_base.py +++ b/aeon/forecasting/deep_learning/tests/test_base.py @@ -12,6 +12,9 @@ class DummyDeepForecaster(BaseDeepForecaster): def __init__(self, window): super().__init__(window=window) + def _predict(self, y, exog=None): + return None + def build_model(self, input_shape): """Construct and return a model based on the provided input shape.""" return None # Not needed for this test From 2de7b2df8eb77924aa387716503a0e32ab5b62f1 Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 26 Aug 2025 02:59:42 +0530 Subject: [PATCH 35/36] tcn updated with args --- aeon/forecasting/deep_learning/_tcn.py | 152 +++++++++++++++++++------ 1 file changed, 120 insertions(+), 32 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index 732ed30a75..ed4c92b4b1 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -3,8 +3,12 @@ from __future__ import annotations __maintainer__ = [] + __all__ = ["TCNForecaster"] +import os +import time +from copy import deepcopy from typing import Any import numpy as np @@ -18,8 +22,8 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin): """A deep learning forecaster using Temporal Convolutional Network (TCN). - It leverages the `TCNNetwork` from aeon's network module - to build the architecture suitable for forecasting tasks. + Leverages the `TCNNetwork` from aeon's network module to build the architecture + suitable for forecasting tasks. Parameters ---------- @@ -31,8 +35,13 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin): Number of epochs to train the model. verbose : int, default=0 Verbosity mode (0, 1, or 2). - optimizer : str or tf.keras.optimizers.Optimizer, default='adam' + optimizer : str or tf.keras.optimizers.Optimizer, default=None Optimizer to use for training. + metrics : str or list[str|function|keras.metrics.Metric], default="accuracy" + The evaluation metrics to use during training. Each can be a string, function, + or a keras.metrics.Metric instance (see https://keras.io/api/metrics/). + If a single string metric is provided, it will be used as the only metric. + If a list of metrics are provided, all will be used for evaluation. loss : str or tf.keras.losses.Loss, default='mse' Loss function for training. callbacks : list of tf.keras.callbacks.Callback or None, default=None @@ -48,13 +57,26 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin): file_path : str, default="./" Directory path where models will be saved. n_blocks : list of int, default=[16, 16, 16] - List specifying the number of output channels for each layer of the - TCN. The length determines the depth of the network. + List specifying the number of output channels for each layer of the TCN. + The length determines the depth of the network. kernel_size : int, default=2 Size of the convolutional kernel in the TCN. dropout : float, default=0.2 - Dropout rate applied after each convolutional layer for - regularization. + Dropout rate applied after each convolutional layer for regularization. + save_last_model : bool, default=False + Whether or not to save the last model, last epoch trained. + save_init_model : bool, default=False + Whether to save the initialization of the model. + best_file_name : str, default="best_model" + The name of the file of the best model. + init_file_name : str, default="init_model" + The name of the file of the init model. + + References + ---------- + .. [1] Bai, S., Kolter, J. Z., & Koltun, V. (2018). An empirical evaluation of + generic convolutional and recurrent networks for sequence modeling. + arXiv preprint arXiv:1803.01271. """ _tags = { @@ -75,7 +97,8 @@ def __init__( batch_size=32, n_epochs=100, verbose=0, - optimizer="adam", + optimizer=None, + metrics="accuracy", loss="mse", callbacks=None, random_state=None, @@ -86,25 +109,42 @@ def __init__( n_blocks=None, kernel_size=2, dropout=0.2, + save_last_model=False, + save_init_model=False, + best_file_name="best_model", + init_file_name="init_model", ): - super().__init__( - horizon=horizon, - window=window, - verbose=verbose, - callbacks=callbacks, - axis=axis, - last_file_name=last_file_name, - file_path=file_path, - ) - self.n_blocks = n_blocks - self.kernel_size = kernel_size - self.dropout = dropout + self.window = window + self.horizon = horizon self.batch_size = batch_size self.n_epochs = n_epochs + self.verbose = verbose self.optimizer = optimizer + self.metrics = metrics self.loss = loss + self.callbacks = callbacks self.random_state = random_state + self.axis = axis + self.last_file_name = last_file_name self.save_best_model = save_best_model + self.file_path = file_path + self.n_blocks = n_blocks + self.kernel_size = kernel_size + self.dropout = dropout + self.save_last_model = save_last_model + self.save_init_model = save_init_model + self.best_file_name = best_file_name + self.init_file_name = init_file_name + + super().__init__( + horizon=self.horizon, + window=self.window, + verbose=self.verbose, + callbacks=self.callbacks, + axis=self.axis, + last_file_name=self.last_file_name, + file_path=self.file_path, + ) def build_model(self, input_shape): """Build the TCN model for forecasting. @@ -121,17 +161,35 @@ def build_model(self, input_shape): """ import tensorflow as tf + rng = check_random_state(self.random_state) + self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) + + tf.keras.utils.set_random_seed(self.random_state_) network = TCNNetwork( n_blocks=self.n_blocks if self.n_blocks is not None else [16, 16, 16], kernel_size=self.kernel_size, dropout=self.dropout, ) + input_layer, output = network.build_network(input_shape=input_shape) + model = tf.keras.Model(inputs=input_layer, outputs=output) + + self.optimizer_ = ( + tf.keras.optimizers.Adam() if self.optimizer is None else self.optimizer + ) + + model.compile( + loss=self.loss, + optimizer=self.optimizer_, + metrics=self._metrics, + ) + return model def _fit(self, y, exog=None): - """Fit the forecaster to training data. + """ + Fit the TCN forecaster model to the training data. Parameters ---------- @@ -140,40 +198,56 @@ def _fit(self, y, exog=None): Returns ------- - self : TCNForecaster - Returns an instance of self. + self : object """ import tensorflow as tf - rng = check_random_state(self.random_state) - self.random_state_ = rng.randint(0, np.iinfo(np.int32).max) - tf.keras.utils.set_random_seed(self.random_state_) y_inner = y num_timepoints, num_channels = y_inner.shape num_sequences = num_timepoints - self.window - self.horizon + 1 + if y_inner.shape[0] < self.window + self.horizon: raise ValueError( f"Data length ({y_inner.shape}) is insufficient for window " f"({self.window}) and horizon ({self.horizon})." ) + + if isinstance(self.metrics, list): + self._metrics = self.metrics + elif isinstance(self.metrics, str): + self._metrics = [self.metrics] + windows_full = np.lib.stride_tricks.sliding_window_view( y_inner, window_shape=(self.window, num_channels) ) windows_full = np.squeeze(windows_full, axis=1) X_train = windows_full[:num_sequences] - # print(f"Shape of X_train is {X_train.shape}") + tail = y_inner[self.window :] y_windows = np.lib.stride_tricks.sliding_window_view( tail, window_shape=(self.horizon, num_channels) ) y_windows = np.squeeze(y_windows, axis=1) y_train = y_windows[:num_sequences] - # print(f"Shape of y_train is {y_train.shape}") + input_shape = X_train.shape[1:] - self.model_ = self.build_model(input_shape) - self.model_.compile(optimizer=self.optimizer, loss=self.loss) + self.training_model_ = self.build_model(input_shape) + + if self.save_init_model: + self.training_model_.save(self.file_path + self.init_file_name + ".keras") + + self.file_name_ = ( + self.best_file_name if self.save_best_model else str(time.time_ns()) + ) + callbacks_list = self._prepare_callbacks() - self.history_ = self.model_.fit( + callbacks_list.append( + tf.keras.callbacks.ReduceLROnPlateau( + monitor="loss", factor=0.5, patience=50, min_lr=0.0001 + ) + ) + + self.history = self.training_model_.fit( X_train, y_train, batch_size=self.batch_size, @@ -181,7 +255,21 @@ def _fit(self, y, exog=None): verbose=self.verbose, callbacks=callbacks_list, ) + + try: + self.model_ = tf.keras.models.load_model( + self.file_path + self.file_name_ + ".keras", compile=False + ) + if not self.save_best_model: + os.remove(self.file_path + self.file_name_ + ".keras") + except ValueError: + self.model_ = deepcopy(self.training_model_) + + if self.save_last_model: + self.save_last_model_to_file(file_path=self.file_path) + self.last_window_ = y_inner[-self.window :] + return self def _predict(self, y=None, exog=None): @@ -211,7 +299,7 @@ def _predict(self, y=None, exog=None): f"Input data length ({y_inner.shape}) is less than the " f"window size ({self.window})." ) - y_inner = y_inner[-self.window :] + y_inner = y_inner[-self.window :] num_channels = y_inner.shape[-1] last_window = y_inner.reshape(1, self.window, num_channels) pred = self.model_.predict(last_window, verbose=0) From 9b8d2d3f7c8d25315320d00aa6860bc65f0aff1a Mon Sep 17 00:00:00 2001 From: lucifer4073 Date: Tue, 26 Aug 2025 14:46:10 +0530 Subject: [PATCH 36/36] conversations resolved --- aeon/forecasting/deep_learning/_tcn.py | 4 ++-- aeon/forecasting/deep_learning/tests/test_tcn.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aeon/forecasting/deep_learning/_tcn.py b/aeon/forecasting/deep_learning/_tcn.py index ed4c92b4b1..1221352a01 100644 --- a/aeon/forecasting/deep_learning/_tcn.py +++ b/aeon/forecasting/deep_learning/_tcn.py @@ -22,8 +22,8 @@ class TCNForecaster(BaseDeepForecaster, IterativeForecastingMixin): """A deep learning forecaster using Temporal Convolutional Network (TCN). - Leverages the `TCNNetwork` from aeon's network module to build the architecture - suitable for forecasting tasks. + Adapted from the implementation used in [1]_. Leverages the `TCNNetwork` from + aeon's network module to build the architecture suitable for forecasting tasks. Parameters ---------- diff --git a/aeon/forecasting/deep_learning/tests/test_tcn.py b/aeon/forecasting/deep_learning/tests/test_tcn.py index afaf3bd16b..2755201672 100644 --- a/aeon/forecasting/deep_learning/tests/test_tcn.py +++ b/aeon/forecasting/deep_learning/tests/test_tcn.py @@ -32,7 +32,7 @@ def test_tcn_forecaster(horizon, window, epochs): prediction = forecaster.predict(y) # Basic assertions - assert prediction is not None + assert isinstance(prediction, float) if isinstance(prediction, tf.Tensor): assert not tf.math.is_nan(prediction).numpy() @@ -65,11 +65,11 @@ def test_tcn_forecaster_uni_mutli(loader, is_univariate): # predict prediction = forecaster.predict(y) - assert prediction is not None + assert isinstance(prediction, float) # forecast prediction = forecaster.forecast(y) - assert prediction is not None + assert isinstance(prediction, float) # iterative forecasting prediction = forecaster.iterative_forecast(y, 3)