diff --git a/docs/source/tutorials/ar.ipynb b/docs/source/tutorials/ar.ipynb index 7cbf0fbc6..32a311de7 100644 --- a/docs/source/tutorials/ar.ipynb +++ b/docs/source/tutorials/ar.ipynb @@ -187,10 +187,16 @@ " max_prediction_length=prediction_length,\n", ")\n", "\n", - "validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training_cutoff + 1)\n", + "validation = TimeSeriesDataSet.from_dataset(\n", + " training, data, min_prediction_idx=training_cutoff + 1\n", + ")\n", "batch_size = 128\n", - "train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)\n", - "val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)" + "train_dataloader = training.to_dataloader(\n", + " train=True, batch_size=batch_size, num_workers=0\n", + ")\n", + "val_dataloader = validation.to_dataloader(\n", + " train=False, batch_size=batch_size, num_workers=0\n", + ")" ] }, { @@ -251,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -269,12 +275,18 @@ "source": [ "pl.seed_everything(42)\n", "trainer = pl.Trainer(accelerator=\"auto\", gradient_clip_val=0.1)\n", - "net = NBeats.from_dataset(training, learning_rate=3e-2, weight_decay=1e-2, widths=[32, 512], backcast_loss_ratio=0.1)" + "net = NBeats.from_dataset(\n", + " training,\n", + " learning_rate=3e-2,\n", + " weight_decay=1e-2,\n", + " widths=[32, 512],\n", + " backcast_loss_ratio=0.1,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -321,9 +333,18 @@ ], "source": [ "# find optimal learning rate\n", - "from lightning.pytorch.tuner import Tuner\n", + "# from lightning.pytorch.tuner import Tuner\n", + "# todo: update when lightning.pytorch.tuner allows weights_only param\n", + "from pytorch_forecasting.models.temporal_fusion_transformer.tuning import (\n", + " _NewTuner as Tuner,\n", + ")\n", "\n", - "res = Tuner(trainer).lr_find(net, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader, min_lr=1e-5)\n", + "res = Tuner(trainer).lr_find(\n", + " net,\n", + " train_dataloaders=train_dataloader,\n", + " val_dataloaders=val_dataloader,\n", + " min_lr=1e-5,\n", + ")\n", "print(f\"suggested learning rate: {res.suggestion()}\")\n", "fig = res.plot(show=True, suggest=True)\n", "fig.show()\n", @@ -340,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -443,7 +464,9 @@ } ], "source": [ - "early_stop_callback = EarlyStopping(monitor=\"val_loss\", min_delta=1e-4, patience=10, verbose=False, mode=\"min\")\n", + "early_stop_callback = EarlyStopping(\n", + " monitor=\"val_loss\", min_delta=1e-4, patience=10, verbose=False, mode=\"min\"\n", + ")\n", "trainer = pl.Trainer(\n", " max_epochs=3,\n", " accelerator=\"auto\",\n", @@ -468,6 +491,7 @@ " net,\n", " train_dataloaders=train_dataloader,\n", " val_dataloaders=val_dataloader,\n", + " weights_only=False,\n", ")" ] }, @@ -481,12 +505,12 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "best_model_path = trainer.checkpoint_callback.best_model_path\n", - "best_model = NBeats.load_from_checkpoint(best_model_path)" + "best_model = NBeats.load_from_checkpoint(best_model_path, weights_only=False)" ] }, { @@ -645,7 +669,9 @@ ], "source": [ "for idx in range(10): # plot 10 examples\n", - " best_model.plot_prediction(raw_predictions.x, raw_predictions.output, idx=idx, add_loss_to_title=True)" + " best_model.plot_prediction(\n", + " raw_predictions.x, raw_predictions.output, idx=idx, add_loss_to_title=True\n", + " )" ] }, { diff --git a/docs/source/tutorials/deepar.ipynb b/docs/source/tutorials/deepar.ipynb index 753a06d93..a44e722cc 100644 --- a/docs/source/tutorials/deepar.ipynb +++ b/docs/source/tutorials/deepar.ipynb @@ -301,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -348,7 +348,11 @@ ], "source": [ "# find optimal learning rate\n", - "from lightning.pytorch.tuner import Tuner\n", + "# from lightning.pytorch.tuner import Tuner\n", + "# todo: update when lightning.pytorch.tuner allows weights_only param\n", + "from pytorch_forecasting.models.temporal_fusion_transformer.tuning import (\n", + " _NewTuner as Tuner,\n", + ")\n", "\n", "res = Tuner(trainer).lr_find(\n", " net,\n", @@ -883,12 +887,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "best_model_path = trainer.checkpoint_callback.best_model_path\n", - "best_model = DeepAR.load_from_checkpoint(best_model_path)" + "best_model = DeepAR.load_from_checkpoint(best_model_path, weights_only=False)" ] }, { @@ -1268,7 +1272,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": ".venv (3.12.3)", "language": "python", "name": "python3" }, @@ -1282,7 +1286,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/source/tutorials/nhits.ipynb b/docs/source/tutorials/nhits.ipynb index 5970c892d..ffa1fc886 100644 --- a/docs/source/tutorials/nhits.ipynb +++ b/docs/source/tutorials/nhits.ipynb @@ -306,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -353,7 +353,11 @@ ], "source": [ "# find optimal learning rate\n", - "from lightning.pytorch.tuner import Tuner\n", + "# from lightning.pytorch.tuner import Tuner\n", + "# todo: update when lightning.pytorch.tuner allows weights_only param\n", + "from pytorch_forecasting.models.temporal_fusion_transformer.tuning import (\n", + " _NewTuner as Tuner,\n", + ")\n", "\n", "res = Tuner(trainer).lr_find(\n", " net,\n", @@ -553,12 +557,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "best_model_path = trainer.checkpoint_callback.best_model_path\n", - "best_model = NHiTS.load_from_checkpoint(best_model_path)" + "best_model = NHiTS.load_from_checkpoint(best_model_path, weights_only=False)" ] }, { diff --git a/docs/source/tutorials/stallion.ipynb b/docs/source/tutorials/stallion.ipynb index ebd8afe9c..f777fa2d6 100644 --- a/docs/source/tutorials/stallion.ipynb +++ b/docs/source/tutorials/stallion.ipynb @@ -1012,7 +1012,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "collapsed": false, "jupyter": { @@ -1067,7 +1067,11 @@ ], "source": [ "# find optimal learning rate\n", - "from lightning.pytorch.tuner import Tuner\n", + "# from lightning.pytorch.tuner import Tuner\n", + "# todo: update when lightning.pytorch.tuner allows weights_only param\n", + "from pytorch_forecasting.models.temporal_fusion_transformer.tuning import (\n", + " _NewTuner as Tuner,\n", + ")\n", "\n", "res = Tuner(trainer).lr_find(\n", " tft,\n", @@ -2051,14 +2055,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load the best model according to the validation loss\n", "# (given that we use early stopping, this is not necessarily the last epoch)\n", "best_model_path = trainer.checkpoint_callback.best_model_path\n", - "best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)" + "best_tft = TemporalFusionTransformer.load_from_checkpoint(\n", + " best_model_path, weights_only=False\n", + ")" ] }, { diff --git a/pyproject.toml b/pyproject.toml index ca7ef1240..cc966a0ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ description = "Forecasting timeseries with PyTorch - dataloaders, normalizers, m dependencies = [ "numpy<=3.0.0", "torch >=2.0.0,!=2.0.1,<3.0.0", - "lightning >=2.0.0,<2.6.0", + "lightning >=2.0.0,<3.0.0", "scipy >=1.8,<2.0", "pandas >=1.3.0,<3.0.0", "scikit-learn >=1.2,<2.0", diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py b/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py index 6acb76203..19da91e56 100644 --- a/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py +++ b/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py @@ -3,6 +3,7 @@ """ import copy +import functools import logging import os from typing import Any, Union @@ -10,9 +11,12 @@ import lightning.pytorch as pl from lightning.pytorch.callbacks import LearningRateMonitor, ModelCheckpoint from lightning.pytorch.loggers import TensorBoardLogger +from lightning.pytorch.trainer import Trainer from lightning.pytorch.tuner import Tuner import numpy as np import scipy._lib._util +from skbase.utils.dependencies import _check_soft_dependencies +import torch from torch.utils.data import DataLoader from pytorch_forecasting import TemporalFusionTransformer @@ -23,6 +27,26 @@ optuna_logger = logging.getLogger("optuna") +# todo: Remove this class once lightning allows the pass of weights_only to tuner +class _NewTuner(Tuner): + def lr_find(self, *args, **kwargs): + strategy = self._trainer.strategy + original_load_checkpoint = strategy.load_checkpoint + + @functools.wraps(original_load_checkpoint) + def new_load_checkpoint(*ckpt_args, **ckpt_kwargs): + ckpt_kwargs["weights_only"] = False + return original_load_checkpoint(*ckpt_args, **ckpt_kwargs) + + if not _check_soft_dependencies("lightning<2.6", severity="none"): + strategy.load_checkpoint = new_load_checkpoint + + try: + return super().lr_find(*args, **kwargs) + finally: + strategy.load_checkpoint = original_load_checkpoint + + # ToDo: remove this once statsmodels release a version compatible with latest # scipy version def _lazywhere(cond, arrays, f, fillvalue=np.nan, f2=None): @@ -209,7 +233,7 @@ def objective(trial: optuna.Trial) -> float: enable_progress_bar=False, enable_model_summary=False, ) - tuner = Tuner(lr_trainer) + tuner = _NewTuner(lr_trainer) res = tuner.lr_find( model, train_dataloaders=train_dataloaders, diff --git a/pytorch_forecasting/tests/test_all_estimators.py b/pytorch_forecasting/tests/test_all_estimators.py index e3f9980ea..c9aee5222 100644 --- a/pytorch_forecasting/tests/test_all_estimators.py +++ b/pytorch_forecasting/tests/test_all_estimators.py @@ -318,7 +318,7 @@ def _integration( assert len(test_outputs) > 0 # check loading net = estimator_cls.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) # check prediction diff --git a/tests/test_models/test_deepar.py b/tests/test_models/test_deepar.py index 6b3e1f0cc..118852079 100644 --- a/tests/test_models/test_deepar.py +++ b/tests/test_models/test_deepar.py @@ -90,7 +90,9 @@ def _integration( test_outputs = trainer.test(net, dataloaders=test_dataloader) assert len(test_outputs) > 0 # check loading - net = DeepAR.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = DeepAR.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path, weights_only=False + ) # check prediction net.predict( diff --git a/tests/test_models/test_mlp.py b/tests/test_models/test_mlp.py index 54e4a6191..55f4a2dda 100644 --- a/tests/test_models/test_mlp.py +++ b/tests/test_models/test_mlp.py @@ -71,7 +71,7 @@ def _integration( ) # check loading net = DecoderMLP.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) # check prediction diff --git a/tests/test_models/test_nbeats.py b/tests/test_models/test_nbeats.py index c3379fbf1..b242e9a6b 100644 --- a/tests/test_models/test_nbeats.py +++ b/tests/test_models/test_nbeats.py @@ -50,7 +50,9 @@ def test_integration(dataloaders_fixed_window_without_covariates, tmp_path): test_outputs = trainer.test(net, dataloaders=test_dataloader) assert len(test_outputs) > 0 # check loading - net = NBeats.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = NBeats.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path, weights_only=False + ) # check prediction net.predict( diff --git a/tests/test_models/test_nhits.py b/tests/test_models/test_nhits.py index a79e7a93f..1b8bf0e5e 100644 --- a/tests/test_models/test_nhits.py +++ b/tests/test_models/test_nhits.py @@ -66,7 +66,9 @@ def _integration(dataloader, tmp_path, trainer_kwargs=None, **kwargs): test_outputs = trainer.test(net, dataloaders=test_dataloader) assert len(test_outputs) > 0 # check loading - net = NHiTS.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = NHiTS.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path, weights_only=False + ) # check prediction net.predict( diff --git a/tests/test_models/test_rnn_model.py b/tests/test_models/test_rnn_model.py index 69a9b558e..5f1b98215 100644 --- a/tests/test_models/test_rnn_model.py +++ b/tests/test_models/test_rnn_model.py @@ -76,7 +76,7 @@ def _integration( assert len(test_outputs) > 0 # check loading net = RecurrentNetwork.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) # check prediction diff --git a/tests/test_models/test_temporal_fusion_transformer.py b/tests/test_models/test_temporal_fusion_transformer.py index 3e09b8b7e..491ca865c 100644 --- a/tests/test_models/test_temporal_fusion_transformer.py +++ b/tests/test_models/test_temporal_fusion_transformer.py @@ -185,7 +185,7 @@ def _integration(dataloader, tmp_path, loss=None, trainer_kwargs=None, **kwargs) # check loading net = TemporalFusionTransformer.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) # check prediction @@ -505,7 +505,9 @@ def test_no_exogenous_variable(): val_dataloaders=validation_data_loader, ) best_model_path = trainer.checkpoint_callback.best_model_path - best_model = TemporalFusionTransformer.load_from_checkpoint(best_model_path) + best_model = TemporalFusionTransformer.load_from_checkpoint( + best_model_path, weights_only=False + ) best_model.predict( validation_data_loader, return_x=True, diff --git a/tests/test_models/test_tide.py b/tests/test_models/test_tide.py index 3b73ba380..48b75b1c4 100644 --- a/tests/test_models/test_tide.py +++ b/tests/test_models/test_tide.py @@ -84,7 +84,7 @@ def _integration( assert len(test_outputs) > 0 # check loading net = estimator_cls.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) # check prediction @@ -261,7 +261,7 @@ def test_no_exogenous_variable(): val_dataloaders=validation_data_loader, ) best_model_path = trainer.checkpoint_callback.best_model_path - best_model = TiDEModel.load_from_checkpoint(best_model_path) + best_model = TiDEModel.load_from_checkpoint(best_model_path, weights_only=False) best_model.predict( validation_data_loader, fast_dev_run=True, diff --git a/tests/test_models/test_timexer.py b/tests/test_models/test_timexer.py index 4ce98e6d6..510578582 100644 --- a/tests/test_models/test_timexer.py +++ b/tests/test_models/test_timexer.py @@ -115,7 +115,7 @@ def _integration(dataloader, tmp_path, loss=None, trainer_kwargs=None, **kwargs) # test the checkpointing feature net = TimeXer.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path, + trainer.checkpoint_callback.best_model_path, weights_only=False ) predictions = net.predict( val_dataloader, diff --git a/tests/test_models/test_x_lstm.py b/tests/test_models/test_x_lstm.py index a527957cf..5f1c7e7bf 100644 --- a/tests/test_models/test_x_lstm.py +++ b/tests/test_models/test_x_lstm.py @@ -57,7 +57,7 @@ def _integration( assert len(test_outputs) > 0 net = xLSTMTime.load_from_checkpoint( - trainer.checkpoint_callback.best_model_path + trainer.checkpoint_callback.best_model_path, weights_only=False ) net.predict(