diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index 086633cf043..d9e7b338cfd 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -32,7 +32,7 @@ bs4
 awscliv2==2.1.1
 flask
 spacy==3.4.1
-ray[tune]==2.7.2
+ray[tune]==2.52.1
 tensorboard
 jinja2==3.1.3
 pytorch-lightning
diff --git a/.gitignore b/.gitignore
index 3f1f927ee33..ea478ca180d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,8 @@ beginner_source/hymenoptera_data/
 intermediate_source/data/
 *.zip
 MNIST/
+data/cifar-10-batches-py/*
+*.tar.gz
 
 #builds
 _build/
@@ -132,3 +134,4 @@ dictionary.dic
 
 # linters
 /.lintbin
+
diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index dd3fe65699e..c1798cca8ac 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -1,44 +1,42 @@
-# -*- coding: utf-8 -*-
 """
 Hyperparameter tuning with Ray Tune
 ===================================
 
-Hyperparameter tuning can make the difference between an average model and a highly
-accurate one. Often simple things like choosing a different learning rate or changing
-a network layer size can have a dramatic impact on your model performance.
+**Author:** `Ricardo Decal <https://github.com/crypdick>`_
 
-Fortunately, there are tools that help with finding the best combination of parameters.
-`Ray Tune <https://docs.ray.io/en/latest/tune.html>`_ is an industry standard tool for
-distributed hyperparameter tuning. Ray Tune includes the latest hyperparameter search
-algorithms, integrates with various analysis libraries, and natively
-supports distributed training through `Ray's distributed machine learning engine
-<https://ray.io/>`_.
+This tutorial shows how to integrate Ray Tune into your PyTorch training
+workflow to perform scalable and efficient hyperparameter tuning.
 
-In this tutorial, we will show you how to integrate Ray Tune into your PyTorch
-training workflow. We will extend `this tutorial from the PyTorch documentation
-<https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`_ for training
-a CIFAR10 image classifier.
+`Ray <https://docs.ray.io/en/latest/index.html>`__, a project of the
+PyTorch Foundation, is an open-source unified framework for scaling AI
+and Python applications. It helps run distributed workloads by handling
+the complexity of distributed computing. `Ray
+Tune <https://docs.ray.io/en/latest/tune/index.html>`__ is a library
+built on Ray for hyperparameter tuning that enables you to scale a
+hyperparameter sweep from your machine to a large cluster with no code
+changes.
 
-As you will see, we only need to add some slight modifications. In particular, we
-need to
+This tutorial extends the PyTorch tutorial for training a CIFAR10 image
+classifier in the `CIFAR10 tutorial (PyTorch
+documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+Only minor modifications are needed to adapt the PyTorch tutorial for
+Ray Tune. Specifically, this tutorial wraps the data loading and
+training in functions, makes some network parameters configurable, adds
+optional checkpointing, and defines the search space for model tuning.
 
-1. wrap data loading and training in functions,
-2. make some network parameters configurable,
-3. add checkpointing (optional),
-4. and define the search space for the model tuning
+Setup
+-----
 
-|
+To run this tutorial, install the dependencies:
 
-To run this tutorial, please make sure the following packages are
-installed:
+"""
 
--  ``ray[tune]``: Distributed hyperparameter tuning library
--  ``torchvision``: For the data transformers
+# %%bash
+# pip install "ray[tune]" torchvision
+
+######################################################################
+# Then start with the imports:
 
-Setup / Imports
----------------
-Let's start with the imports:
-"""
 from functools import partial
 import os
 import tempfile
@@ -50,33 +48,22 @@
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
-# sphinx_gallery_start_ignore
-# Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
-# This is only needed to run with sphinx-build.
-import sys
-if not hasattr(sys.stdout, "encoding"):
-    sys.stdout.encoding = "latin1"
-    sys.stdout.fileno = lambda: 0
-# sphinx_gallery_end_ignore
+# New: imports for Ray Tune
+import ray
 from ray import tune
-from ray import train
-from ray.train import Checkpoint, get_checkpoint
+from ray.tune import Checkpoint
 from ray.tune.schedulers import ASHAScheduler
-import ray.cloudpickle as pickle
 
 ######################################################################
-# Most of the imports are needed for building the PyTorch model. Only the last 
-# imports are for Ray Tune.
+# How to use PyTorch data loaders with Ray Tune
+# ---------------------------------------------
 #
-# Data loaders
-# ------------
-# We wrap the data loaders in their own function and pass a global data directory.
-# This way we can share a data directory between different trials.
-
+# Wrap the data loaders in a constructor function. Pass a global data
+# directory here to reuse the dataset across different trials.
 
 def load_data(data_dir="./data"):
     transform = transforms.Compose(
-        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+        [transforms.ToTensor(), transforms.Normalize((0.4914, 0.48216, 0.44653), (0.2022, 0.19932, 0.20086))]
     )
 
     trainset = torchvision.datasets.CIFAR10(
@@ -89,18 +76,16 @@ def load_data(data_dir="./data"):
 
     return trainset, testset
 
-
 ######################################################################
-# Configurable neural network
-# ---------------------------
-# We can only tune those parameters that are configurable.
-# In this example, we can specify
-# the layer sizes of the fully connected layers:
-
+# Configure the hyperparameters
+# -----------------------------
+#
+# In this example, we specify the layer sizes of the fully connected
+# layers.
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
-        super(Net, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2d(3, 6, 5)
         self.pool = nn.MaxPool2d(2, 2)
         self.conv2 = nn.Conv2d(6, 16, 5)
@@ -117,119 +102,133 @@ def forward(self, x):
         x = self.fc3(x)
         return x
 
-
 ######################################################################
-# The train function
-# ------------------
-# Now it gets interesting, because we introduce some changes to the example `from the PyTorch
-# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`_.
-#
-# We wrap the training script in a function ``train_cifar(config, data_dir=None)``.
-# The ``config`` parameter will receive the hyperparameters we would like to
-# train with. The ``data_dir`` specifies the directory where we load and store the data,
-# so that multiple runs can share the same data source.
-# We also load the model and optimizer state at the start of the run, if a checkpoint
-# is provided. Further down in this tutorial you will find information on how
-# to save the checkpoint and what it is used for.
+# Use a train function with Ray Tune
+# ----------------------------------
+#
+# Now it gets interesting, because we introduce some changes to the
+# example `from the PyTorch
+# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+#
+# We wrap the training script in a function
+# ``train_cifar(config, data_dir=None)``. The ``config`` parameter
+# receives the hyperparameters we want to train with. The ``data_dir``
+# specifies the directory where we load and store the data, allowing
+# multiple runs to share the same data source. This is especially useful
+# in cluster environments where you can mount shared storage (for example
+# NFS) to prevent the data from being downloaded to each node separately.
+# We also load the model and optimizer state at the start of the run if a
+# checkpoint is provided. Further down in this tutorial, you will find
+# information on how to save the checkpoint and how it is used.
 #
 # .. code-block:: python
 #
-#     net = Net(config["l1"], config["l2"])
+#    net = Net(config["l1"], config["l2"])
 #
-#     checkpoint = get_checkpoint()
-#     if checkpoint:
-#         with checkpoint.as_directory() as checkpoint_dir:
-#             data_path = Path(checkpoint_dir) / "data.pkl"
-#             with open(data_path, "rb") as fp:
-#                 checkpoint_state = pickle.load(fp)
-#             start_epoch = checkpoint_state["epoch"]
-#             net.load_state_dict(checkpoint_state["net_state_dict"])
-#             optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
-#     else:
-#         start_epoch = 0
+#    checkpoint = tune.get_checkpoint()
+#    if checkpoint:
+#        with checkpoint.as_directory() as checkpoint_dir:
+#            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#            checkpoint_state = torch.load(checkpoint_path)
+#            start_epoch = checkpoint_state["epoch"]
+#            net.load_state_dict(checkpoint_state["net_state_dict"])
+#            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
+#    else:
+#        start_epoch = 0
 #
 # The learning rate of the optimizer is made configurable, too:
 #
 # .. code-block:: python
 #
-#     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
+#    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 #
-# We also split the training data into a training and validation subset. We thus train on
-# 80% of the data and calculate the validation loss on the remaining 20%. The batch sizes
-# with which we iterate through the training and test sets are configurable as well.
+# We also split the training data into a training and validation subset.
+# We thus train on 80% of the data and calculate the validation loss on
+# the remaining 20%. The batch sizes with which we iterate through the
+# training and test sets are configurable as well.
 #
-# Adding (multi) GPU support with DataParallel
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Image classification benefits largely from GPUs. Luckily, we can continue to use
-# PyTorch's abstractions in Ray Tune. Thus, we can wrap our model in ``nn.DataParallel``
-# to support data parallel training on multiple GPUs:
+# Add multi-GPU support with DataParallel
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Image classification benefits largely from GPUs. Luckily, you can
+# continue to use PyTorch tools in Ray Tune. Thus, you can wrap the model
+# in ``nn.DataParallel`` to support data-parallel training on multiple
+# GPUs:
 #
 # .. code-block:: python
 #
-#     device = "cpu"
-#     if torch.cuda.is_available():
-#         device = "cuda:0"
-#         if torch.cuda.device_count() > 1:
-#             net = nn.DataParallel(net)
-#     net.to(device)
+#    device = "cpu"
+#    if torch.cuda.is_available():
+#        device = "cuda:0"
+#        if torch.cuda.device_count() > 1:
+#            net = nn.DataParallel(net)
+#    net.to(device)
 #
-# By using a ``device`` variable we make sure that training also works when we have
-# no GPUs available. PyTorch requires us to send our data to the GPU memory explicitly,
-# like this:
+# By using a ``device`` variable, we ensure that training works even
+# without a GPU. PyTorch requires us to send our data to the GPU memory
+# explicitly:
 #
 # .. code-block:: python
 #
-#     for i, data in enumerate(trainloader, 0):
-#         inputs, labels = data
-#         inputs, labels = inputs.to(device), labels.to(device)
+#    for i, data in enumerate(trainloader, 0):
+#        inputs, labels = data
+#        inputs, labels = inputs.to(device), labels.to(device)
 #
-# The code now supports training on CPUs, on a single GPU, and on multiple GPUs. Notably, Ray
-# also supports `fractional GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`_
-# so we can share GPUs among trials, as long as the model still fits on the GPU memory. We'll come back
-# to that later.
+# The code now supports training on CPUs, on a single GPU, and on multiple
+# GPUs. Notably, Ray also supports `fractional
+# GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`__
+# so we can share GPUs among trials, as long as the model still fits on
+# the GPU memory. We will return to that later.
 #
 # Communicating with Ray Tune
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# The most interesting part is the communication with Ray Tune:
+# The most interesting part is the communication with Ray Tune. As you’ll
+# see, integrating Ray Tune into your training code requires only a few
+# additional lines:
 #
 # .. code-block:: python
 #
-#     checkpoint_data = {
-#         "epoch": epoch,
-#         "net_state_dict": net.state_dict(),
-#         "optimizer_state_dict": optimizer.state_dict(),
-#     }
-#     with tempfile.TemporaryDirectory() as checkpoint_dir:
-#         data_path = Path(checkpoint_dir) / "data.pkl"
-#         with open(data_path, "wb") as fp:
-#             pickle.dump(checkpoint_data, fp)
-#
-#         checkpoint = Checkpoint.from_directory(checkpoint_dir)
-#         train.report(
-#             {"loss": val_loss / val_steps, "accuracy": correct / total},
-#             checkpoint=checkpoint,
-#         )
-#
-# Here we first save a checkpoint and then report some metrics back to Ray Tune. Specifically,
-# we send the validation loss and accuracy back to Ray Tune. Ray Tune can then use these metrics
-# to decide which hyperparameter configuration lead to the best results. These metrics
-# can also be used to stop bad performing trials early in order to avoid wasting
-# resources on those trials.
-#
-# The checkpoint saving is optional, however, it is necessary if we wanted to use advanced
-# schedulers like
-# `Population Based Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`_.
-# Also, by saving the checkpoint we can later load the trained models and validate them
-# on a test set. Lastly, saving checkpoints is useful for fault tolerance, and it allows
-# us to interrupt training and continue training later.
+#    checkpoint_data = {
+#        "epoch": epoch,
+#        "net_state_dict": net.state_dict(),
+#        "optimizer_state_dict": optimizer.state_dict(),
+#    }
+#    with tempfile.TemporaryDirectory() as checkpoint_dir:
+#        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#        torch.save(checkpoint_data, checkpoint_path)
+#
+#        checkpoint = Checkpoint.from_directory(checkpoint_dir)
+#        tune.report(
+#            {"loss": val_loss / val_steps, "accuracy": correct / total},
+#            checkpoint=checkpoint,
+#        )
+#
+# Here we first save a checkpoint and then report some metrics back to Ray
+# Tune. Specifically, we send the validation loss and accuracy back to Ray
+# Tune. Ray Tune uses these metrics to determine the best hyperparameter
+# configuration and to stop underperforming trials early, saving
+# resources.
+#
+# The checkpoint saving is optional. However, it is necessary if we wanted
+# to use advanced schedulers like `Population Based
+# Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__.
+# Saving the checkpoint also allows us to later load the trained models
+# for validation on a test set. Lastly, it provides fault tolerance,
+# enabling us to pause and resume training.
+#
+# To summarize, integrating Ray Tune into your PyTorch training requires
+# just a few key additions: use ``tune.report()`` to report metrics (and
+# optionally checkpoints) to Ray Tune, ``tune.get_checkpoint()`` to load a
+# model from a checkpoint, and ``Checkpoint.from_directory()`` to create a
+# checkpoint object from saved state. The rest of your training code
+# remains standard PyTorch.
 #
 # Full training function
 # ~~~~~~~~~~~~~~~~~~~~~~
 #
 # The full code example looks like this:
 
-
 def train_cifar(config, data_dir=None):
     net = Net(config["l1"], config["l2"])
 
@@ -243,12 +242,12 @@ def train_cifar(config, data_dir=None):
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 
-    checkpoint = get_checkpoint()
+    # Load checkpoint if resuming training
+    checkpoint = tune.get_checkpoint()
     if checkpoint:
         with checkpoint.as_directory() as checkpoint_dir:
-            data_path = Path(checkpoint_dir) / "data.pkl"
-            with open(data_path, "rb") as fp:
-                checkpoint_state = pickle.load(fp)
+            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+            checkpoint_state = torch.load(checkpoint_path)
             start_epoch = checkpoint_state["epoch"]
             net.load_state_dict(checkpoint_state["net_state_dict"])
             optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
@@ -263,10 +262,10 @@ def train_cifar(config, data_dir=None):
     )
 
     trainloader = torch.utils.data.DataLoader(
-        train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
+        train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=2
     )
     valloader = torch.utils.data.DataLoader(
-        val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
+        val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=2
     )
 
     for epoch in range(start_epoch, 10):  # loop over the dataset multiple times
@@ -315,34 +314,34 @@ def train_cifar(config, data_dir=None):
                 val_loss += loss.cpu().numpy()
                 val_steps += 1
 
+        # Save checkpoint and report metrics
         checkpoint_data = {
             "epoch": epoch,
             "net_state_dict": net.state_dict(),
             "optimizer_state_dict": optimizer.state_dict(),
         }
         with tempfile.TemporaryDirectory() as checkpoint_dir:
-            data_path = Path(checkpoint_dir) / "data.pkl"
-            with open(data_path, "wb") as fp:
-                pickle.dump(checkpoint_data, fp)
+            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+            torch.save(checkpoint_data, checkpoint_path)
 
             checkpoint = Checkpoint.from_directory(checkpoint_dir)
-            train.report(
+            tune.report(
                 {"loss": val_loss / val_steps, "accuracy": correct / total},
                 checkpoint=checkpoint,
             )
-    
-    print("Finished Training")
 
+    print("Finished Training")
 
 ######################################################################
-# As you can see, most of the code is adapted directly from the original example.
+# As you can see, most of the code is adapted directly from the original
+# example.
 #
-# Test set accuracy
-# -----------------
-# Commonly the performance of a machine learning model is tested on a hold-out test
-# set with data that has not been used for training the model. We also wrap this in a
-# function:
-
+# Compute test set accuracy
+# -------------------------
+#
+# Commonly the performance of a machine learning model is tested on a
+# held-out test set with data that has not been used for training the
+# model. We also wrap this in a function:
 
 def test_accuracy(net, device="cpu"):
     trainset, testset = load_data()
@@ -364,69 +363,115 @@ def test_accuracy(net, device="cpu"):
 
     return correct / total
 
-
 ######################################################################
-# The function also expects a ``device`` parameter, so we can do the
-# test set validation on a GPU.
+# The function also expects a ``device`` parameter so you can run the test
+# set validation on a GPU.
 #
-# Configuring the search space
-# ----------------------------
-# Lastly, we need to define Ray Tune's search space. Here is an example:
+# Configure the search space
+# --------------------------
 #
-# .. code-block:: python
+# Lastly, we need to define Ray Tune’s search space. Ray Tune offers a
+# variety of `search space
+# distributions <https://docs.ray.io/en/latest/tune/api/search_space.html>`__
+# to suit different parameter types: ``loguniform``, ``uniform``,
+# ``choice``, ``randint``, ``grid``, and more. It also lets you express
+# complex dependencies between parameters with `conditional search
+# spaces <https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html#how-to-use-custom-and-conditional-search-spaces-in-tune>`__.
 #
-#     config = {
-#         "l1": tune.choice([2 ** i for i in range(9)]),
-#         "l2": tune.choice([2 ** i for i in range(9)]),
-#         "lr": tune.loguniform(1e-4, 1e-1),
-#         "batch_size": tune.choice([2, 4, 8, 16])
-#     }
+# Here is an example:
 #
-# The ``tune.choice()`` accepts a list of values that are uniformly sampled from.
-# In this example, the ``l1`` and ``l2`` parameters
-# should be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or 256.
-# The ``lr`` (learning rate) should be uniformly sampled between 0.0001 and 0.1. Lastly,
-# the batch size is a choice between 2, 4, 8, and 16.
-#
-# At each trial, Ray Tune will now randomly sample a combination of parameters from these
-# search spaces. It will then train a number of models in parallel and find the best
-# performing one among these. We also use the ``ASHAScheduler`` which will terminate bad
-# performing trials early.
+# .. code-block:: python
 #
-# We wrap the ``train_cifar`` function with ``functools.partial`` to set the constant
-# ``data_dir`` parameter. We can also tell Ray Tune what resources should be
-# available for each trial:
+#    config = {
+#        "l1": tune.choice([2**i for i in range(9)]),
+#        "l2": tune.choice([2**i for i in range(9)]),
+#        "lr": tune.loguniform(1e-4, 1e-1),
+#        "batch_size": tune.choice([2, 4, 8, 16]),
+#    }
+#
+# The ``tune.choice()`` accepts a list of values that are uniformly
+# sampled from. In this example, the ``l1`` and ``l2`` parameter values
+# will be powers of 2 between 1 and 256. The learning rate is sampled on a
+# log scale between 0.0001 and 0.1. Sampling on a log scale ensures that
+# the search space is explored efficiently across different magnitudes.
+#
+# Smarter sampling and scheduling
+# -------------------------------
+#
+# To make the hyperparameter search process efficient, Ray Tune provides
+# two main controls:
+#
+# 1. It can intelligently pick the next set of hyperparameters to test
+#    based on previous results using `advanced search
+#    algorithms <https://docs.ray.io/en/latest/tune/api/suggestion.html>`__
+#    such as
+#    `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__
+#    or
+#    ```bayesopt`` <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__,
+#    instead of relying only on random or grid search.
+# 2. It can detect underperforming trials and stop them early using
+#    `schedulers <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`__,
+#    enabling you to explore the parameter space more on the same compute
+#    budget.
+#
+# In this tutorial, we use the ``ASHAScheduler``, which aggressively
+# terminates low-performing trials to save computational resources.
+#
+# Configure the resources
+# -----------------------
+#
+# Tell Ray Tune what resources should be available for each trial using
+# ``tune.with_resources``:
 #
 # .. code-block:: python
 #
-#     gpus_per_trial = 2
-#     # ...
-#     result = tune.run(
-#         partial(train_cifar, data_dir=data_dir),
-#         resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
-#         config=config,
-#         num_samples=num_samples,
-#         scheduler=scheduler,
-#         checkpoint_at_end=True)
+#    tune.with_resources(
+#        partial(train_cifar, data_dir=data_dir),
+#        resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
+#    )
 #
-# You can specify the number of CPUs, which are then available e.g.
-# to increase the ``num_workers`` of the PyTorch ``DataLoader`` instances. The selected
-# number of GPUs are made visible to PyTorch in each trial. Trials do not have access to
-# GPUs that haven't been requested for them - so you don't have to care about two trials
-# using the same set of resources.
+# This tells Ray Tune to allocate ``cpus_per_trial`` CPUs and
+# ``gpus_per_trial`` GPUs for each trial. Ray Tune automatically manages
+# the placement of these trials and ensures they are isolated, so you
+# don’t need to manually assign GPUs to processes.
 #
-# Here we can also specify fractional GPUs, so something like ``gpus_per_trial=0.5`` is
-# completely valid. The trials will then share GPUs among each other.
-# You just have to make sure that the models still fit in the GPU memory.
+# For example, if you are running this experiment on a cluster of 20
+# machines, each with 8 GPUs, you can set ``gpus_per_trial = 0.5`` to
+# schedule 2 concurrent trials per GPU. This configuration runs 320 trials
+# in parallel across the cluster.
 #
-# After training the models, we will find the best performing one and load the trained
-# network from the checkpoint file. We then obtain the test set accuracy and report
-# everything by printing.
+# Putting it together
+# -------------------
 #
-# The full main function looks like this:
-
-
-def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
+# The Ray Tune API is designed to be modular and composable: you pass your
+# configurations to the ``tune.Tuner`` class to create a tuner object,
+# then execute ``tuner.fit()`` to start training:
+#
+# .. code-block:: python
+#
+#    tuner = tune.Tuner(
+#        tune.with_resources(
+#            partial(train_cifar, data_dir=data_dir),
+#            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
+#        ),
+#        tune_config=tune.TuneConfig(
+#            metric="loss",
+#            mode="min",
+#            scheduler=scheduler,
+#            num_samples=num_samples,
+#        ),
+#        param_space=config,
+#    )
+#    results = tuner.fit()
+#
+# After training the models, we will find the best performing one and load
+# the trained network from the checkpoint file. We then obtain the test
+# set accuracy and report the results.
+
+def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
+    print("Starting hyperparameter tuning.")
+    ray.init()
+    
     data_dir = os.path.abspath("./data")
     load_data(data_dir)
     config = {
@@ -436,26 +481,32 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
         "batch_size": tune.choice([2, 4, 8, 16]),
     }
     scheduler = ASHAScheduler(
-        metric="loss",
-        mode="min",
         max_t=max_num_epochs,
         grace_period=1,
         reduction_factor=2,
     )
-    result = tune.run(
-        partial(train_cifar, data_dir=data_dir),
-        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
-        config=config,
-        num_samples=num_samples,
-        scheduler=scheduler,
+    
+    tuner = tune.Tuner(
+        tune.with_resources(
+            partial(train_cifar, data_dir=data_dir),
+            resources={"cpu": 2, "gpu": gpus_per_trial}
+        ),
+        tune_config=tune.TuneConfig(
+            metric="loss",
+            mode="min",
+            scheduler=scheduler,
+            num_samples=num_trials,
+        ),
+        param_space=config,
     )
+    results = tuner.fit()
 
-    best_trial = result.get_best_trial("loss", "min", "last")
-    print(f"Best trial config: {best_trial.config}")
-    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
-    print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}")
+    best_result = results.get_best_result("loss", "min")
+    print(f"Best trial config: {best_result.config}")
+    print(f"Best trial final validation loss: {best_result.metrics['loss']}")
+    print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")
 
-    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
+    best_trained_model = Net(best_result.config["l1"], best_result.config["l2"])
     device = "cpu"
     if torch.cuda.is_available():
         device = "cuda:0"
@@ -463,11 +514,10 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
             best_trained_model = nn.DataParallel(best_trained_model)
     best_trained_model.to(device)
 
-    best_checkpoint = result.get_best_checkpoint(trial=best_trial, metric="accuracy", mode="max")
+    best_checkpoint = best_result.checkpoint
     with best_checkpoint.as_directory() as checkpoint_dir:
-        data_path = Path(checkpoint_dir) / "data.pkl"
-        with open(data_path, "rb") as fp:
-            best_checkpoint_data = pickle.load(fp)
+        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+        best_checkpoint_data = torch.load(checkpoint_path)
 
         best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
         test_acc = test_accuracy(best_trained_model, device)
@@ -475,38 +525,74 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 
 if __name__ == "__main__":
-    # You can change the number of GPUs per trial here:
-    main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)
-
+    # Set the number of trials, epochs, and GPUs per trial here:
+    # The following configuration is for a quick run (1 trial, 1 epoch, CPU only) for demonstration purposes.
+    main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
-# If you run the code, an example output could look like this:
-#
-# .. code-block:: sh
-#
-#     Number of trials: 10/10 (10 TERMINATED)
-#     +-----+--------------+------+------+-------------+--------+---------+------------+
-#     | ... |   batch_size |   l1 |   l2 |          lr |   iter |    loss |   accuracy |
-#     |-----+--------------+------+------+-------------+--------+---------+------------|
-#     | ... |            2 |    1 |  256 | 0.000668163 |      1 | 2.31479 |     0.0977 |
-#     | ... |            4 |   64 |    8 | 0.0331514   |      1 | 2.31605 |     0.0983 |
-#     | ... |            4 |    2 |    1 | 0.000150295 |      1 | 2.30755 |     0.1023 |
-#     | ... |           16 |   32 |   32 | 0.0128248   |     10 | 1.66912 |     0.4391 |
-#     | ... |            4 |    8 |  128 | 0.00464561  |      2 | 1.7316  |     0.3463 |
-#     | ... |            8 |  256 |    8 | 0.00031556  |      1 | 2.19409 |     0.1736 |
-#     | ... |            4 |   16 |  256 | 0.00574329  |      2 | 1.85679 |     0.3368 |
-#     | ... |            8 |    2 |    2 | 0.00325652  |      1 | 2.30272 |     0.0984 |
-#     | ... |            2 |    2 |    2 | 0.000342987 |      2 | 1.76044 |     0.292  |
-#     | ... |            4 |   64 |   32 | 0.003734    |      8 | 1.53101 |     0.4761 |
-#     +-----+--------------+------+------+-------------+--------+---------+------------+
-#
-#     Best trial config: {'l1': 64, 'l2': 32, 'lr': 0.0037339984519545164, 'batch_size': 4}
-#     Best trial final validation loss: 1.5310075663924216
-#     Best trial final validation accuracy: 0.4761
-#     Best trial test set accuracy: 0.4737
-#
-# Most trials have been stopped early in order to avoid wasting resources.
-# The best performing trial achieved a validation accuracy of about 47%, which could
-# be confirmed on the test set.
-#
-# So that's it! You can now tune the parameters of your PyTorch models.
+# Your Ray Tune trial summary output will look something like this:
+#
+# .. code-block:: bash
+#
+#    Number of trials: 10/10 (10 TERMINATED)
+#    +-----+--------------+------+------+-------------+--------+---------+------------+
+#    | ... |   batch_size |   l1 |   l2 |          lr |   iter |    loss |   accuracy |
+#    |-----+--------------+------+------+-------------+--------+---------+------------|
+#    | ... |            2 |    1 |  256 | 0.000668163 |      1 | 2.31479 |     0.0977 |
+#    | ... |            4 |   64 |    8 | 0.0331514   |      1 | 2.31605 |     0.0983 |
+#    | ... |            4 |    2 |    1 | 0.000150295 |      1 | 2.30755 |     0.1023 |
+#    | ... |           16 |   32 |   32 | 0.0128248   |     10 | 1.66912 |     0.4391 |
+#    | ... |            4 |    8 |  128 | 0.00464561  |      2 | 1.7316  |     0.3463 |
+#    | ... |            8 |  256 |    8 | 0.00031556  |      1 | 2.19409 |     0.1736 |
+#    | ... |            4 |   16 |  256 | 0.00574329  |      2 | 1.85679 |     0.3368 |
+#    | ... |            8 |    2 |    2 | 0.00325652  |      1 | 2.30272 |     0.0984 |
+#    | ... |            2 |    2 |    2 | 0.000342987 |      2 | 1.76044 |     0.292  |
+#    | ... |            4 |   64 |   32 | 0.003734    |      8 | 1.53101 |     0.4761 |
+#    +-----+--------------+------+------+-------------+--------+---------+------------+
+#
+#    Best trial config: {'l1': 64, 'l2': 32, 'lr': 0.0037339984519545164, 'batch_size': 4}
+#    Best trial final validation loss: 1.5310075663924216
+#    Best trial final validation accuracy: 0.4761
+#    Best trial test set accuracy: 0.4737
+#
+# Most trials were stopped early to conserve resources. The best
+# performing trial achieved a validation accuracy of approximately 47%,
+# which could be confirmed on the test set.
+#
+# You can now tune the parameters of your PyTorch models.
+#
+# Observability
+# -------------
+#
+# When running large-scale experiments, monitoring is crucial. Ray
+# provides a
+# `Dashboard <https://docs.ray.io/en/latest/ray-observability/getting-started.html>`__
+# that lets you view the status of your trials, check cluster resource
+# utilization, and inspect logs in real-time.
+#
+# For debugging, Ray also offers `Distributed
+# Debugging <https://docs.ray.io/en/latest/ray-observability/user-guides/debug-apps/ray-debugger.html>`__
+# tools that let you attach a debugger to running trials across the
+# cluster.
+#
+# Conclusion
+# ----------
+#
+# In this tutorial, you learned how to tune the hyperparameters of a
+# PyTorch model using Ray Tune. You saw how to integrate Ray Tune into
+# your PyTorch training loop, define a search space for your
+# hyperparameters, use an efficient scheduler like ASHA to terminate bad
+# trials early, save checkpoints and report metrics to Ray Tune, and run
+# the hyperparameter search and analyze the results.
+#
+# Ray Tune makes it easy to scale your experiments from a single machine
+# to a large cluster, helping you find the best model configuration
+# efficiently.
+#
+# Further reading
+# ---------------
+#
+# - `Ray Tune
+#   documentation <https://docs.ray.io/en/latest/tune/index.html>`__
+# - `Ray Tune
+#   examples <https://docs.ray.io/en/latest/tune/examples/index.html>`__
diff --git a/ecosystem.rst b/ecosystem.rst
index da2a926851a..ddd6c505561 100644
--- a/ecosystem.rst
+++ b/ecosystem.rst
@@ -33,7 +33,7 @@ to production deployment.
    :card_description: Learn how to use Ray Tune to find the best performing set of hyperparameters for your model.
    :image: _static/img/ray-tune.png
    :link: beginner/hyperparameter_tuning_tutorial.html
-   :tags: Model-Optimization,Best-Practice,Ecosystem
+   :tags: Model-Optimization,Best-Practice,Ecosystem,Ray-Distributed,Parallel-and-Distributed-Training
 
 .. customcarditem::
    :header: Multi-Objective Neural Architecture Search with Ax
diff --git a/index.rst b/index.rst
index 5a5e80abfbb..f9a76296750 100644
--- a/index.rst
+++ b/index.rst
@@ -493,7 +493,7 @@ Welcome to PyTorch Tutorials
    :card_description: Learn how to use Ray Tune to find the best performing set of hyperparameters for your model.
    :image: _static/img/ray-tune.png
    :link: beginner/hyperparameter_tuning_tutorial.html
-   :tags: Model-Optimization,Best-Practice
+   :tags: Model-Optimization,Best-Practice,Ray-Distributed,Parallel-and-Distributed-Training
 
 .. customcarditem::
    :header: Parametrizations Tutorial