Hyperparameter tuning with Hyperopt.jl (#109)

BernhardAhrens · web-flow · commit fc329c8f5b5c · 2025-08-30T10:11:33.000+02:00
* hyperparameter tuning, passing hyperparameters as kwargs

* md and script with hyperparameter tuning

* function to extract and pass best hyperparameters to another train (via tune)

* re-added option to pass data as in a tuple
diff --git a/Project.toml b/Project.toml
@@ -14,6 +14,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
@@ -47,6 +48,7 @@ DataFrames = "1"
 Downloads = "1.6.0"
 Flux = "0.16"
 ForwardDiff = "1.0.1"
+Hyperopt = "0.5.6"
 JLD2 = "0.5.13, 0.6"
 Lux = "1.12.4"
 LuxCore = "1.2.4"
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -4,6 +4,7 @@ Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365"
 EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
 
 [sources]
 EasyHybrid = {path = ".."}
diff --git a/docs/make.jl b/docs/make.jl
@@ -15,6 +15,7 @@ makedocs(;
         "Get Started" => "get_started.md",
         "Tutorial" => [
             "Exponential Response" => "tutorials/exponential_res.md",
+            "Hyperparameter Tuning" => "tutorials/hyperparameter_tuning.md"
         ],
         "Research" =>[
             "Overview" => "research/overview.md"
diff --git a/docs/src/tutorials/hyperparameter_tuning.md b/docs/src/tutorials/hyperparameter_tuning.md
@@ -0,0 +1,220 @@
+```@raw html
+---
+authors:
+  - name: Bernhard Ahrens
+    avatar: https://raw.githubusercontent.com/EarthyScience/EasyHybrid.jl/72c2fa9df829d46d25df15352a4b728d2dbe94ed/docs/src/assets/Bernhard_Ahrens.png
+    link: https://www.bgc-jena.mpg.de/en/bgi/miss
+  - name: Lazaro Alonso
+    avatar: https://avatars.githubusercontent.com/u/19525261?v=4
+    platform: github
+    link: https://lazarusa.github.io
+
+---
+
+<Authors />
+```
+
+# Getting Started
+
+
+### 1. Setup and Data Loading
+
+Load package and synthetic dataset
+
+```@example hyperparameter_tuning
+using EasyHybrid
+using CairoMakie
+using Hyperopt
+```
+
+```@example hyperparameter_tuning
+ds = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc")
+ds = ds[1:20000, :]  # Use subset for faster execution
+first(ds, 5)
+```
+
+### 2. Define the Process-based Model
+
+RbQ10 model: Respiration model with Q10 temperature sensitivity
+
+```@example hyperparameter_tuning
+function RbQ10(;ta, Q10, rb, tref = 15.0f0)
+    reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref))
+    return (; reco, Q10, rb)
+end
+```
+
+### 3. Configure Model Parameters
+
+Parameter specification: (default, lower_bound, upper_bound)
+
+```@example hyperparameter_tuning
+parameters = (
+    rb  = (3.0f0, 0.0f0, 13.0f0),  # Basal respiration [μmol/m²/s]
+    Q10 = (2.0f0, 1.0f0, 4.0f0),   # Temperature sensitivity - describes factor by which respiration is increased for 10 K increase in temperature [-]
+)
+```
+
+### 4. Construct the Hybrid Model
+
+Define input variables
+
+```@example hyperparameter_tuning
+forcing = [:ta]                    # Forcing variables (temperature)
+predictors = [:sw_pot, :dsw_pot]   # Predictor variables (solar radiation)
+target = [:reco]                   # Target variable (respiration)
+```
+
+Parameter classification as global, neural or fixed (difference between global and neural)
+
+```@example hyperparameter_tuning
+global_param_names = [:Q10]        # Global parameters (same for all samples)
+neural_param_names = [:rb]         # Neural network predicted parameters
+```
+
+Construct hybrid model
+
+```@example hyperparameter_tuning
+hybrid_model = constructHybridModel(
+    predictors,               # Input features
+    forcing,                  # Forcing variables
+    target,                   # Target variables
+    RbQ10,                    # Process-based model function
+    parameters,               # Parameter definitions
+    neural_param_names,       # NN-predicted parameters
+    global_param_names,       # Global parameters
+    hidden_layers = [16, 16], # Neural network architecture
+    activation = relu,       # Activation function
+    scale_nn_outputs = true,  # Scale neural network outputs
+    input_batchnorm = false    # Apply batch normalization to inputs
+)
+```
+
+### 5. Train the Model
+
+```@example hyperparameter_tuning
+out = train(
+    hybrid_model, 
+    ds, 
+    (); 
+    nepochs = 100,               # Number of training epochs
+    batchsize = 512,             # Batch size for training
+    opt = AdamW(0.001),        # Optimizer and learning rate
+    monitor_names = [:rb, :Q10], # Parameters to monitor during training
+    yscale = identity,           # Scaling for outputs
+    patience = 30,               # Early stopping patience
+    show_progress=false,
+    hybrid_name="before"
+)
+```
+
+```@raw html
+<video src="../training_history_before.mp4" controls="controls" autoplay="autoplay"></video>
+```
+
+### 6. Check Results
+
+Evolution of train and validation loss
+
+```@example hyperparameter_tuning
+EasyHybrid.plot_loss(out, yscale = identity)
+```
+
+Check results - what do you think - is it the true Q10 used to generate the synthetic dataset?
+
+```@example hyperparameter_tuning
+out.train_diffs.Q10
+``` 
+
+Quick scatterplot - dispatches on the output of train
+
+```@example hyperparameter_tuning
+EasyHybrid.poplot(out)
+```
+
+## Hyperparameter Tuning
+
+EasyHybrid provides built-in hyperparameter tuning capabilities to optimize your model configuration. This is especially useful for finding the best neural network architecture, optimizer settings, and other hyperparameters.
+
+### Basic Hyperparameter Tuning
+
+You can use the `tune` function to automatically search for optimal hyperparameters:
+
+```@example hyperparameter_tuning
+# Create empty model specification for tuning
+mspempty = ModelSpec()
+
+# Define hyperparameter search space
+nhyper = 4
+ho = @thyperopt for i=nhyper,
+    opt = [AdamW(0.01), AdamW(0.1), RMSProp(0.001), RMSProp(0.01)],
+    input_batchnorm = [true, false]
+    
+    hyper_parameters = (;opt, input_batchnorm)
+    println("Hyperparameter run: ", i, " of ", nhyper, " with hyperparameters: ", hyper_parameters)
+    
+    # Run tuning with current hyperparameters
+    out = EasyHybrid.tune(
+        hybrid_model, 
+        ds, 
+        mspempty; 
+        hyper_parameters..., 
+        nepochs = 10, 
+        plotting = false, 
+        show_progress = false, 
+        file_name = "test$i.jld2"
+    )
+    
+    out.best_loss
+end
+
+# Get the best hyperparameters
+ho.minimizer
+printmin(ho)
+
+# Train the model with the best hyperparameters
+best_hyperp = best_hyperparams(ho)
+
+```
+
+### Train model with the best hyperparameters
+
+```@example hyperparameter_tuning
+# Run tuning with specific hyperparameters
+out_tuned = EasyHybrid.tune(
+    hybrid_model, 
+    ds, 
+    mspempty; 
+    best_hyperp...,
+    nepochs = 100,
+    monitor_names = [:rb, :Q10],
+    hybrid_name="after"
+)
+
+# Check the tuned model performance
+out_tuned.best_loss
+```
+
+```@raw html
+<video src="../training_history_after.mp4" controls="controls" autoplay="autoplay"></video>
+```
+
+### Key Hyperparameters to Tune
+
+When tuning your hybrid model, consider these important hyperparameters:
+
+- **Optimizer and Learning Rate**: Try different optimizers (AdamW, RMSProp, Adam) with various learning rates
+- **Neural Network Architecture**: Experiment with different `hidden_layers` configurations
+- **Activation Functions**: Test different activation functions (relu, sigmoid, tanh)
+- **Batch Normalization**: Enable/disable `input_batchnorm` and other normalization options
+- **Batch Size**: Adjust `batchsize` for optimal training performance
+
+### Tips for Hyperparameter Tuning
+
+- **Start with a small search space** to get a baseline understanding
+- **Monitor for overfitting** by tracking validation loss  
+- **Consider computational cost** - more hyperparameters and epochs increase training time
+
+## More Examples
+
+Check out the `projects/` directory for additional examples and use cases. Each project demonstrates different aspects of hybrid modeling with EasyHybrid.
diff --git a/projects/book_chapter/Project.toml b/projects/book_chapter/Project.toml
@@ -1,4 +1,6 @@
 [deps]
 EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3"
+Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
 NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
+Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 WGLMakie = "276b4fcb-3e11-5398-bf8b-a0c2d153d008"
diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl
@@ -88,9 +88,9 @@ hybrid_model = constructHybridModel(
     neural_param_names,      # NN-predicted parameters
     global_param_names,      # Global parameters
     hidden_layers = [16, 16], # Neural network architecture
-    activation = swish,      # Activation function
+    activation = sigmoid,      # Activation function
     scale_nn_outputs = true, # Scale neural network outputs
-    input_batchnorm = true   # Apply batch normalization to inputs
+    input_batchnorm = false   # Apply batch normalization to inputs
 )
 
 # =============================================================================
@@ -105,15 +105,95 @@ out = train(
     (); 
     nepochs = 100,           # Number of training epochs
     batchsize = 512,         # Batch size for training
-    opt = RMSProp(0.001),   # Optimizer and learning rate
+    opt = AdamW(0.1),   # Optimizer and learning rate
     monitor_names = [:rb, :Q10], # Parameters to monitor during training
-    yscale = identity,       # Scaling for outputs
-    patience = 30            # Early stopping patience
+    yscale = identity       # Scaling for outputs
 )
 
 # =============================================================================
 # Results Analysis
 # =============================================================================
 # Check the training differences for Q10 parameter
 # This shows how close the model learned the true Q10 value
-out.train_diffs.Q10
+out.train_diffs.Q10
+
+using Hyperopt
+using Distributed
+using WGLMakie
+
+mspempty = ModelSpec()
+
+nhyper = 4
+ho = @thyperopt for i=nhyper,
+    opt = [AdamW(0.01), AdamW(0.1), RMSProp(0.001), RMSProp(0.01)],
+    input_batchnorm = [true, false]
+    hyper_parameters = (;opt, input_batchnorm)
+    println("Hyperparameter run: \n", i, " of ", nhyper, "\t with hyperparameters \t", hyper_parameters, "\t")
+    out = EasyHybrid.tune(hybrid_model, ds, mspempty; hyper_parameters..., nepochs = 10, plotting = false, show_progress = false, file_name = "test$i.jld2")
+    #out.best_loss
+    # return a rich record for this trial (stored in ho.results[i])
+    (out.best_loss,
+     hyperps = hyper_parameters,
+     ps_st = (ps = out.ps, st = out.st),
+     file = "test$i.jld2",
+     i = i)
+end
+
+losses = getfield.(ho.results, :best_loss)
+hyperps = getfield.(ho.results, :hyperps)
+
+# Helper function to make optimizer names short and readable
+function short_opt_name(opt)
+    if opt isa AdamW
+        return "AdamW(η=$(opt.eta))"
+    elseif opt isa RMSProp
+        return "RMSProp(η=$(opt.eta))"
+    else
+        return string(typeof(opt))
+    end
+end
+
+# Sort losses and associated data by increasing loss
+idx = sortperm(losses)
+sorted_losses = losses[idx]
+sorted_hyperps = hyperps[idx]
+
+fig = Figure(figure_padding = 50)
+# Prepare tick labels with hyperparameter info for each trial (sorted)
+sorted_ticklabels = [
+    join([
+        k == :opt ? "opt=$(short_opt_name(v))" : "$k=$(repr(v))"
+        for (k, v) in pairs(hp)
+    ], "\n")
+    for hp in sorted_hyperps
+]
+ax = Makie.Axis(
+    fig[1, 1];
+    xlabel = "Trial",
+    ylabel = "Loss",
+    title = "Hyperparameter Tuning Results",
+    xgridvisible = false,
+    ygridvisible = false,
+    xticks = (1:length(sorted_losses), sorted_ticklabels),
+    xticklabelrotation = 45
+)
+scatter!(ax, 1:length(sorted_losses), sorted_losses; markersize=15, color=:dodgerblue)
+
+
+
+best_idx = argmin(losses)
+best_trial = ho.results[best_idx]
+
+best_params = best_trial.params        # (ps, st)
+
+# Print the best hyperparameters
+printmin(ho)
+
+# Plot the results
+import Plots
+using Unitful
+Plots.plot(ho, xrotation=25, left_margin=[100mm 0mm], bottom_margin=60mm, ylab = "loss", size = (900, 900)) 
+
+# Train the model with the best hyperparameters
+best_hyperp = best_hyperparams(ho)
+out = EasyHybrid.tune(hybrid_model, ds, mspempty; best_hyperp..., nepochs = 100)
diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl
@@ -25,6 +25,7 @@ using JLD2
 using StyledStrings
 using Printf
 using Reexport: @reexport
+using Hyperopt
 
 @reexport begin
     import LuxCore
@@ -53,5 +54,6 @@ include("utils/show_train.jl")
 include("utils/helpers_for_HybridModel.jl")
 include("plotrecipes.jl")
 include("utils/helpers_data_loading.jl")
+include("tune.jl")
 
 end
diff --git a/src/models/GenericHybridModel.jl b/src/models/GenericHybridModel.jl
diff --git a/src/train.jl b/src/train.jl
diff --git a/src/tune.jl b/src/tune.jl