EarthyScience · bgctw · Oct 7, 2025 · Sep 5, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,6 @@ docs/src/**/*.html
 docs/src/**/*.ipynb
 docs/src/**/*Manifest.toml
 docs/src_stash/*.ipynb
+docs/src/tutorials/test.*
+docs/src/tutorials/tmp*
+
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "HybridVariationalInference"
 uuid = "a108c475-a4e2-4021-9a84-cfa7df242f64"
 authors = ["Thomas Wutzler <[email protected]> and contributors"]
-version = "1.0.0-DEV"
+version = "0.2"
 
 [deps]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
@@ -10,22 +10,28 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 DistributionFits = "45214091-1ed4-4409-9bcf-fdb48a05e921"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+NaNMath = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [weakdeps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
@@ -47,17 +53,22 @@ ChainRulesCore = "1.25"
 Combinatorics = "1.0.2"
 CommonSolve = "0.2.4"
 ComponentArrays = "0.15.19"
+DifferentiationInterface = "0.6.54, 0.7"
 DistributionFits = "0.3.9"
 Distributions = "0.25.117"
 FillArrays = "1.13.0"
 Flux = "0.14, 0.15, 0.16"
 Functors = "0.4, 0.5"
 GPUArraysCore = "0.1, 0.2"
+KernelAbstractions = "0.9.34"
 LinearAlgebra = "1.10"
+LogExpFunctions = "0.3.29"
 Lux = "1.4.2"
 MLDataDevices = "1.5, 1.6"
 MLUtils = "0.4.5"
 Missings = "1.2.0"
+NaNMath = "1.1.3"
+Optimisers = "0.4.6"
 Optimization = "3.19.3, 4"
 Random = "1.10.0"
 SimpleChains = "0.4"
@@ -66,6 +77,7 @@ StaticArrays = "1.9.13"
 StatsBase = "0.34.4"
 StatsFuns = "1.3.2"
 Test = "1.10"
+Zygote = "0.7.10"
 julia = "1.10"
 
 [workspace]

diff --git a/dev/doubleMM.jl b/dev/doubleMM.jl
@@ -42,7 +42,7 @@ train_dataloader = MLUtils.DataLoader(
     (xM, xP, y_o, y_unc, 1:n_site);
     batchsize = n_batch, partial = false)
 σ_o = exp.(y_unc[:, 1] / 2)
-# assign the train_loader, otherwise it eatch time creates another version of synthetic data
+# assign the train_loader, otherwise it each time creates another version of synthetic data
 prob0 = HybridProblem(prob0_; train_dataloader)
 #tmp = HVI.get_hybridproblem_ϕunc(prob0; scenario)
 #prob0.covar
@@ -248,7 +248,7 @@ end
     (y2_K1global, θsP2_K1global, θsMs2_K1global) = (y, θsP, θsMs);
 end
 
-() -> begin # otpimize using LUX
+() -> begin # optimize using LUX
     #using Lux
     g_lux = Lux.Chain(
         # dense layer with bias that maps to 8 outputs and applies `tanh` activation
@@ -560,7 +560,8 @@ end
 end
 
 #ζi = first(eachrow(Array(chain)))
-f_allsites = get_hybridproblem_PBmodel(prob0; scenario, use_all_sites = true)
+f = get_hybridproblem_PBmodel(probc; scenario)
+f_allsites = create_nsite_applicator(f, n_site)
 #ζs = mapreduce(ζi -> transposeMs(ζi, intm_PMs_gen, true), hcat, eachrow(Array(chain)));
 ζsP = Array(chain)[:,1:n_θP]'
 ζsMst = reshape(Array(chain)[:,(n_θP+1) : end], n_sample_NUTS, n_site, n_θM)

diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -3,6 +3,10 @@ project:
   render: 
     - src/tutorials/basic_cpu.qmd
     - src/tutorials/*.qmd
+# julia:
+#     # workaround for quarto not pciking up the correct environment variable
+#     # https://github.com/quarto-dev/quarto-cli/issues/13416#issuecomment-3333700286
+#     env: ["JULIA_DEPOT_PATH=/User/homes/twutz/scratch/twutz/julia_gpu_depots"]
 
 
 

diff --git a/docs/src/explanation/theory_hvi.md b/docs/src/explanation/theory_hvi.md
@@ -28,7 +28,7 @@ In order to learn $\phi_g$, the user needs to provide a batch of $i \in \{1 \ldo
 ## Estimation using the ELBO
 
 In order to find the parameters of the approximation of the posterior, HVI
-minizes the KL divergence between the approximation and the true posterior.
+minimizes the KL divergence between the approximation and the true posterior.
 This is achieve by maximizing the evidence lower bound (ELBO).
 
 $$\mathcal{L}(\phi) = \mathbb{E}_{q(\theta)} \left[\log p(y,\theta) \right] - \mathbb{E}_{q(\theta)} \left[\log q(\theta) \right]$$
@@ -128,7 +128,7 @@ $\phi = (\phi_P, \phi_g, \phi_u)$, comprises
 -  $\phi_P = \mu_{\zeta_P}$: the  means of the distributions of the transformed global   
   parameters, 
 -  $\phi_g$: the parameters of the machine learning model, and 
--  $\phi_u$: paramerization of $\Sigma_\zeta$ that is additional to the means. 
+-  $\phi_u$: parameterization of $\Sigma_\zeta$ that is additional to the means. 
 
 ### Details
 Specifically, $\phi_u= (log\sigma^2_P, log\sigma^2_{M0}, log\sigma^2_{M\eta}, a_P, a_M)$, 

diff --git a/docs/src/tutorials/basic_cpu.md b/docs/src/tutorials/basic_cpu.md
@@ -27,7 +27,8 @@ The example process based model (PBM) predicts a double-monod constrained rate
 for different substrate concentrations, `S1`, and `S2`.
 
 $$
-y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}$$
+y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}
+$$
 
 ``` julia
 function f_doubleMM(θc::CA.ComponentVector{ET}, x) where ET
@@ -49,7 +50,7 @@ access the components by its symbolic names in the provided `ComponentArray`.
 HVI requires the evaluation of the likelihood of the predictions.
 It corresponds to the cost of predictions given some observations.
 
-The user specifies a function of the negative log-Likehood
+The user specifies a function of the negative log-Likelihood
 `neg_logden(obs, pred, uncertainty_parameters)`,
 where all of the parameters are arrays with columns for sites.
 
@@ -229,10 +230,10 @@ given a vector of global parameters, and a matrix of site parameters to
 invocation of the process based model (PBM), defined at the beginning.
 
 ``` julia
-f_batch = f_allsites = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1])
+f_batch = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1])
 
 prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0, 
-    f_batch, f_allsites, priors_dict, py,
+    f_batch, priors_dict, py,
     transM, transP, train_dataloader, n_covar, n_site, n_batch)
 ```
 
@@ -241,6 +242,11 @@ prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0,
 Eventually, having assembled all the moving parts of the HVI, we can perform
 the inversion.
 
+``` julia
+# silence warning of no GPU backend found (because we did not import CUDA here)
+ENV["MLDATADEVICES_SILENCE_WARN_NO_GPU"] = 1
+```
+
 ``` julia
 using OptimizationOptimisers
 import Zygote
@@ -313,8 +319,7 @@ The HVI Problem needs to be updated with this new applicatior.
 
 ``` julia
 f_batch = PBMPopulationApplicator(f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1])
-f_allsites = PBMPopulationApplicator(f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1])
-probo_sites = HybridProblem(probo; f_batch, f_allsites)
+probo_sites = HybridProblem(probo; f_batch)
 ```
 
 For numerical efficiency, the number of sites within one batch is part of the
@@ -345,8 +350,7 @@ module `Main` to allow for easier reloading with JLD2.
 
 ``` julia
 f_batch = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1])
-f_allsites = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1])
-probo2 = HybridProblem(probo; f_batch, f_allsites)
+probo2 = HybridProblem(probo; f_batch)
 ```
 
 ``` julia

diff --git a/docs/src/tutorials/basic_cpu.qmd b/docs/src/tutorials/basic_cpu.qmd
@@ -36,7 +36,8 @@ The example process based model (PBM) predicts a double-monod constrained rate
 for different substrate concentrations, `S1`, and `S2`.
 
 $$
-y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}$$
+y = r_0+ r_1 \frac{S_1}{K_1 + S_1} \frac{S_2}{K_2 + S_2}
+$$
 
 ```{julia}
 function f_doubleMM(θc::CA.ComponentVector{ET}, x) where ET
@@ -58,7 +59,7 @@ access the components by its symbolic names in the provided `ComponentArray`.
 HVI requires the evaluation of the likelihood of the predictions.
 It corresponds to the cost of predictions given some observations.
 
-The user specifies a function of the negative log-Likehood 
+The user specifies a function of the negative log-Likelihood 
 `neg_logden(obs, pred, uncertainty_parameters)`,
 where all of the parameters are arrays with columns for sites.
 
@@ -239,10 +240,10 @@ given a vector of global parameters, and a matrix of site parameters to
 invocation of the process based model (PBM), defined at the beginning.
 
 ```{julia}
-f_batch = f_allsites = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1])
+f_batch = PBMSiteApplicator(f_doubleMM; θP, θM, θFix, xPvec=xP[:,1])
 
 prob = HybridProblem(θP, θM, g_chain_scaled, ϕg0, 
-    f_batch, f_allsites, priors_dict, py,
+    f_batch, priors_dict, py,
     transM, transP, train_dataloader, n_covar, n_site, n_batch)
 ```
 
@@ -265,7 +266,7 @@ y1 = f_batch(CA.getdata(θP), CA.getdata(θMs), CA.getdata(x_batch))[2]
     #using Cthulhu
     #@descend_code_warntype f_batch(CA.getdata(θP), CA.getdata(θMs), CA.getdata(x_batch))
     prob0 = HVI.DoubleMM.DoubleMMCase()
-    f_batch0 = get_hybridproblem_PBmodel(prob0; use_all_sites = false)
+    f_batch0 = get_hybridproblem_PBmodel(prob0)
     y1f = f_batch0(θP, θMs, x_batch)[2]
     y1 .- y1f  # equal
 end
@@ -276,6 +277,11 @@ end
 Eventually, having assembled all the moving parts of the HVI, we can perform 
 the inversion.
 
+```{julia}
+# silence warning of no GPU backend found (because we did not import CUDA here)
+ENV["MLDATADEVICES_SILENCE_WARN_NO_GPU"] = 1
+```
+
 ```{julia}
 using OptimizationOptimisers
 import Zygote
@@ -349,8 +355,7 @@ The HVI Problem needs to be updated with this new applicatior.
 
 ```{julia}
 f_batch = PBMPopulationApplicator(f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1])
-f_allsites = PBMPopulationApplicator(f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1])
-probo_sites = HybridProblem(probo; f_batch, f_allsites)
+probo_sites = HybridProblem(probo; f_batch)
 ```
 
 For numerical efficiency, the number of sites within one batch is part of the 
@@ -380,8 +385,7 @@ module `Main` to allow for easier reloading with JLD2.
 
 ```{julia}
 f_batch = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_batch; θP, θM, θFix, xPvec=xP[:,1])
-f_allsites = PBMPopulationApplicator(DoubleMM.f_doubleMM_sites, n_site; θP, θM, θFix, xPvec=xP[:,1])
-probo2 = HybridProblem(probo; f_batch, f_allsites)
+probo2 = HybridProblem(probo; f_batch)
 ```
 
 ```{julia}
@@ -397,4 +401,4 @@ end
 #| eval: false
 #| echo: false
 probo = load(fname, "probo"; iotype = IOStream);
-```
+```
diff --git a/docs/src/tutorials/blocks_corr.md b/docs/src/tutorials/blocks_corr.md
@@ -1,4 +1,4 @@
-# How to model indenpendent parameter-blocks in the posterior
+# How to model independent parameter-blocks in the posterior
 
 
 ``` @meta

diff --git a/docs/src/tutorials/blocks_corr.qmd b/docs/src/tutorials/blocks_corr.qmd
@@ -1,5 +1,5 @@
 ---
-title: "How to model indenpendent parameter-blocks in the posterior"
+title: "How to model independent parameter-blocks in the posterior"
 engine: julia
 execute:
   echo: true

diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-11-output-1.png
diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-12-output-1.png
diff --git a/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png b/docs/src/tutorials/blocks_corr_files/figure-commonmark/cell-13-output-1.png
diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-10-output-1.png
diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-11-output-1.png
diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-12-output-1.png
diff --git a/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png b/docs/src/tutorials/corr_site_global_files/figure-commonmark/cell-9-output-1.png
diff --git a/docs/src/tutorials/inspect_results.md b/docs/src/tutorials/inspect_results.md
@@ -39,7 +39,7 @@ using function [`sample_posterior`](@ref).
 using StableRNGs
 rng = StableRNG(112)
 n_sample_pred = 400
-(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred)
+(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred, is_testmode = true)
 ```
 
 Lets look at the results.

diff --git a/docs/src/tutorials/inspect_results.qmd b/docs/src/tutorials/inspect_results.qmd
@@ -53,9 +53,7 @@ probo = load(fname, "probo");
 _xP_batch = first(probo.train_dataloader)[2]
 f_batch = PBMPopulationApplicator(
   f_doubleMM_sites, probo.n_batch; probo.θP, probo.θM, θFix, xPvec=_xP_batch[:,1])
-f_allsites = PBMPopulationApplicator(
-  f_doubleMM_sites, probo.n_site; probo.θP, probo.θM, θFix, xPvec=_xP_batch[:,1])
-probo = HybridProblem(probo; f_batch, f_allsites)
+probo = HybridProblem(probo; f_batch)
 ```
 ## Sample the posterior
 
@@ -66,7 +64,7 @@ using function [`sample_posterior`](@ref).
 using StableRNGs
 rng = StableRNG(112)
 n_sample_pred = 400
-(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred)
+(; θsP, θsMs) = sample_posterior(rng, probo; n_sample_pred, is_testmode = true)
 ```
 
 Lets look at the results.

diff --git a/docs/src/tutorials/intermediate/basic_cpu_results.jld2 b/docs/src/tutorials/intermediate/basic_cpu_results.jld2
diff --git a/docs/src/tutorials/logden_user.md b/docs/src/tutorials/logden_user.md
@@ -5,7 +5,7 @@
 CurrentModule = HybridVariationalInference  
 ```
 
-This guide shows how the user can specify a customized log-density function.
+This guide shows how the user can specify a customized log-Likelihood function.
 
 ## Motivation
 

diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-10-output-1.png
diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-11-output-1.png
diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-12-output-1.png
diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-8-output-1.png
diff --git a/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png b/docs/src/tutorials/logden_user_files/figure-commonmark/cell-9-output-1.png