Skip to content

Commit c52221d

Browse files
vpratzhan-oldaniel-habermann
authored
Release 2.0.5 (#526)
* fix trainable parameters in distributions (#520) * Improve numerical precision in MVNScore.log_prob * add log_gamma diagnostic (#522) * add log_gamma diagnostic * add missing export for log_gamma * add missing export for gamma_null_distribution, gamma_discrepancy * fix broken unit tests * rename log_gamma module to sbc * add test_log_gamma unit test * add return information to log_gamma doc string * fix typo in docstring, use fixed-length np array to collect log_gammas instead of appending to an empty list * Breaking changes: Fix bugs regarding counts in standardization layer (#525) * standardization: add test for multi-input values (failing) This test reveals to bugs in the standarization layer: - count is updated multiple times - batch_count is too small, as the sizes from reduce_axes have to be multiplied * breaking: fix bugs regarding count in standardization layer Fixes #524 This fixes the two bugs described in c4cc133: - count was accidentally updated, leading to wrong values - count was calculated wrongly, as only the batch size was used. Correct is the product of all reduce dimensions. This lead to wrong standard deviations While the batch dimension is the same for all inputs, the size of the second dimension might vary. For this reason, we need to introduce an input-specific `count` variable. This breaks serialization. * fix assert statement in test * bump version to 2.0.5, adjust deprecation warnings * rename log_gamma to calibration_log_gamma (#527) --------- Co-authored-by: han-ol <[email protected]> Co-authored-by: Daniel Habermann <[email protected]>
1 parent 8990c88 commit c52221d

File tree

14 files changed

+294
-38
lines changed

14 files changed

+294
-38
lines changed

bayesflow/approximators/continuous_approximator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,9 +578,9 @@ def summarize(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
578578
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
579579
"""
580580
.. deprecated:: 2.0.4
581-
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
581+
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
582582
"""
583-
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
583+
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)
584584
return self.summarize(data=data, **kwargs)
585585

586586
def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:

bayesflow/approximators/model_comparison_approximator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ def summarize(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
442442
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
443443
"""
444444
.. deprecated:: 2.0.4
445-
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
445+
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
446446
"""
447-
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
447+
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)
448448
return self.summarize(data=data, **kwargs)
449449

450450
def _compute_logits(self, classifier_conditions: Tensor) -> Tensor:

bayesflow/diagnostics/metrics/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from .expected_calibration_error import expected_calibration_error
55
from .classifier_two_sample_test import classifier_two_sample_test
66
from .model_misspecification import bootstrap_comparison, summary_space_comparison
7+
from .calibration_log_gamma import calibration_log_gamma, gamma_null_distribution, gamma_discrepancy
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
from collections.abc import Mapping, Sequence
2+
3+
import numpy as np
4+
from scipy.stats import binom
5+
6+
from ...utils.dict_utils import dicts_to_arrays
7+
8+
9+
def calibration_log_gamma(
10+
estimates: Mapping[str, np.ndarray] | np.ndarray,
11+
targets: Mapping[str, np.ndarray] | np.ndarray,
12+
variable_keys: Sequence[str] = None,
13+
variable_names: Sequence[str] = None,
14+
num_null_draws: int = 1000,
15+
quantile: float = 0.05,
16+
):
17+
"""
18+
Compute the log gamma discrepancy statistic to test posterior calibration,
19+
see [1] for additional information.
20+
Log gamma is log(gamma/gamma_null), where gamma_null is the 5th percentile of the
21+
null distribution under uniformity of ranks.
22+
That is, if adopting a hypothesis testing framework,then log_gamma < 0 implies
23+
a rejection of the hypothesis of uniform ranks at the 5% level.
24+
This diagnostic is typically more sensitive than the Kolmogorov-Smirnoff test or
25+
ChiSq test.
26+
27+
[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
28+
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
29+
"Simulation-Based Calibration Checking for Bayesian Computation:
30+
The Choice of Test Quantities Shapes Sensitivity."
31+
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404
32+
33+
Parameters
34+
----------
35+
estimates : np.ndarray of shape (num_datasets, num_draws, num_variables)
36+
The random draws from the approximate posteriors over ``num_datasets``
37+
targets : np.ndarray of shape (num_datasets, num_variables)
38+
The corresponding ground-truth values sampled from the prior
39+
variable_keys : Sequence[str], optional (default = None)
40+
Select keys from the dictionaries provided in estimates and targets.
41+
By default, select all keys.
42+
variable_names : Sequence[str], optional (default = None)
43+
Optional variable names to show in the output.
44+
quantile : float in (0, 1), optional, default 0.05
45+
The quantile from the null distribution to be used as a threshold.
46+
A lower quantile increases sensitivity to deviations from uniformity.
47+
48+
Returns
49+
-------
50+
result : dict
51+
Dictionary containing:
52+
53+
- "values" : float or np.ndarray
54+
The log gamma values per variable
55+
- "metric_name" : str
56+
The name of the metric ("Log Gamma").
57+
- "variable_names" : str
58+
The (inferred) variable names.
59+
"""
60+
samples = dicts_to_arrays(
61+
estimates=estimates,
62+
targets=targets,
63+
variable_keys=variable_keys,
64+
variable_names=variable_names,
65+
)
66+
67+
num_ranks = samples["estimates"].shape[0]
68+
num_post_draws = samples["estimates"].shape[1]
69+
70+
# rank statistics
71+
ranks = np.sum(samples["estimates"] < samples["targets"][:, None], axis=1)
72+
73+
# null distribution and threshold
74+
null_distribution = gamma_null_distribution(num_ranks, num_post_draws, num_null_draws)
75+
null_quantile = np.quantile(null_distribution, quantile)
76+
77+
# compute log gamma for each parameter
78+
log_gammas = np.empty(ranks.shape[-1])
79+
80+
for i in range(ranks.shape[-1]):
81+
gamma = gamma_discrepancy(ranks[:, i], num_post_draws=num_post_draws)
82+
log_gammas[i] = np.log(gamma / null_quantile)
83+
84+
output = {
85+
"values": log_gammas,
86+
"metric_name": "Log Gamma",
87+
"variable_names": samples["estimates"].variable_names,
88+
}
89+
90+
return output
91+
92+
93+
def gamma_null_distribution(num_ranks: int, num_post_draws: int = 1000, num_null_draws: int = 1000) -> np.ndarray:
94+
"""
95+
Computes the distribution of expected gamma values under uniformity of ranks.
96+
97+
Parameters
98+
----------
99+
num_ranks : int
100+
Number of ranks to use for each gamma.
101+
num_post_draws : int, optional, default 1000
102+
Number of posterior draws that were used to calculate the rank distribution.
103+
num_null_draws : int, optional, default 1000
104+
Number of returned gamma values under uniformity of ranks.
105+
106+
Returns
107+
-------
108+
result : np.ndarray
109+
Array of shape (num_null_draws,) containing gamma values under uniformity of ranks.
110+
"""
111+
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)
112+
gamma = np.empty(num_null_draws)
113+
114+
# loop non-vectorized to reduce memory footprint
115+
for i in range(num_null_draws):
116+
u = np.random.uniform(size=num_ranks)
117+
F_z = np.mean(u[:, None] < z_i, axis=0)
118+
bin_1 = binom.cdf(num_ranks * F_z, num_ranks, z_i)
119+
bin_2 = 1 - binom.cdf(num_ranks * F_z - 1, num_ranks, z_i)
120+
121+
gamma[i] = 2 * np.min(np.minimum(bin_1, bin_2))
122+
123+
return gamma
124+
125+
126+
def gamma_discrepancy(ranks: np.ndarray, num_post_draws: int = 100) -> float:
127+
"""
128+
Quantifies deviation from uniformity by the likelihood of observing the
129+
most extreme point on the empirical CDF of the given rank distribution
130+
according to [1] (equation 7).
131+
132+
[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
133+
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
134+
"Simulation-Based Calibration Checking for Bayesian Computation:
135+
The Choice of Test Quantities Shapes Sensitivity."
136+
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404
137+
138+
Parameters
139+
----------
140+
ranks : array of shape (num_ranks,)
141+
Empirical rank distribution
142+
num_post_draws : int, optional, default 100
143+
Number of posterior draws used to generate ranks.
144+
145+
Returns
146+
-------
147+
result : float
148+
Gamma discrepancy values for each parameter.
149+
"""
150+
num_ranks = len(ranks)
151+
152+
# observed count of ranks smaller than i
153+
R_i = np.array([sum(ranks < i) for i in range(1, num_post_draws + 2)])
154+
155+
# expected proportion of ranks smaller than i
156+
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)
157+
158+
bin_1 = binom.cdf(R_i, num_ranks, z_i)
159+
bin_2 = 1 - binom.cdf(R_i - 1, num_ranks, z_i)
160+
161+
# likelihood of obtaining the most extreme point on the empirical CDF
162+
# if the rank distribution was indeed uniform
163+
return float(2 * np.min(np.minimum(bin_1, bin_2)))

bayesflow/distributions/diagonal_normal.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def __init__(
5858
self.seed_generator = seed_generator or keras.random.SeedGenerator()
5959

6060
self.dim = None
61-
self.log_normalization_constant = None
6261
self._mean = None
6362
self._std = None
6463

@@ -71,17 +70,18 @@ def build(self, input_shape: Shape) -> None:
7170
self.mean = ops.cast(ops.broadcast_to(self.mean, (self.dim,)), "float32")
7271
self.std = ops.cast(ops.broadcast_to(self.std, (self.dim,)), "float32")
7372

74-
self.log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self.std))
75-
7673
if self.trainable_parameters:
7774
self._mean = self.add_weight(
7875
shape=ops.shape(self.mean),
79-
initializer=keras.initializers.get(self.mean),
76+
initializer=keras.initializers.get(keras.ops.copy(self.mean)),
8077
dtype="float32",
8178
trainable=True,
8279
)
8380
self._std = self.add_weight(
84-
shape=ops.shape(self.std), initializer=keras.initializers.get(self.std), dtype="float32", trainable=True
81+
shape=ops.shape(self.std),
82+
initializer=keras.initializers.get(keras.ops.copy(self.std)),
83+
dtype="float32",
84+
trainable=True,
8585
)
8686
else:
8787
self._mean = self.mean
@@ -91,7 +91,8 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
9191
result = -0.5 * ops.sum((samples - self._mean) ** 2 / self._std**2, axis=-1)
9292

9393
if normalize:
94-
result += self.log_normalization_constant
94+
log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self._std))
95+
result += log_normalization_constant
9596

9697
return result
9798

bayesflow/distributions/diagonal_student_t.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ def __init__(
6363

6464
self.seed_generator = seed_generator or keras.random.SeedGenerator()
6565

66-
self.log_normalization_constant = None
6766
self.dim = None
6867
self._loc = None
6968
self._scale = None
@@ -78,21 +77,16 @@ def build(self, input_shape: Shape) -> None:
7877
self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
7978
self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")
8079

81-
self.log_normalization_constant = (
82-
-0.5 * self.dim * math.log(self.df)
83-
- 0.5 * self.dim * math.log(math.pi)
84-
- math.lgamma(0.5 * self.df)
85-
+ math.lgamma(0.5 * (self.df + self.dim))
86-
- ops.sum(keras.ops.log(self.scale))
87-
)
88-
8980
if self.trainable_parameters:
9081
self._loc = self.add_weight(
91-
shape=ops.shape(self.loc), initializer=keras.initializers.get(self.loc), dtype="float32", trainable=True
82+
shape=ops.shape(self.loc),
83+
initializer=keras.initializers.get(keras.ops.copy(self.loc)),
84+
dtype="float32",
85+
trainable=True,
9286
)
9387
self._scale = self.add_weight(
9488
shape=ops.shape(self.scale),
95-
initializer=keras.initializers.get(self.scale),
89+
initializer=keras.initializers.get(keras.ops.copy(self.scale)),
9690
dtype="float32",
9791
trainable=True,
9892
)
@@ -105,7 +99,14 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
10599
result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)
106100

107101
if normalize:
108-
result += self.log_normalization_constant
102+
log_normalization_constant = (
103+
-0.5 * self.dim * math.log(self.df)
104+
- 0.5 * self.dim * math.log(math.pi)
105+
- math.lgamma(0.5 * self.df)
106+
+ math.lgamma(0.5 * (self.df + self.dim))
107+
- ops.sum(keras.ops.log(self._scale))
108+
)
109+
result += log_normalization_constant
109110

110111
return result
111112

bayesflow/distributions/mixture.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def build(self, input_shape: Shape) -> None:
144144

145145
self._mixture_logits = self.add_weight(
146146
shape=(len(self.distributions),),
147-
initializer=keras.initializers.get(self.mixture_logits),
147+
initializer=keras.initializers.get(keras.ops.copy(self.mixture_logits)),
148148
dtype="float32",
149149
trainable=self.trainable_mixture,
150150
)

bayesflow/networks/standardization/standardization.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def moving_std(self, index: int) -> Tensor:
4040
"""
4141
return keras.ops.where(
4242
self.moving_m2[index] > 0,
43-
keras.ops.sqrt(self.moving_m2[index] / self.count),
43+
keras.ops.sqrt(self.moving_m2[index] / self.count[index]),
4444
1.0,
4545
)
4646

@@ -53,7 +53,7 @@ def build(self, input_shape: Shape):
5353
self.moving_m2 = [
5454
self.add_weight(shape=(shape[-1],), initializer="zeros", trainable=False) for shape in flattened_shapes
5555
]
56-
self.count = self.add_weight(shape=(), initializer="zeros", trainable=False)
56+
self.count = [self.add_weight(shape=(), initializer="zeros", trainable=False) for _ in flattened_shapes]
5757

5858
def call(
5959
self,
@@ -150,7 +150,7 @@ def _update_moments(self, x: Tensor, index: int):
150150
"""
151151

152152
reduce_axes = tuple(range(x.ndim - 1))
153-
batch_count = keras.ops.cast(keras.ops.shape(x)[0], self.count.dtype)
153+
batch_count = keras.ops.cast(keras.ops.prod(keras.ops.shape(x)[:-1]), self.count[index].dtype)
154154

155155
# Compute batch mean and M2 per feature
156156
batch_mean = keras.ops.mean(x, axis=reduce_axes)
@@ -159,7 +159,7 @@ def _update_moments(self, x: Tensor, index: int):
159159
# Read current totals
160160
mean = self.moving_mean[index]
161161
m2 = self.moving_m2[index]
162-
count = self.count
162+
count = self.count[index]
163163

164164
total_count = count + batch_count
165165
delta = batch_mean - mean
@@ -169,4 +169,4 @@ def _update_moments(self, x: Tensor, index: int):
169169

170170
self.moving_mean[index].assign(new_mean)
171171
self.moving_m2[index].assign(new_m2)
172-
self.count.assign(total_count)
172+
self.count[index].assign(total_count)

bayesflow/scores/multivariate_normal_score.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,15 @@ def log_prob(self, x: Tensor, mean: Tensor, cov_chol: Tensor) -> Tensor:
8282
"""
8383
diff = x - mean
8484

85-
# Calculate covariance from Cholesky factors
86-
covariance = keras.ops.matmul(
87-
cov_chol,
88-
keras.ops.swapaxes(cov_chol, -2, -1),
85+
# Calculate precision from Cholesky factors of covariance matrix
86+
cov_chol_inv = keras.ops.inv(cov_chol)
87+
precision = keras.ops.matmul(
88+
keras.ops.swapaxes(cov_chol_inv, -2, -1),
89+
cov_chol_inv,
8990
)
90-
precision = keras.ops.inv(covariance)
91-
log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part
91+
92+
# Compute log determinant, exploiting Cholesky factors
93+
log_det_covariance = keras.ops.log(keras.ops.prod(keras.ops.diagonal(cov_chol, axis1=1, axis2=2), axis=1)) * 2
9294

9395
# Compute the quadratic term in the exponential of the multivariate Gaussian
9496
quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "bayesflow"
7-
version = "2.0.4"
7+
version = "2.0.5"
88
authors = [{ name = "The BayesFlow Team" }]
99
classifiers = [
1010
"Development Status :: 5 - Production/Stable",

0 commit comments

Comments
 (0)