diff --git a/edward2/tensorflow/metrics.py b/edward2/tensorflow/metrics.py index dcd8a2be..287af7bb 100644 --- a/edward2/tensorflow/metrics.py +++ b/edward2/tensorflow/metrics.py @@ -171,12 +171,13 @@ def logit_kl_divergence(logits_1, logits_2): return tf.reduce_mean(vals) -def kl_divergence(p, q): +def kl_divergence(p, q, clip=True): """Generalized KL divergence [1] for unnormalized distributions. Args: p: tf.Tensor. - q: tf.Tensor + q: tf.Tensor. + clip: bool. Returns: tf.Tensor of the Kullback-Leibler divergences per example. @@ -187,7 +188,10 @@ def kl_divergence(p, q): matrix factorization." Advances in neural information processing systems. 2001. """ - return tf.reduce_sum(p * tf.math.log(p / q) - p + q, axis=-1) + if clip: + p = tf.clip_by_value(p, tf.keras.backend.epsilon(), 1) + q = tf.clip_by_value(q, tf.keras.backend.epsilon(), 1) + return tf.reduce_sum(p * tf.math.log(p / q), axis=-1) def lp_distance(x, y, p=1): @@ -229,7 +233,7 @@ def average_pairwise_diversity(probs, num_models, error=None): # TODO(ghassen): we could also return max and min pairwise metrics. average_disagreement = tf.reduce_mean(tf.stack(pairwise_disagreement)) if error is not None: - average_disagreement /= (error + tf.keras.backend.epsilon()) + average_disagreement /= (1 - error + tf.keras.backend.epsilon()) average_kl_divergence = tf.reduce_mean(tf.stack(pairwise_kl_divergence)) average_cosine_distance = tf.reduce_mean(tf.stack(pairwise_cosine_distance))