Add group pairwise AUC metric for a list of predictions.

lingvo-bot · copybara-github · commit f4192cb4cf6e · 2022-10-10T14:37:29.000-07:00
PiperOrigin-RevId: 480180039
diff --git a/lingvo/core/metrics.py b/lingvo/core/metrics.py
@@ -728,3 +728,62 @@ def Summary(self, name):
   def _CreateSummary(self, name):
     """Returns a tf.Summary for this metric."""
     raise NotImplementedError()
+
+
+class GroupPairAUCMetric(AUCMetric):
+  """Compute the AUC score for all pairs extracted from each group of items.
+
+  For each group of items, the metric extracts all pairs with different
+  target values. For each pair (i, j), the metric computes the binary
+  classification AUC where the `label = 1 if target[i] > target[j] else 0` and
+  `prob = sigmoid(logits[i] - logits[j])`.
+
+  To prevent generating pairs across groups, an additional arg `group_ids` is
+  required, which is a list of ints that specifies the group_id of each item.
+
+  In addition, in order to achieve streaming computation, items from the same
+  group need to form continuous chunks,
+  e.g. group_ids = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2].
+
+  In the case of [0, 0, 1, 1, 1, 0, 0, 2, 2, 2, 2], the second chunk of 0s will
+  be treated as a separate 3rd group rather than part of the 1st group.
+  """
+
+  def UpdateRaw(self, group_ids, target, logits, weight=None):
+    """Updates the metrics.
+
+    Args:
+      group_ids: An array to specify the group identity.
+      target: An array to specify the groundtruth float values.
+      logits: An array to specify the raw prediction logits.
+      weight: An array to specify the sample weight for the auc computation.
+    """
+
+    assert self._samples <= 0
+
+    sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
+
+    def _ProcessChunk(s, e):
+      for i in range(s, e):
+        for j in range(i + 1, e):
+          if target[i] != target[j]:
+            pair_label = 1 if target[i] > target[j] else 0
+            pair_prob = sigmoid(logits[i] - logits[j])
+            self._label.append(pair_label)
+            self._prob.append(pair_prob)
+            if weight:
+              self._weight.append(min(1.0, weight[i] + weight[j]))
+            else:
+              self._weight.append(1.0)
+
+    s, e = 0, 1
+    while e <= len(target):
+      # Find the end of a chunk
+      if e == len(target) or group_ids[e] != group_ids[s]:
+        # Process the current chunk [s:e]
+        _ProcessChunk(s, e)
+
+        # Start a new chunk by setting `s` to `e`
+        s = e
+      # Increment `e` by 1.
+      e += 1
diff --git a/lingvo/core/metrics_test.py b/lingvo/core/metrics_test.py
@@ -246,6 +246,32 @@ def testMultiClassAUCMetric(self):
     # Verify average AUC value.
     self.assertAllClose(0.722222222, m.value)
 
+  def testGroupPairAUCMetric(self):
+    if not metrics.HAS_SKLEARN:
+      self.skipTest('sklearn is not installed.')
+    pair_m = metrics.AUCMetric()
+    group_m = metrics.GroupPairAUCMetric()
+    group_ids = [0, 0, 0, 1, 1, 1, 2, 2]
+    target = np.random.rand(8).tolist()
+    logits = np.random.rand(8).tolist()
+    weight = [1.0] * 8
+    group_m.UpdateRaw(
+        group_ids=group_ids, target=target, logits=logits, weight=weight)
+
+    sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
+    left, right = 0, 0
+    while right < len(group_ids):
+      while right < len(group_ids) and group_ids[right] == group_ids[left]:
+        right += 1
+      for i in range(left, right):
+        for j in range(i + 1, right):
+          if group_ids[i] == group_ids[j] and target[i] != target[j]:
+            pair_m.Update(
+                label=[1 if target[i] > target[j] else 0],
+                prob=[sigmoid(logits[i] - logits[j])],
+                weight=[min(1.0, weight[i] + weight[j])])
+      left = right
+    self.assertEqual(pair_m.value, group_m.value)
 
 if __name__ == '__main__':
   test_utils.main()