ArneBinder · ArneBinder · Mar 4, 2022 · Mar 5, 2022 · Mar 5, 2022 · Mar 5, 2022
diff --git a/examples/train/set_prediction.py b/examples/train/set_prediction.py
@@ -0,0 +1,83 @@
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint
+from torch.utils.data import DataLoader
+
+from pytorch_ie.data.datasets.conll2003 import load_conll2003
+from pytorch_ie.models.transformer_set_prediction import TransformerSetPredictionModel
+from pytorch_ie.taskmodules.transformer_set_prediction import TransformerSetPredictionTaskModule
+
+
+def main():
+    pl.seed_everything(42)
+
+    model_output_path = "./model_output/"
+    model_name = "bert-base-cased"
+    num_epochs = 10
+    batch_size = 16  # tested on a single GeForce RTX 2080 Ti (11016 MB)
+
+    train_docs, val_docs = load_conll2003(split="train"), load_conll2003(split="validation")
+
+    print("train docs: ", len(train_docs))
+    print("val docs: ", len(val_docs))
+
+    task_module = TransformerSetPredictionTaskModule(
+        tokenizer_name_or_path=model_name,
+        max_length=128,
+    )
+
+    task_module.prepare(train_docs)
+
+    train_dataset = task_module.encode(train_docs, encode_target=True)
+    val_dataset = task_module.encode(val_docs, encode_target=True)
+
+    train_dataloader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        collate_fn=task_module.collate,
+    )
+
+    val_dataloader = DataLoader(
+        val_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        collate_fn=task_module.collate,
+    )
+
+    model = TransformerSetPredictionModel(
+        model_name_or_path=model_name,
+        num_classes=len(task_module.label_to_id),
+        t_total=len(train_dataloader) * num_epochs,
+        none_coef=1.0,
+        learning_rate=1e-4,
+    )
+
+    # checkpoint_callback = ModelCheckpoint(
+    #     monitor="val/f1",
+    #     dirpath=model_output_path,
+    #     filename="zs-ner-{epoch:02d}-val_f1-{val/f1:.2f}",
+    #     save_top_k=1,
+    #     mode="max",
+    #     auto_insert_metric_name=False,
+    #     save_weights_only=True,
+    # )
+
+    trainer = pl.Trainer(
+        fast_dev_run=False,
+        max_epochs=num_epochs,
+        gpus=1,
+        checkpoint_callback=False,
+        # callbacks=[checkpoint_callback],
+        precision=32,
+    )
+    trainer.fit(model, train_dataloader, val_dataloader)
+
+    # task_module.save_pretrained(model_output_path)
+
+    # trainer.save_checkpoint(model_output_path + "model.ckpt")
+    # or
+    # model.save_pretrained(model_output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,7 @@ transformers = "^4.15.0"
 huggingface-hub = "^0.2.1"
 torchmetrics = "^0.6.2"
 setuptools = "59.5.0"
+scipy = "^1.8.0"
 
 [tool.poetry.dev-dependencies]
 Pygments = ">=2.10.0"

diff --git a/src/pytorch_ie/data/datasets/conll2003.py b/src/pytorch_ie/data/datasets/conll2003.py
@@ -38,6 +38,7 @@ def load_conll2003(
         spans = bio_tags_to_spans(tag_sequence)
 
         document = Document(text)
+        document.annotations.spans.create_layer("entities")
 
         for label, (start, end) in spans:
             start_offset = token_offsets[start][0]

diff --git a/src/pytorch_ie/metrics/__init__.py b/src/pytorch_ie/metrics/__init__.py
diff --git a/src/pytorch_ie/metrics/set_fbeta.py b/src/pytorch_ie/metrics/set_fbeta.py
@@ -0,0 +1,68 @@
+from typing import Dict, List, Set, Tuple
+
+import torch
+import torch.nn.functional as F
+from torchmetrics.metric import Metric
+
+
+class SetFbetaScore(Metric):
+    def __init__(self, none_index: int, beta: float = 1.0, dist_sync_on_step=False):
+        super().__init__(dist_sync_on_step=dist_sync_on_step)
+
+        self.none_index = none_index
+        self.beta = beta
+
+        self.add_state("true_positives", default=torch.tensor(0), dist_reduce_fx="sum")
+        self.add_state("false_positives", default=torch.tensor(0), dist_reduce_fx="sum")
+        self.add_state("false_negatives", default=torch.tensor(0), dist_reduce_fx="sum")
+
+    def update(self, predictions: Dict[str, torch.Tensor], targets: Dict[str, List[torch.Tensor]]):
+        label_ids_target = [target.detach().cpu() for target in targets["label_ids"]]
+        start_index_target = [target.detach().cpu() for target in targets["start_index"]]
+        end_index_target = [target.detach().cpu() for target in targets["end_index"]]
+
+        label_ids_pred_full = F.softmax(predictions["label_ids"], dim=-1).argmax(dim=-1)
+        start_index_pred_full = F.softmax(predictions["start_index"], dim=-1).argmax(dim=-1)
+        end_index_pred_full = F.softmax(predictions["end_index"], dim=-1).argmax(dim=-1)
+
+        for batch_index, (label_ids, start_index, end_index) in enumerate(
+            zip(label_ids_target, start_index_target, end_index_target)
+        ):
+            pred_label_set: Set[Tuple[int, int, int]] = set()
+            true_label_set: Set[Tuple[int, int, int]] = set()
+
+            indices_pred = label_ids_pred_full[batch_index] != self.none_index
+            label_ids_pred = label_ids_pred_full[batch_index][indices_pred].detach().cpu()
+            start_index_pred = start_index_pred_full[batch_index][indices_pred].detach().cpu()
+            end_index_pred = end_index_pred_full[batch_index][indices_pred].detach().cpu()
+
+            for i in range(label_ids_pred.shape[0]):
+                pred_label_set.add(
+                    (
+                        label_ids_pred[i].item(),
+                        start_index_pred[i].item(),
+                        end_index_pred[i].item(),
+                    )
+                )
+
+            for i in range(label_ids.shape[0]):
+                true_label_set.add(
+                    (label_ids[i].item(), start_index[i].item(), end_index[i].item())
+                )
+
+            for pred in pred_label_set:
+                if pred in true_label_set:
+                    self.true_positives += 1
+                else:
+                    self.false_positives += 1
+
+            for pred in true_label_set:
+                if pred not in pred_label_set:
+                    self.false_negatives += 1
+
+    def compute(self):
+        precision = self.true_positives / (self.true_positives + self.false_positives + 1e-10)
+        recall = self.true_positives / (self.true_positives + self.false_negatives + 1e-10)
+        f1 = self.beta * precision * recall / (precision + recall + 1e-10)
+
+        return f1
diff --git a/src/pytorch_ie/models/set_prediction/__init__.py b/src/pytorch_ie/models/set_prediction/__init__.py
diff --git a/src/pytorch_ie/models/set_prediction/loss/__init__.py b/src/pytorch_ie/models/set_prediction/loss/__init__.py
diff --git a/src/pytorch_ie/models/set_prediction/loss/loss_functions.py b/src/pytorch_ie/models/set_prediction/loss/loss_functions.py
@@ -0,0 +1,209 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+def get_src_permutation_idx(indices):
+    # permute predictions following indices
+    batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
+    src_idx = torch.cat([src for (src, _) in indices])
+    return batch_idx, src_idx
+
+
+def get_tgt_permutation_idx(indices):
+    # permute targets following indices
+    batch_idx = torch.cat([torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
+    tgt_idx = torch.cat([tgt for (_, tgt) in indices])
+    return batch_idx, tgt_idx
+
+
+class LossFunction(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(
+        self,
+        name: str,
+        output: Dict[str, torch.Tensor],
+        targets: Dict[str, List[torch.Tensor]],
+        permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+        prev_permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+    ) -> Optional[torch.Tensor]:
+        raise NotImplementedError()
+
+
+class CrossEntropyLossFunction(LossFunction):
+    def __init__(
+        self, num_classes: Optional[int] = None, none_weight: float = 1.0, none_index: int = 0
+    ):
+        super().__init__()
+        if num_classes is not None:
+            empty_weight = torch.ones(num_classes + 1)
+            empty_weight[none_index] = none_weight
+            self.register_buffer("empty_weight", empty_weight)
+        else:
+            self.empty_weight = None
+
+        self.none_index = none_index
+
+    def forward(
+        self,
+        name: str,
+        output: Dict[str, torch.Tensor],
+        targets: Dict[str, List[torch.Tensor]],
+        permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+        prev_permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+    ) -> Optional[torch.Tensor]:
+        src_logits = output[name]  # [batch_size, num_queries, num_targets]
+
+        idx = get_src_permutation_idx(permutation_indices)
+        idx = idx[0].to(src_logits.device), idx[1].to(src_logits.device)
+
+        target_classes_o = torch.cat(
+            [target[i] for target, (_, i) in zip(targets[name], permutation_indices)], dim=0
+        ).long()
+        target_classes_o = target_classes_o.to(src_logits.device)
+
+        target_classes = torch.full(
+            src_logits.shape[:2], self.none_index, dtype=torch.int64, device=src_logits.device
+        )
+        target_classes[idx] = target_classes_o
+
+        loss = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight)
+
+        return loss
+
+
+class BinaryCrossEntropyLossFunction(LossFunction):
+    def __init__(self, ignore_index: int = -100):
+        super().__init__()
+        self.ignore_index = ignore_index
+
+    def forward(
+        self,
+        name: str,
+        output: Dict[str, torch.Tensor],
+        targets: Dict[str, List[torch.Tensor]],
+        permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+        prev_permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+    ) -> Optional[torch.Tensor]:
+        src_token_mask = output[name]  # [batch_size, num_queries, seq_len]
+
+        idx = get_src_permutation_idx(permutation_indices)
+        idx = idx[0].to(src_token_mask.device), idx[1].to(src_token_mask.device)
+
+        src_token_mask = output[name][idx]
+
+        target_token_mask = torch.cat(
+            [target[i] for target, (_, i) in zip(targets[name], permutation_indices)], dim=0
+        ).float()
+        target_token_mask = target_token_mask.to(src_token_mask.device)
+
+        mask = target_token_mask != -100
+        loss_span = F.binary_cross_entropy_with_logits(
+            src_token_mask, target_token_mask.float(), reduction="none"
+        )
+
+        loss = ((loss_span * mask) / mask.sum(dim=-1, keepdims=True)).sum()
+
+        return loss
+
+
+class SpanPositionLossFunction(LossFunction):
+    def forward(
+        self,
+        name: str,
+        output: Dict[str, torch.Tensor],
+        targets: Dict[str, List[torch.Tensor]],
+        permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+        prev_permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+    ) -> Optional[torch.Tensor]:
+        if len(targets[name]) <= 0:
+            return None
+
+        idx = get_src_permutation_idx(permutation_indices)
+        src_span_positions = output[name][idx]
+        target_span_positions = torch.cat(
+            [target[i] for target, (_, i) in zip(targets[name], permutation_indices)], dim=0
+        )
+
+        loss = F.l1_loss(src_span_positions, target_span_positions)
+
+        return loss
+
+
+class EdgeLossFunction(CrossEntropyLossFunction):
+    def forward(
+        self,
+        name: str,
+        output: Dict[str, torch.Tensor],
+        targets: Dict[str, List[torch.Tensor]],
+        permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+        prev_permutation_indices: Tuple[torch.Tensor, torch.Tensor],
+    ) -> Optional[torch.Tensor]:
+        batch_edges = output[name]  # [batch_size, num_queries, num_prev_queries, num_classes]
+
+        batch_size, num_queries = batch_edges.shape[:2]
+
+        sorted_prev_query_indices = [
+            query_indices[torch.argsort(target_indices)]
+            for query_indices, target_indices in prev_permutation_indices
+        ]
+
+        sorted_query_indices = [
+            query_indices[torch.argsort(target_indices)]
+            for query_indices, target_indices in permutation_indices
+        ]
+
+        total_loss: torch.Tensor = None
+        for (
+            batch_index,
+            (edges, prev_query_indices, query_indices, edge_ids, row_ids, col_ids),
+        ) in enumerate(
+            zip(
+                batch_edges,
+                sorted_prev_query_indices,
+                sorted_query_indices,
+                targets["edge_ids"],
+                targets["row_ids"],
+                targets["col_ids"],
+            )
+        ):
+            if len(edge_ids) <= 0:
+                continue
+
+            valid_edges = edges[:, prev_query_indices, :][
+                query_indices, :, :
+            ]  # [num_targets, num_prev_targets, num_classes]
+
+            row_indices = torch.cat(
+                [row_i.long() for row_i in row_ids], dim=0
+            )  # [num_edge_targets]
+
+            col_indices = torch.cat(
+                [col_i.long() for col_i in col_ids], dim=0
+            )  # [num_edge_targets]
+
+            target_classes = torch.full(
+                valid_edges.shape[:2],
+                self.none_index,
+                dtype=torch.int64,
+                device=valid_edges.device,
+            )
+
+            edge_target_indices = torch.cat(
+                [edge_i.long() for edge_i in edge_ids], dim=0
+            )  # [num_edge_targets]
+
+            target_classes[row_indices, col_indices] = edge_target_indices
+
+            loss = F.cross_entropy(valid_edges.transpose(1, 2), target_classes, self.empty_weight)
+
+            if total_loss is None:
+                total_loss = loss
+            else:
+                total_loss += loss
+
+        return total_loss