aphp · LucasDedieu · Sep 2, 2025 · Jun 12, 2025 · Jul 16, 2025 · Jul 17, 2025
diff --git a/changelog.md b/changelog.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## Unreleased
+
+### Added
+
+- New `attention` pooling mode in `eds.span_pooler`
+- New `word_pooling_mode=False` in `eds.transformer` to allow returning the worpiece embeddings directly, instead of the mean-pooled word embeddings. At the moment, this only works with `eds.span_pooler` which can pool over wordpieces or words seamlessly.
+
 ## v0.18.0 (2025-09-02)
 
 📢 EDS-NLP will drop support for Python 3.7, 3.8 and 3.9 support in the next major release (v0.19.0), in October 2025. Please upgrade to Python 3.10 or later.
@@ -13,6 +20,7 @@
 - New `eds.explode` pipe that splits one document into multiple documents, one per span yielded by its `span_getter` parameter, each new document containing exactly that single span.
 - New `Training a span classifier` tutorial, and reorganized deep-learning docs
 - `ScheduledOptimizer` now warns when a parameter selector does not match any parameter.
+- New `attention` pooling mode in `eds.span_pooler`
 
 ### Fixed
 

diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
@@ -4,6 +4,7 @@ We provide step-by-step guides to get you started. We cover the following use-ca
 
 ### Base tutorials
 
+<!-- --8<-- [start:tutorials] -->
 <!-- --8<-- [start:classic-tutorials] -->
 
 === card {: href=/tutorials/spacy101 }
@@ -85,6 +86,8 @@ We provide step-by-step guides to get you started. We cover the following use-ca
     ---
     Quickly visualize the results of your pipeline as annotations or tables.
 
+<!-- --8<-- [end:classic-tutorials] -->
+
 ### Deep learning tutorials
 
 We also provide tutorials on how to train deep-learning models with EDS-NLP. These tutorials cover the training API, hyperparameter tuning, and more.
@@ -123,8 +126,5 @@ We also provide tutorials on how to train deep-learning models with EDS-NLP. The
     ---
     Learn how to tune hyperparameters of a model with `edsnlp.tune`.
 
-
 <!-- --8<-- [end:deep-learning-tutorials] -->
-
-
 <!-- --8<-- [end:tutorials] -->
diff --git a/edsnlp/core/torch_component.py b/edsnlp/core/torch_component.py
@@ -339,7 +339,14 @@ def compute_training_metrics(
         This is useful to compute averages when doing multi-gpu training or mini-batch
         accumulation since full denominators are not known during the forward pass.
         """
-        return batch_output
+        return (
+            {
+                **batch_output,
+                "loss": batch_output["loss"] / count,
+            }
+            if "loss" in batch_output
+            else batch_output
+        )
 
     def module_forward(self, *args, **kwargs):  # pragma: no cover
         """
@@ -348,6 +355,31 @@ def module_forward(self, *args, **kwargs):  # pragma: no cover
         """
         return torch.nn.Module.__call__(self, *args, **kwargs)
 
+    def preprocess_batch(self, docs: Sequence[Doc], supervision=False, **kwargs):
+        """
+        Convenience method to preprocess a batch of documents.
+        Features corresponding to the same path are grouped together in a list,
+        under the same key.
+
+        Parameters
+        ----------
+        docs: Sequence[Doc]
+            Batch of documents
+        supervision: bool
+            Whether to extract supervision features or not
+
+        Returns
+        -------
+        Dict[str, Sequence[Any]]
+            The batch of features
+        """
+        batch = [
+            (self.preprocess_supervised(d) if supervision else self.preprocess(d))
+            for d in docs
+        ]
+        batch = decompress_dict(list(batch_compress_dict(batch)))
+        return batch
+
     def prepare_batch(
         self,
         docs: Sequence[Doc],
@@ -372,11 +404,7 @@ def prepare_batch(
         -------
         Dict[str, Sequence[Any]]
         """
-        batch = [
-            (self.preprocess_supervised(doc) if supervision else self.preprocess(doc))
-            for doc in docs
-        ]
-        batch = decompress_dict(list(batch_compress_dict(batch)))
+        batch = self.preprocess_batch(docs, supervision=supervision)
         batch = self.collate(batch)
         batch = self.batch_to_device(batch, device=device)
         return batch

diff --git a/edsnlp/metrics/doc_classif.py b/edsnlp/metrics/doc_classif.py
@@ -0,0 +1,160 @@
+from typing import Any, Dict, Iterable, Optional, Tuple, Union
+
+from spacy.tokens import Doc
+from spacy.training import Example
+
+from edsnlp import registry
+from edsnlp.metrics import make_examples
+
+
+def doc_classification_metric(
+    examples: Union[Tuple[Iterable[Doc], Iterable[Doc]], Iterable[Example]],
+    label_attr: str = "label",
+    micro_key: str = "micro",
+    macro_key: str = "macro",
+    filter_expr: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Scores document-level classification (accuracy, precision, recall, F1).
+    Parameters
+    ----------
+    examples: Examples
+        The examples to score, either a tuple of (golds, preds) or a list of
+        spacy.training.Example objects
+    label_attr: str
+        The Doc._ attribute containing the label
+    micro_key: str
+        The key to use to store the micro-averaged results
+    macro_key: str
+        The key to use to store the macro-averaged results
+    filter_expr: str
+        The filter expression to use to filter the documents
+    Returns
+    -------
+    Dict[str, Any]
+    """
+    examples = make_examples(examples)
+    if filter_expr is not None:
+        filter_fn = eval(f"lambda doc: {filter_expr}")
+        examples = [eg for eg in examples if filter_fn(eg.reference)]
+
+    pred_labels = []
+    gold_labels = []
+    for eg in examples:
+        pred = getattr(eg.predicted._, label_attr, None)
+        gold = getattr(eg.reference._, label_attr, None)
+        pred_labels.append(pred)
+        gold_labels.append(gold)
+
+    labels = set(gold_labels) | set(pred_labels)
+    labels = {label for label in labels if label is not None}
+    results = {}
+
+    for label in labels:
+        tp = sum(
+            1 for p, g in zip(pred_labels, gold_labels) if p == label and g == label
+        )
+        fp = sum(
+            1 for p, g in zip(pred_labels, gold_labels) if p == label and g != label
+        )
+        fn = sum(
+            1 for p, g in zip(pred_labels, gold_labels) if g == label and p != label
+        )
+
+        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+        f1 = (
+            (2 * precision * recall) / (precision + recall)
+            if (precision + recall) > 0
+            else 0.0
+        )
+
+        results[label] = {
+            "f": f1,
+            "p": precision,
+            "r": recall,
+            "tp": tp,
+            "fp": fp,
+            "fn": fn,
+            "support": tp + fn,
+            "positives": tp + fp,
+        }
+
+    total_tp = sum(1 for p, g in zip(pred_labels, gold_labels) if p == g)
+    total_fp = sum(1 for p, g in zip(pred_labels, gold_labels) if p != g)
+    total_fn = total_fp
+
+    micro_precision = (
+        total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
+    )
+    micro_recall = (
+        total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
+    )
+    micro_f1 = (
+        (2 * micro_precision * micro_recall) / (micro_precision + micro_recall)
+        if (micro_precision + micro_recall) > 0
+        else 0.0
+    )
+    accuracy = total_tp / len(pred_labels) if len(pred_labels) > 0 else 0.0
+
+    results[micro_key] = {
+        "accuracy": accuracy,
+        "f": micro_f1,
+        "p": micro_precision,
+        "r": micro_recall,
+        "tp": total_tp,
+        "fp": total_fp,
+        "fn": total_fn,
+        "support": len(gold_labels),
+        "positives": len(pred_labels),
+    }
+
+    per_class_precisions = [results[label]["p"] for label in labels]
+    per_class_recalls = [results[label]["r"] for label in labels]
+    per_class_f1s = [results[label]["f"] for label in labels]
+
+    macro_precision = (
+        sum(per_class_precisions) / len(per_class_precisions)
+        if per_class_precisions
+        else 0.0
+    )
+    macro_recall = (
+        sum(per_class_recalls) / len(per_class_recalls) if per_class_recalls else 0.0
+    )
+    macro_f1 = sum(per_class_f1s) / len(per_class_f1s) if per_class_f1s else 0.0
+
+    results[macro_key] = {
+        "f": macro_f1,
+        "p": macro_precision,
+        "r": macro_recall,
+        "support": len(labels),
+        "classes": len(labels),
+    }
+    return results
+
+
+@registry.metrics.register("eds.doc_classification")
+class DocClassificationMetric:
+    def __init__(
+        self,
+        label_attr: str = "label",
+        micro_key: str = "micro",
+        filter_expr: Optional[str] = None,
+    ):
+        self.label_attr = label_attr
+        self.micro_key = micro_key
+        self.filter_expr = filter_expr
+
+    def __call__(self, *examples):
+        return doc_classification_metric(
+            examples,
+            label_attr=self.label_attr,
+            micro_key=self.micro_key,
+            filter_expr=self.filter_expr,
+        )
+
+
+__all__ = [
+    "doc_classification_metric",
+    "DocClassificationMetric",
+]
diff --git a/edsnlp/pipes/__init__.py b/edsnlp/pipes/__init__.py
@@ -82,5 +82,7 @@
     from .trainable.embeddings.span_pooler.factory import create_component as span_pooler
     from .trainable.embeddings.transformer.factory import create_component as transformer
     from .trainable.embeddings.text_cnn.factory import create_component as text_cnn
+    from .trainable.embeddings.doc_pooler.factory import create_component as doc_pooler
+    from .trainable.doc_classifier.factory import create_component as doc_classifier
     from .misc.split import Split as split
     from .misc.explode import Explode as explode
diff --git a/edsnlp/pipes/trainable/doc_classifier/__init__.py b/edsnlp/pipes/trainable/doc_classifier/__init__.py
@@ -0,0 +1 @@
+from .factory import create_component