Merge branch 'main' into issue-1927-type-hints

ojeda-e · web-flow · commit dc83c021164a · 2025-10-21T11:16:53.000-06:00
diff --git a/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py b/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 from collections import defaultdict
 from functools import partial
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 
 import numpy
 import torch
@@ -27,24 +27,24 @@ class SparsityModifierBase(Modifier):
     """
 
     # modifier arguments
-    sparsity: Optional[Union[float, List[float]]]
-    sparsity_profile: Optional[str] = None
+    sparsity: float | list[float] | None
+    sparsity_profile: str | None = None
     mask_structure: str = "0:0"
-    owl_m: Optional[int] = None
-    owl_lmbda: Optional[float] = None
+    owl_m: int | None = None
+    owl_lmbda: float | None = None
 
     # data pipeline arguments
-    sequential_update: Optional[bool] = False  # deprecated
-    sequential_targets: Union[str, List[str], None] = None
-    targets: Union[str, List[str]] = ["Linear"]
-    ignore: List[str] = Field(default_factory=list)
+    sequential_update: bool | None = False  # deprecated
+    sequential_targets: str | list[str] | None = None
+    targets: str | list[str] = ["Linear"]
+    ignore: list[str] = Field(default_factory=list)
 
     # private variables
-    _prune_n: Optional[int] = PrivateAttr(default=None)
-    _prune_m: Optional[int] = PrivateAttr(default=None)
-    _module_names: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
-    _target_layers: Dict[str, torch.nn.Module] = PrivateAttr(default_factory=dict)
-    _module_sparsities: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
+    _prune_n: int | None = PrivateAttr(default=None)
+    _prune_m: int | None = PrivateAttr(default=None)
+    _module_names: dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
+    _target_layers: dict[str, torch.nn.Module] = PrivateAttr(default_factory=dict)
+    _module_sparsities: dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
 
     @field_validator("sequential_update", mode="before")
     def validate_sequential_update(cls, value: bool) -> bool:
@@ -58,7 +58,7 @@ def validate_sequential_update(cls, value: bool) -> bool:
         return True
 
     @field_validator("sparsity_profile", mode="before")
-    def validate_sparsity_profile(cls, value: Optional[str]) -> bool:
+    def validate_sparsity_profile(cls, value: str | None) -> bool:
         if value is None:
             return value
 
@@ -94,7 +94,7 @@ def validate_model_after(model: "SparsityModifierBase") -> "SparsityModifierBase
     def calibrate_module(
         self,
         module: torch.nn.Module,
-        args: Tuple[torch.Tensor, ...],
+        args: tuple[torch.Tensor, ...],
         _output: torch.Tensor,
     ):
         raise NotImplementedError()
@@ -143,12 +143,13 @@ def on_start(self, state: State, event: Event, **kwargs):
 
         # register hooks
         for index, (layer_name, layer) in enumerate(self._target_layers.items()):
-            if isinstance(self.sparsity, dict):
-                layer_sparsity = self.sparsity[layer_name]
-            elif isinstance(self.sparsity, list):
-                layer_sparsity = self.sparsity[index]
-            else:
-                layer_sparsity = self.sparsity
+            match self.sparsity:
+                case dict():
+                    layer_sparsity = self.sparsity[layer_name]
+                case list():
+                    layer_sparsity = self.sparsity[index]
+                case _:
+                    layer_sparsity = self.sparsity
 
             for name, module in get_prunable_layers(layer).items():
                 name = f"{layer_name}.{name}"
@@ -191,21 +192,21 @@ def on_end(self, state: State, event: Event, **kwargs):
         self.ended_ = True
         self.remove_hooks()
 
-    def _infer_sequential_targets(
-        self, model: torch.nn.Module
-    ) -> Union[str, List[str]]:
-        if self.sequential_targets is None:
-            return get_no_split_params(model)
-        if isinstance(self.sequential_targets, str):
-            return [self.sequential_targets]
-        return self.sequential_targets
+    def _infer_sequential_targets(self, model: torch.nn.Module) -> str | list[str]:
+        match self.sequential_targets:
+            case None:
+                return get_no_split_params(model)
+            case str():
+                return [self.sequential_targets]
+            case _:
+                return self.sequential_targets
 
     def _infer_owl_layer_sparsity(
         self,
         model: torch.nn.Module,
-        layers: Dict[str, torch.nn.Module],
+        layers: dict[str, torch.nn.Module],
         dataloader: torch.utils.data.DataLoader,
-    ) -> Dict[str, float]:
+    ) -> dict[str, float]:
         activations = self._get_activations(model, dataloader)
 
         groups = {}
@@ -248,12 +249,12 @@ def _infer_owl_layer_sparsity(
             logger.info(f"Sparsity for {k}: {sparsities[k]}")
         return sparsities
 
-    def _get_activations(self, model, dataloader, nsamples=128) -> Dict[str, int]:
+    def _get_activations(self, model, dataloader, nsamples=128) -> dict[str, int]:
         from llmcompressor.pipelines.basic import run_calibration
 
         acts = defaultdict(int)
 
-        def save_acts(_module, input: Union[Tuple[Any, ...], torch.Tensor], name: str):
+        def save_acts(_module, input: tuple[Any, ...] | torch.Tensor, name: str):
             nonlocal acts
             if isinstance(input, tuple):
                 input = input[0]
@@ -270,6 +271,6 @@ def save_acts(_module, input: Union[Tuple[Any, ...], torch.Tensor], name: str):
 
         return acts
 
-    def _split_mask_structure(self, mask_structure: str) -> Tuple[int, int]:
+    def _split_mask_structure(self, mask_structure: str) -> tuple[int, int]:
         n, m = mask_structure.split(":")
         return int(n), int(m)
diff --git a/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py b/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
@@ -1,6 +1,5 @@
 import math
 from copy import copy
-from typing import Dict, Optional, Tuple, Union
 
 import torch
 import transformers
@@ -23,7 +22,7 @@
 
 
 def make_empty_hessian(
-    module: torch.nn.Module, device: Optional[torch.device] = None
+    module: torch.nn.Module, device: torch.device | None = None
 ) -> torch.Tensor:
     weight = module.weight
     num_columns = weight.shape[1]
@@ -34,30 +33,30 @@ def make_empty_hessian(
 def accumulate_hessian(
     inp: torch.Tensor,
     module: torch.nn.Module,
-    H: Optional[torch.Tensor],
+    H: torch.Tensor | None,
     num_samples: int,
-) -> Tuple[torch.Tensor, int]:
+) -> tuple[torch.Tensor, int]:
     inp = inp.to(device=H.device)
     if len(inp.shape) == 2:
         inp = inp.unsqueeze(0)
 
     num_added = inp.shape[0]
 
-    if isinstance(module, (torch.nn.Linear, transformers.Conv1D)):
-        if len(inp.shape) == 3:
-            inp = inp.reshape((-1, inp.shape[-1]))
-        inp = inp.t()
-
-    if isinstance(module, torch.nn.Conv2d):
-        unfold = torch.nn.Unfold(
-            module.kernel_size,
-            dilation=module.dilation,
-            padding=module.padding,
-            stride=module.stride,
-        )
-        inp = unfold(inp)
-        inp = inp.permute([1, 0, 2])
-        inp = inp.flatten(1)
+    match module:
+        case torch.nn.Linear() | transformers.Conv1D():
+            if len(inp.shape) == 3:
+                inp = inp.reshape((-1, inp.shape[-1]))
+            inp = inp.t()
+        case torch.nn.Conv2d():
+            unfold = torch.nn.Unfold(
+                module.kernel_size,
+                dilation=module.dilation,
+                padding=module.padding,
+                stride=module.stride,
+            )
+            inp = unfold(inp)
+            inp = inp.permute([1, 0, 2])
+            inp = inp.flatten(1)
 
     H *= num_samples / (num_samples + num_added)
     num_samples += num_added
@@ -72,10 +71,10 @@ def accumulate_hessian(
 def quantize_weight(
     module: torch.nn.Module,
     quant_args: QuantizationArgs,
-    hessians_dict: Dict[torch.nn.Module, torch.Tensor],
+    hessians_dict: dict[torch.nn.Module, torch.Tensor],
     blocksize: int = 128,
     percdamp: float = 0.01,
-) -> Tuple[float, torch.Tensor, torch.Tensor, Union[torch.Tensor, None], torch.Tensor]:
+) -> tuple[float, torch.Tensor, torch.Tensor, torch.Tensor | None, torch.Tensor]:
     """
     Quantize a module weight according to the GPTQ algorithm
 
@@ -103,10 +102,11 @@ def quantize_weight(
     )
 
     # standardize shape and dtype
-    if isinstance(module, torch.nn.Conv2d):
-        W = W.flatten(1)
-    elif isinstance(module, transformers.Conv1D):
-        W.transpose_(0, 1)
+    match module:
+        case torch.nn.Conv2d():
+            W = W.flatten(1)
+        case transformers.Conv1D():
+            W.transpose_(0, 1)
     W = W.to(dtype=GPTQ_PRECISION)
     num_rows = W.shape[0]
     num_columns = W.shape[1]
@@ -284,7 +284,7 @@ def quantize_weight(
 
 def _apply_activation_ordering(
     W: torch.Tensor, H: torch.Tensor
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Permute weight and hessian in order of greatest outupt activations
 
diff --git a/src/llmcompressor/pipelines/cache.py b/src/llmcompressor/pipelines/cache.py