From 9a2f4e7676b719f9d905a87e53705fad4378e98d Mon Sep 17 00:00:00 2001 From: Jon Clarke <5629061+JonAnCla@users.noreply.github.com> Date: Wed, 15 Oct 2025 21:17:19 +0100 Subject: [PATCH 1/2] - WIP --- ibis/common/annotations.py | 57 ++++++++++++++++++++++++++++++++++---- ibis/common/grounds.py | 11 ++++++-- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/ibis/common/annotations.py b/ibis/common/annotations.py index 297e57fd8cca..ad80dc2584f5 100644 --- a/ibis/common/annotations.py +++ b/ibis/common/annotations.py @@ -1,5 +1,6 @@ from __future__ import annotations +import dataclasses import functools import inspect import types @@ -301,7 +302,11 @@ class Signature(inspect.Signature): Primarily used in the implementation of `ibis.common.grounds.Annotable`. """ - __slots__ = () + __slots__ = ('_cached_params') + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._cached_params = dict(self.parameters) # avoids access via MappingProxyType which is slow compared to dict @classmethod def merge(cls, *signatures, **annotations): @@ -509,13 +514,25 @@ def validate(self, func, args, kwargs): return this + def validate_nobind_using_dataclass(self, cls, *args, **kwargs): + try: + instance = cls.__dataclass__(*args, **kwargs) + except TypeError as err: + raise SignatureValidationError( + "{call} {cause}\n\nExpected signature: {sig}", + sig=self, + func=cls, + args=args, + kwargs=kwargs, + ) from err + + return self.validate_nobind(cls, instance.__dict__) + def validate_nobind(self, func, kwargs): """Validate the arguments against the signature without binding.""" this, errors = {}, [] - for name, param in self.parameters.items(): - value = kwargs.get(name, param.default) - if value is EMPTY: - raise TypeError(f"missing required argument `{name!r}`") + for name, param in self._cached_params.items(): + value = kwargs[name] pattern = param.annotation.pattern result = pattern.match(value, this) @@ -564,6 +581,36 @@ def validate_return(self, func, value): ) return result + + def to_dataclass(self, cls_name: str) -> type: + """Create a dataclass from this signature. + + Later, instantiating a dataclass from arg+kwargs and accessing the resulting __dict__ + is much faster (~100x) than using Signature.bind + """ + fields = [] + for k, v in self.parameters.items(): + if v.default is inspect.Parameter.empty: + fields.append((k, v.annotation)) + elif v.annotation.__hash__ is None: + # unhashable types (e.g. list) cannot be used as default values + # in dataclasses, so we use a default factory instead + fields.append((k, v.annotation, dataclasses.field(default_factory=DefaultFactory(v.default)))) + else: + fields.append((k, v.annotation, dataclasses.field(default=v.default))) + return dataclasses.make_dataclass(cls_name, fields) + + +class DefaultFactory: + """Helper to create default factories for dataclass fields.""" + + __slots__ = ("value",) + + def __init__(self, value): + self.value = value + + def __call__(self): + return self.value def annotated(_1=None, _2=None, _3=None, **kwargs): diff --git a/ibis/common/grounds.py b/ibis/common/grounds.py index bb4d3e3b4cf2..801b002cbe0a 100644 --- a/ibis/common/grounds.py +++ b/ibis/common/grounds.py @@ -81,6 +81,9 @@ def __new__(metacls, clsname, bases, dct, **kwargs): signature = Signature.merge(*signatures, **arguments) argnames = tuple(signature.parameters.keys()) + # convert the signature to a dataclass so it can be used later instead of Signature.bind + data_cls = signature.to_dataclass(f"{clsname}DataClass") + namespace.update( __module__=module, __qualname__=qualname, @@ -88,6 +91,7 @@ def __new__(metacls, clsname, bases, dct, **kwargs): __attributes__=attributes, __match_args__=argnames, __signature__=signature, + __dataclass__=data_cls, __slots__=tuple(slots), ) return super().__new__(metacls, clsname, bases, namespace, **kwargs) @@ -103,6 +107,9 @@ class Annotable(Abstract, metaclass=AnnotableMeta): __signature__: ClassVar[Signature] """Signature of the class, containing the Argument annotations.""" + + __dataclass__: ClassVar[type] + """Dataclass with identical signature to this class. Used as a faster alternative to Signature.bind""" __attributes__: ClassVar[FrozenDict[str, Annotation]] """Mapping of the Attribute annotations.""" @@ -116,8 +123,8 @@ class Annotable(Abstract, metaclass=AnnotableMeta): @classmethod def __create__(cls, *args: Any, **kwargs: Any) -> Self: # construct the instance by passing only validated keyword arguments - kwargs = cls.__signature__.validate(cls, args, kwargs) - return super().__create__(**kwargs) + validated_kwargs = cls.__signature__.validate_nobind_using_dataclass(cls, *args, **kwargs) + return super().__create__(**validated_kwargs) @classmethod def __recreate__(cls, kwargs: Any) -> Self: From 8e580905eb28e72cedec31389a7df1056df2ea34 Mon Sep 17 00:00:00 2001 From: Jon Clarke <5629061+JonAnCla@users.noreply.github.com> Date: Sat, 18 Oct 2025 10:54:36 +0100 Subject: [PATCH 2/2] - wip cleanup after removal of __call__ --- ibis/common/annotations.py | 24 +++++++++++++----------- ibis/common/grounds.py | 29 +++++++++++++---------------- ibis/expr/operations/generic.py | 2 +- ibis/expr/operations/relations.py | 8 ++++---- ibis/expr/types/generic.py | 4 ++++ 5 files changed, 35 insertions(+), 32 deletions(-) diff --git a/ibis/common/annotations.py b/ibis/common/annotations.py index ad80dc2584f5..0d4103384a06 100644 --- a/ibis/common/annotations.py +++ b/ibis/common/annotations.py @@ -302,11 +302,13 @@ class Signature(inspect.Signature): Primarily used in the implementation of `ibis.common.grounds.Annotable`. """ - __slots__ = ('_cached_params') + __slots__ = ('_patterns', '_dataclass') def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._cached_params = dict(self.parameters) # avoids access via MappingProxyType which is slow compared to dict + # prebuild dict of patterns to avoid slow retrieval via property&MappingProxyType + self._patterns = {k: param.annotation.pattern for k, param in self.parameters.items() if hasattr(param.annotation, 'pattern')} + self._dataclass = self.to_dataclass() @classmethod def merge(cls, *signatures, **annotations): @@ -514,27 +516,27 @@ def validate(self, func, args, kwargs): return this - def validate_nobind_using_dataclass(self, cls, *args, **kwargs): + def validate_fast(self, func, args, kwargs): + """Faster validation using internal dataclass to bind args/kwargs to names instead of Signature.bind.""" try: - instance = cls.__dataclass__(*args, **kwargs) + instance = self._dataclass(*args, **kwargs) except TypeError as err: raise SignatureValidationError( "{call} {cause}\n\nExpected signature: {sig}", sig=self, - func=cls, + func=func, args=args, kwargs=kwargs, ) from err - return self.validate_nobind(cls, instance.__dict__) + return self.validate_nobind(func, instance.__dict__) def validate_nobind(self, func, kwargs): """Validate the arguments against the signature without binding.""" this, errors = {}, [] - for name, param in self._cached_params.items(): + for name, pattern in self._patterns.items(): value = kwargs[name] - pattern = param.annotation.pattern result = pattern.match(value, this) if result is NoMatch: errors.append((name, value, pattern)) @@ -581,12 +583,12 @@ def validate_return(self, func, value): ) return result - - def to_dataclass(self, cls_name: str) -> type: + + def to_dataclass(self, cls_name: str = 'SignatureDataclass') -> type: """Create a dataclass from this signature. Later, instantiating a dataclass from arg+kwargs and accessing the resulting __dict__ - is much faster (~100x) than using Signature.bind + is much faster (~10-20x) than using Signature.bind """ fields = [] for k, v in self.parameters.items(): diff --git a/ibis/common/grounds.py b/ibis/common/grounds.py index 801b002cbe0a..b529f9e1a337 100644 --- a/ibis/common/grounds.py +++ b/ibis/common/grounds.py @@ -81,9 +81,6 @@ def __new__(metacls, clsname, bases, dct, **kwargs): signature = Signature.merge(*signatures, **arguments) argnames = tuple(signature.parameters.keys()) - # convert the signature to a dataclass so it can be used later instead of Signature.bind - data_cls = signature.to_dataclass(f"{clsname}DataClass") - namespace.update( __module__=module, __qualname__=qualname, @@ -91,7 +88,6 @@ def __new__(metacls, clsname, bases, dct, **kwargs): __attributes__=attributes, __match_args__=argnames, __signature__=signature, - __dataclass__=data_cls, __slots__=tuple(slots), ) return super().__new__(metacls, clsname, bases, namespace, **kwargs) @@ -99,6 +95,8 @@ def __new__(metacls, clsname, bases, dct, **kwargs): def __or__(self, other): # required to support `dt.Numeric | dt.Floating` annotation for python<3.10 return Union[self, other] + + __call__ = type.__call__ @dataclass_transform() @@ -107,9 +105,6 @@ class Annotable(Abstract, metaclass=AnnotableMeta): __signature__: ClassVar[Signature] """Signature of the class, containing the Argument annotations.""" - - __dataclass__: ClassVar[type] - """Dataclass with identical signature to this class. Used as a faster alternative to Signature.bind""" __attributes__: ClassVar[FrozenDict[str, Annotation]] """Mapping of the Attribute annotations.""" @@ -120,11 +115,11 @@ class Annotable(Abstract, metaclass=AnnotableMeta): __match_args__: ClassVar[tuple[str, ...]] """Names of the arguments to be used for pattern matching.""" - @classmethod - def __create__(cls, *args: Any, **kwargs: Any) -> Self: - # construct the instance by passing only validated keyword arguments - validated_kwargs = cls.__signature__.validate_nobind_using_dataclass(cls, *args, **kwargs) - return super().__create__(**validated_kwargs) + #@classmethod + #def __create__(cls, *args: Any, **kwargs: Any) -> Self: + # # construct the instance by passing only validated keyword arguments + # validated_kwargs = cls.__signature__.validate_fast(cls, args, kwargs) + # return super().__create__(**validated_kwargs) @classmethod def __recreate__(cls, kwargs: Any) -> Self: @@ -132,9 +127,10 @@ def __recreate__(cls, kwargs: Any) -> Self: kwargs = cls.__signature__.validate_nobind(cls, kwargs) return super().__create__(**kwargs) - def __init__(self, **kwargs: Any) -> None: + def __init__(self, *args, **kwargs: Any) -> None: + validated_kwargs = self.__signature__.validate_fast(self.__class__, args, kwargs) # set the already validated arguments - for name, value in kwargs.items(): + for name, value in validated_kwargs.items(): object.__setattr__(self, name, value) # initialize the remaining attributes for name, field in self.__attributes__.items(): @@ -199,11 +195,12 @@ class Concrete(Immutable, Comparable, Annotable): __slots__ = ("__args__", "__precomputed_hash__") - def __init__(self, **kwargs: Any) -> None: + def __init__(self, *args, **kwargs: Any) -> None: + validated_kwargs = self.__signature__.validate_fast(self.__class__, args, kwargs) # collect and set the arguments in a single pass args = [] for name in self.__argnames__: - value = kwargs[name] + value = validated_kwargs[name] args.append(value) object.__setattr__(self, name, value) diff --git a/ibis/expr/operations/generic.py b/ibis/expr/operations/generic.py index 46e3dda002f8..ca322e4d9768 100644 --- a/ibis/expr/operations/generic.py +++ b/ibis/expr/operations/generic.py @@ -166,7 +166,7 @@ class ScalarParameter(Scalar): shape = ds.scalar - def __init__(self, dtype, counter): + def __init__(self, dtype, counter=None): if counter is None: counter = next(self._counter) super().__init__(dtype=dtype, counter=counter) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 90dc400a8ded..803fa71358f9 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -91,13 +91,13 @@ class Field(Value): shape = ds.columnar def __init__(self, rel, name): - if name not in rel.schema: - columns_formatted = ", ".join(map(repr, rel.schema.names)) + super().__init__(rel=rel, name=name) + if self.name not in self.rel.schema: + columns_formatted = ", ".join(map(repr, self.rel.schema.names)) raise IbisTypeError( - f"Column {name!r} is not found in table. " + f"Column {self.name!r} is not found in table. " f"Existing columns: {columns_formatted}." ) - super().__init__(rel=rel, name=name) @attribute def dtype(self): diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 63b1c264623d..13e6f2353495 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -120,6 +120,10 @@ def type(self) -> dt.DataType: """ return self.op().dtype + @property + def dtype(self) -> dt.DataType: + return self.type() + def hash(self) -> ir.IntegerValue: """Compute an integer hash value.