Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/user_guide/entry_points.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODO
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Other enhancements
- Improved deprecation message for offset aliases (:issue:`60820`)
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
- Support :class:`DataFrame` plugin accessor via entry points (:issue:`29076`)
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
Expand Down
5 changes: 5 additions & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,8 @@
"unique",
"wide_to_long",
]

from pandas.core.accessor import DataFrameAccessorLoader

DataFrameAccessorLoader.load()
del DataFrameAccessorLoader
39 changes: 39 additions & 0 deletions pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import functools
from typing import (
TYPE_CHECKING,
Any,
final,
)
import warnings
Expand All @@ -25,6 +26,8 @@
from pandas import Index
from pandas.core.generic import NDFrame

from importlib.metadata import entry_points


class DirNamesMixin:
_accessors: set[str] = set()
Expand Down Expand Up @@ -393,3 +396,39 @@ def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]:
from pandas import Index

return _register_accessor(name, Index)


class DataFrameAccessorLoader:
"""Loader class for registering DataFrame accessors via entry points."""

ENTRY_POINT_GROUP: str = "pandas_dataframe_accessor"

@classmethod
def load(cls) -> None:
"""loads and registers accessors defined by 'pandas_dataframe_accessor'."""
eps = entry_points(group=cls.ENTRY_POINT_GROUP)
names: set[str] = set()

for ep in eps:
name: str = ep.name

if name in names: # Verifies duplicated package names
warnings.warn(
f"Warning: you have two packages with the same name: '{name}'. "
"Uninstall the package you don't want to use "
"in order to remove this warning.\n",
UserWarning,
stacklevel=2,
)

else:
names.add(name)

def make_property(ep):
def accessor(self) -> Any:
cls_ = ep.load()
return cls_(self)

return accessor

register_dataframe_accessor(name)(make_property(ep))
227 changes: 227 additions & 0 deletions pandas/tests/test_plugis_entrypoint_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import pandas as pd
import pandas._testing as tm
from pandas.core.accessor import DataFrameAccessorLoader


def test_no_accessors(monkeypatch):
# GH29076

# Mock entry_points
def mock_entry_points(*, group):
return []

# Patch entry_points in the correct module
monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)

DataFrameAccessorLoader.load()


def test_load_dataframe_accessors(monkeypatch):
# GH29076
# Mocked EntryPoint to simulate a plugin
class MockEntryPoint:
name = "test_accessor"

def load(self):
class TestAccessor:
def __init__(self, df):
self._df = df

def test_method(self):
return "success"

return TestAccessor

# Mock entry_points
def mock_entry_points(*, group):
if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP:
return [MockEntryPoint()]
return []

# Patch entry_points in the correct module
monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)

DataFrameAccessorLoader.load()

# Create DataFrame and verify that the accessor was registered
df = pd.DataFrame({"a": [1, 2, 3]})
assert hasattr(df, "test_accessor")
assert df.test_accessor.test_method() == "success"


def test_duplicate_accessor_names(monkeypatch):
# GH29076
# Create plugin
class MockEntryPoint1:
name = "duplicate_accessor"

def load(self):
class Accessor1:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor1"

return Accessor1

# Create plugin
class MockEntryPoint2:
name = "duplicate_accessor"

def load(self):
class Accessor2:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor2"

return Accessor2

def mock_entry_points(*, group):
if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP:
return [MockEntryPoint1(), MockEntryPoint2()]
return []

monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)

# Check that the UserWarning is raised
with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
DataFrameAccessorLoader.load()

messages = [str(w.message) for w in record]
assert any("two packages with the same name" in msg for msg in messages)

df = pd.DataFrame({"x": [1, 2, 3]})
assert hasattr(df, "duplicate_accessor")
assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"}


def test_unique_accessor_names(monkeypatch):
# GH29076
# Create plugin
class MockEntryPoint1:
name = "accessor1"

def load(self):
class Accessor1:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor1"

return Accessor1

# Create plugin
class MockEntryPoint2:
name = "accessor2"

def load(self):
class Accessor2:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor2"

return Accessor2

def mock_entry_points(*, group):
if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP:
return [MockEntryPoint1(), MockEntryPoint2()]
return []

monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)

# Check that no UserWarning is raised
with tm.assert_produces_warning(None, check_stacklevel=False):
DataFrameAccessorLoader.load()

df = pd.DataFrame({"x": [1, 2, 3]})
assert hasattr(df, "accessor1"), "Accessor1 not registered"
assert hasattr(df, "accessor2"), "Accessor2 not registered"
assert df.accessor1.which() == "Accessor1", "Accessor1 method incorrect"
assert df.accessor2.which() == "Accessor2", "Accessor2 method incorrect"


def test_duplicate_and_unique_accessor_names(monkeypatch):
# GH29076
# Create plugin
class MockEntryPoint1:
name = "duplicate_accessor"

def load(self):
class Accessor1:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor1"

return Accessor1

# Create plugin
class MockEntryPoint2:
name = "duplicate_accessor"

def load(self):
class Accessor2:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor2"

return Accessor2

# Create plugin
class MockEntryPoint3:
name = "unique_accessor"

def load(self):
class Accessor3:
def __init__(self, df):
self._df = df

def which(self):
return "Accessor3"

return Accessor3

def mock_entry_points(*, group):
if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP:
return [MockEntryPoint1(), MockEntryPoint2(), MockEntryPoint3()]
return []

monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points)

# Capture warnings
with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record:
DataFrameAccessorLoader.load()

messages = [str(w.message) for w in record]

# Filter warnings for the specific message about duplicate packages
duplicate_package_warnings = [
msg
for msg in messages
if "you have two packages with the same name: 'duplicate_accessor'" in msg
]

# Assert one warning about duplicate packages
assert len(duplicate_package_warnings) == 1, (
f"Expected exactly one warning about duplicate packages, "
f"got {len(duplicate_package_warnings)}: {duplicate_package_warnings}"
)

df = pd.DataFrame({"x": [1, 2, 3]})
assert hasattr(df, "duplicate_accessor"), "duplicate_accessor not registered"

assert hasattr(df, "unique_accessor"), "unique_accessor not registered"

assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"}, (
"duplicate_accessor method incorrect"
)
assert df.unique_accessor.which() == "Accessor3", "unique_accessor method incorrect"
Loading