Skip to content

Commit daa3ff9

Browse files
committed
create consume_modules() to properly load annotations in get_overrides()
1 parent 10dff5b commit daa3ff9

File tree

6 files changed

+131
-7
lines changed

6 files changed

+131
-7
lines changed

docs/intro/overrides.rst

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,31 @@ to see the other functionalities.
161161
using only the ``default_registry``. There's no need to declare multiple
162162
:class:`~.PageObjectRegistry` instances and use multiple annotations.
163163

164+
.. warning::
165+
166+
:meth:`~.PageObjectRegistry.get_overrides` relies on the fact that all essential
167+
packages/modules which contains the :meth:`~.PageObjectRegistry.handle_urls`
168+
annotations are properly loaded.
169+
170+
Thus, for cases like importing Page Objects from another external package, you'd
171+
need to properly load all :meth:`~.PageObjectRegistry.handle_urls` annotations
172+
from the external module. This ensures that the external Page Objects' have
173+
their annotations properly loaded.
174+
175+
This can be done via the function named :func:`~.web_poet.overrides.consume_modules`.
176+
Here's an example:
177+
178+
.. code-block:: python
179+
180+
from web_poet import default_registry, consume_modules
181+
182+
consume_modules("external_package_A.po", "another_ext_package.lib")
183+
rules = default_registry.get_overrides()
184+
185+
**NOTE**: :func:`~.web_poet.overrides.consume_modules` must be called before
186+
:meth:`~.PageObjectRegistry.get_overrides` for the imports to properly load.
187+
188+
164189
A handy CLI tool is also available at your disposal to quickly see the available
165190
Override rules in a given module in your project. For example, invoking something
166191
like ``web_poet my_project.page_objects`` would produce the following:
@@ -226,7 +251,7 @@ Then we could easily retrieve all Page Objects per subpackage or module like thi
226251

227252
.. code-block:: python
228253
229-
from web_poet import default_registry
254+
from web_poet import default_registry, consume_modules
230255
231256
# We can do it per website.
232257
rules = default_registry.get_overrides_from("my_page_obj_project.cool_gadget_site")
@@ -236,11 +261,16 @@ Then we could easily retrieve all Page Objects per subpackage or module like thi
236261
rules = default_registry.get_overrides_from("my_page_obj_project.cool_gadget_site.us")
237262
rules = default_registry.get_overrides_from("my_page_obj_project.cool_gadget_site.fr")
238263
239-
# or even drill down further to the specific module.
264+
# Or even drill down further to the specific module.
240265
rules = default_registry.get_overrides_from("my_page_obj_project.cool_gadget_site.us.products")
241266
rules = default_registry.get_overrides_from("my_page_obj_project.cool_gadget_site.us.product_listings")
242267
243-
# Or simply all of Override rules ever declared.
268+
# Or simply all of the Override rules ever declared.
269+
rules = default_registry.get_overrides()
270+
271+
# Lastly, you'd need to properly load external packages/modules for the
272+
# @handle_urls annotation to be correctly read.
273+
consume_modules("external_package_A.po", "another_ext_package.lib")
244274
rules = default_registry.get_overrides()
245275
246276
Multiple Registry Approach

tests/test_overrides.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
PONestedModule,
1010
PONestedModuleOverridenSecondary,
1111
)
12+
from web_poet import consume_modules
1213
from web_poet.overrides import PageObjectRegistry, default_registry
1314

1415

@@ -17,9 +18,19 @@
1718

1819
def test_list_page_objects_all():
1920
rules = default_registry.get_overrides()
20-
2121
page_objects = {po.use for po in rules}
2222

23+
# Note that the 'tests_extra.po_lib_sub_not_imported.POLibSubNotImported'
24+
# Page Object is not included here since it was never imported anywhere in
25+
# our test package. It would only be included if we run any of the following
26+
# below. (Note that they should run before `get_overrides` is called.)
27+
# - from tests_extra import po_lib_sub_not_imported
28+
# - import tests_extra.po_lib_sub_not_imported
29+
# - web_poet.consume_modules("tests_extra")
30+
# Merely having `import tests_extra` won't work since the subpackages and
31+
# modules needs to be traversed and imported as well.
32+
assert all(["po_lib_sub_not_imported" not in po.__module__ for po in page_objects])
33+
2334
# Ensure that ALL Override Rules are returned as long as the given
2435
# registry's @handle_urls annotation was used.
2536
assert page_objects == POS.union({POLibSub})
@@ -29,6 +40,16 @@ def test_list_page_objects_all():
2940
assert rule.meta == rule.use.expected_meta, rule.use
3041

3142

43+
def test_list_page_objects_all_consume_modules():
44+
"""A test similar to the one above but calls ``consume_modules()`` to properly
45+
load the @handle_urls annotations from other modules/packages.
46+
"""
47+
consume_modules("tests_extra")
48+
rules = default_registry.get_overrides()
49+
page_objects = {po.use for po in rules}
50+
assert any(["po_lib_sub_not_imported" in po.__module__ for po in page_objects])
51+
52+
3253
def test_list_page_objects_from_pkg():
3354
"""Tests that metadata is extracted properly from the po_lib package"""
3455
rules = default_registry.get_overrides_from("tests.po_lib")

tests_extra/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
This test package was created separately to see the behavior of retrieving the
3+
Override rules declared on a registry where @handle_urls is defined on another
4+
package.
5+
"""
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""
2+
This package quite is similar to tests/po_lib_sub in terms of code contents.
3+
4+
What we're ultimately trying to test here is to see if the `default_registry`
5+
captures the rules annotated in this module if it was not imported.
6+
"""
7+
from typing import Dict, Any, Callable
8+
9+
from url_matcher import Patterns
10+
11+
from web_poet import handle_urls
12+
13+
14+
class POBase:
15+
expected_overrides: Callable
16+
expected_patterns: Patterns
17+
expected_meta: Dict[str, Any]
18+
19+
20+
class POLibSubOverridenNotImported:
21+
...
22+
23+
24+
@handle_urls("sub_example_not_imported.com", POLibSubOverridenNotImported)
25+
class POLibSubNotImported(POBase):
26+
expected_overrides = POLibSubOverridenNotImported
27+
expected_patterns = Patterns(["sub_example_not_imported.com"])
28+
expected_meta = {} # type: ignore

web_poet/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
22
from .page_inputs import ResponseData
3-
from .overrides import handle_urls, PageObjectRegistry, default_registry
3+
from .overrides import handle_urls, PageObjectRegistry, default_registry, consume_modules

web_poet/overrides.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
import importlib.util
33
import warnings
44
import pkgutil
5+
from collections import deque
56
from dataclasses import dataclass, field
7+
from types import ModuleType
68
from typing import Iterable, Union, List, Callable, Dict, Any
79

810
from url_matcher import Patterns
@@ -164,7 +166,7 @@ def get_overrides_from(self, module: str) -> List[OverrideRule]:
164166
"""
165167
rules: Dict[Callable, OverrideRule] = {}
166168

167-
for mod in walk_modules(module):
169+
for mod in walk_module(module):
168170
# Dict ensures that no duplicates are collected and returned.
169171
rules.update(self._filter_from_module(mod.__name__))
170172

@@ -191,7 +193,7 @@ def _filter_from_module(self, module: str) -> Dict[Callable, OverrideRule]:
191193
handle_urls = default_registry.handle_urls
192194

193195

194-
def walk_modules(module: str) -> Iterable:
196+
def walk_module(module: str) -> Iterable:
195197
"""Return all modules from a module recursively.
196198
197199
Note that this will import all the modules and submodules. It returns the
@@ -212,3 +214,41 @@ def onerror(err):
212214
):
213215
mod = importlib.import_module(info.name)
214216
yield mod
217+
218+
219+
def consume_modules(*modules: str) -> None:
220+
"""A quick wrapper for :func:`~.walk_module` to efficiently consume the
221+
generator and recursively load all packages/modules.
222+
223+
This function is essential to be run before calling :meth:`~.PageObjectRegistry.get_overrides`
224+
from the :class:`~.PageObjectRegistry`. It essentially ensures that the
225+
``@handle_urls`` are properly acknowledged for modules/packages that are not
226+
imported.
227+
228+
Let's take a look at an example:
229+
230+
.. code-block:: python
231+
232+
# my_page_obj_project/load_rules.py
233+
234+
from web_poet import default_registry, consume_modules
235+
236+
consume_modules("other_external_pkg.po", "another_pkg.lib")
237+
rules = default_registry.get_overrides()
238+
239+
For this case, the Override rules are coming from:
240+
241+
- ``my_page_obj_project`` `(since it's the same module as the file above)`
242+
- ``other_external_pkg.po``
243+
- ``another_pkg.lib``
244+
245+
So if the ``default_registry`` had other ``@handle_urls`` annotations outside
246+
of the packages/modules list above, then the Override rules won't be returned.
247+
"""
248+
249+
for module in modules:
250+
gen = walk_module(module)
251+
252+
# Inspired by itertools recipe: https://docs.python.org/3/library/itertools.html
253+
# Using a deque() results in a tiny bit performance improvement that list().
254+
deque(gen, maxlen=0)

0 commit comments

Comments
 (0)