Skip to content

Commit de5563a

Browse files
committed
introduce concept of 'registry_pool' to access all PageObjectRegistry instances
1 parent 0cbeb0b commit de5563a

File tree

6 files changed

+212
-45
lines changed

6 files changed

+212
-45
lines changed

docs/intro/overrides.rst

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ code example below:
105105
from web_poet.pages import ItemWebPage
106106
from web_poet import PageObjectRegistry
107107
108-
primary_registry = PageObjectRegistry()
109-
secondary_registry = PageObjectRegistry()
108+
primary_registry = PageObjectRegistry(name="primary")
109+
secondary_registry = PageObjectRegistry(name="secondary")
110110
111111
class GenericProductPage(ItemWebPage):
112112
def to_item(self):
@@ -196,11 +196,22 @@ like ``web_poet my_project.page_objects`` would produce the following:
196196

197197
.. code-block::
198198
199-
Use this instead of for the URL patterns except for the patterns with priority meta
200-
---------------------------------------------------- ------------------------------------------ -------------------------------------- ------------------------- --------------- ------
201-
my_project.page_objects.ExampleProductPage my_project.page_objects.GenericProductPage ['example.com'] [] 500 {}
202-
my_project.page_objects.AnotherExampleProductPage my_project.page_objects.GenericProductPage ['anotherexample.com'] ['/digital-goods/'] 500 {}
203-
my_project.page_objects.DualExampleProductPage my_project.page_objects.GenericProductPage ['dualexample.com/shop/?product=*', 'dualexample.net/store/?pid=*'] [] 500 {}
199+
Registry Use this instead of for the URL patterns except for the patterns with priority meta
200+
--------- ---------------------------------------------------- ------------------------------------------ ------------------------------------------------------------------- ------------------------- --------------- ------
201+
default my_project.page_objects.ExampleProductPage my_project.page_objects.GenericProductPage ['example.com'] [] 500 {}
202+
default my_project.page_objects.AnotherExampleProductPage my_project.page_objects.GenericProductPage ['anotherexample.com'] ['/digital-goods/'] 500 {}
203+
default my_project.page_objects.DualExampleProductPage my_project.page_objects.GenericProductPage ['dualexample.com/shop/?product=*', 'dualexample.net/store/?pid=*'] [] 500 {}
204+
205+
You can also filter them via the **name** of :class:`~.PageObjectRegistry`. For example,
206+
invoking ``web_poet my_project.page_objects --registry_name=custom`` would produce
207+
something like:
208+
209+
.. code-block::
210+
211+
Registry Use this instead of for the URL patterns except for the patterns with priority meta
212+
---------- ---------------------------------------------------- ------------------------------------------ ---------------------- ------------------------- --------------- ------
213+
custom my_project.page_objects.CustomProductPage my_project.page_objects.GenericProductPage ['example.com'] [] 500 {}
214+
custom my_project.page_objects.AnotherCustomProductPage my_project.page_objects.GenericProductPage ['anotherexample.com'] ['/digital-goods/'] 500 {}
204215
205216
Organizing Page Object Overrides
206217
--------------------------------
@@ -320,10 +331,52 @@ organize them using our own instances of the :class:`~.PageObjectRegistry` inste
320331
321332
from web_poet import PageObjectRegistry
322333
323-
cool_gadget_registry = PageObjectRegistry()
324-
cool_gadget_us_registry = PageObjectRegistry()
325-
cool_gadget_fr_registry = PageObjectRegistry()
326-
furniture_shop_registry = PageObjectRegistry()
334+
cool_gadget_registry = PageObjectRegistry(name="cool_gadget")
335+
cool_gadget_us_registry = PageObjectRegistry(name="cool_gadget_us")
336+
cool_gadget_fr_registry = PageObjectRegistry(name="cool_gadget_fr")
337+
furniture_shop_registry = PageObjectRegistry(name="furniture_shop")
338+
339+
Note that you can access all of the :class:`~.PageObjectRegistry` that were
340+
ever instantiated via ``web_poet.registry_pool`` which is simply a mapping
341+
structured as ``Dict[str, PageObjectRegistry]``:
342+
343+
.. code-block:: python
344+
345+
from web_poet import registry_pool
346+
347+
print(registry_pool)
348+
# {
349+
# 'default': <web_poet.overrides.PageObjectRegistry object at 0x7f47d654d8b0>,
350+
# 'cool_gadget' = <my_page_obj_project.PageObjectRegistry object at 0x7f47d654382a>,
351+
# 'cool_gadget_us' = <my_page_obj_project.PageObjectRegistry object at 0xb247d65433c3>,
352+
# 'cool_gadget_fr' = <my_page_obj_project.PageObjectRegistry object at 0xd93746549dea>,
353+
# 'furniture_shop' = <my_page_obj_project.PageObjectRegistry object at 0x82n78654441b>
354+
# }
355+
356+
.. warning::
357+
358+
Please be aware that there might be some :class:`~.PageObjectRegistry`
359+
that are not available, most especially if you're using them from external
360+
packages.
361+
362+
Thus, it's imperative to use :func:`~.web_poet.overrides.consume_modules`
363+
beforehand:
364+
365+
.. code-block:: python
366+
367+
from web_poet import registry_pool, consume_modules
368+
369+
consume_modules("external_pkg")
370+
371+
print(registry_pool)
372+
# {
373+
# 'default': <web_poet.overrides.PageObjectRegistry object at 0x7f47d654d8b0>,
374+
# 'cool_gadget' = <my_page_obj_project.PageObjectRegistry object at 0x7f47d654382a>,
375+
# 'cool_gadget_us' = <my_page_obj_project.PageObjectRegistry object at 0xb247d65433c3>,
376+
# 'cool_gadget_fr' = <my_page_obj_project.PageObjectRegistry object at 0xd93746549dea>,
377+
# 'furniture_shop' = <my_page_obj_project.PageObjectRegistry object at 0x82n78654441b>,
378+
# 'ecommerce': <external_pkg.PageObjectRegistry object at 0xbc45d8328420>
379+
# }
327380
328381
After declaring the :class:`~.PageObjectRegistry` instances, they can be used
329382
in each of the Page Object packages like so:
@@ -412,7 +465,7 @@ our Override Rules.
412465
413466
from web_poet import PageObjectRegistry
414467
415-
product_listings_registry = PageObjectRegistry()
468+
product_listings_registry = PageObjectRegistry(name="product_listings")
416469
417470
Using the additional registry instance above, we'll use it to provide another
418471
annotation for the Page Objects in each of the ``product_listings.py`` module.
@@ -477,7 +530,7 @@ packages** in your project, you can do it like:
477530
# attribute of the registry even before calling `PageObjectRegistry.get_overrides()`
478531
consume_modules("ecommerce_page_objects", "gadget_sites_page_objects")
479532
480-
combined_registry = PageObjectRegistry()
533+
combined_registry = PageObjectRegistry(name="combined")
481534
combined_registry.data = {
482535
# Since ecommerce_page_objects is using web_poet.default_registry, then
483536
# it functions like a global registry which we can access as:

tests/po_lib/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class POTopLevelOverriden2:
2323
...
2424

2525

26-
secondary_registry = PageObjectRegistry()
26+
secondary_registry = PageObjectRegistry(name="secondary")
2727

2828

2929
# This first annotation is ignored. A single annotation per registry is allowed

tests/test_overrides.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import argparse
2+
13
import pytest
24
from url_matcher import Patterns
35

@@ -14,7 +16,7 @@
1416
PONestedModule,
1517
PONestedModuleOverridenSecondary,
1618
)
17-
from web_poet.overrides import PageObjectRegistry, default_registry
19+
from web_poet import PageObjectRegistry, default_registry, registry_pool
1820

1921

2022
POS = {POTopLevel1, POTopLevel2, POModule, PONestedPkg, PONestedModule}
@@ -134,7 +136,32 @@ def test_registry_data_from():
134136
assert PONestedPkg in data
135137

136138

137-
def test_cmd():
139+
def test_registry_name_conflict():
140+
"""Registries can only have a unique name."""
141+
142+
PageObjectRegistry("main")
143+
144+
assert "main" in registry_pool
145+
146+
with pytest.raises(ValueError):
147+
PageObjectRegistry("main")
148+
149+
150+
def test_cli_tool():
151+
"""Ensure that CLI parameters returns the expected results.
152+
153+
There's no need to check each specific OverrideRule below as we already have
154+
extensive tests for those above. We can simply count how many rules there are
155+
for a given registry.
156+
"""
157+
138158
from web_poet.__main__ import main
139159

140-
assert main(["tests.po_lib"]) is None
160+
results = main(["tests"])
161+
assert len(results) == 6
162+
163+
results = main(["tests", "--registry_name=secondary"])
164+
assert len(results) == 2
165+
166+
results = main(["tests", "--registry_name=not_exist"])
167+
assert not results

web_poet/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
from typing import Dict
2+
13
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
24
from .page_inputs import ResponseData
3-
from .overrides import handle_urls, PageObjectRegistry, default_registry, consume_modules
5+
from .overrides import (
6+
PageObjectRegistry,
7+
consume_modules,
8+
registry_pool,
9+
)
10+
11+
12+
# For ease of use, we'll create a default registry so that users can simply
13+
# use its `handle_urls()` method directly by `from web_poet import handle_urls`
14+
default_registry = PageObjectRegistry(name="default")
15+
handle_urls = default_registry.handle_urls

web_poet/__main__.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
"""Returns all Override Rules from the default registry."""
22

33
import argparse
4-
from typing import Callable
4+
from typing import Callable, Optional, List
55

66
import tabulate
77

8-
from web_poet import default_registry
8+
from web_poet import registry_pool, consume_modules, PageObjectRegistry
9+
from web_poet.overrides import OverrideRule
910

1011

1112
def qualified_name(cls: Callable) -> str:
1213
return f"{cls.__module__}.{cls.__name__}"
1314

1415

15-
def main(args=None):
16+
def parse_args(raw_args: Optional[List[str]] = None) -> argparse.Namespace:
1617
parser = argparse.ArgumentParser(
1718
description="Tool that list the Page Object overrides from a package or module recursively"
1819
)
@@ -22,29 +23,56 @@ def main(args=None):
2223
type=str,
2324
help="A package or module to list overrides from",
2425
)
25-
args = parser.parse_args(args)
26-
table = [
27-
(
28-
"Use this",
29-
"instead of",
30-
"for the URL patterns",
31-
"except for the patterns",
32-
"with priority",
33-
"meta",
34-
)
26+
parser.add_argument(
27+
"--registry_name",
28+
default="default",
29+
type=str,
30+
help="Name of the registry to retrieve the rules from.",
31+
)
32+
return parser.parse_args(args=raw_args)
33+
34+
35+
def load_registry(args: argparse.Namespace) -> Optional[PageObjectRegistry]:
36+
consume_modules(args.module)
37+
registry = registry_pool.get(args.registry_name)
38+
return registry
39+
40+
41+
def display_table(registry_name: str, rules: List[OverrideRule]) -> None:
42+
headers = [
43+
"Registry",
44+
"Use this",
45+
"instead of",
46+
"for the URL patterns",
47+
"except for the patterns",
48+
"with priority",
49+
"meta",
3550
]
36-
table += [
51+
52+
table = [
3753
(
54+
registry_name,
3855
qualified_name(rule.use),
3956
qualified_name(rule.instead_of),
4057
rule.for_patterns.include,
4158
rule.for_patterns.exclude,
4259
rule.for_patterns.priority,
4360
rule.meta,
4461
)
45-
for rule in default_registry.get_overrides(filters=args.module)
62+
for rule in rules
4663
]
47-
print(tabulate.tabulate(table, headers="firstrow"))
64+
print(tabulate.tabulate(table, headers=headers))
65+
66+
67+
def main(raw_args: Optional[List[str]] = None) -> Optional[List[OverrideRule]]:
68+
args = parse_args(raw_args) # pragma: no cover
69+
registry = load_registry(args)
70+
if not registry:
71+
print(f"No registry named {args.registry_name} found.")
72+
return None
73+
rules = registry.get_overrides(filters=args.module)
74+
display_table(registry.name, rules)
75+
return rules # for ease of testing
4876

4977

5078
if __name__ == "__main__":

0 commit comments

Comments
 (0)