scrapy-plugins
diff --git a/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/checks.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/checks.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 78 additions & 16 deletions b/‎README.md‎
Lines changed: 78 additions & 16 deletions
diff --git a/‎docs/changelog.md‎
Lines changed: 32 additions & 0 deletions b/‎docs/changelog.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎pylintrc‎
Lines changed: 3 additions & 0 deletions b/‎pylintrc‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scrapy_playwright/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎scrapy_playwright/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scrapy_playwright/_utils.py‎
Lines changed: 50 additions & 56 deletions b/‎scrapy_playwright/_utils.py‎
Lines changed: 50 additions & 56 deletions
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.39
+current_version = 0.0.44
 commit = True
 tag = True
 
 
@@ -1,5 +1,5 @@
 name: Checks
-on: [push, pull_request]
+on: [push, pull_request, workflow_dispatch]
 
 jobs:
   checks:
 
@@ -1,5 +1,5 @@
 name: Tests
-on: [push, pull_request]
+on: [push, pull_request, workflow_dispatch]
 
 jobs:
   tests:
@@ -9,12 +9,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        os: [ubuntu-22.04]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
         include:
-          - os: macos-latest
+          - os: macos-14
             python-version: "3.12"
-          - os: windows-latest
+          - os: windows-2022
             python-version: "3.12"
 
     steps:
 
@@ -22,3 +22,5 @@ coverage-twisted/
 node_modules/
 package-lock.json
 package.json
+
+.idea
@@ -22,7 +22,7 @@ to integrate `asyncio`-based projects such as `Playwright`.
 
 ### Minimum required versions
 
-* Python >= 3.8
+* Python >= 3.9
 * Scrapy >= 2.0 (!= 2.4.0)
 * Playwright >= 1.15
 
@@ -76,15 +76,14 @@ requests will be processed by the regular Scrapy download handler.
 
 ### Twisted reactor
 
-When running on GNU/Linux or macOS you'll need to
-[install the `asyncio`-based Twisted reactor](https://docs.scrapy.org/en/latest/topics/asyncio.html#installing-the-asyncio-reactor):
+[Install the `asyncio`-based Twisted reactor](https://docs.scrapy.org/en/latest/topics/asyncio.html#installing-the-asyncio-reactor):
 
 ```python
 # settings.py
 TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
 ```
 
-This is not a requirement on Windows (see [Windows support](#windows-support))
+This is the default in new projects since [Scrapy 2.7](https://github.com/scrapy/scrapy/releases/tag/2.7.0).
 
 
 ## Basic usage
@@ -282,21 +281,24 @@ PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT = 10 * 1000  # 10 seconds
 ### `PLAYWRIGHT_PROCESS_REQUEST_HEADERS`
 Type `Optional[Union[Callable, str]]`, default `scrapy_playwright.headers.use_scrapy_headers`
 
-A function (or the path to a function) that processes headers for a given request
-and returns a dictionary with the headers to be used (note that, depending on the browser,
-additional default headers could be sent as well). Coroutine functions (`async def`) are
-supported.
+A function (or the path to a function) that processes a Playwright request and returns a
+dictionary with headers to be overridden (note that, depending on the browser, additional
+default headers could be sent as well). Coroutine functions (`async def`) are supported.
 
-This will be called at least once for each Scrapy request (receiving said request and the
-corresponding Playwright request), but it could be called additional times if the given
-resource generates more requests (e.g. to retrieve assets like images or scripts).
+This will be called at least once for each Scrapy request, but it could be called additional times
+if Playwright generates more requests (e.g. to retrieve assets like images or scripts).
 
-The function must return a `dict` object, and receives the following positional arguments:
+The function must return a `Dict[str, str]` object, and receives the following three **keyword** arguments:
 
 ```python
-- browser_type: str
+- browser_type_name: str
 - playwright_request: playwright.async_api.Request
-- scrapy_headers: scrapy.http.headers.Headers
+- scrapy_request_data: dict
+    * method: str
+    * url: str
+    * headers: scrapy.http.headers.Headers
+    * body: Optional[bytes]
+    * encoding: str
 ```
 
 The default function (`scrapy_playwright.headers.use_scrapy_headers`) tries to
@@ -311,6 +313,38 @@ set by Playwright will be sent. Keep in mind that in this case, headers passed
 via the `Request.headers` attribute or set by Scrapy components are ignored
 (including cookies set via the `Request.cookies` attribute).
 
+Example:
+```python
+async def custom_headers(
+    *,
+    browser_type_name: str,
+    playwright_request: playwright.async_api.Request,
+    scrapy_request_data: dict,
+) -> Dict[str, str]:
+    headers = await playwright_request.all_headers()
+    scrapy_headers = scrapy_request_data["headers"].to_unicode_dict()
+    headers["Cookie"] = scrapy_headers.get("Cookie")
+    return headers
+
+PLAYWRIGHT_PROCESS_REQUEST_HEADERS = custom_headers
+```
+
+#### Deprecated argument handling
+
+In version 0.0.40 and earlier, arguments were passed to the function positionally,
+and only the Scrapy headers were passed instead of a dictionary with data about the
+Scrapy request.
+This is deprecated since version 0.0.41, and support for this way of handling arguments
+will eventually be removed in accordance with the [Deprecation policy](#deprecation-policy).
+
+Passed arguments:
+```python
+- browser_type: str
+- playwright_request: playwright.async_api.Request
+- scrapy_headers: scrapy.http.headers.Headers
+```
+
+Example:
 ```python
 def custom_headers(
     browser_type: str,
@@ -827,10 +861,12 @@ down or clicking links) and you want to handle only the final result in your cal
 
 ### `PageMethod` class
 
-#### `scrapy_playwright.page.PageMethod(method: str, *args, **kwargs)`:
+#### `scrapy_playwright.page.PageMethod(method: str | callable, *args, **kwargs)`:
 
 Represents a method to be called (and awaited if necessary) on a
 `playwright.page.Page` object (e.g. "click", "screenshot", "evaluate", etc).
+It's also possible to pass callable objects that will be invoked as callbacks
+and receive Playwright Page as argument.
 `method` is the name of the method, `*args` and `**kwargs`
 are passed when calling such method. The return value
 will be stored in the `PageMethod.result` attribute.
@@ -868,8 +904,34 @@ async def parse(self, response, **kwargs):
     await page.close()
 ```
 
+### Passing callable objects
+
+If a `PageMethod` receives a callable object as its first argument, it will be
+called with the page as its first argument. Any additional arguments are passed
+to the callable after the page.
+
+```python
+async def scroll_page(page: Page) -> str:
+    await page.wait_for_selector(selector="div.quote")
+    await page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
+    await page.wait_for_selector(selector="div.quote:nth-child(11)")
+    return page.url
+
+
+class MySpyder(scrapy.Spider):
+    name = "scroll"
+
+    def start_requests(self):
+        yield Request(
+            url="https://quotes.toscrape.com/scroll",
+            meta={
+                "playwright": True,
+                "playwright_page_methods": [PageMethod(scroll_page)],
+            },
+        )
+```
 
-### Supported methods
+### Supported Playwright methods
 
 Refer to the [upstream docs for the `Page` class](https://playwright.dev/python/docs/api/class-page)
 to see available methods.
 
@@ -1,5 +1,37 @@
 # scrapy-playwright changelog
 
+
+### [v0.0.44](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.44) (2025-08-13)
+
+* Fix crawl getting stuck on Windows with Scrapy>=2.13 (#351)
+
+
+### [v0.0.43](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.43) (2025-02-22)
+
+* Only register request and response loggers when needed (#336)
+
+
+### [v0.0.42](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.42) (2024-11-06)
+
+* Allow custom PageMethod callbacks (#318)
+* Fix download errors caused by Content-Encoding header (#322)
+
+
+### [v0.0.41](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.41) (2024-08-13)
+
+* Keyword arguments for PLAYWRIGHT_PROCESS_REQUEST_HEADERS, pass additional Request data (#303).
+  Deprecated positional argument handling for the function passed to the PLAYWRIGHT_PROCESS_REQUEST_HEADERS
+  setting, arguments should now be handled by keyword.
+* Retry to create page on browser crash (#305)
+* Fix typo in log message (#312)
+
+
+### [v0.0.40](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.40) (2024-07-16)
+
+* Enforce asyncio reactor in all platforms (#298)
+* Allow multiple handlers in separate thread (#299)
+
+
 ### [v0.0.39](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.39) (2024-07-11)
 
 * Return proper status and headers for downloads (#293)
 
@@ -20,3 +20,6 @@ disable=
 [FORMAT]
 expected-line-ending-format=LF
 max-line-length=99
+
+[MASTER]
+ignored-classes=twisted.internet.reactor
@@ -1 +1 @@
-__version__ = "0.0.39"
+__version__ = "0.0.44"
@@ -2,14 +2,15 @@
 import logging
 import platform
 import threading
-from typing import Awaitable, Iterator, Optional, Tuple, Union
+from typing import Awaitable, Dict, Iterator, Optional, Tuple, Union
 
 import scrapy
 from playwright.async_api import Error, Page, Request, Response
 from scrapy.http.headers import Headers
 from scrapy.settings import Settings
 from scrapy.utils.python import to_unicode
 from twisted.internet.defer import Deferred
+from twisted.python import failure
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 
 
@@ -103,68 +104,61 @@ async def _get_header_value(
         return None
 
 
-if platform.system() == "Windows":
-
-    class _ThreadedLoopAdapter:
-        """Utility class to start an asyncio event loop in a new thread and redirect coroutines.
-        This allows to run Playwright in a different loop than the Scrapy crawler, allowing to
-        use ProactorEventLoop which is supported by Playwright on Windows.
-        """
-
-        _loop: asyncio.AbstractEventLoop
-        _thread: threading.Thread
-        _coro_queue: asyncio.Queue = asyncio.Queue()
-        _stop_event: asyncio.Event = asyncio.Event()
-
-        @classmethod
-        async def _handle_coro(cls, coro, future) -> None:
-            try:
-                future.set_result(await coro)
-            except Exception as exc:
-                future.set_exception(exc)
-
-        @classmethod
-        async def _process_queue(cls) -> None:
-            while not cls._stop_event.is_set():
-                coro, future = await cls._coro_queue.get()
-                asyncio.create_task(cls._handle_coro(coro, future))
-                cls._coro_queue.task_done()
-
-        @classmethod
-        def _deferred_from_coro(cls, coro) -> Deferred:
-            future: asyncio.Future = asyncio.Future()
-            asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, future)), cls._loop)
-            return scrapy.utils.defer.deferred_from_coro(future)
-
-        @classmethod
-        def start(cls) -> None:
-            policy = asyncio.WindowsProactorEventLoopPolicy()  # type: ignore[attr-defined]
+class _ThreadedLoopAdapter:
+    """Utility class to start an asyncio event loop in a new thread and redirect coroutines.
+    This allows to run Playwright in a different loop than the Scrapy crawler, allowing to
+    use ProactorEventLoop which is supported by Playwright on Windows.
+    """
+
+    _loop: asyncio.AbstractEventLoop
+    _thread: threading.Thread
+    _coro_queue: asyncio.Queue = asyncio.Queue()
+    _stop_events: Dict[int, asyncio.Event] = {}
+
+    @classmethod
+    async def _handle_coro(cls, coro: Awaitable, dfd: Deferred) -> None:
+        from twisted.internet import reactor
+
+        try:
+            result = await coro
+        except Exception as exc:
+            reactor.callFromThread(dfd.errback, failure.Failure(exc))
+        else:
+            reactor.callFromThread(dfd.callback, result)
+
+    @classmethod
+    async def _process_queue(cls) -> None:
+        while any(not ev.is_set() for ev in cls._stop_events.values()):
+            coro, dfd = await cls._coro_queue.get()
+            asyncio.create_task(cls._handle_coro(coro, dfd))
+            cls._coro_queue.task_done()
+
+    @classmethod
+    def _deferred_from_coro(cls, coro) -> Deferred:
+        dfd: Deferred = Deferred()
+        asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, dfd)), cls._loop)
+        return dfd
+
+    @classmethod
+    def start(cls, caller_id: int) -> None:
+        cls._stop_events[caller_id] = asyncio.Event()
+        if not getattr(cls, "_loop", None):
+            policy = asyncio.DefaultEventLoopPolicy()
+            if platform.system() == "Windows":
+                policy = asyncio.WindowsProactorEventLoopPolicy()  # type: ignore[attr-defined]
             cls._loop = policy.new_event_loop()
-            asyncio.set_event_loop(cls._loop)
 
+        if not getattr(cls, "_thread", None):
             cls._thread = threading.Thread(target=cls._loop.run_forever, daemon=True)
             cls._thread.start()
             logger.info("Started loop on separate thread: %s", cls._loop)
-
             asyncio.run_coroutine_threadsafe(cls._process_queue(), cls._loop)
 
-        @classmethod
-        def stop(cls) -> None:
-            cls._stop_event.set()
+    @classmethod
+    def stop(cls, caller_id: int) -> None:
+        """Wait until all handlers are closed to stop the event loop and join the thread."""
+        cls._stop_events[caller_id].set()
+        if all(ev.is_set() for ev in cls._stop_events.values()):
             asyncio.run_coroutine_threadsafe(cls._coro_queue.join(), cls._loop)
             cls._loop.call_soon_threadsafe(cls._loop.stop)
             cls._thread.join()
-
-    _deferred_from_coro = _ThreadedLoopAdapter._deferred_from_coro
-else:
-
-    class _ThreadedLoopAdapter:  # type: ignore[no-redef]
-        @classmethod
-        def start(cls) -> None:
-            pass
-
-        @classmethod
-        def stop(cls) -> None:
-            pass
-
-    _deferred_from_coro = scrapy.utils.defer.deferred_from_coro
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.39"`
	`1`	`+__version__ = "0.0.44"`