9
9
from typing import TYPE_CHECKING , Callable , Coroutine , Iterator , Type
10
10
11
11
from aiostream import stream
12
+ from aiostream .core import StreamEmpty
12
13
from ranges import Range , RangeSet
13
14
14
15
MYPY = False # when using mypy will be overrided as True
@@ -34,15 +35,32 @@ def __init__(
34
35
show_progress_bar : bool = True ,
35
36
timeout_s : float = 5.0 ,
36
37
client = None ,
38
+ close_client : bool = False ,
37
39
** kwargs ,
38
40
):
39
41
"""
40
42
Any kwargs are passed through to the stream class constructor.
41
43
42
44
Args:
43
- callback : A function to be passed 3 values: the AsyncFetcher which is calling
44
- it, the awaited RangeStream, and its source URL (a ``httpx.URL``,
45
- which can be coerced to a string).
45
+ stream_cls : The :class:`~range_streams.stream.RangeStream` class or a
46
+ subclass (i.e. one of its codecs or a custom subclass)
47
+ to instantiate for each of the URLs. Note: these classes
48
+ also have a helper method
49
+ :meth:`~range_streams.stream.RangeStream.make_async_fetcher`
50
+ urls : The list of URLs to fetch until completion
51
+ callback : A function to be passed 3 values: the AsyncFetcher which
52
+ is calling it, the awaited RangeStream, and its source URL
53
+ (a ``httpx.URL``, which can be coerced to a string).
54
+ verbose : Whether to log to console
55
+ show_progress_bar : Whether to show a tqdm progress bar (async-compatible)
56
+ timeout_s : The timeout to set on the client (converted into
57
+ ``httpx.Timeout`` configuration on instantiation)
58
+ client : The client to pass, if any, or one will be instantiated
59
+ and closed on each usage (note: not each instantiation!)
60
+ close_client : Whether to close the client upon completion (only if
61
+ provided: if no client is provided, one will be created
62
+ and closed by the standard `async with httpx.AsyncClient`
63
+ contextmanager block).
46
64
"""
47
65
if urls == []:
48
66
raise ValueError ("The list of URLs to fetch cannot be empty" )
@@ -59,6 +77,7 @@ def __init__(
59
77
self .verbose = verbose
60
78
self .show_progress_bar = show_progress_bar and not self .verbose
61
79
self .client = client
80
+ self .close_client_on_completion = close_client
62
81
self .timeout = httpx .Timeout (timeout = timeout_s )
63
82
self .completed = RangeSet ()
64
83
set_up_logging (quiet = not verbose )
@@ -71,14 +90,22 @@ def make_calls(self):
71
90
urlset = (u for u in self .filtered_url_list ) # single use URL generator
72
91
if self .show_progress_bar :
73
92
self .set_up_progress_bar ()
74
- self .fetch_things (urls = urlset )
93
+ try :
94
+ self .fetch_things (urls = urlset )
95
+ except StreamEmpty as exc :
96
+ # Treat this like a StopIteration (was called despite completed URLs)
97
+ if self .close_client_on_completion :
98
+ asyncio .run (self .client .aclose ())
99
+ else :
100
+ raise
101
+ # Note: to avoid throwing exception, check `total_complete` before calling
75
102
if self .show_progress_bar :
76
103
self .pbar .close ()
77
104
78
105
async def process_stream (self , range_stream : RangeStreamOrSubclass ):
79
106
"""
80
107
Process an awaited RangeStream within an async fetch loop, calling the callback
81
- set on the `~range_streams.async_utils.AsyncFetcher.callback` attribute.
108
+ set on the :attr: `~range_streams.async_utils.AsyncFetcher.callback` attribute.
82
109
83
110
Args:
84
111
range_stream : The awaited RangeStream (or one of its subclasses)
@@ -94,8 +121,12 @@ async def process_stream(self, range_stream: RangeStreamOrSubclass):
94
121
log .debug (f"Processed URL in async callback: { source_url } " )
95
122
if self .show_progress_bar :
96
123
self .pbar .update ()
97
- self .complete_row (row_index = i )
124
+ if i not in self .completed :
125
+ # Don't bother putting in if already been marked as complete in the callback
126
+ self .complete_row (row_index = i )
98
127
await resp .aclose ()
128
+ if self .total_complete == self .n and self .close_client_on_completion :
129
+ await self .client .aclose ()
99
130
100
131
@property
101
132
def total_complete (self ) -> int :
@@ -174,6 +205,9 @@ async def async_fetch_urlset(
174
205
in a contextmanager block (i.e. close it immediately after use), otherwise use
175
206
the one provided, not in a contextmanager block (i.e. leave it up to the user to
176
207
close the client).
208
+
209
+ Args:
210
+ urls : The URLs to fetch, as an exhaustible iterator (not a Sequence)
177
211
"""
178
212
await self .set_async_signal_handlers ()
179
213
if self .client is None :
0 commit comments