Skip to content

Commit 8bb99f9

Browse files
committed
Further docs improvements; closes #13
1 parent 2a108fd commit 8bb99f9

File tree

7 files changed

+147
-31
lines changed

7 files changed

+147
-31
lines changed

src/range_streams/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
r"""
22
:mod:`range_streams` provides file-like object handling through
33
an API familiar to users of the standard library
4-
:mod:`io` module. It uses :class:`ranges.Range`, :class:`ranges.RangeSet`,
5-
and :class:`ranges.RangeDict` classes (from the externally maintained
4+
:mod:`io` module. It uses :class:`~ranges.Range`, :class:`~ranges.RangeSet`,
5+
and :class:`~ranges.RangeDict` classes (from the externally maintained
66
`python-ranges <https://python-ranges.readthedocs.io/en/latest/>`_ library)
77
to represent and look up range operations in an efficient linked
88
list data structure.
@@ -50,7 +50,7 @@
5050
or removed due to overlap with another range. See the docs for further details.
5151
5252
Further ranges are requested by simply calling the :meth:`~range_streams.range_stream.RangeStream.add`
53-
method with another :class:`ranges.Range` object. To create this implicitly, you can
53+
method with another :class:`~ranges.Range` object. To create this implicitly, you can
5454
simply provide a byte range to the `add` method as a tuple of two integers,
5555
which will be interpreted per the usual convention for ranges in Python,
5656
as an ``[a,b)`` half-open interval.

src/range_streams/codecs/png/stream.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,30 @@ def __init__(
4242
self.scan_ihdr()
4343

4444
def populate_chunks(self):
45+
"""
46+
Call :meth:`~range_streams.codecs.png.PngStream.enumerate_chunks`
47+
and store in the internal
48+
:attr:`~range_streams.codecs.png.PngStream._chunks` attribute,
49+
accessible through the :attr:`~range_streams.codecs.png.PngStream.chunks`
50+
property.
51+
52+
If the :attr:`~range_streams.codecs.png.PngStream.chunks` property is
53+
called 'prematurely', to avoid an access error it will 'proactively'
54+
call this method before returning the gated internal attribute.
55+
"""
4556
self._chunks: dict[str, list[PngChunkInfo]] = self.enumerate_chunks()
4657

4758
@property
4859
def chunks(self):
60+
"""
61+
'Gate' to the internal :attr:`~range_streams.codecs.png.PngStream._chunks`
62+
attribute.
63+
64+
If this property is called before the internal attribute is set,
65+
('prematurely'), to avoid an access error it will 'proactively'
66+
call :meth:`~range_streams.codecs.png.PngStream.populate_chunks`
67+
before returning the gated internal attribute.
68+
"""
4969
if not hasattr(self, "_chunks"):
5070
self.populate_chunks()
5171
return self._chunks
@@ -77,6 +97,13 @@ def enumerate_chunks(self):
7797
possible). Build a dictionary of all chunks with keys of the chunk type (four
7898
letter strings) and values of lists (since some chunks e.g. IDAT can appear
7999
multiple times in the PNG).
100+
101+
See `the official specification
102+
<http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html>`_ for full details
103+
(or `Wikipedia
104+
<https://en.wikipedia.org/wiki/
105+
Portable_Network_Graphics#%22Chunks%22_within_the_file>`_,
106+
or `the W3C <https://www.w3.org/TR/PNG/#5Chunk-layout>`_).
80107
"""
81108
png_signature = 8 # PNG files start with an 8-byte signature
82109
chunk_preamble_size = 8 # 4-byte length chunk + 4-byte type chunk

src/range_streams/overlaps.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# This could be written more clearly by using a range_utils helper function shared with
1414
# most_recent_range
1515
def get_range_containing(rng_dict: RangeDict, position: int) -> Range:
16-
"""Get a :class:`ranges.Range` from ``rng_dict`` by looking up the ``position`` it
16+
"""Get a :class:`~ranges.Range` from ``rng_dict`` by looking up the ``position`` it
1717
contains, where ``rng_dict`` is either the internal
1818
:obj:`RangeStream._ranges` attribute
1919
or the external :obj:`~range_streams.range_stream.RangeStream.ranges` property.
@@ -41,11 +41,12 @@ def overlap_whence(
4141
) -> int | None:
4242
"""
4343
Determine if any overlap exists, whence (i.e. from where) on the pre-existing
44-
range it overlapped. 0 if the new range overlapped at the start ('head') of
45-
the existing range, 1 if fully contained (in the 'body'), 2 if at the end
46-
('tail'), or None if the range is non-overlapping with any pre-existing range.
44+
range it overlapped. ``0`` if the new range overlapped at the start ('head') of
45+
the existing range, ``1`` if fully contained (in the 'body'), ``2`` if at the end
46+
('tail'), or ``None`` if the range is non-overlapping with any pre-existing range.
4747
48-
Note: same convention as Python io module's SEEK_SET, SEEK_CUR, and SEEK_END.
48+
Note: same convention as Python io module's
49+
:obj:`~io.SEEK_SET`, :obj:`~io.SEEK_CUR`, and :obj:`~io.SEEK_END`.
4950
"""
5051
if rng in rng_dict:
5152
# Full overlap (i.e. in middle of pre-existing range)

src/range_streams/range_request.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616
class RangeRequest:
1717
"""
1818
Store a GET request and the response stream while keeping a reference to
19-
the client that spawned it, providing an overridable `_iterator` attribute
20-
[by default giving access to `iter_raw()`] on the underlying response,
21-
suitable for `RangeResponse` to wrap in a `io.BytesIO` buffered stream.
19+
the client that spawned it, providing an overridable
20+
:attr:`~range_streams.range_response.RangeResponse._iterator` attribute
21+
[by default giving access to
22+
:meth:`~range_streams.range_response.RangeResponse.iter_raw`] on the
23+
underlying ``httpx.Response``, suitable for
24+
:class:`~range_streams.range_response.RangeResponse`
25+
to wrap in a :class:`io.BytesIO` buffered stream.
2226
"""
2327

2428
def __init__(self, byte_range: Range, url: str, client):
@@ -36,7 +40,7 @@ def range_header(self):
3640

3741
def setup_stream(self) -> None:
3842
"""
39-
`client.stream("GET", url)` but leave the stream to be manually closed
43+
``client.stream("GET", url)`` but leave the stream to be manually closed
4044
rather than using a context manager
4145
"""
4246
self.request = self.client.build_request(
@@ -57,24 +61,42 @@ def raise_for_non_partial_content(self):
5761

5862
def content_range_header(self) -> str:
5963
"""
60-
Validate request was range request by presence of `content-range` header
64+
Validate request was range request by presence of ``content-range`` header
6165
"""
6266
return detect_header_value(headers=self.response.headers, key="content-range")
6367

6468
@property
6569
def total_content_length(self) -> int:
70+
"""
71+
Obtain the total content length from the ``content-range`` header of a
72+
partial content HTTP GET request. This method is not used for the HTTP HEAD
73+
request sent when a :class:`~range_streams.range_stream.RangeStream` is
74+
initialised with an empty :class:`~ranges.Range` (since that is not a partial
75+
content request it returns a ``content-length`` header which can be read
76+
as an integer directly).
77+
"""
6678
return int(self.content_range.split("/")[-1])
6779

6880
def iter_raw(self) -> Iterator[bytes]:
81+
"""
82+
Wrap the :meth:`iter_raw` method of the underlying :class:`httpx.Response`
83+
object within the :class:`~range_streams.range_response.RangeResponse` in
84+
:attr:`~range_streams.range_request.RangeRequest.response`.
85+
"""
6986
return self.response.iter_raw()
7087

7188
def close(self) -> None:
89+
"""
90+
Close the :attr:`~range_streams.range_request.RangeRequest.response`
91+
:class:`~range_streams.range_response.RangeResponse`.
92+
"""
7293
if not self.response.is_closed:
7394
self.response.close()
7495

7596
def check_client(self):
7697
"""
77-
Typing workaround (Sphinx type hint extension does not like httpx)
98+
Type checking workaround (Sphinx type hint extension does not like httpx
99+
so check the type manually with a method called at initialisation).
78100
"""
79101
if not isinstance(self.client, httpx.Client): # pragma: no cover
80102
raise NotImplementedError("Only HTTPX clients currently supported")

src/range_streams/range_response.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,25 @@
1313

1414

1515
class RangeResponse:
16-
tail_mark = 0
16+
"""
17+
Adapted from `obskyr's ResponseStream demo code
18+
<https://gist.github.com/obskyr/b9d4b4223e7eaf4eedcd9defabb34f13>`_,
19+
this class handles the streamed partial request as a file-like object.
20+
"""
21+
22+
tail_mark: int = 0
23+
"""
24+
The amount by which to shorten the 'tail' (i.e. the upper end) of the
25+
range when deciding if it is 'consumed'. Incremented within the
26+
:meth:`~range_streams.range_stream.RangeStream.handle_overlap` method
27+
when the ``pruning_level`` is set to ``1`` (indicating a "replant" policy).
28+
29+
Under a 'replant' policy, when a new range is to be added and would overlap
30+
at the tail of an existing range, the pre-existing range should be effectively
31+
truncated by 'marking their tails'
32+
(where `an existing range` is assumed here to only be considered a range
33+
if it is not 'consumed' yet).
34+
"""
1735

1836
def __init__(
1937
self,
@@ -43,10 +61,18 @@ def client(self):
4361

4462
@property
4563
def url(self) -> str:
64+
"""
65+
A wrapper to access the :attr:`~range_streams.range_stream.RangeStream.url`
66+
of the 'parent' :class:`~range_streams.range_stream.RangeStream`.
67+
"""
4668
return self.parent_stream.url
4769

4870
@property
4971
def name(self) -> str:
72+
"""
73+
A wrapper to access the :attr:`~range_streams.range_stream.RangeStream.name`
74+
of the 'parent' :class:`~range_streams.range_stream.RangeStream`.
75+
"""
5076
return self.parent_stream.name
5177

5278
def _load_all(self):
@@ -63,9 +89,15 @@ def _load_until(self, goal_position):
6389
break
6490

6591
def tell(self):
92+
"""
93+
File-like tell (position indicator) within the range request stream.
94+
"""
6695
return self._bytes.tell()
6796

6897
def read(self, size=None):
98+
"""
99+
File-like reading within the range request stream.
100+
"""
69101
left_off_at = self._bytes.tell()
70102
if size is None:
71103
self._load_all()
@@ -77,11 +109,39 @@ def read(self, size=None):
77109
return self._bytes.read(size)
78110

79111
def seek(self, position, whence=SEEK_SET):
112+
"""
113+
File-like seeking within the range request stream.
114+
"""
80115
if whence == SEEK_END:
81116
self._load_all()
82117
self._bytes.seek(position, whence)
83118

84119
def is_consumed(self) -> bool:
120+
"""
121+
Whether the :meth:`~range_streams.range_response.RangeResponse.tell`
122+
position (indicating 'consumed' or 'read so far') along with the
123+
:attr:`~range_streams.range_response.RangeResponse.tail_mark` indicates
124+
whether the stream should be considered consumed.
125+
126+
The :attr:`~range_streams.range_response.RangeResponse.tail_mark`
127+
is part of a mechanism to 'shorten' ranges when an overlap is detected,
128+
to preserve the one-to-one integrity of the :class:`~ranges.RangeDict`
129+
(see notes on the "replant" policy of
130+
:meth:`~range_streams.range_stream.RangeStream.handle_overlap`, set
131+
by the ``pruning_level`` passed into
132+
:class:`~range_streams.range_stream.RangeStream` on initialisation).
133+
134+
Note that there is (absolutely!) nothing stopping a stream from being
135+
re-consumed, but this library works on the assumption that all streams
136+
will be handled in an efficient manner (with any data read out from them
137+
either used once only or else will be reused from the first output rather
138+
than re-accessed directly from the stream itself).
139+
140+
To this end, :class:`~range_streams.range_stream.RangeStream` has measures
141+
in place to "decommission" ranges once they are consumed (see in particular
142+
:meth:`~range_streams.range_stream.RangeStream.burn_range` and
143+
:meth:`~range_streams.range_stream.RangeStream.handle_overlap`).
144+
"""
85145
read_so_far = self.tell()
86146
len_to_read = range_len(self.request.range) - self.tail_mark
87147
return read_so_far - len_to_read > 0

src/range_streams/range_stream.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
r""":mod:`range_streams.range_stream` exposes a class
22
:py:func:`RangeStream`, whose key property (once initialised) is
33
:attr:`~range_streams.range_stream.RangeStream.ranges`,
4-
which provides a :class:`ranges.RangeDict` comprising the ranges of
4+
which provides a :class:`~ranges.RangeDict` comprising the ranges of
55
the file being streamed.
66
77
The method :py:func:`RangeStream.add` will request further ranges,
@@ -66,7 +66,7 @@ class RangeStream:
6666
"""
6767
`'Internal'` ranges attribute. Start position is not affected by
6868
reading in bytes from the :class:`RangeResponse` (unlike the
69-
'externa' :attr:`ranges` property)
69+
'external' :attr:`~range_streams.range_stream.RangeStream.ranges` property)
7070
"""
7171

7272
def __init__(
@@ -199,18 +199,22 @@ def register_range(
199199

200200
def set_active_range(self, rng: Range):
201201
"""
202-
Setter for the active range (through which active_range_response is also set).
202+
Setter for the active range (through which
203+
:attr:`~range_streams.range_stream.RangeStream.active_range_response`
204+
is also set).
203205
"""
204206
if self._active_range != rng:
205207
self._active_range = rng
206208

207209
@property
208210
def active_range_response(self) -> RangeResponse:
209-
"""Look up the :class:`RangeResponse` object associated with the
210-
currently active range by using
211+
"""
212+
Look up the :class:`~range_streams.range_response.RangeResponse`
213+
object associated with the currently active range by using
211214
:attr:`~range_streams.range_stream.RangeStream._active_range` as the
212-
:class:`Range` key for the internal
213-
:attr:`~range_streams.range_stream.RangeStream._ranges` :class:`RangeDict`.
215+
:class:`~ranges.Range` key for the internal
216+
:attr:`~range_streams.range_stream.RangeStream._ranges`
217+
:class:`RangeDict`.
214218
"""
215219
try:
216220
return self._ranges[self._active_range]
@@ -221,7 +225,8 @@ def active_range_response(self) -> RangeResponse:
221225
raise ValueError(f"{e_pre}({self._active_range=}")
222226

223227
def ext2int(self, ext_rng: Range) -> RangeResponse:
224-
"""Given the external range `ext_rng` and the :class:`RangeStream`
228+
"""
229+
Given the external range `ext_rng` and the :class:`RangeStream`
225230
on which it is 'stored' (or rather, computed, in the
226231
:attr:`~range_streams.range_stream.RangeStream.ranges` property),
227232
return the internal :class:`~ranges.Range` stored on the
@@ -230,7 +235,7 @@ def ext2int(self, ext_rng: Range) -> RangeResponse:
230235
shared :class:`~range_streams.range_response.RangeResponse` value.
231236
232237
Args:
233-
ext_rng : A :class:`ranges.Range` from the 'external'
238+
ext_rng : A :class:`~ranges.Range` from the 'external'
234239
:attr:`~range_streams.range_stream.RangeStream.ranges`
235240
with which to cross-reference in
236241
:attr:`~range_streams.range_stream.RangeStream._ranges`

src/range_streams/range_utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def ranges_in_reg_order(ranges: RangeDict) -> list[Range]:
3636

3737
def response_ranges_in_reg_order(ranges: RangeDict) -> list[Range]:
3838
"""For all of the :class:~range_streams.range_response.RangeResponse`
39-
values in the :class:`ranges.RangeDict`, list the ranges from their
39+
values in the :class:`~ranges.RangeDict`, list the ranges from their
4040
original :attribute:~range_streams.range_response.RangeResponse.request`
4141
in order of registration.
4242
@@ -50,14 +50,15 @@ def response_ranges_in_reg_order(ranges: RangeDict) -> list[Range]:
5050
def most_recent_range(
5151
stream: range_streams.range_stream.RangeStream, internal: bool = True
5252
) -> Range | None:
53-
"""For all of the :class:`~range_streams.range_response.RangeResponse`
54-
values in the :class:`ranges.RangeDict`, list the ranges from their
53+
"""
54+
For all of the :class:`~range_streams.range_response.RangeResponse`
55+
values in the :class:`~ranges.RangeDict`, list the ranges from their
5556
original :attr:`~range_streams.range_response.RangeResponse.request`
5657
in order of registration.
5758
5859
If ``internal`` is ``True``, use
5960
:attr:`~range_streams.range_stream.RangeStream._ranges` as the
60-
:class:`ranges.RangeDict`, else use the 'external' (computed) property
61+
:class:`~ranges.RangeDict`, else use the 'external' (computed) property
6162
:attr:`~range_streams.range_stream.RangeStream.ranges`. The external
6263
ones take into account the position the file has been read/seeked to.
6364
@@ -76,7 +77,7 @@ def most_recent_range(
7677

7778
def range_termini(rng: Range) -> tuple[int, int]:
7879
"""Get the inclusive start and end positions ``[start,end]``
79-
from a :class`ranges.Range`. These are referred to as the
80+
from a :class:`ranges.Range`. These are referred to as the
8081
'termini'. Ranges are always ascending.
8182
8283
Args:
@@ -130,7 +131,7 @@ def validate_range(
130131
byte_range: Range | tuple[int, int], allow_empty: bool = True
131132
) -> Range:
132133
"""Validate ``byte_range`` and convert to a half-closed (i.e.
133-
not inclusive of the end position) ``[start,end)`` :class:`ranges.Range`
134+
not inclusive of the end position) ``[start,end)`` :class:`~ranges.Range`
134135
if given as integer tuple.
135136
136137
Args:
@@ -163,7 +164,7 @@ def range_span(ranges: list[Range]) -> Range:
163164
(i.e. the range spanned from their minimum to maximum). This span
164165
may of course not be completely 'covered' by the ranges in the list.
165166
166-
Assumes input list of :class:`RangeSets` are in ascending order,
167+
Assumes input list of :class:`~ranges.RangeSet` are in ascending order,
167168
switches if not.
168169
169170
Args:

0 commit comments

Comments
 (0)