2
2
import time
3
3
from typing import TYPE_CHECKING , Any , Iterable , Optional
4
4
5
- from quixstreams .context import message_context
6
5
from quixstreams .state import WindowedPartitionTransaction , WindowedState
7
6
8
7
from .base import (
9
8
MultiAggregationWindowMixin ,
10
9
SingleAggregationWindowMixin ,
11
- Window ,
12
10
WindowKeyResult ,
13
11
WindowOnLateCallback ,
14
12
)
15
- from .time_based import ClosingStrategy , ClosingStrategyValues
13
+ from .time_based import ClosingStrategy , TimeWindow
16
14
17
15
if TYPE_CHECKING :
18
16
from quixstreams .dataframe .dataframe import StreamingDataFrame
19
17
20
18
logger = logging .getLogger (__name__ )
21
19
22
20
23
- class SessionWindow (Window ):
21
+ class SessionWindow (TimeWindow ):
24
22
"""
25
23
Session window groups events that occur within a specified timeout period.
26
24
@@ -40,77 +38,10 @@ def __init__(
40
38
dataframe : "StreamingDataFrame" ,
41
39
on_late : Optional [WindowOnLateCallback ] = None ,
42
40
):
43
- super ().__init__ (
44
- name = name ,
45
- dataframe = dataframe ,
46
- )
41
+ super ().__init__ (name = name , dataframe = dataframe , on_late = on_late )
47
42
48
43
self ._timeout_ms = timeout_ms
49
44
self ._grace_ms = grace_ms
50
- self ._on_late = on_late
51
- self ._closing_strategy = ClosingStrategy .KEY
52
-
53
- def final (
54
- self , closing_strategy : ClosingStrategyValues = "key"
55
- ) -> "StreamingDataFrame" :
56
- """
57
- Apply the session window aggregation and return results only when the sessions
58
- are closed.
59
-
60
- The format of returned sessions:
61
- ```python
62
- {
63
- "start": <session start time in milliseconds>,
64
- "end": <session end time in milliseconds>,
65
- "value: <aggregated session value>,
66
- }
67
- ```
68
-
69
- The individual session is closed when the event time
70
- (the maximum observed timestamp across the partition) passes
71
- the last event timestamp + timeout + grace period.
72
- The closed sessions cannot receive updates anymore and are considered final.
73
-
74
- :param closing_strategy: the strategy to use when closing sessions.
75
- Possible values:
76
- - `"key"` - messages advance time and close sessions with the same key.
77
- If some message keys appear irregularly in the stream, the latest sessions can remain unprocessed until a message with the same key is received.
78
- - `"partition"` - messages advance time and close sessions for the whole partition to which this message key belongs.
79
- If timestamps between keys are not ordered, it may increase the number of discarded late messages.
80
- Default - `"key"`.
81
- """
82
- self ._closing_strategy = ClosingStrategy .new (closing_strategy )
83
- return super ().final ()
84
-
85
- def current (
86
- self , closing_strategy : ClosingStrategyValues = "key"
87
- ) -> "StreamingDataFrame" :
88
- """
89
- Apply the session window transformation to the StreamingDataFrame to return results
90
- for each updated session.
91
-
92
- The format of returned sessions:
93
- ```python
94
- {
95
- "start": <session start time in milliseconds>,
96
- "end": <session end time in milliseconds>,
97
- "value: <aggregated session value>,
98
- }
99
- ```
100
-
101
- This method processes streaming data and returns results as they come,
102
- regardless of whether the session is closed or not.
103
-
104
- :param closing_strategy: the strategy to use when closing sessions.
105
- Possible values:
106
- - `"key"` - messages advance time and close sessions with the same key.
107
- If some message keys appear irregularly in the stream, the latest sessions can remain unprocessed until a message with the same key is received.
108
- - `"partition"` - messages advance time and close sessions for the whole partition to which this message key belongs.
109
- If timestamps between keys are not ordered, it may increase the number of discarded late messages.
110
- Default - `"key"`.
111
- """
112
- self ._closing_strategy = ClosingStrategy .new (closing_strategy )
113
- return super ().current ()
114
45
115
46
def process_window (
116
47
self ,
@@ -140,7 +71,7 @@ def process_window(
140
71
# Check if the event is too late
141
72
if timestamp_ms < session_expiry_threshold :
142
73
late_by_ms = session_expiry_threshold - timestamp_ms
143
- self ._on_expired_session (
74
+ self ._on_expired_window (
144
75
value = value ,
145
76
key = key ,
146
77
start = timestamp_ms ,
@@ -216,17 +147,17 @@ def process_window(
216
147
217
148
# Expire old sessions
218
149
if self ._closing_strategy == ClosingStrategy .PARTITION :
219
- expired_windows = self .expire_sessions_by_partition (
150
+ expired_windows = self .expire_by_partition (
220
151
transaction , session_expiry_threshold , collect
221
152
)
222
153
else :
223
- expired_windows = self .expire_sessions_by_key (
154
+ expired_windows = self .expire_by_key (
224
155
key , state , session_expiry_threshold , collect
225
156
)
226
157
227
158
return updated_windows , expired_windows
228
159
229
- def expire_sessions_by_partition (
160
+ def expire_by_partition (
230
161
self ,
231
162
transaction : WindowedPartitionTransaction ,
232
163
expiry_threshold : int ,
@@ -257,7 +188,7 @@ def expire_sessions_by_partition(
257
188
for prefix in seen_prefixes :
258
189
state = transaction .as_state (prefix = prefix )
259
190
prefix_expired = list (
260
- self .expire_sessions_by_key (prefix , state , expiry_threshold , collect )
191
+ self .expire_by_key (prefix , state , expiry_threshold , collect )
261
192
)
262
193
expired_results .extend (prefix_expired )
263
194
count += len (prefix_expired )
@@ -271,7 +202,7 @@ def expire_sessions_by_partition(
271
202
272
203
return expired_results
273
204
274
- def expire_sessions_by_key (
205
+ def expire_by_key (
275
206
self ,
276
207
key : Any ,
277
208
state : WindowedState ,
@@ -318,43 +249,6 @@ def expire_sessions_by_key(
318
249
round (time .monotonic () - start , 2 ),
319
250
)
320
251
321
- def _on_expired_session (
322
- self ,
323
- value : Any ,
324
- key : Any ,
325
- start : int ,
326
- end : int ,
327
- timestamp_ms : int ,
328
- late_by_ms : int ,
329
- ) -> None :
330
- ctx = message_context ()
331
- to_log = True
332
-
333
- # Trigger the "on_late" callback if provided
334
- if self ._on_late :
335
- to_log = self ._on_late (
336
- value ,
337
- key ,
338
- timestamp_ms ,
339
- late_by_ms ,
340
- start ,
341
- end ,
342
- self ._name ,
343
- ctx .topic ,
344
- ctx .partition ,
345
- ctx .offset ,
346
- )
347
- if to_log :
348
- logger .warning (
349
- "Skipping session processing for the closed session "
350
- f"timestamp_ms={ timestamp_ms } "
351
- f"session={ (start , end )} "
352
- f"late_by_ms={ late_by_ms } "
353
- f"store_name={ self ._name } "
354
- f"partition={ ctx .topic } [{ ctx .partition } ] "
355
- f"offset={ ctx .offset } "
356
- )
357
-
358
252
359
253
class SessionWindowSingleAggregation (SingleAggregationWindowMixin , SessionWindow ):
360
254
pass
0 commit comments