Skip to content

Commit 50773ec

Browse files
DOC-5225 testable probabilistic dt examples (#3691)
1 parent a00d182 commit 50773ec

File tree

1 file changed

+232
-0
lines changed

1 file changed

+232
-0
lines changed

doctests/home_prob_dts.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# EXAMPLE: home_prob_dts
2+
"""
3+
Probabilistic data type examples:
4+
https://redis.io/docs/latest/develop/connect/clients/python/redis-py/prob
5+
"""
6+
7+
# HIDE_START
8+
import redis
9+
r = redis.Redis(decode_responses=True)
10+
# HIDE_END
11+
# REMOVE_START
12+
r.delete(
13+
"recorded_users", "other_users",
14+
"group:1", "group:2", "both_groups",
15+
"items_sold",
16+
"male_heights", "female_heights", "all_heights",
17+
"top_3_songs"
18+
)
19+
# REMOVE_END
20+
21+
# STEP_START bloom
22+
res1 = r.bf().madd("recorded_users", "andy", "cameron", "david", "michelle")
23+
print(res1) # >>> [1, 1, 1, 1]
24+
25+
res2 = r.bf().exists("recorded_users", "cameron")
26+
print(res2) # >>> 1
27+
28+
res3 = r.bf().exists("recorded_users", "kaitlyn")
29+
print(res3) # >>> 0
30+
# STEP_END
31+
# REMOVE_START
32+
assert res1 == [1, 1, 1, 1]
33+
assert res2 == 1
34+
assert res3 == 0
35+
# REMOVE_END
36+
37+
# STEP_START cuckoo
38+
res4 = r.cf().add("other_users", "paolo")
39+
print(res4) # >>> 1
40+
41+
res5 = r.cf().add("other_users", "kaitlyn")
42+
print(res5) # >>> 1
43+
44+
res6 = r.cf().add("other_users", "rachel")
45+
print(res6) # >>> 1
46+
47+
res7 = r.cf().mexists("other_users", "paolo", "rachel", "andy")
48+
print(res7) # >>> [1, 1, 0]
49+
50+
res8 = r.cf().delete("other_users", "paolo")
51+
print(res8) # >>> 1
52+
53+
res9 = r.cf().exists("other_users", "paolo")
54+
print(res9) # >>> 0
55+
# STEP_END
56+
# REMOVE_START
57+
assert res4 == 1
58+
assert res5 == 1
59+
assert res6 == 1
60+
assert res7 == [1, 1, 0]
61+
assert res8 == 1
62+
assert res9 == 0
63+
# REMOVE_END
64+
65+
# STEP_START hyperloglog
66+
res10 = r.pfadd("group:1", "andy", "cameron", "david")
67+
print(res10) # >>> 1
68+
69+
res11 = r.pfcount("group:1")
70+
print(res11) # >>> 3
71+
72+
res12 = r.pfadd("group:2", "kaitlyn", "michelle", "paolo", "rachel")
73+
print(res12) # >>> 1
74+
75+
res13 = r.pfcount("group:2")
76+
print(res13) # >>> 4
77+
78+
res14 = r.pfmerge("both_groups", "group:1", "group:2")
79+
print(res14) # >>> True
80+
81+
res15 = r.pfcount("both_groups")
82+
print(res15) # >>> 7
83+
# STEP_END
84+
# REMOVE_START
85+
assert res10 == 1
86+
assert res11 == 3
87+
assert res12 == 1
88+
assert res13 == 4
89+
assert res14
90+
assert res15 == 7
91+
# REMOVE_END
92+
93+
# STEP_START cms
94+
# Specify that you want to keep the counts within 0.01
95+
# (1%) of the true value with a 0.005 (0.5%) chance
96+
# of going outside this limit.
97+
res16 = r.cms().initbyprob("items_sold", 0.01, 0.005)
98+
print(res16) # >>> True
99+
100+
# The parameters for `incrby()` are two lists. The count
101+
# for each item in the first list is incremented by the
102+
# value at the same index in the second list.
103+
res17 = r.cms().incrby(
104+
"items_sold",
105+
["bread", "tea", "coffee", "beer"], # Items sold
106+
[300, 200, 200, 100]
107+
)
108+
print(res17) # >>> [300, 200, 200, 100]
109+
110+
res18 = r.cms().incrby(
111+
"items_sold",
112+
["bread", "coffee"],
113+
[100, 150]
114+
)
115+
print(res18) # >>> [400, 350]
116+
117+
res19 = r.cms().query("items_sold", "bread", "tea", "coffee", "beer")
118+
print(res19) # >>> [400, 200, 350, 100]
119+
# STEP_END
120+
# REMOVE_START
121+
assert res16
122+
assert res17 == [300, 200, 200, 100]
123+
assert res18 == [400, 350]
124+
assert res19 == [400, 200, 350, 100]
125+
# REMOVE_END
126+
127+
# STEP_START tdigest
128+
res20 = r.tdigest().create("male_heights")
129+
print(res20) # >>> True
130+
131+
res21 = r.tdigest().add(
132+
"male_heights",
133+
[175.5, 181, 160.8, 152, 177, 196, 164]
134+
)
135+
print(res21) # >>> OK
136+
137+
res22 = r.tdigest().min("male_heights")
138+
print(res22) # >>> 152.0
139+
140+
res23 = r.tdigest().max("male_heights")
141+
print(res23) # >>> 196.0
142+
143+
res24 = r.tdigest().quantile("male_heights", 0.75)
144+
print(res24) # >>> 181
145+
146+
# Note that the CDF value for 181 is not exactly
147+
# 0.75. Both values are estimates.
148+
res25 = r.tdigest().cdf("male_heights", 181)
149+
print(res25) # >>> [0.7857142857142857]
150+
151+
res26 = r.tdigest().create("female_heights")
152+
print(res26) # >>> True
153+
154+
res27 = r.tdigest().add(
155+
"female_heights",
156+
[155.5, 161, 168.5, 170, 157.5, 163, 171]
157+
)
158+
print(res27) # >>> OK
159+
160+
res28 = r.tdigest().quantile("female_heights", 0.75)
161+
print(res28) # >>> [170]
162+
163+
res29 = r.tdigest().merge(
164+
"all_heights", 2, "male_heights", "female_heights"
165+
)
166+
print(res29) # >>> OK
167+
168+
res30 = r.tdigest().quantile("all_heights", 0.75)
169+
print(res30) # >>> [175.5]
170+
# STEP_END
171+
# REMOVE_START
172+
assert res20
173+
assert res21 == "OK"
174+
assert res22 == 152.0
175+
assert res23 == 196.0
176+
assert res24 == [181]
177+
assert res25 == [0.7857142857142857]
178+
assert res26
179+
assert res27 == "OK"
180+
assert res28 == [170]
181+
assert res29 == "OK"
182+
assert res30 == [175.5]
183+
# REMOVE_END
184+
185+
# STEP_START topk
186+
# The `reserve()` method creates the Top-K object with
187+
# the given key. The parameters are the number of items
188+
# in the ranking and values for `width`, `depth`, and
189+
# `decay`, described in the Top-K reference page.
190+
res31 = r.topk().reserve("top_3_songs", 3, 7, 8, 0.9)
191+
print(res31) # >>> True
192+
193+
# The parameters for `incrby()` are two lists. The count
194+
# for each item in the first list is incremented by the
195+
# value at the same index in the second list.
196+
res32 = r.topk().incrby(
197+
"top_3_songs",
198+
[
199+
"Starfish Trooper",
200+
"Only one more time",
201+
"Rock me, Handel",
202+
"How will anyone know?",
203+
"Average lover",
204+
"Road to everywhere"
205+
],
206+
[
207+
3000,
208+
1850,
209+
1325,
210+
3890,
211+
4098,
212+
770
213+
]
214+
)
215+
print(res32)
216+
# >>> [None, None, None, 'Rock me, Handel', 'Only one more time', None]
217+
218+
res33 = r.topk().list("top_3_songs")
219+
print(res33)
220+
# >>> ['Average lover', 'How will anyone know?', 'Starfish Trooper']
221+
222+
res34 = r.topk().query(
223+
"top_3_songs", "Starfish Trooper", "Road to everywhere"
224+
)
225+
print(res34) # >>> [1, 0]
226+
# STEP_END
227+
# REMOVE_START
228+
assert res31
229+
assert res32 == [None, None, None, 'Rock me, Handel', 'Only one more time', None]
230+
assert res33 == ['Average lover', 'How will anyone know?', 'Starfish Trooper']
231+
assert res34 == [1, 0]
232+
# REMOVE_END

0 commit comments

Comments
 (0)