Skip to content

Commit 57492fa

Browse files
committed
router: update known bucket count when rs removed
When replicaset was removed, router didn't update router.known_bucket_count value. It is used by discovery to decide when to turn on aggressive discovery. And by router.info() to provide alerts, change status. As a result, in case of replicaset removal, known_bucket_count was left bigger than it actually is, and it could be fixed only by restart. Discovery could become non-agressive too early, and router.info() could show weird alerts about bucke_count being configured differently on storages and router. Besides, router.info().bucket was showing not realistic numbers.
1 parent 7f34d05 commit 57492fa

File tree

3 files changed

+88
-0
lines changed

3 files changed

+88
-0
lines changed

test/router/router2.result

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,67 @@ vshard.router.static.discovery_fiber
250250
| - null
251251
| ...
252252

253+
--
254+
-- Known bucket count should be updated properly when replicaset
255+
-- is removed from the config.
256+
--
257+
vshard.router.info().bucket
258+
| ---
259+
| - unreachable: 0
260+
| available_ro: 0
261+
| unknown: 0
262+
| available_rw: 3000
263+
| ...
264+
rs1_uuid = util.replicasets[1]
265+
| ---
266+
| ...
267+
rs1 = cfg.sharding[rs1_uuid]
268+
| ---
269+
| ...
270+
cfg.sharding[rs1_uuid] = nil
271+
| ---
272+
| ...
273+
vshard.router.cfg(cfg)
274+
| ---
275+
| ...
276+
vshard.router.info().bucket
277+
| ---
278+
| - unreachable: 0
279+
| available_ro: 0
280+
| unknown: 1500
281+
| available_rw: 1500
282+
| ...
283+
cfg.sharding[rs1_uuid] = rs1
284+
| ---
285+
| ...
286+
vshard.router.cfg(cfg)
287+
| ---
288+
| ...
289+
vshard.router.discovery_set('on')
290+
| ---
291+
| ...
292+
function wait_all_rw() \
293+
local total = vshard.router.bucket_count() \
294+
local res = vshard.router.info().bucket.available_rw == total \
295+
if not res then \
296+
vshard.router.discovery_wakeup() \
297+
end \
298+
return res \
299+
end
300+
| ---
301+
| ...
302+
test_run:wait_cond(wait_all_rw)
303+
| ---
304+
| - true
305+
| ...
306+
vshard.router.info().bucket
307+
| ---
308+
| - unreachable: 0
309+
| available_ro: 0
310+
| unknown: 0
311+
| available_rw: 3000
312+
| ...
313+
253314
_ = test_run:switch("default")
254315
| ---
255316
| ...

test/router/router2.test.lua

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,30 @@ vshard.router.static.discovery_fiber
9595
vshard.router.discovery_set('once')
9696
vshard.router.static.discovery_fiber
9797

98+
--
99+
-- Known bucket count should be updated properly when replicaset
100+
-- is removed from the config.
101+
--
102+
vshard.router.info().bucket
103+
rs1_uuid = util.replicasets[1]
104+
rs1 = cfg.sharding[rs1_uuid]
105+
cfg.sharding[rs1_uuid] = nil
106+
vshard.router.cfg(cfg)
107+
vshard.router.info().bucket
108+
cfg.sharding[rs1_uuid] = rs1
109+
vshard.router.cfg(cfg)
110+
vshard.router.discovery_set('on')
111+
function wait_all_rw() \
112+
local total = vshard.router.bucket_count() \
113+
local res = vshard.router.info().bucket.available_rw == total \
114+
if not res then \
115+
vshard.router.discovery_wakeup() \
116+
end \
117+
return res \
118+
end
119+
test_run:wait_cond(wait_all_rw)
120+
vshard.router.info().bucket
121+
98122
_ = test_run:switch("default")
99123
_ = test_run:cmd("stop server router_1")
100124
_ = test_run:cmd("cleanup server router_1")

vshard/router/init.lua

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,14 +908,17 @@ local function router_cfg(router, cfg, is_reload)
908908
router.failover_ping_timeout = vshard_cfg.failover_ping_timeout
909909
router.sync_timeout = vshard_cfg.sync_timeout
910910
local old_route_map = router.route_map
911+
local known_bucket_count = 0
911912
router.route_map = table_new(router.total_bucket_count, 0)
912913
for bucket, rs in pairs(old_route_map) do
913914
local new_rs = router.replicasets[rs.uuid]
914915
if new_rs then
915916
router.route_map[bucket] = new_rs
916917
new_rs.bucket_count = new_rs.bucket_count + 1
918+
known_bucket_count = known_bucket_count + 1
917919
end
918920
end
921+
router.known_bucket_count = known_bucket_count
919922
if router.failover_fiber == nil then
920923
router.failover_fiber = util.reloadable_fiber_create(
921924
'vshard.failover.' .. router.name, M, 'failover_f', router)

0 commit comments

Comments
 (0)