Skip to content

Commit 06c15b0

Browse files
committed
rebalancer: add logging of routes
This patch adds rebalancer routes' logging. The log file now includes information about the source storage, the number of buckets, and the destination storage where the buckets will be moved. Since the rebalancer service has changed logging of routes that were sent, we change the `rebalancer/rebalancer.test.lua` and `rebalancer/stress_add_remove_several_rs.test.lua` tests. Part of #212 NO_DOC=bugfix
1 parent 2691757 commit 06c15b0

File tree

6 files changed

+31
-10
lines changed

6 files changed

+31
-10
lines changed

test/rebalancer/rebalancer.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ test_run:switch('box_1_a')
149149
vshard.storage.rebalancer_enable()
150150
---
151151
...
152-
wait_rebalancer_state("Rebalance routes are sent", test_run)
152+
wait_rebalancer_state("The following rebalancer routes were sent", test_run)
153153
---
154154
...
155155
wait_rebalancer_state('The cluster is balanced ok', test_run)
@@ -239,7 +239,7 @@ cfg.rebalancer_disbalance_threshold = 0.01
239239
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
240240
---
241241
...
242-
wait_rebalancer_state('Rebalance routes are sent', test_run)
242+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
243243
---
244244
...
245245
wait_rebalancer_state('The cluster is balanced ok', test_run)

test/rebalancer/rebalancer.test.lua

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ util.map_bucket_protection(test_run, {REPLICASET_1}, true)
7878

7979
test_run:switch('box_1_a')
8080
vshard.storage.rebalancer_enable()
81-
wait_rebalancer_state("Rebalance routes are sent", test_run)
81+
wait_rebalancer_state("The following rebalancer routes were sent", test_run)
8282

8383
wait_rebalancer_state('The cluster is balanced ok', test_run)
8484
_bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
@@ -118,7 +118,7 @@ _bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
118118
-- Return 1%.
119119
cfg.rebalancer_disbalance_threshold = 0.01
120120
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
121-
wait_rebalancer_state('Rebalance routes are sent', test_run)
121+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
122122
wait_rebalancer_state('The cluster is balanced ok', test_run)
123123
_bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
124124
_bucket.index.status:min({vshard.consts.BUCKET.ACTIVE})

test/rebalancer/stress_add_remove_several_rs.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ add_replicaset()
175175
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
176176
---
177177
...
178-
wait_rebalancer_state('Rebalance routes are sent', test_run)
178+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
179179
---
180180
...
181181
-- Now, add a second replicaset.
@@ -422,7 +422,7 @@ remove_second_replicaset_first_stage()
422422
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
423423
---
424424
...
425-
wait_rebalancer_state('Rebalance routes are sent', test_run)
425+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
426426
---
427427
...
428428
-- Rebalancing has been started - now remove second replicaset.

test/rebalancer/stress_add_remove_several_rs.test.lua

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ fiber.sleep(0.5)
7171
test_run:switch('box_1_a')
7272
add_replicaset()
7373
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
74-
wait_rebalancer_state('Rebalance routes are sent', test_run)
74+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
7575

7676
-- Now, add a second replicaset.
7777

@@ -153,7 +153,7 @@ fiber.sleep(0.5)
153153
test_run:switch('box_1_a')
154154
remove_second_replicaset_first_stage()
155155
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
156-
wait_rebalancer_state('Rebalance routes are sent', test_run)
156+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
157157
-- Rebalancing has been started - now remove second replicaset.
158158
remove_replicaset_first_stage()
159159
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)

test/storage-luatest/storage_1_1_1_test.lua

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,23 @@ rebalancer_recovery_group.test_no_logs_while_unsuccess_recovery = function(g)
200200
wait_for_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
201201
hanged_bucket_id_2)
202202
end
203+
204+
rebalancer_recovery_group.test_rebalancer_routes_logging = function(g)
205+
local moved_bucket_from_2 = vtest.storage_first_bucket(g.replica_2_a)
206+
start_bucket_move(g.replica_2_a, g.replica_1_a, moved_bucket_from_2)
207+
local moved_bucket_from_3 = vtest.storage_first_bucket(g.replica_3_a)
208+
start_bucket_move(g.replica_3_a, g.replica_1_a, moved_bucket_from_3)
209+
t.helpers.retrying({timeout = 60}, function()
210+
g.replica_1_a:exec(function() ivshard.storage.rebalancer_wakeup() end)
211+
t.assert(g.replica_1_a:grep_log('Apply rebalancer routes with 1 ' ..
212+
'workers'))
213+
end)
214+
local rebalancer_routes_msg = string.format(
215+
"{\"%s\":{\"%s\":1,\"%s\":1}}", g.replica_1_a:replicaset_uuid(),
216+
g.replica_3_a:replicaset_uuid(), g.replica_2_a:replicaset_uuid())
217+
t.assert(g.replica_1_a:grep_log(rebalancer_routes_msg))
218+
t.helpers.retrying({}, function()
219+
g.replica_1_a:exec(function() ivshard.storage.rebalancer_wakeup() end)
220+
g.replica_1_a:grep_log('The cluster is balanced ok.')
221+
end)
222+
end

vshard/storage/init.lua

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2909,8 +2909,9 @@ local function rebalancer_service_f(service, limiter)
29092909
goto continue
29102910
end
29112911
end
2912-
log.info('Rebalance routes are sent. Schedule next wakeup after '..
2913-
'%f seconds', consts.REBALANCER_WORK_INTERVAL)
2912+
log.info('The following rebalancer routes were sent: %s. ' ..
2913+
'Schedule next wakeup after %f seconds', json_encode(routes),
2914+
consts.REBALANCER_WORK_INTERVAL)
29142915
service:set_activity('idling')
29152916
lfiber.testcancel()
29162917
lfiber.sleep(consts.REBALANCER_WORK_INTERVAL)

0 commit comments

Comments
 (0)