Skip to content

Commit 6364056

Browse files
SerpentianGerold103
authored andcommitted
replicaset: introduce name validation
In case of name identification, no UUID may be passed at all, so we cannot verify only UUID, when connecting to storage. It seems impossible to extend the current net.box greeting by exposing net_box.conn.name to it, as iproto greeting doesn't have enough free space to save 64 bit instance name. So, we should deal with name validation on vshard side. For this, conn.vconnect is introduced. It's asynchronous vshard greeting, saved inside netbox connection. It stores future object and additional info, needed for its work. Future is initialized, when the connection is established (inside netbox_on_connect). The connection cannot be considered "connected" until vconnect is properly validated. Currently only instance_name is validated inside conn.vconnect. Closes #426 NO_DOC=internal
1 parent 9d52e1e commit 6364056

File tree

7 files changed

+381
-9
lines changed

7 files changed

+381
-9
lines changed

test/replicaset-luatest/replicaset_3_test.lua

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ local t = require('luatest')
33
local vreplicaset = require('vshard.replicaset')
44
local vtest = require('test.luatest_helpers.vtest')
55
local verror = require('vshard.error')
6+
local vutil = require('vshard.util')
67

78
local small_timeout_opts = {timeout = 0.05}
89
local timeout_opts = {timeout = vtest.wait_timeout}
@@ -247,6 +248,7 @@ test_group.test_locate_master_when_no_conn_object = function(g)
247248
end
248249

249250
test_group.test_named_replicaset = function(g)
251+
t.run_only_if(vutil.feature.persistent_names)
250252
local new_cfg_template = table.deepcopy(cfg_template)
251253
new_cfg_template.identification_mode = 'name_as_key'
252254
new_cfg_template.sharding['replicaset'] = new_cfg_template.sharding[1]
@@ -267,9 +269,15 @@ test_group.test_named_replicaset = function(g)
267269
t.assert_equals(rs.id, rs.name)
268270
t.assert_equals(replica_1_a.id, replica_1_a.name)
269271

270-
-- Just to be sure, that it works.
272+
-- Name is not set, name mismatch error.
273+
local ret, err = rs:callrw('get_uuid', {}, {timeout = 5})
274+
t.assert_equals(err.name, 'INSTANCE_NAME_MISMATCH')
275+
t.assert_equals(ret, nil)
276+
277+
-- Set name, everything works from now on.
278+
g.replica_1_a:exec(function() box.cfg{instance_name = 'replica_1_a'} end)
271279
local uuid_a = g.replica_1_a:instance_uuid()
272-
local ret, err = rs:callrw('get_uuid', {}, timeout_opts)
280+
ret, err = rs:callrw('get_uuid', {}, timeout_opts)
273281
t.assert_equals(err, nil)
274282
t.assert_equals(ret, uuid_a)
275283

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
local fiber = require('fiber')
2+
local t = require('luatest')
3+
local vreplicaset = require('vshard.replicaset')
4+
local vtest = require('test.luatest_helpers.vtest')
5+
local vutil = require('vshard.util')
6+
local verror = require('vshard.error')
7+
8+
local small_timeout_opts = {timeout = 0.01}
9+
local timeout_opts = {timeout = vtest.wait_timeout}
10+
11+
local test_group = t.group('vconnect')
12+
13+
local cfg_template = {
14+
sharding = {
15+
replicaset = {
16+
replicas = {
17+
replica = {
18+
master = true,
19+
},
20+
},
21+
},
22+
},
23+
bucket_count = 20,
24+
test_user_grant_range = 'super',
25+
replication_timeout = 0.1,
26+
identification_mode = 'name_as_key',
27+
}
28+
local global_cfg
29+
30+
test_group.before_all(function(g)
31+
t.run_only_if(vutil.feature.persistent_names)
32+
global_cfg = vtest.config_new(cfg_template)
33+
vtest.cluster_new(g, global_cfg)
34+
vtest.cluster_bootstrap(g, global_cfg)
35+
vtest.cluster_wait_vclock_all(g)
36+
end)
37+
38+
test_group.after_all(function(g)
39+
g.cluster:stop()
40+
end)
41+
42+
--
43+
-- Test, that conn_vconnect_wait fails to get correct
44+
-- result. Connection should be closed.
45+
--
46+
test_group.test_vconnect_no_result = function(g)
47+
local _, rs = next(vreplicaset.buildall(global_cfg))
48+
g.replica:exec(function()
49+
rawset(_G, '_call', ivshard.storage._call)
50+
ivshard.storage._call = nil
51+
end)
52+
53+
-- Drop connection in order to make replicaset to recreate it.
54+
rs.master.conn = nil
55+
local ret, err = rs:callrw('get_uuid', {}, timeout_opts)
56+
t.assert_str_contains(err.message, "_call' is not defined")
57+
t.assert_equals(ret, nil)
58+
-- Critical error, connection should be closed.
59+
t.assert_equals(rs.master.conn.state, 'closed')
60+
61+
g.replica:exec(function()
62+
ivshard.storage._call = _G._call
63+
end)
64+
end
65+
66+
--
67+
-- Test, that conn_vconnect_wait fails, when future is nil.
68+
--
69+
test_group.test_vconnect_no_future = function(g)
70+
local _, rs = next(vreplicaset.buildall(global_cfg))
71+
g.replica:exec(function()
72+
rawset(_G, '_call', ivshard.storage._call)
73+
rawset(_G, 'do_sleep', true)
74+
-- Future should not appear at all.
75+
ivshard.storage._call = function()
76+
while _G.do_sleep do
77+
ifiber.sleep(0.1)
78+
end
79+
end
80+
end)
81+
82+
rs.master.conn = nil
83+
local ret, err = rs:callrw('get_uuid', {}, small_timeout_opts)
84+
t.assert(verror.is_timeout(err))
85+
t.assert_equals(ret, nil)
86+
t.assert_not_equals(rs.master.conn.state, 'closed')
87+
88+
g.replica:exec(function()
89+
_G.do_sleep = false
90+
ivshard.storage._call = _G._call
91+
end)
92+
end
93+
94+
--
95+
-- Test, that conn_vconnect_check fails, when future's result is nil.
96+
--
97+
test_group.test_vconnect_check_no_future = function(g)
98+
local _, rs = next(vreplicaset.buildall(global_cfg))
99+
g.replica:exec(function()
100+
rawset(_G, '_call', ivshard.storage._call)
101+
ivshard.storage._call = nil
102+
end)
103+
104+
rs.master.conn = nil
105+
local opts = table.deepcopy(timeout_opts)
106+
opts.is_async = true
107+
t.helpers.retrying({}, function()
108+
-- It may be VHANDSHAKE_NOT_COMPLETE error, when future
109+
-- is not ready. But at the end it must be the actual error.
110+
local ret, err = rs:callrw('get_uuid', {}, opts)
111+
t.assert_str_contains(err.message, "_call' is not defined")
112+
t.assert_equals(ret, nil)
113+
t.assert_equals(rs.master.conn.state, 'closed')
114+
end)
115+
116+
g.replica:exec(function()
117+
ivshard.storage._call = _G._call
118+
end)
119+
end
120+
121+
--
122+
-- 1. Change name and stop replica.
123+
-- 2. Wait for error_reconnect timeout.
124+
-- 3. Assert, that on reconnect name change is noticed.
125+
--
126+
test_group.test_vconnect_on_reconnect = function(g)
127+
local _, rs = next(vreplicaset.buildall(global_cfg))
128+
t.assert_not_equals(rs:connect_master(), nil)
129+
-- Configuration to use after restart.
130+
local new_cfg = table.deepcopy(global_cfg)
131+
local cfg_rs = new_cfg.sharding.replicaset
132+
cfg_rs.replicas.bad = cfg_rs.replicas.replica
133+
cfg_rs.replicas.replica = nil
134+
135+
g.replica:exec(function()
136+
box.cfg{instance_name = 'bad', force_recovery = true}
137+
end)
138+
g.replica:stop()
139+
t.helpers.retrying({}, function()
140+
t.assert_equals(rs.master.conn.state, 'error_reconnect')
141+
end)
142+
143+
-- Replica cannot be started with incorrect name, change box.cfg.
144+
g.replica.box_cfg.instance_name = 'bad'
145+
g.replica:start()
146+
vtest.cluster_cfg(g, new_cfg)
147+
local ret, err = rs:callrw('get_uuid', {}, timeout_opts)
148+
t.assert_equals(err.name, 'INSTANCE_NAME_MISMATCH')
149+
t.assert_equals(ret, nil)
150+
t.assert_equals(rs.master.conn.state, 'closed')
151+
152+
g.replica:exec(function()
153+
box.cfg{instance_name = 'replica', force_recovery = true}
154+
end)
155+
vtest.cluster_cfg(g, global_cfg)
156+
end
157+
158+
--
159+
-- Test, that async call doesn't yield and immediately fails.
160+
--
161+
test_group.test_async_no_yield = function(g)
162+
local _, rs = next(vreplicaset.buildall(global_cfg))
163+
g.replica:exec(function()
164+
rawset(_G, '_call', ivshard.storage._call)
165+
rawset(_G, 'do_sleep', true)
166+
-- Future should not appear at all.
167+
ivshard.storage._call = function()
168+
while _G.do_sleep do
169+
ifiber.sleep(0.1)
170+
end
171+
end
172+
end)
173+
174+
local opts = table.deepcopy(timeout_opts)
175+
opts.is_async = true
176+
local csw1 = fiber.self():csw()
177+
local ret, err = rs:callrw('get_uuid', {}, opts)
178+
local csw2 = fiber.self():csw()
179+
-- Waiting for #456 to be fixed.
180+
t.assert_equals(csw2, csw1 + 1)
181+
t.assert_str_contains(err.name, 'VHANDSHAKE_NOT_COMPLETE')
182+
t.assert_equals(ret, nil)
183+
t.assert_not_equals(rs.master.conn.state, 'closed')
184+
185+
g.replica:exec(function()
186+
_G.do_sleep = false
187+
ivshard.storage._call = _G._call
188+
end)
189+
end

test/storage/storage.result

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,16 +1014,16 @@ vshard.storage.internal.errinj.ERRINJ_RECOVERY_PAUSE = false
10141014
--
10151015
-- Internal info function.
10161016
--
1017-
vshard.storage._call('info')
1017+
vshard.storage._call('info').is_master
10181018
---
1019-
- is_master: true
1019+
- true
10201020
...
10211021
_ = test_run:switch('storage_1_b')
10221022
---
10231023
...
1024-
vshard.storage._call('info')
1024+
vshard.storage._call('info').is_master
10251025
---
1026-
- is_master: false
1026+
- false
10271027
...
10281028
--
10291029
-- gh-123, gh-298: storage auto-enable/disable depending on instance state.

test/storage/storage.test.lua

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,9 @@ vshard.storage.internal.errinj.ERRINJ_RECOVERY_PAUSE = false
327327
--
328328
-- Internal info function.
329329
--
330-
vshard.storage._call('info')
330+
vshard.storage._call('info').is_master
331331
_ = test_run:switch('storage_1_b')
332-
vshard.storage._call('info')
332+
vshard.storage._call('info').is_master
333333

334334
--
335335
-- gh-123, gh-298: storage auto-enable/disable depending on instance state.

vshard/error.lua

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,16 @@ local error_message_template = {
202202
msg = 'Bucket %s update is invalid: %s',
203203
args = {'bucket_id', 'reason'},
204204
},
205+
[40] = {
206+
name = 'VHANDSHAKE_NOT_COMPLETE',
207+
msg = 'Handshake with %s have not been completed yet',
208+
args = {'replica'},
209+
},
210+
[41] = {
211+
name = 'INSTANCE_NAME_MISMATCH',
212+
msg = 'Mismatch server name: expected "%s", but got "%s"',
213+
args = {'expected_name', 'actual_name'},
214+
},
205215
}
206216

207217
--

0 commit comments

Comments
 (0)