Skip to content

Commit 1de9c68

Browse files
authored
Use reassign node api in state storage self heal (#21557)
1 parent 1c54822 commit 1de9c68

File tree

2 files changed

+123
-9
lines changed

2 files changed

+123
-9
lines changed

ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp

Lines changed: 114 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,13 @@ namespace NKikimr::NStorage {
112112
Self->Send(new IEventHandle(TEvents::TSystem::Poison, 0, Self->StateStorageSelfHealActor.value(), Self->SelfId(), nullptr, 0));
113113
Self->StateStorageSelfHealActor.reset();
114114
}
115-
116-
auto needReconfig = [&](auto name, auto clearFunc, auto ssMutableFunc, auto buildFunc) {
115+
enum ReconfigType {
116+
NONE,
117+
ONE_NODE,
118+
FULL
119+
};
120+
std::unordered_map<ui32, ui32> nodesToReplace;
121+
auto needReconfig = [&](auto clearFunc, auto ssMutableFunc, auto buildFunc) {
117122
auto copyCurrentConfig = currentConfig;
118123
auto ss = *(copyCurrentConfig.*ssMutableFunc)();
119124
if (ss.RingGroupsSize() == 0) {
@@ -127,26 +132,127 @@ namespace NKikimr::NStorage {
127132
TIntrusivePtr<TStateStorageInfo> oldSSInfo;
128133
oldSSInfo = (*buildFunc)(ss);
129134
newSSInfo = (*buildFunc)(*(targetConfig.*ssMutableFunc)());
130-
STLOG(PRI_DEBUG, BS_NODE, NW52, "Need to reconfig " << name << " " << (oldSSInfo->RingGroups != newSSInfo->RingGroups));
131135
if (oldSSInfo->RingGroups == newSSInfo->RingGroups) {
132136
(targetConfig.*clearFunc)();
133-
return false;
137+
return ReconfigType::NONE;
134138
}
135139

136-
return true;
140+
if (oldSSInfo->RingGroups.size() != newSSInfo->RingGroups.size()) {
141+
return ReconfigType::FULL;
142+
}
143+
144+
bool hasBadNodes = false;
145+
for (ui32 ringGroupIdx : xrange(oldSSInfo->RingGroups.size())) {
146+
auto& oldRg = oldSSInfo->RingGroups[ringGroupIdx];
147+
auto& newRg = newSSInfo->RingGroups[ringGroupIdx];
148+
if (oldRg.NToSelect != newRg.NToSelect || oldRg.Rings.size() != newRg.Rings.size()) {
149+
return ReconfigType::FULL;
150+
}
151+
for (ui32 j : xrange(oldRg.Rings.size())) {
152+
auto& oldRing = oldRg.Rings[j];
153+
auto& newRing = newRg.Rings[j];
154+
if (oldRing.IsDisabled != newRing.IsDisabled
155+
|| oldRing.UseRingSpecificNodeSelection != newRing.UseRingSpecificNodeSelection
156+
|| oldRing.Replicas.size() != newRing.Replicas.size()) {
157+
return ReconfigType::FULL;
158+
}
159+
for (auto& actorId : oldRing.Replicas) {
160+
if (!Self->SelfHealNodesState.contains(actorId.NodeId()) || Self->SelfHealNodesState.at(actorId.NodeId()) > 0) {
161+
hasBadNodes = true;
162+
}
163+
}
164+
}
165+
}
166+
if (!hasBadNodes) {
167+
return ReconfigType::NONE; // Current config is optimal and all nodes are good
168+
}
169+
170+
// Check can be node replacement applyed
171+
for (ui32 ringGroupIdx : xrange(oldSSInfo->RingGroups.size())) {
172+
auto& oldRg = oldSSInfo->RingGroups[ringGroupIdx];
173+
auto& newRg = newSSInfo->RingGroups[ringGroupIdx];
174+
175+
// Find not changed rings and place them on previous position
176+
auto equalRingsByNodes = [](auto& ring1, auto& ring2) {
177+
if (ring1.Replicas.size() != ring2.Replicas.size()) {
178+
return false;
179+
}
180+
for(ui32 replicaPos : xrange(ring1.Replicas.size())) {
181+
if (ring1.Replicas[replicaPos].NodeId() != ring2.Replicas[replicaPos].NodeId()) {
182+
return false;
183+
}
184+
}
185+
return true;
186+
};
187+
for (ui32 oldRingIdx : xrange(oldRg.Rings.size())) {
188+
for (ui32 newRingIdx : xrange(newRg.Rings.size())) {
189+
if (newRingIdx != oldRingIdx && equalRingsByNodes(oldRg.Rings[oldRingIdx], newRg.Rings[newRingIdx])) {
190+
std::swap(newRg.Rings[newRingIdx], newRg.Rings[oldRingIdx]);
191+
break;
192+
}
193+
}
194+
}
195+
for (ui32 j : xrange(oldRg.Rings.size())) {
196+
auto& oldRing = oldRg.Rings[j];
197+
auto& newRing = newRg.Rings[j];
198+
if (oldRing == newRing) {
199+
continue;
200+
}
201+
// Place replicas in ring on previous position
202+
for (ui32 oldReplicaPos : xrange(oldRing.Replicas.size())) {
203+
for (ui32 newReplicaPos : xrange(newRing.Replicas.size())) {
204+
if (newReplicaPos != oldReplicaPos && oldRing.Replicas[oldReplicaPos].NodeId() == newRing.Replicas[newReplicaPos].NodeId()) {
205+
std::swap(newRing.Replicas[newReplicaPos], newRing.Replicas[oldReplicaPos]);
206+
break;
207+
}
208+
}
209+
}
210+
211+
for (ui32 k : xrange(oldRing.Replicas.size())) {
212+
auto oldRep = oldRing.Replicas[k].NodeId();
213+
auto newRep = newRing.Replicas[k].NodeId();
214+
if (oldRep == newRep) {
215+
continue;
216+
}
217+
if (auto it = nodesToReplace.find(oldRep); it != nodesToReplace.end() && it->second != newRep) {
218+
return ReconfigType::FULL;
219+
}
220+
nodesToReplace[oldRep] = newRep;
221+
}
222+
}
223+
}
224+
if (nodesToReplace.size() == 1) {
225+
return ReconfigType::ONE_NODE;
226+
}
227+
return nodesToReplace.empty() ? ReconfigType::NONE : ReconfigType::FULL;
137228
};
138-
#define NEED_RECONFIG(NAME) needReconfig(#NAME, &NKikimrBlobStorage::TStateStorageConfig::Clear##NAME##Config, &NKikimrBlobStorage::TStateStorageConfig::Mutable##NAME##Config, &NKikimr::Build##NAME##Info)
229+
#define NEED_RECONFIG(NAME) needReconfig(&NKikimrBlobStorage::TStateStorageConfig::Clear##NAME##Config, &NKikimrBlobStorage::TStateStorageConfig::Mutable##NAME##Config, &NKikimr::Build##NAME##Info)
139230
auto needReconfigSS = NEED_RECONFIG(StateStorage);
140231
auto needReconfigSSB = NEED_RECONFIG(StateStorageBoard);
141232
auto needReconfigSB = NEED_RECONFIG(SchemeBoard);
233+
#undef NEED_RECONFIG
142234

143-
if (!needReconfigSS && !needReconfigSSB && !needReconfigSB) {
235+
if (needReconfigSS == ReconfigType::NONE && needReconfigSSB == ReconfigType::NONE && needReconfigSB == ReconfigType::NONE) {
144236
throw TExError() << "Current configuration is recommended. Nothing to self-heal.";
145237
}
146-
#undef NEED_RECONFIG
238+
if (nodesToReplace.size() == 1 && needReconfigSS != ReconfigType::FULL && needReconfigSSB != ReconfigType::FULL && needReconfigSB != ReconfigType::FULL) {
239+
STLOG(PRI_DEBUG, BS_NODE, NW52, "Need to reconfig one node " << nodesToReplace.begin()->first << " to " << nodesToReplace.begin()->second
240+
, (CurrentConfig, currentConfig), (TargetConfig, targetConfig));
241+
242+
TQuery::TReassignStateStorageNode cmd;
243+
cmd.SetFrom(nodesToReplace.begin()->first);
244+
cmd.SetTo(nodesToReplace.begin()->second);
245+
cmd.SetStateStorage(needReconfigSS == ReconfigType::ONE_NODE);
246+
cmd.SetStateStorageBoard(needReconfigSSB == ReconfigType::ONE_NODE);
247+
cmd.SetSchemeBoard(needReconfigSB == ReconfigType::ONE_NODE);
248+
ReassignStateStorageNode(cmd);
249+
return;
250+
}
147251

148252
AdjustRingGroupActorIdOffsetInRecommendedStateStorageConfig(&targetConfig);
149253

254+
STLOG(PRI_DEBUG, BS_NODE, NW52, "Need to reconfig, starting StateStorageSelfHealActor", (CurrentConfig, currentConfig), (TargetConfig, targetConfig));
255+
150256
Self->StateStorageSelfHealActor = Register(new TStateStorageSelfhealActor(Sender, Cookie,
151257
TDuration::Seconds(waitForConfigStep), std::move(currentConfig), std::move(targetConfig)));
152258
auto ev = PrepareResult(TResult::OK, std::nullopt);

ydb/tests/functional/config/test_distconf_sentinel_node_status.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,20 @@ def do_test(self, configName):
143143
assert_eq(len(rg["Ring"]), 9)
144144
self.validate_contains_nodes(rg, [3])
145145
self.cluster.nodes[3].stop()
146-
time.sleep(25)
146+
for i in range(15):
147+
time.sleep(2)
148+
cfg = self.do_request_config()[f"{configName}Config"]
149+
assert_eq(len(cfg["RingGroups"]), 1)
150+
147151
rg2 = get_ring_group(self.do_request_config(), configName)
148152
assert_eq(rg["NToSelect"], 9)
149153
assert_eq(len(rg["Ring"]), 9)
150154
self.validate_not_contains_nodes(rg2, [3])
151155
assert_that(rg != rg2)
156+
self.cluster.nodes[3].start()
157+
time.sleep(25)
158+
rg3 = get_ring_group(self.do_request_config(), configName)
159+
assert_that(rg3 == rg2) # Current config has no bad nodes and should not run self-heal
152160

153161

154162
class TestKiKiMRDistConfSelfHeal2NodesDisconnected(KiKiMRDistConfNodeStatusTest):

0 commit comments

Comments
 (0)