@@ -53,7 +53,7 @@ pub struct TqState {
53
53
pub member_universe : Vec < PlatformId > ,
54
54
55
55
/// All possible system faults in our test
56
- pub faults : Faults ,
56
+ pub crashed_nodes : BTreeSet < PlatformId > ,
57
57
58
58
/// All configurations ever generated by a coordinator.
59
59
///
@@ -79,7 +79,7 @@ impl TqState {
79
79
underlay_network : Vec :: new ( ) ,
80
80
nexus : NexusState :: new ( ) ,
81
81
member_universe,
82
- faults : Faults :: default ( ) ,
82
+ crashed_nodes : BTreeSet :: new ( ) ,
83
83
all_coordinated_configs : IdOrdMap :: new ( ) ,
84
84
expunged : BTreeSet :: new ( ) ,
85
85
}
@@ -91,7 +91,7 @@ impl TqState {
91
91
pub fn send_reconfigure_msg ( & mut self ) {
92
92
let ( coordinator, msg) = self . nexus . reconfigure_msg_for_latest_config ( ) ;
93
93
let epoch_to_config = msg. epoch ;
94
- if !self . faults . crashed_nodes . contains ( coordinator) {
94
+ if !self . crashed_nodes . contains ( coordinator) {
95
95
let ( node, ctx) = self
96
96
. sut
97
97
. nodes
@@ -122,7 +122,7 @@ impl TqState {
122
122
let ( coordinator, msg) = self . nexus . reconfigure_msg_for_latest_config ( ) ;
123
123
124
124
// The coordinator should have received the `ReconfigureMsg` from Nexus
125
- if !self . faults . crashed_nodes . contains ( coordinator) {
125
+ if !self . crashed_nodes . contains ( coordinator) {
126
126
let ( node, ctx) = self
127
127
. sut
128
128
. nodes
@@ -131,22 +131,24 @@ impl TqState {
131
131
let mut connected_members = 0 ;
132
132
// The coordinator should start preparing by sending a `PrepareMsg` to all
133
133
// connected nodes in the membership set.
134
- for member in
135
- msg. members . iter ( ) . filter ( |& id| id != coordinator) . cloned ( )
134
+ for member in msg
135
+ . members
136
+ . iter ( )
137
+ . filter ( |& id| {
138
+ !self . crashed_nodes . contains ( id) && id != ctx. platform_id ( )
139
+ } )
140
+ . cloned ( )
136
141
{
137
- if self . faults . is_connected ( coordinator. clone ( ) , member. clone ( ) )
138
- {
139
- connected_members += 1 ;
140
- let msg_found = ctx. envelopes ( ) . any ( |envelope| {
141
- envelope. to == member
142
- && envelope. from == * coordinator
143
- && matches ! (
144
- envelope. msg. kind,
145
- PeerMsgKind :: Prepare { .. }
146
- )
147
- } ) ;
148
- assert ! ( msg_found) ;
149
- }
142
+ connected_members += 1 ;
143
+ let msg_found = ctx. envelopes ( ) . any ( |envelope| {
144
+ envelope. to == member
145
+ && envelope. from == * coordinator
146
+ && matches ! (
147
+ envelope. msg. kind,
148
+ PeerMsgKind :: Prepare { .. }
149
+ )
150
+ } ) ;
151
+ assert ! ( msg_found) ;
150
152
}
151
153
assert_eq ! ( connected_members, ctx. envelopes( ) . count( ) ) ;
152
154
@@ -176,7 +178,7 @@ impl TqState {
176
178
// Only send envelopes to alive nodes
177
179
for envelope in ctx
178
180
. drain_envelopes ( )
179
- . filter ( |e| !self . faults . crashed_nodes . contains ( & e. to ) )
181
+ . filter ( |e| !self . crashed_nodes . contains ( & e. to ) )
180
182
{
181
183
let msgs =
182
184
self . bootstrap_network . entry ( envelope. to . clone ( ) ) . or_default ( ) ;
@@ -241,7 +243,7 @@ impl TqState {
241
243
// Create the SUT nodes
242
244
self . sut = Sut :: new ( & self . log , self . member_universe . clone ( ) ) ;
243
245
244
- self . faults . crashed_nodes = crashed_nodes;
246
+ self . crashed_nodes = crashed_nodes;
245
247
246
248
// Inform nexus about the initial configuration
247
249
self . nexus . configs . insert_unique ( config) . expect ( "new config" ) ;
@@ -251,11 +253,13 @@ impl TqState {
251
253
. sut
252
254
. nodes
253
255
. iter_mut ( )
254
- . filter ( |( id, _) | !self . faults . crashed_nodes . contains ( id) )
256
+ . filter ( |( id, _) | !self . crashed_nodes . contains ( id) )
255
257
{
256
- for to in self . member_universe . iter ( ) . filter ( |id| {
257
- !self . faults . crashed_nodes . contains ( id) && from != * id
258
- } ) {
258
+ for to in self
259
+ . member_universe
260
+ . iter ( )
261
+ . filter ( |id| !self . crashed_nodes . contains ( id) && from != * id)
262
+ {
259
263
node. on_connect ( ctx, to. clone ( ) ) ;
260
264
}
261
265
}
@@ -339,7 +343,7 @@ impl TqState {
339
343
self . bootstrap_network . remove ( & id) ;
340
344
341
345
// Keep track of the crashed node
342
- self . faults . crashed_nodes . insert ( id. clone ( ) ) ;
346
+ self . crashed_nodes . insert ( id. clone ( ) ) ;
343
347
344
348
// We get to define the semantics of the network with regards to an
345
349
// inflight message sourced from a crashed node. We have two choices:
@@ -355,7 +359,7 @@ impl TqState {
355
359
. sut
356
360
. nodes
357
361
. iter_mut ( )
358
- . filter ( |( id, _) | !self . faults . crashed_nodes . contains ( id) )
362
+ . filter ( |( id, _) | !self . crashed_nodes . contains ( id) )
359
363
{
360
364
node. on_disconnect ( ctx, id. clone ( ) ) ;
361
365
}
@@ -367,7 +371,7 @@ impl TqState {
367
371
connection_order : Vec < PlatformId > ,
368
372
) {
369
373
// The node is no longer crashed.
370
- self . faults . crashed_nodes . remove ( & id) ;
374
+ self . crashed_nodes . remove ( & id) ;
371
375
372
376
// We need to clear the mutable state of the `Node`. We do this by
373
377
// creating a new `Node` and passing in the existing context which
@@ -390,14 +394,18 @@ impl TqState {
390
394
send_envelopes (
391
395
peer_ctx,
392
396
& mut self . bootstrap_network ,
393
- & mut self . faults ,
397
+ & self . crashed_nodes ,
394
398
) ;
395
399
396
400
let ( node, ctx) = self . sut . nodes . get_mut ( & id) . expect ( "node exists" ) ;
397
401
// Inform the restarted node of the connection
398
402
node. on_connect ( ctx, peer) ;
399
403
// Send any messages output as a result of the connection
400
- send_envelopes ( ctx, & mut self . bootstrap_network , & self . faults ) ;
404
+ send_envelopes (
405
+ ctx,
406
+ & mut self . bootstrap_network ,
407
+ & self . crashed_nodes ,
408
+ ) ;
401
409
}
402
410
}
403
411
@@ -478,7 +486,7 @@ impl TqState {
478
486
}
479
487
480
488
// Send any messages as a result of handling this message
481
- send_envelopes ( ctx, & mut self . bootstrap_network , & self . faults ) ;
489
+ send_envelopes ( ctx, & mut self . bootstrap_network , & self . crashed_nodes ) ;
482
490
483
491
// Remove any destinations with zero messages in-flight
484
492
self . bootstrap_network . retain ( |_, msgs| !msgs. is_empty ( ) ) ;
@@ -536,10 +544,10 @@ impl TqState {
536
544
fn send_envelopes (
537
545
ctx : & mut NodeCtx ,
538
546
bootstrap_network : & mut BTreeMap < PlatformId , Vec < Envelope > > ,
539
- faults : & Faults ,
547
+ crashed_nodes : & BTreeSet < PlatformId > ,
540
548
) {
541
549
for envelope in
542
- ctx. drain_envelopes ( ) . filter ( |e| !faults . crashed_nodes . contains ( & e. to ) )
550
+ ctx. drain_envelopes ( ) . filter ( |e| !crashed_nodes. contains ( & e. to ) )
543
551
{
544
552
let envelopes =
545
553
bootstrap_network. entry ( envelope. to . clone ( ) ) . or_default ( ) ;
@@ -574,55 +582,6 @@ impl Sut {
574
582
}
575
583
}
576
584
577
- /// Faults in our system. It's useful to keep these self contained and not
578
- /// in separate fields in `TestState` so that we can access them all at once
579
- /// independently of other `TestState` fields.
580
- #[ derive( Default , Debug , Clone , Diffable ) ]
581
- pub struct Faults {
582
- // We allow nodes to crash and restart and therefore track crashed nodes here.
583
- //
584
- // A crashed node is implicitly disconnected from every other node. We don't
585
- // bother storing the pairs in `disconnected_nodes`, but instead check both
586
- // fields when necessary.
587
- pub crashed_nodes : BTreeSet < PlatformId > ,
588
-
589
- /// The set of disconnected nodes
590
- pub disconnected_nodes : DisconnectedNodes ,
591
- }
592
-
593
- impl Faults {
594
- pub fn is_connected ( & self , node1 : PlatformId , node2 : PlatformId ) -> bool {
595
- !self . crashed_nodes . contains ( & node1)
596
- && !self . crashed_nodes . contains ( & node2)
597
- && !self . disconnected_nodes . contains ( node1, node2)
598
- }
599
- }
600
-
601
- /// For cardinality purposes, we assume all nodes are connected and explicitly
602
- /// disconnect some of them. This allows us to track and compare much less data.
603
- #[ derive( Default , Debug , Clone , Diffable ) ]
604
- pub struct DisconnectedNodes {
605
- // We sort each pair on insert for quick lookups
606
- pairs : BTreeSet < ( PlatformId , PlatformId ) > ,
607
- }
608
-
609
- impl DisconnectedNodes {
610
- // Return true if the pair is newly inserted
611
- pub fn insert ( & mut self , node1 : PlatformId , node2 : PlatformId ) -> bool {
612
- assert_ne ! ( node1, node2) ;
613
-
614
- let pair = if node1 < node2 { ( node1, node2) } else { ( node2, node1) } ;
615
- self . pairs . insert ( pair)
616
- }
617
-
618
- // Return true if the pair of nodes is disconnected, false otherwise
619
- pub fn contains ( & self , node1 : PlatformId , node2 : PlatformId ) -> bool {
620
- assert_ne ! ( node1, node2) ;
621
- let pair = if node1 < node2 { ( node1, node2) } else { ( node2, node1) } ;
622
- self . pairs . contains ( & pair)
623
- }
624
- }
625
-
626
585
/*****************************************************************************
627
586
*
628
587
* Diff related display code
@@ -657,7 +616,7 @@ impl Display for TqStateDiff<'_> {
657
616
display_bootstrap_network_diff ( & self . bootstrap_network , f) ?;
658
617
display_underlay_network_diff ( & self . underlay_network , f) ?;
659
618
display_nexus_state_diff ( & self . nexus , f) ?;
660
- display_faults_diff ( & self . faults , f) ?;
619
+ display_faults_diff ( & self . crashed_nodes , f) ?;
661
620
display_expunged_diff ( & self . expunged , f) ?;
662
621
663
622
Ok ( ( ) )
@@ -678,34 +637,22 @@ fn display_expunged_diff(
678
637
}
679
638
680
639
fn display_faults_diff (
681
- diff : & FaultsDiff < ' _ > ,
640
+ crashed_nodes : & BTreeSetDiff < ' _ , PlatformId > ,
682
641
f : & mut std:: fmt:: Formatter < ' _ > ,
683
642
) -> std:: fmt:: Result {
684
- if !diff . crashed_nodes . added . is_empty ( ) {
643
+ if !crashed_nodes. added . is_empty ( ) {
685
644
writeln ! ( f, " Nodes crashed:" ) ?;
686
- for id in & diff . crashed_nodes . added {
645
+ for id in & crashed_nodes. added {
687
646
writeln ! ( f, " {id}" ) ?;
688
647
}
689
648
}
690
- if !diff . crashed_nodes . removed . is_empty ( ) {
649
+ if !crashed_nodes. removed . is_empty ( ) {
691
650
writeln ! ( f, " nodes started:" ) ?;
692
- for id in & diff . crashed_nodes . removed {
651
+ for id in & crashed_nodes. removed {
693
652
writeln ! ( f, " {id}" ) ?;
694
653
}
695
654
}
696
655
697
- if !diff. disconnected_nodes . pairs . added . is_empty ( ) {
698
- writeln ! ( f, " nodes disconnected from each other:" ) ?;
699
- for pair in & diff. disconnected_nodes . pairs . added {
700
- writeln ! ( f, " {}, {}" , pair. 0 , pair. 1 ) ?;
701
- }
702
- }
703
- if !diff. disconnected_nodes . pairs . removed . is_empty ( ) {
704
- writeln ! ( f, " nodes connected to each other:" ) ?;
705
- for pair in & diff. disconnected_nodes . pairs . removed {
706
- writeln ! ( f, " {}, {}" , pair. 0 , pair. 1 ) ?;
707
- }
708
- }
709
656
Ok ( ( ) )
710
657
}
711
658
0 commit comments