Skip to content

Commit 795401b

Browse files
committed
refactor(reasm): key reasm FECs by merkle root and rework tile root init
fd_reasm (formerly fd_fec_chainer) now keys all FEC sets by merkle root. Every FEC set's parent is determined from the chained merkle root tied to every FEC set. Tiles now rely on the manifest sent by snap_out to initialize their root directly instead of relying on fseq set by replay.
1 parent 2f4ff84 commit 795401b

24 files changed

+1453
-1546
lines changed

CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
/src/choreo @lidatong @emwang-jump
77
/src/disco/pack @ptaffet-jump @mmcgee-jump
88
/src/disco/shred @ptaffet-jump @mmcgee-jump
9+
/src/disco/reasm @lidatong @emwang-jump
10+
/src/disco/store @lidatong @emwang-jump
911
/src/discof/repair @lidatong @emwang-jump
1012
/src/waltz/quic @nbridge-jump @ripatel-fd @akhinvasara-jumptrading
1113
/src/waltz/tls @ripatel-fd @mmcgee-jump

src/app/firedancer-dev/commands/backtest.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#include "../../../disco/metrics/fd_metrics.h"
2424
#include "../../../util/pod/fd_pod_format.h"
2525
#include "../../../discof/replay/fd_replay_notif.h"
26-
#include "../../../discof/repair/fd_fec_chainer.h"
26+
#include "../../../discof/repair/fd_reasm.h"
2727
#include "../../../flamenco/runtime/fd_runtime_public.h" /* FD_RUNTIME_PUBLIC_ACCOUNT_UPDATE_MSG_MTU */
2828
#include "../main.h"
2929

@@ -109,7 +109,7 @@ backtest_topo( config_t * config ) {
109109
batches from the CLI-specified source (eg. RocksDB). */
110110

111111
fd_topob_wksp( topo, "repair_repla" );
112-
fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_fec_out_t), 1UL );
112+
fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
113113
fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "repair_repla", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
114114
fd_topob_tile_out( topo, "back", 0UL, "repair_repla", 0UL );
115115

@@ -155,10 +155,10 @@ backtest_topo( config_t * config ) {
155155
This allows the replay tile to advance its watermark, and publish
156156
various data structures. This is an oversimplified barebones mock
157157
of the tower tile. */
158-
fd_topob_wksp( topo, "tower_replay" );
159-
fd_topob_link( topo, "tower_replay", "tower_replay", 128UL, 0UL, 1UL );
160-
fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "tower_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
161-
fd_topob_tile_out( topo, "back", 0UL, "tower_replay", 0UL );
158+
fd_topob_wksp( topo, "root_out" );
159+
fd_topob_link( topo, "root_out", "root_out", 128UL, 0UL, 1UL );
160+
fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "root_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
161+
fd_topob_tile_out( topo, "back", 0UL, "root_out", 0UL );
162162

163163
/**********************************************************************/
164164
/* Setup replay->stake/send/poh links in topo w/o consumers */

src/app/firedancer-dev/commands/repair.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ repair_topo( config_t * config ) {
102102
fd_topob_wksp( topo, "gossip" );
103103
fd_topob_wksp( topo, "metric" );
104104
fd_topob_wksp( topo, "fec_sets" );
105+
fd_topob_wksp( topo, "snap_out" );
105106

106107
fd_topob_wksp( topo, "slot_fseqs" ); /* fseqs for marked slots eg. turbine slot */
107108

@@ -132,14 +133,17 @@ repair_topo( config_t * config ) {
132133

133134
/**/ fd_topob_link( topo, "repair_net", "net_repair", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
134135
/**/ fd_topob_link( topo, "repair_sign", "repair_sign", 128UL, 2048UL, 1UL );
135-
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_repair", "shred_repair", pending_fec_shreds_depth, FD_SHRED_REPAIR_MTU, 2UL /* at most 2 msgs per after_frag */ );
136+
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_repair", "shred_repair", pending_fec_shreds_depth, FD_SHRED_REPAIR_MTU, 2UL );
136137

137138
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_repair", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL );
138139
/**/ fd_topob_link( topo, "sign_repair", "sign_repair", 128UL, 64UL, 1UL );
139-
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_fec_out_t), 1UL );
140+
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
140141
/**/ fd_topob_link( topo, "poh_shred", "poh_shred", 16384UL, USHORT_MAX, 1UL );
141142

142-
/**/ fd_topob_link( topo, "send_txns", "send_txns", 128UL, FD_TXN_MTU, 1UL );
143+
/**/ fd_topob_link( topo, "send_txns", "send_txns", 128UL, FD_TXN_MTU, 1UL );
144+
145+
FD_TEST( sizeof(fd_snapshot_manifest_t)<=(5UL*(1UL<<30UL)) );
146+
/**/ fd_topob_link( topo, "snap_out", "snap_out", 2UL, 5UL*(1UL<<30UL), 1UL );
143147

144148
ushort parsed_tile_to_cpu[ FD_TILE_MAX ];
145149
/* Unassigned tiles will be floating, unless auto topology is enabled. */
@@ -296,6 +300,7 @@ repair_topo( config_t * config ) {
296300
FOR(net_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "net_repair", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
297301
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "gossip_repai", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
298302
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "stake_out", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
303+
fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "snap_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
299304
FOR(shred_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "shred_repair", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
300305
301306
/**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "repair_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
@@ -330,7 +335,8 @@ repair_topo( config_t * config ) {
330335
fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "replay_scap", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
331336

332337
fd_topob_tile_uses( topo, scap_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
333-
fd_topob_tile_out( topo, "scap", 0UL, "stake_out", 0UL );
338+
fd_topob_tile_out( topo, "scap", 0UL, "stake_out", 0UL );
339+
fd_topob_tile_out( topo, "scap", 0UL, "snap_out", 0UL );
334340
}
335341

336342
FD_TEST( link_permit_no_producers( topo, "quic_net" ) == quic_tile_cnt );

src/app/firedancer/topology.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "topology.h"
22

3-
#include "../../discof/repair/fd_fec_chainer.h"
3+
#include "../../choreo/fd_choreo_base.h"
4+
#include "../../discof/repair/fd_reasm.h"
45
#include "../../discof/replay/fd_replay_notif.h"
56
#include "../../disco/net/fd_net_tile.h"
67
#include "../../disco/quic/fd_tpu.h"
@@ -244,7 +245,8 @@ fd_topo_initialize( config_t * config ) {
244245
fd_topob_wksp( topo, "gossip_repai" );
245246
fd_topob_wksp( topo, "gossip_verif" );
246247
fd_topob_wksp( topo, "gossip_tower" );
247-
fd_topob_wksp( topo, "replay_tower" );
248+
fd_topob_wksp( topo, "replay_out" );
249+
fd_topob_wksp( topo, "root_out" );
248250

249251
fd_topob_wksp( topo, "repair_sign" );
250252
fd_topob_wksp( topo, "sign_repair" );
@@ -337,8 +339,8 @@ fd_topo_initialize( config_t * config ) {
337339
338340
/**/ fd_topob_link( topo, "gossip_verif", "gossip_verif", config->tiles.verify.receive_buffer_size, FD_TPU_RAW_MTU, 1UL );
339341
/**/ fd_topob_link( topo, "gossip_tower", "gossip_tower", 128UL, FD_TPU_MTU, 1UL );
340-
/**/ fd_topob_link( topo, "replay_tower", "replay_tower", 128UL, 65536UL, 1UL );
341-
/**/ fd_topob_link( topo, "tower_replay", "replay_tower", 128UL, 0, 1UL );
342+
/**/ fd_topob_link( topo, "replay_out", "replay_out", 128UL, sizeof(fd_replay_out_t), 1UL );
343+
/**/ fd_topob_link( topo, "root_out", "root_out", 128UL, sizeof(fd_block_id_t), 1UL );
342344
343345
/**/ fd_topob_link( topo, "crds_shred", "crds_shred", 128UL, 8UL + 40200UL * 38UL, 1UL );
344346
/**/ fd_topob_link( topo, "gossip_repai", "gossip_repai", 128UL, 40200UL * 38UL, 1UL );
@@ -353,7 +355,7 @@ fd_topo_initialize( config_t * config ) {
353355

354356
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_repair", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL );
355357
/**/ fd_topob_link( topo, "sign_repair", "sign_repair", 128UL, 64UL, 1UL );
356-
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_fec_out_t), 1UL );
358+
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
357359
/**/ fd_topob_link( topo, "poh_shred", "poh_shred", 16384UL, USHORT_MAX, 1UL );
358360
/**/ fd_topob_link( topo, "poh_pack", "replay_poh", 128UL, sizeof(fd_became_leader_t) , 1UL );
359361
FOR(bank_tile_cnt) fd_topob_link( topo, "replay_poh", "replay_poh", 128UL, (4096UL*sizeof(fd_txn_p_t))+sizeof(fd_microblock_trailer_t), 1UL );
@@ -525,7 +527,6 @@ fd_topo_initialize( config_t * config ) {
525527

526528
for( ulong i=0UL; i<exec_tile_cnt; i++ ) {
527529
fd_topo_obj_t * exec_fseq_obj = fd_topob_obj( topo, "fseq", "exec_fseq" );
528-
fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], exec_fseq_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
529530
fd_topob_tile_uses( topo, replay_tile, exec_fseq_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
530531
FD_TEST( fd_pod_insertf_ulong( topo->props, exec_fseq_obj->id, "exec_fseq.%lu", i ) );
531532
}
@@ -552,7 +553,6 @@ fd_topo_initialize( config_t * config ) {
552553

553554
fd_topo_obj_t * root_slot_obj = fd_topob_obj( topo, "fseq", "slot_fseqs" );
554555
fd_topob_tile_uses( topo, replay_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
555-
fd_topob_tile_uses( topo, repair_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
556556
FD_TEST( fd_pod_insertf_ulong( topo->props, root_slot_obj->id, "root_slot" ) );
557557

558558
/* turbine_slot0 is an fseq marking the slot number of the first shred
@@ -637,10 +637,10 @@ fd_topo_initialize( config_t * config ) {
637637

638638
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_net", 0UL );
639639

640-
/**/ fd_topob_tile_in( topo, "tower", 0UL, "metric_in", "gossip_tower", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
641-
/**/ fd_topob_tile_in( topo, "tower", 0UL, "metric_in", "replay_tower", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
642-
643-
/**/ fd_topob_tile_out( topo, "tower", 0UL, "tower_replay", 0UL );
640+
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "gossip_tower", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
641+
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "replay_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
642+
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "snap_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
643+
/**/ fd_topob_tile_out( topo, "tower", 0UL, "root_out", 0UL );
644644
/**/ fd_topob_tile_out( topo, "tower", 0UL, "tower_send", 0UL );
645645

646646
/* Sign links don't need to be reliable because they are synchronous,
@@ -669,12 +669,13 @@ fd_topo_initialize( config_t * config ) {
669669
FOR(net_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "net_repair", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
670670
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "gossip_repai", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
671671
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "stake_out", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
672+
fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "snap_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
672673
FOR(shred_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "shred_repair", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
673674
674-
/**/ fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "repair_repla", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
675+
/**/ fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "repair_repla", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
675676
/**/ fd_topob_tile_out( topo, "replay", 0UL, "stake_out", 0UL );
676-
/**/ fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "tower_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
677-
/**/ fd_topob_tile_out( topo, "replay", 0UL, "replay_tower", 0UL );
677+
/**/ fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "root_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
678+
/**/ fd_topob_tile_out( topo, "replay", 0UL, "replay_out", 0UL );
678679
FOR(bank_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_poh", i );
679680
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_exec", i ); /* TODO check order in fd_replay.c macros*/
680681

@@ -704,9 +705,9 @@ fd_topo_initialize( config_t * config ) {
704705

705706
fd_topob_tile_out( topo, "poh", 0UL, "poh_pack", 0UL );
706707

707-
/**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "repair_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
708+
/**/ fd_topob_tile_in ( topo, "sign", 0UL, "metric_in", "repair_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
708709
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_sign", 0UL );
709-
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "sign_repair", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
710+
/**/ fd_topob_tile_in ( topo, "repair", 0UL, "metric_in", "sign_repair", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
710711
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_repla", 0UL );
711712
FOR(shred_tile_cnt) fd_topob_tile_out( topo, "repair", 0UL, "repair_shred", i );
712713
/**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_repair", 0UL );

src/choreo/fd_choreo_base.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@
3232
#define FD_SLOT_PUBKEY_HASH(key,seed) FD_SLOT_HASH_HASH(key,seed)
3333
/* clang-format on */
3434

35+
/* The block_id is the merkle root of the last FEC set for a slot. This
36+
is guaranteed to be unique (practically speaking, the probability of
37+
collision before sun burns out is negligibly miniscule).
38+
39+
This is used as the identifier for a block (hence "block_id") because
40+
unlike the slot number, if a leader equivocates (ie. produces
41+
multiple blocks for the same slot), the block_id will remain unique
42+
unlike the slot. */
43+
44+
typedef uchar fd_block_id_t[ 32UL ];
45+
3546
typedef fd_slot_hash_t fd_slot_pubkey_t;
3647

3748
#endif /* HEADER_fd_src_choreo_fd_choreo_base_h */

0 commit comments

Comments
 (0)