Skip to content

Commit 4220d89

Browse files
committed
Async Sign Updt: map, flow control
1 parent db4018e commit 4220d89

File tree

9 files changed

+426
-254
lines changed

9 files changed

+426
-254
lines changed

src/app/firedancer/config/default.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -688,10 +688,10 @@ user = ""
688688
# very high TPS rates because the cluster size will be very small.
689689
shred_tile_count = 1
690690

691-
# How many sign tiles to run. Should be set to 2. This is
692-
# configurable and designed to scale for the repair tile.
693-
# The 0th tile gets used by other tiles for signing messages. While
694-
# the remaining tiles distribute the workload of signing repair
691+
# How many sign tiles to run. Should be set >= 2. This is
692+
# configurable and horizontally scales repair request signing.
693+
# One tile is reserved for synchronous signing across all tiles.
694+
# The remaining tiles distribute the workload of signing repair
695695
# requests.
696696
sign_tile_count = 2
697697

src/app/firedancer/topology.c

Lines changed: 42 additions & 41 deletions
Large diffs are not rendered by default.

src/app/shared/fd_config.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,13 @@ fd_config_validate( fd_config_t const * config ) {
490490
CFG_HAS_NON_ZERO ( layout.verify_tile_count );
491491
CFG_HAS_NON_ZERO ( layout.bank_tile_count );
492492
CFG_HAS_NON_ZERO ( layout.shred_tile_count );
493-
CFG_HAS_NON_ZERO ( firedancer.layout.sign_tile_count );
493+
494+
if( FD_UNLIKELY( config->is_firedancer ) ) {
495+
CFG_HAS_NON_ZERO( firedancer.layout.sign_tile_count );
496+
if( FD_UNLIKELY( config->firedancer.layout.sign_tile_count < 2 ) ) {
497+
FD_LOG_ERR(( "firedancer.layout.sign_tile_count must be >= 2" ));
498+
}
499+
}
494500

495501
if( 0U!=config->firedancer.layout.writer_tile_count ) {
496502
if( FD_UNLIKELY( config->firedancer.layout.writer_tile_count>config->firedancer.layout.exec_tile_count ) ) {

src/disco/sign/fd_sign_tile.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,7 @@ during_frag_sensitive( void * _ctx,
129129
ulong mtu = ctx->in[ in_idx ].mtu;
130130

131131
if( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>mtu ) {
132-
FD_LOG_EMERG(( "oversz or out of bounds signing request (role=%d chunk=%lu sz=%lu mtu=%lu, chunk0=%lu, wmark=%lu)",
133-
role, chunk, sz, mtu, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
132+
FD_LOG_EMERG(( "oversz or out of bounds signing request (role=%d chunk=%lu sz=%lu mtu=%lu, chunk0=%lu, wmark=%lu)", role, chunk, sz, mtu, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
134133
}
135134

136135
void * src = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
@@ -172,6 +171,7 @@ after_frag_sensitive( void * _ctx,
172171
FD_TEST( in_idx<MAX_IN );
173172

174173
int role = ctx->in[ in_idx ].role;
174+
175175
fd_keyguard_authority_t authority = {0};
176176
memcpy( authority.identity_pubkey, ctx->public_key, 32 );
177177

src/discof/repair/fd_repair_tile.c

Lines changed: 230 additions & 198 deletions
Large diffs are not rendered by default.

src/discof/send/fd_send_tile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include "fd_send_tile.h"
22
#include "../../disco/topo/fd_topo.h"
33
#include "../../disco/keyguard/fd_keyload.h"
4-
#include "../../disco/keyguard/fd_keyguard.h"
54
#include "../../disco/fd_txn_m_t.h"
5+
#include "../../disco/keyguard/fd_keyguard.h"
66
#include "generated/fd_send_tile_seccomp.h"
77

88

src/flamenco/repair/Local.mk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@ $(call add-hdrs,fd_repair.h)
33
$(call add-objs,fd_repair,fd_flamenco)
44
ifdef FD_HAS_HOSTED
55
#$(call make-bin,fd_repair_tool,fd_repair_tool,fd_flamenco fd_ballet fd_util)
6+
$(call make-unit-test,test_repair,test_repair,fd_flamenco fd_ballet fd_util)
7+
$(call run-unit-test,test_repair)
68
endif
79
endif

src/flamenco/repair/fd_repair.c

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ fd_repair_new ( void * shmem, ulong seed ) {
4040
glob->peer_idx = 0;
4141
glob->actives_random_seed = 0;
4242

43+
/* Initialize pending sign request pool and map */
44+
shm = FD_SCRATCH_ALLOC_APPEND( l, fd_repair_pending_sign_req_pool_align(), fd_repair_pending_sign_req_pool_footprint( FD_REPAIR_PENDING_SIGN_REQ_MAX ) );
45+
glob->pending_sign_req_pool = fd_repair_pending_sign_req_pool_join( fd_repair_pending_sign_req_pool_new( shm, FD_REPAIR_PENDING_SIGN_REQ_MAX ) );
46+
shm = FD_SCRATCH_ALLOC_APPEND( l, fd_repair_pending_sign_req_map_align(), fd_repair_pending_sign_req_map_footprint( FD_REPAIR_PENDING_SIGN_REQ_MAX ) );
47+
glob->pending_sign_req_map = fd_repair_pending_sign_req_map_join( fd_repair_pending_sign_req_map_new( shm, FD_REPAIR_PENDING_SIGN_REQ_MAX, seed ) );
48+
4349
ulong scratch_top = FD_SCRATCH_ALLOC_FINI(l, 1UL);
4450
if ( scratch_top > (ulong)shmem + fd_repair_footprint() ) {
4551
FD_LOG_ERR(("Enough space not allocated for repair"));
@@ -60,6 +66,8 @@ fd_repair_delete ( void * shmap ) {
6066
fd_active_table_delete( fd_active_table_leave( glob->actives ) );
6167
fd_inflight_table_delete( fd_inflight_table_leave( glob->dupdetect ) );
6268
fd_pinged_table_delete( fd_pinged_table_leave( glob->pinged ) );
69+
fd_repair_pending_sign_req_pool_delete( fd_repair_pending_sign_req_pool_leave( glob->pending_sign_req_pool ) );
70+
fd_repair_pending_sign_req_map_delete( fd_repair_pending_sign_req_map_leave( glob->pending_sign_req_map ) );
6371
return glob;
6472
}
6573

@@ -366,7 +374,7 @@ fd_repair_create_inflight_request( fd_repair_t * glob, int type, ulong slot, uin
366374
dupelem->last_send_time = 0L;
367375
}
368376

369-
if( FD_LIKELY( dupelem->last_send_time+(long)40e6 < now ) ) { /* 40ms */
377+
if( FD_LIKELY( dupelem->last_send_time+(long)80e6 < now ) ) { /* 80ms */
370378
dupelem->last_send_time = now;
371379
dupelem->req_cnt = FD_REPAIR_NUM_NEEDED_PEERS;
372380
return 1;
@@ -525,3 +533,75 @@ fd_repair_metrics_t *
525533
fd_repair_get_metrics( fd_repair_t * repair ) {
526534
return &repair->metrics;
527535
}
536+
537+
/* Pending Sign Request API
538+
539+
These functions manage the pool and map of pending sign requests in
540+
the repair module. Each request is identified by a unique nonce,
541+
allowing for nonce to be used as a key in the map.
542+
543+
fd_repair_pending_sign_req_t * fd_repair_acquire_pending_request(...);
544+
Acquires an empty pending sign request from the pool. Returns
545+
pointer or NULL if pool is full. Caller is responsible for setting
546+
all fields before adding to map.
547+
548+
int fd_repair_add_pending_to_map(...);
549+
Adds a pending sign request to the map. Returns 0 on success, -1 on
550+
failure. The pending request must be previously acquired from the
551+
pool.
552+
553+
fd_repair_pending_sign_req_t * fd_repair_find_pending_request(...);
554+
Finds a pending sign request by nonce. Returns pointer or NULL.
555+
556+
int fd_repair_remove_pending_request(...);
557+
Removes a pending sign request by nonce. Returns 0 on success, -1
558+
if not found.
559+
560+
All functions assume the repair context is valid and not used concurrently.
561+
*/
562+
563+
int
564+
fd_repair_pending_request_pool_free( fd_repair_t * repair ) {
565+
return (int)fd_repair_pending_sign_req_pool_free( repair->pending_sign_req_pool );
566+
}
567+
568+
fd_repair_pending_sign_req_t *
569+
fd_repair_acquire_pending_request( fd_repair_t * repair ) {
570+
/* Check if there is any space for a new pending sign request */
571+
if( FD_UNLIKELY( fd_repair_pending_request_pool_free( repair ) == 0 ) ) {
572+
return NULL;
573+
}
574+
575+
fd_repair_pending_sign_req_t * pending = fd_repair_pending_sign_req_pool_ele_acquire( repair->pending_sign_req_pool );
576+
return pending;
577+
}
578+
579+
int
580+
fd_repair_add_pending_to_map( fd_repair_t * repair,
581+
fd_repair_pending_sign_req_t * pending ) {
582+
if( FD_UNLIKELY( !pending ) ) {
583+
return -1;
584+
}
585+
586+
fd_repair_pending_sign_req_map_ele_insert( repair->pending_sign_req_map, pending, repair->pending_sign_req_pool );
587+
return 0;
588+
}
589+
590+
fd_repair_pending_sign_req_t *
591+
fd_repair_find_pending_request( fd_repair_t * repair,
592+
ulong nonce ) {
593+
return fd_repair_pending_sign_req_map_ele_query( repair->pending_sign_req_map, &nonce, NULL, repair->pending_sign_req_pool );
594+
}
595+
596+
int
597+
fd_repair_remove_pending_request( fd_repair_t * repair,
598+
ulong nonce ) {
599+
fd_repair_pending_sign_req_t * pending = fd_repair_pending_sign_req_map_ele_query( repair->pending_sign_req_map, &nonce, NULL, repair->pending_sign_req_pool );
600+
if( FD_UNLIKELY( !pending ) ) {
601+
return -1;
602+
}
603+
604+
fd_repair_pending_sign_req_map_ele_remove( repair->pending_sign_req_map, &nonce, NULL, repair->pending_sign_req_pool );
605+
fd_repair_pending_sign_req_pool_ele_release( repair->pending_sign_req_pool, pending );
606+
return 0;
607+
}

src/flamenco/repair/fd_repair.h

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
#define FD_PING_PRE_IMAGE_SZ (48UL)
3535
/* Number of peers to send requests to. */
3636
#define FD_REPAIR_NUM_NEEDED_PEERS (1)
37+
/* Max number of pending sign requests */
38+
#define FD_REPAIR_PENDING_SIGN_REQ_MAX (1<<10)
39+
/* Maximum size for sign buffer, typically <= 160 bytes (e.g., pings, repairs) */
40+
#define FD_REPAIR_MAX_SIGN_BUF_SIZE (256UL)
3741

3842
typedef fd_gossip_peer_addr_t fd_repair_peer_addr_t;
3943

@@ -165,6 +169,28 @@ typedef struct fd_pinged_elem fd_pinged_elem_t;
165169
#define MAP_T fd_pinged_elem_t
166170
#include "../../util/tmpl/fd_map_giant.c"
167171

172+
/* Pending sign request structure for async request handling */
173+
struct fd_repair_pending_sign_req {
174+
ulong nonce; /* map key, unique nonce */
175+
ulong next; /* used internally by fd_map_chain */
176+
uchar buf[FD_REPAIR_MAX_SIGN_BUF_SIZE];
177+
ulong buflen;
178+
ulong sig_offset;
179+
uint dst_ip_addr;
180+
ushort dst_port;
181+
fd_pubkey_t recipient;
182+
};
183+
typedef struct fd_repair_pending_sign_req fd_repair_pending_sign_req_t;
184+
185+
#define POOL_NAME fd_repair_pending_sign_req_pool
186+
#define POOL_T fd_repair_pending_sign_req_t
187+
#include "../../util/tmpl/fd_pool.c"
188+
189+
#define MAP_NAME fd_repair_pending_sign_req_map
190+
#define MAP_KEY nonce
191+
#define MAP_ELE_T fd_repair_pending_sign_req_t
192+
#include "../../util/tmpl/fd_map_chain.c"
193+
168194
struct fd_peer {
169195
fd_pubkey_t key;
170196
fd_ip4_port_t ip4;
@@ -238,6 +264,9 @@ struct fd_repair {
238264
fd_vote_stake_weight_t * stake_weights_temp;
239265
/* Path to the file where we write the cache of known good repair peers, to make cold booting faster */
240266
int good_peer_cache_file_fd;
267+
/* Pending sign requests for async operations */
268+
fd_repair_pending_sign_req_t * pending_sign_req_pool;
269+
fd_repair_pending_sign_req_map_t * pending_sign_req_map;
241270
/* Metrics */
242271
fd_repair_metrics_t metrics;
243272
};
@@ -249,13 +278,16 @@ fd_repair_align ( void ) { return 128UL; }
249278
FD_FN_CONST static inline ulong
250279
fd_repair_footprint( void ) {
251280
ulong l = FD_LAYOUT_INIT;
252-
l = FD_LAYOUT_APPEND( l, alignof(fd_repair_t), sizeof(fd_repair_t) );
253-
l = FD_LAYOUT_APPEND( l, fd_active_table_align(), fd_active_table_footprint(FD_ACTIVE_KEY_MAX) );
254-
l = FD_LAYOUT_APPEND( l, fd_inflight_table_align(), fd_inflight_table_footprint(FD_NEEDED_KEY_MAX) );
255-
l = FD_LAYOUT_APPEND( l, fd_pinged_table_align(), fd_pinged_table_footprint(FD_REPAIR_PINGED_MAX) );
281+
l = FD_LAYOUT_APPEND( l, alignof(fd_repair_t), sizeof(fd_repair_t) );
282+
l = FD_LAYOUT_APPEND( l, fd_active_table_align(), fd_active_table_footprint(FD_ACTIVE_KEY_MAX) );
283+
l = FD_LAYOUT_APPEND( l, fd_inflight_table_align(), fd_inflight_table_footprint(FD_NEEDED_KEY_MAX) );
284+
l = FD_LAYOUT_APPEND( l, fd_pinged_table_align(), fd_pinged_table_footprint(FD_REPAIR_PINGED_MAX) );
256285
/* regular and temp stake weights */
257-
l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
258-
l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
286+
l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
287+
l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
288+
/* pending sign request structures */
289+
l = FD_LAYOUT_APPEND( l, fd_repair_pending_sign_req_pool_align(), fd_repair_pending_sign_req_pool_footprint( FD_REPAIR_PENDING_SIGN_REQ_MAX ) );
290+
l = FD_LAYOUT_APPEND( l, fd_repair_pending_sign_req_map_align(), fd_repair_pending_sign_req_map_footprint( FD_REPAIR_PENDING_SIGN_REQ_MAX ) );
259291
return FD_LAYOUT_FINI(l, fd_repair_align() );
260292
}
261293

@@ -330,5 +362,24 @@ void fd_repair_set_stake_weights_fini( fd_repair_t * repair );
330362
fd_repair_metrics_t *
331363
fd_repair_get_metrics( fd_repair_t * repair );
332364

365+
/* Pending sign request operations */
366+
fd_repair_pending_sign_req_t *
367+
fd_repair_acquire_pending_request( fd_repair_t * repair );
368+
369+
int
370+
fd_repair_add_pending_to_map( fd_repair_t * repair,
371+
fd_repair_pending_sign_req_t * pending );
372+
373+
fd_repair_pending_sign_req_t *
374+
fd_repair_find_pending_request( fd_repair_t * repair,
375+
ulong nonce );
376+
377+
int
378+
fd_repair_remove_pending_request( fd_repair_t * repair,
379+
ulong nonce );
380+
381+
int
382+
fd_repair_pending_request_pool_free( fd_repair_t * repair );
383+
333384

334385
#endif /* HEADER_fd_src_flamenco_repair_fd_repair_h */

0 commit comments

Comments
 (0)