Skip to content

Commit fc840b4

Browse files
jherrera-jumpmmcgee-jump
authored andcommitted
bundle, gui: add tile card + primary metric
1 parent 95f4c8f commit fc840b4

File tree

12 files changed

+173
-35
lines changed

12 files changed

+173
-35
lines changed

book/api/metrics-generated.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@
194194
| <span class="metrics-name">bundle_&#8203;rtt_&#8203;sample</span> | gauge | Latest RTT sample at scrape time (nanoseconds) |
195195
| <span class="metrics-name">bundle_&#8203;rtt_&#8203;smoothed</span> | gauge | RTT moving average (nanoseconds) |
196196
| <span class="metrics-name">bundle_&#8203;rtt_&#8203;var</span> | gauge | RTT variance (nanoseconds) |
197-
| <span class="metrics-name">bundle_&#8203;message_&#8203;rx_&#8203;delay</span> | histogram | Message receive delay in seconds from bundle server to bundle client |
197+
| <span class="metrics-name">bundle_&#8203;message_&#8203;rx_&#8203;delay_&#8203;nanos</span> | histogram | Message receive delay in nanoseconds from bundle server to bundle client |
198198

199199
</div>
200200

book/api/websocket.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,8 @@ potential underflow.
643643
"next_leader_slot": 285228774,
644644
"tile_primary_metric": {
645645
"quic": 3,
646+
"bundle_rtt_smoothed_millis": 30,
647+
"bundle_rx_delay_millis_p90": 101,
646648
"net_in": 37803082,
647649
"net_out": 4982399,
648650
"verify": 0,
@@ -666,15 +668,17 @@ potential underflow.
666668
| tile_primary_metric | `TilePrimaryMetric` | Per-tile-type primary metrics. Some of these are point-in-time values (P), and some are 1-second moving window averages (W) |
667669

668670
**`TilePrimaryMetric`**
669-
| Field | Type | Description |
670-
|---------|----------|-------------|
671-
| net_in | `number` | Network ingress bytes per second (W) |
672-
| quic | `number` | Active QUIC connections (P) |
673-
| verify | `number` | Fraction of transactions that failed sigverify (W) |
674-
| dedup | `number` | Fraction of transactions deduplicated (W) |
675-
| pack | `number` | Fraction of pack buffer filled (P) |
676-
| bank | `number` | Execution TPS (W) |
677-
| net_out | `number` | Network egress bytes per second (W) |
671+
| Field | Type | Description |
672+
|----------------------------|----------|-------------|
673+
| net_in | `number` | Network ingress bytes per second (W) |
674+
| quic | `number` | Active QUIC connections (P) |
675+
| bundle_rtt_smoothed_millis | `number` | The round-trip time for grpc messages sent to the bundle server. These are mostly ping messages when the validator is not leader. An exponential moving average ( avg = 1/8 val + 7/8 avg ) is used to filter the signal |
676+
| bundle_rx_delay_millis_p90 | `number` | An estimate of the 90th percentile of the one-way delay of a bundle dispatched from the bundle server. Only samples since the start of the most recent leader rotation for this validator are used to compute the percentile |
677+
| verify | `number` | Fraction of transactions that failed sigverify (W) |
678+
| dedup | `number` | Fraction of transactions deduplicated (W) |
679+
| pack | `number` | Fraction of pack buffer filled (P) |
680+
| bank | `number` | Execution TPS (W) |
681+
| net_out | `number` | Network egress bytes per second (W) |
678682

679683

680684
#### `summary.live_tile_timers`

src/disco/bundle/fd_bundle_tile.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ metrics_write( fd_bundle_tile_t * ctx ) {
5959
FD_MGAUGE_SET( BUNDLE, RTT_SMOOTHED, (ulong)ctx->rtt->smoothed_rtt );
6060
FD_MGAUGE_SET( BUNDLE, RTT_VAR, (ulong)ctx->rtt->var_rtt );
6161

62-
FD_MHIST_COPY( BUNDLE, MESSAGE_RX_DELAY, ctx->metrics.msg_rx_delay );
62+
FD_MHIST_COPY( BUNDLE, MESSAGE_RX_DELAY_NANOS, ctx->metrics.msg_rx_delay );
6363

6464
fd_wksp_t * wksp = fd_wksp_containing( ctx );
6565
fd_wksp_usage_t usage[1];
@@ -565,8 +565,8 @@ unprivileged_init( fd_topo_t * topo,
565565
fd_grpc_client_set_authority( ctx->grpc_client, ctx->server_sni, ctx->server_sni_len, ctx->server_tcp_port );
566566

567567
fd_histf_new( ctx->metrics.msg_rx_delay,
568-
FD_MHIST_SECONDS_MIN( BUNDLE, MESSAGE_RX_DELAY ),
569-
FD_MHIST_SECONDS_MAX( BUNDLE, MESSAGE_RX_DELAY ) );
568+
FD_MHIST_MIN( BUNDLE, MESSAGE_RX_DELAY_NANOS ),
569+
FD_MHIST_MAX( BUNDLE, MESSAGE_RX_DELAY_NANOS ) );
570570
}
571571

572572
static ulong

src/disco/gui/fd_gui.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ fd_gui_new( void * shmem,
132132
for( ulong i=0UL; i<FD_GUI_SLOTS_CNT; i++ ) gui->slots[ i ]->slot = ULONG_MAX;
133133
gui->pack_txn_idx = 0UL;
134134

135+
fd_histf_new( gui->bundle_rx_delay_hist_current, FD_MHIST_MIN( BUNDLE, MESSAGE_RX_DELAY_NANOS ), FD_MHIST_MAX( BUNDLE, MESSAGE_RX_DELAY_NANOS ) );
136+
fd_histf_new( gui->bundle_rx_delay_hist_reference, FD_MHIST_MIN( BUNDLE, MESSAGE_RX_DELAY_NANOS ), FD_MHIST_MAX( BUNDLE, MESSAGE_RX_DELAY_NANOS ) );
137+
135138
return gui;
136139
}
137140

@@ -499,6 +502,13 @@ fd_gui_tile_stats_snap( fd_gui_t * gui,
499502
stats->quic_conn_cnt += quic_metrics[ MIDX( GAUGE, QUIC, CONNECTIONS_ACTIVE ) ];
500503
}
501504

505+
fd_topo_tile_t const * bundle = &topo->tiles[ fd_topo_find_tile( topo, "bundle", 0UL ) ];
506+
volatile ulong * bundle_metrics = fd_metrics_tile( bundle->metrics );
507+
stats->bundle_rtt_smoothed_nanos = bundle_metrics[ MIDX( GAUGE, BUNDLE, RTT_SMOOTHED ) ];
508+
509+
gui->bundle_rx_delay_hist_current->sum = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + FD_HISTF_BUCKET_CNT ];
510+
for( ulong b=0; b<FD_HISTF_BUCKET_CNT; b++ ) gui->bundle_rx_delay_hist_current->counts[ b ] = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + b ];
511+
502512
stats->verify_drop_cnt = waterfall->out.verify_duplicate +
503513
waterfall->out.verify_parse +
504514
waterfall->out.verify_failed;
@@ -1731,6 +1741,19 @@ fd_gui_became_leader( fd_gui_t * gui,
17311741
slot->txs.leader_start_time = start_time_nanos;
17321742
slot->txs.leader_end_time = end_time_nanos;
17331743
if( FD_LIKELY( slot->txs.microblocks_upper_bound==USHORT_MAX ) ) slot->txs.microblocks_upper_bound = (ushort)max_microblocks;
1744+
1745+
// snapshot of bundle rx histogram at leader rotation start
1746+
if( FD_UNLIKELY( _slot % 4 == 0 ) ) {
1747+
fd_topo_tile_t const * bundle = &gui->topo->tiles[ fd_topo_find_tile( gui->topo, "bundle", 0UL ) ];
1748+
volatile ulong * bundle_metrics = fd_metrics_tile( bundle->metrics );
1749+
(void)bundle_metrics;
1750+
1751+
gui->bundle_rx_delay_hist_current->sum = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + FD_HISTF_BUCKET_CNT ];
1752+
for( ulong b=0; b<FD_HISTF_BUCKET_CNT; b++ ) gui->bundle_rx_delay_hist_current->counts[ b ] = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + b ];
1753+
1754+
gui->bundle_rx_delay_hist_reference->sum = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + FD_HISTF_BUCKET_CNT ];
1755+
for( ulong b=0; b<FD_HISTF_BUCKET_CNT; b++ ) gui->bundle_rx_delay_hist_reference->counts[ b ] = bundle_metrics[ MIDX( HISTOGRAM, BUNDLE, MESSAGE_RX_DELAY_NANOS ) + b ];
1756+
}
17341757
}
17351758

17361759
void

src/disco/gui/fd_gui.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "../pack/fd_microblock.h"
77
#include "../../waltz/http/fd_http_server.h"
88
#include "../../flamenco/leaders/fd_leaders.h"
9+
#include "../../util/hist/fd_histf.h"
910

1011
#include "../topo/fd_topo.h"
1112

@@ -161,16 +162,17 @@ typedef struct fd_gui_tile_timers fd_gui_tile_timers_t;
161162
struct fd_gui_tile_stats {
162163
long sample_time_nanos;
163164

164-
ulong net_in_rx_bytes; /* Number of bytes received by the net or sock tile*/
165-
ulong quic_conn_cnt; /* Number of active QUIC connections */
166-
ulong verify_drop_cnt; /* Number of transactions dropped by verify tiles */
167-
ulong verify_total_cnt; /* Number of transactions received by verify tiles */
168-
ulong dedup_drop_cnt; /* Number of transactions dropped by dedup tile */
169-
ulong dedup_total_cnt; /* Number of transactions received by dedup tile */
170-
ulong pack_buffer_cnt; /* Number of buffered transactions in the pack tile */
171-
ulong pack_buffer_capacity; /* Total size of the pack transaction buffer */
172-
ulong bank_txn_exec_cnt; /* Number of transactions processed by the bank tile */
173-
ulong net_out_tx_bytes; /* Number of bytes sent by the net or sock tile */
165+
ulong net_in_rx_bytes; /* Number of bytes received by the net or sock tile*/
166+
ulong quic_conn_cnt; /* Number of active QUIC connections */
167+
ulong bundle_rtt_smoothed_nanos; /* RTT (nanoseconds) moving average */
168+
ulong verify_drop_cnt; /* Number of transactions dropped by verify tiles */
169+
ulong verify_total_cnt; /* Number of transactions received by verify tiles */
170+
ulong dedup_drop_cnt; /* Number of transactions dropped by dedup tile */
171+
ulong dedup_total_cnt; /* Number of transactions received by dedup tile */
172+
ulong pack_buffer_cnt; /* Number of buffered transactions in the pack tile */
173+
ulong pack_buffer_capacity; /* Total size of the pack transaction buffer */
174+
ulong bank_txn_exec_cnt; /* Number of transactions processed by the bank tile */
175+
ulong net_out_tx_bytes; /* Number of bytes sent by the net or sock tile */
174176
};
175177

176178
typedef struct fd_gui_tile_stats fd_gui_tile_stats_t;
@@ -360,6 +362,9 @@ struct fd_gui {
360362
ulong tile_timers_leader_history_slot[ FD_GUI_TILE_TIMER_LEADER_CNT ];
361363
} summary;
362364

365+
fd_histf_t bundle_rx_delay_hist_reference[ 1 ]; /* histogram snapshot taken at the start of every leader rotation for this validator */
366+
fd_histf_t bundle_rx_delay_hist_current[ 1 ]; /* latest histogram snapshot captured from metrics workspace */
367+
363368
fd_gui_slot_t slots[ FD_GUI_SLOTS_CNT ][ 1 ];
364369

365370
ulong pack_txn_idx; /* The pack index of the most recently received transaction */

src/disco/gui/fd_gui_printf.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,13 @@ fd_gui_printf_tile_stats( fd_gui_t * gui,
611611
fd_gui_tile_stats_t const * cur ) {
612612
jsonp_open_object( gui, "tile_primary_metric" );
613613
jsonp_ulong( gui, "quic", cur->quic_conn_cnt );
614+
jsonp_double( gui, "bundle_rtt_smoothed_millis", (double)(cur->bundle_rtt_smoothed_nanos) / 1000000.0 );
615+
616+
fd_histf_t bundle_rx_delay_hist_delta[ 1 ];
617+
fd_histf_subtract( gui->bundle_rx_delay_hist_current, gui->bundle_rx_delay_hist_reference, bundle_rx_delay_hist_delta );
618+
ulong bundle_rx_delay_nanos_p90 = fd_histf_percentile( bundle_rx_delay_hist_delta, 90U, ULONG_MAX );
619+
jsonp_double( gui, "bundle_rx_delay_millis_p90", fd_double_if(bundle_rx_delay_nanos_p90==ULONG_MAX, 0.0, (double)(bundle_rx_delay_nanos_p90) / 1000000.0 ));
620+
614621
if( FD_LIKELY( cur->sample_time_nanos>prev->sample_time_nanos ) ) {
615622
jsonp_ulong( gui, "net_in", (ulong)((double)(cur->net_in_rx_bytes - prev->net_in_rx_bytes) * 1000000000.0 / (double)(cur->sample_time_nanos - prev->sample_time_nanos) ));
616623
jsonp_ulong( gui, "net_out", (ulong)((double)(cur->net_out_tx_bytes - prev->net_out_tx_bytes) * 1000000000.0 / (double)(cur->sample_time_nanos - prev->sample_time_nanos) ));

src/disco/metrics/fd_prometheus.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ render_histogram( fd_prom_render_t * r,
7171
for( ulong k=0; k<FD_HISTF_BUCKET_CNT; k++ ) {
7272
value += *(fd_metrics_tile( tile->metrics ) + metric->offset + k);
7373

74-
char * le;
74+
char * le; /* le here means "less then or equal" not "left edge" */
7575
char le_str[ 64 ];
7676
if( FD_UNLIKELY( k==FD_HISTF_BUCKET_CNT-1UL ) ) le = "+Inf";
7777
else {

src/disco/metrics/generated/fd_metrics_bundle.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@ const fd_metrics_meta_t FD_METRICS_BUNDLE[FD_METRICS_BUNDLE_TOTAL] = {
1818
DECLARE_METRIC( BUNDLE_RTT_SAMPLE, GAUGE ),
1919
DECLARE_METRIC( BUNDLE_RTT_SMOOTHED, GAUGE ),
2020
DECLARE_METRIC( BUNDLE_RTT_VAR, GAUGE ),
21-
DECLARE_METRIC_HISTOGRAM_SECONDS( BUNDLE_MESSAGE_RX_DELAY ),
21+
DECLARE_METRIC_HISTOGRAM_NONE( BUNDLE_MESSAGE_RX_DELAY_NANOS ),
2222
};

src/disco/metrics/generated/fd_metrics_bundle.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,13 @@
8282
#define FD_METRICS_GAUGE_BUNDLE_RTT_VAR_DESC "RTT variance (nanoseconds)"
8383
#define FD_METRICS_GAUGE_BUNDLE_RTT_VAR_CVT (FD_METRICS_CONVERTER_NONE)
8484

85-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_OFF (32UL)
86-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NAME "bundle_message_rx_delay"
87-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_TYPE (FD_METRICS_TYPE_HISTOGRAM)
88-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_DESC "Message receive delay in seconds from bundle server to bundle client"
89-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_CVT (FD_METRICS_CONVERTER_SECONDS)
90-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_MIN (0.0001)
91-
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_MAX (1.0)
85+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_OFF (32UL)
86+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_NAME "bundle_message_rx_delay_nanos"
87+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_TYPE (FD_METRICS_TYPE_HISTOGRAM)
88+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_DESC "Message receive delay in nanoseconds from bundle server to bundle client"
89+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_CVT (FD_METRICS_CONVERTER_NONE)
90+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_MIN (100000UL)
91+
#define FD_METRICS_HISTOGRAM_BUNDLE_MESSAGE_RX_DELAY_NANOS_MAX (1000000000UL)
9292

9393
#define FD_METRICS_BUNDLE_TOTAL (17UL)
9494
extern const fd_metrics_meta_t FD_METRICS_BUNDLE[FD_METRICS_BUNDLE_TOTAL];

src/disco/metrics/metrics.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,8 @@ metric introduced.
342342
<gauge name="RttSample" summary="Latest RTT sample at scrape time (nanoseconds)" />
343343
<gauge name="RttSmoothed" summary="RTT moving average (nanoseconds)" />
344344
<gauge name="RttVar" summary="RTT variance (nanoseconds)" />
345-
<histogram name="MessageRxDelay" min="0.0001" max="1.0" converter="seconds">
346-
<summary>Message receive delay in seconds from bundle server to bundle client</summary>
345+
<histogram name="MessageRxDelayNanos" min="100000" max="1000000000">
346+
<summary>Message receive delay in nanoseconds from bundle server to bundle client</summary>
347347
</histogram>
348348
</tile>
349349

0 commit comments

Comments
 (0)