Skip to content

Commit 08138ed

Browse files
MasterPtatoNathanFlurry
authored andcommitted
fix(guard): add more tokio runtime metrics, remove labels from metrics
1 parent d958baa commit 08138ed

File tree

4 files changed

+46
-34
lines changed

4 files changed

+46
-34
lines changed

packages/common/runtime/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,17 @@ fn build_tokio_runtime_builder() -> tokio::runtime::Builder {
6767
metrics::TOKIO_THREAD_COUNT.dec();
6868
});
6969

70+
rt_builder.on_task_spawn(move |_| {
71+
metrics::TOKIO_TASK_TOTAL.inc();
72+
});
73+
7074
if env::var("TOKIO_RUNTIME_METRICS").is_ok() {
7175
rt_builder.on_before_task_poll(|_| {
7276
let metrics = tokio::runtime::Handle::current().metrics();
7377
let buckets = metrics.poll_time_histogram_num_buckets();
7478

7579
metrics::TOKIO_GLOBAL_QUEUE_DEPTH.set(metrics.global_queue_depth() as i64);
80+
metrics::TOKIO_ACTIVE_TASK_COUNT.set(metrics.num_alive_tasks() as i64);
7681

7782
for worker in 0..metrics.num_workers() {
7883
metrics::TOKIO_WORKER_OVERFLOW_COUNT

packages/common/runtime/src/metrics.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ lazy_static::lazy_static! {
1414
"Number of pending tasks in the global queue.",
1515
*REGISTRY
1616
).unwrap();
17+
pub static ref TOKIO_TASK_TOTAL: IntCounter =
18+
register_int_counter_with_registry!(
19+
"tokio_task_total",
20+
"Total number of spawned tasks.",
21+
*REGISTRY
22+
).unwrap();
23+
pub static ref TOKIO_ACTIVE_TASK_COUNT: IntGauge =
24+
register_int_gauge_with_registry!(
25+
"tokio_active_task_count",
26+
"Total number of active (running or sleeping) tasks.",
27+
*REGISTRY
28+
).unwrap();
1729
pub static ref TOKIO_WORKER_OVERFLOW_COUNT: IntGaugeVec = register_int_gauge_vec_with_registry!(
1830
"tokio_worker_overflow_count",
1931
"Number of times the given worker thread saturated its local queue.",

packages/edge/infra/guard/core/src/metrics.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use lazy_static::lazy_static;
2-
use rivet_metrics::{prometheus::*, REGISTRY};
2+
use rivet_metrics::{prometheus::*, REGISTRY, BUCKETS};
33

44
lazy_static! {
55
// MARK: Internal
@@ -36,6 +36,7 @@ lazy_static! {
3636
pub static ref TCP_CONNECTION_DURATION: Histogram = register_histogram_with_registry!(
3737
"guard_tcp_connection_duration",
3838
"TCP connection duration in seconds",
39+
BUCKETS.to_vec(),
3940
*REGISTRY,
4041
)
4142
.unwrap();
@@ -44,36 +45,36 @@ lazy_static! {
4445
pub static ref RESOLVE_ROUTE_DURATION: Histogram = register_histogram_with_registry!(
4546
"guard_resolve_route_duration",
4647
"Time to resolve request route in seconds",
48+
BUCKETS.to_vec(),
4749
*REGISTRY,
4850
)
4951
.unwrap();
5052

5153
// MARK: Proxy requests
52-
pub static ref PROXY_REQUEST_TOTAL: IntCounterVec = register_int_counter_vec_with_registry!(
54+
pub static ref PROXY_REQUEST_TOTAL: IntCounter = register_int_counter_with_registry!(
5355
"guard_proxy_request_total",
5456
"Total number of requests to actor",
55-
&["actor_id", "server_id", "method", "path"],
5657
*REGISTRY,
5758
)
5859
.unwrap();
59-
pub static ref PROXY_REQUEST_PENDING: IntGaugeVec = register_int_gauge_vec_with_registry!(
60+
pub static ref PROXY_REQUEST_PENDING: IntGauge = register_int_gauge_with_registry!(
6061
"guard_proxy_request_pending",
6162
"Number of pending requests to actor",
62-
&["actor_id", "server_id", "method", "path"],
6363
*REGISTRY,
6464
)
6565
.unwrap();
6666
pub static ref PROXY_REQUEST_DURATION: HistogramVec = register_histogram_vec_with_registry!(
6767
"guard_proxy_request_duration",
6868
"Request duration in seconds",
69-
&["actor_id", "server_id", "status"],
69+
&["status"],
70+
BUCKETS.to_vec(),
7071
*REGISTRY,
7172
)
7273
.unwrap();
7374
pub static ref PROXY_REQUEST_ERROR: IntCounterVec = register_int_counter_vec_with_registry!(
7475
"guard_proxy_request_errors_total",
7576
"Total number of errors when proxying requests to actor",
76-
&["actor_id", "server_id", "error_type"],
77+
&["error_type"],
7778
*REGISTRY,
7879
)
7980
.unwrap();

packages/edge/infra/guard/core/src/proxy_service.rs

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,6 @@ impl ProxyService {
613613
.path_and_query()
614614
.map(|x| x.to_string())
615615
.unwrap_or_else(|| req.uri().path().to_string());
616-
let method = req.method().clone();
617-
let method_str = method.as_str();
618616

619617
let start_time = Instant::now();
620618

@@ -642,11 +640,6 @@ impl ProxyService {
642640
};
643641

644642
let actor_id = target.actor_id;
645-
let server_id = target.server_id;
646-
647-
// Convert UUIDs to strings for metrics, handling Optional fields
648-
let actor_id_str = actor_id.map_or_else(|| "none".to_string(), |id| id.to_string());
649-
let server_id_str = server_id.map_or_else(|| "none".to_string(), |id| id.to_string());
650643

651644
// Extract IP address from remote_addr
652645
let client_ip = self.remote_addr.ip();
@@ -666,13 +659,8 @@ impl ProxyService {
666659
.map_err(Into::into)
667660
} else {
668661
// Increment metrics
669-
metrics::PROXY_REQUEST_PENDING
670-
.with_label_values(&[&actor_id_str, &server_id_str, method_str, &path])
671-
.inc();
672-
673-
metrics::PROXY_REQUEST_TOTAL
674-
.with_label_values(&[&actor_id_str, &server_id_str, method_str, &path])
675-
.inc();
662+
metrics::PROXY_REQUEST_PENDING.inc();
663+
metrics::PROXY_REQUEST_TOTAL.inc();
676664

677665
// Prepare to release in-flight counter when done
678666
let state_clone = self.state.clone();
@@ -684,29 +672,35 @@ impl ProxyService {
684672

685673
// Branch for WebSocket vs HTTP handling
686674
// Both paths will handle their own metrics and error handling
687-
if hyper_tungstenite::is_upgrade_request(&req) {
675+
let res = if hyper_tungstenite::is_upgrade_request(&req) {
688676
// WebSocket upgrade
689677
self.handle_websocket_upgrade(req, target).await
690678
} else {
691679
// Regular HTTP request
692680
self.handle_http_request(req, target).await
693-
}
681+
};
682+
683+
// Record metrics
684+
let duration_secs = start_time.elapsed().as_secs_f64();
685+
metrics::PROXY_REQUEST_DURATION
686+
.with_label_values(&[&status])
687+
.observe(duration_secs);
688+
689+
metrics::PROXY_REQUEST_PENDING.dec();
690+
691+
res
694692
};
695693

696694
let status = match &res {
697695
Ok(resp) => resp.status().as_u16().to_string(),
698-
Err(_) => "error".to_string(),
699-
};
700-
701-
// Record metrics
702-
let duration = start_time.elapsed();
703-
metrics::PROXY_REQUEST_DURATION
704-
.with_label_values(&[&actor_id_str, &server_id_str, &status])
705-
.observe(duration.as_secs_f64());
696+
Err(err) => {
697+
metrics::PROXY_REQUEST_ERROR
698+
.with_label_values(&[&err.to_string()])
699+
.inc();
706700

707-
metrics::PROXY_REQUEST_PENDING
708-
.with_label_values(&[&actor_id_str, &server_id_str, method_str, &path])
709-
.dec();
701+
"error".to_string()
702+
}
703+
};
710704

711705
res
712706
}

0 commit comments

Comments
 (0)