@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
22
22
use crate :: local_model:: runtime_config:: ModelRuntimeConfig ;
23
23
use crate :: model_card:: { ModelDeploymentCard , ROOT_PATH as MDC_ROOT_PATH } ;
24
24
use dynamo_runtime:: metrics:: prometheus_names:: clamp_u64_to_i64;
25
- use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
26
25
use dynamo_runtime:: slug:: Slug ;
26
+ use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
27
27
28
28
pub use prometheus:: Registry ;
29
29
@@ -49,7 +49,6 @@ pub struct Metrics {
49
49
model_context_length : IntGaugeVec ,
50
50
model_kv_cache_block_size : IntGaugeVec ,
51
51
model_migration_limit : IntGaugeVec ,
52
- model_workers : IntGaugeVec , // this is an actual gauge, not a counter
53
52
}
54
53
55
54
// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -153,12 +152,11 @@ impl Metrics {
153
152
/// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
154
153
/// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
155
154
/// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
156
- /// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
157
155
///
158
156
/// ## Runtime Config Polling Configuration
159
157
///
160
158
/// The polling behavior can be configured via environment variables:
161
- /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
159
+ /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
162
160
///
163
161
/// Metrics are never removed to preserve historical data. Runtime config and MDC
164
162
/// metrics are updated when models are discovered and their configurations are available.
@@ -329,15 +327,6 @@ impl Metrics {
329
327
)
330
328
. unwrap ( ) ;
331
329
332
- let model_workers = IntGaugeVec :: new (
333
- Opts :: new (
334
- frontend_metric_name ( frontend_service:: MODEL_WORKERS ) ,
335
- "Number of worker instances currently serving the model" ,
336
- ) ,
337
- & [ "model" ] ,
338
- )
339
- . unwrap ( ) ;
340
-
341
330
Metrics {
342
331
request_counter,
343
332
inflight_gauge,
@@ -354,7 +343,6 @@ impl Metrics {
354
343
model_context_length,
355
344
model_kv_cache_block_size,
356
345
model_migration_limit,
357
- model_workers,
358
346
}
359
347
}
360
348
@@ -451,7 +439,6 @@ impl Metrics {
451
439
registry. register ( Box :: new ( self . model_context_length . clone ( ) ) ) ?;
452
440
registry. register ( Box :: new ( self . model_kv_cache_block_size . clone ( ) ) ) ?;
453
441
registry. register ( Box :: new ( self . model_migration_limit . clone ( ) ) ) ?;
454
- registry. register ( Box :: new ( self . model_workers . clone ( ) ) ) ?;
455
442
456
443
Ok ( ( ) )
457
444
}
@@ -504,7 +491,6 @@ impl Metrics {
504
491
. set ( migration_limit as i64 ) ;
505
492
}
506
493
507
-
508
494
/// Update metrics from a ModelEntry
509
495
/// This is a convenience method that extracts runtime config from a ModelEntry
510
496
/// and updates the appropriate metrics
@@ -531,7 +517,10 @@ impl Metrics {
531
517
let store: Box < dyn KeyValueStore > = Box :: new ( EtcdStorage :: new ( etcd_client. clone ( ) ) ) ;
532
518
let card_store = Arc :: new ( KeyValueStoreManager :: new ( store) ) ;
533
519
534
- match card_store. load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug) . await {
520
+ match card_store
521
+ . load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug)
522
+ . await
523
+ {
535
524
Ok ( Some ( mdc) ) => {
536
525
self . update_mdc_metrics (
537
526
& model_entry. name ,
@@ -563,11 +552,27 @@ impl Metrics {
563
552
}
564
553
565
554
/// Start a background task that periodically updates runtime config metrics
566
- /// This polls the ModelManager for current models and updates metrics accordingly
567
- /// Models are never removed - only marked as healthy/unhealthy to preserve historical data
568
555
///
569
- /// Note: If multiple model instances have the same name, only the first instance's metrics are used.
570
- /// Subsequent instances with duplicate names will be skipped.
556
+ /// ## Why Polling is Required
557
+ ///
558
+ /// Polling is necessary because new models may come online at any time through the distributed
559
+ /// discovery system. The ModelManager is continuously updated as workers register/deregister
560
+ /// with etcd, and we need to periodically check for these changes to expose their metrics.
561
+ ///
562
+ /// ## Behavior
563
+ ///
564
+ /// - Polls the ModelManager for current models and updates metrics accordingly
565
+ /// - Models are never removed from metrics to preserve historical data
566
+ /// - If multiple model instances have the same name, only the first instance's metrics are used
567
+ /// - Subsequent instances with duplicate names will be skipped
568
+ ///
569
+ /// ## MDC (Model Deployment Card) Behavior
570
+ ///
571
+ /// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
572
+ /// that all other workers claiming to serve that model really are using the same configuration.
573
+ /// Later, every worker will have its own MDC, and the frontend will validate that they
574
+ /// checksum the same. For right now, you can assume they have the same MDC, because
575
+ /// they aren't allowed to change it.
571
576
///
572
577
/// The task will run until the provided cancellation token is cancelled.
573
578
pub fn start_runtime_config_polling_task (
@@ -600,31 +605,12 @@ impl Metrics {
600
605
// Get current model entries from the manager
601
606
let current_entries = manager. get_model_entries ( ) ;
602
607
let mut current_models = std:: collections:: HashSet :: new ( ) ;
603
- let mut model_worker_counts = std:: collections:: HashMap :: new ( ) ;
604
-
605
- // Count worker instances per model
606
- for entry in & current_entries {
607
- * model_worker_counts. entry ( entry. name . clone ( ) ) . or_insert ( 0 ) += 1 ;
608
- }
609
-
610
- // Update worker count metrics for all models
611
- for ( model_name, count) in & model_worker_counts {
612
- metrics. model_workers
613
- . with_label_values ( & [ model_name] )
614
- . set ( * count) ;
615
- }
616
-
617
- // Reset worker count to 0 for models that no longer have any workers
618
- let current_models_with_workers: std:: collections:: HashSet < String > =
619
- model_worker_counts. keys ( ) . cloned ( ) . collect ( ) ;
620
- for model_name in known_models. difference ( & current_models_with_workers) {
621
- metrics. model_workers
622
- . with_label_values ( & [ model_name] )
623
- . set ( 0 ) ;
624
- }
625
608
626
609
// Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
627
610
// Subsequent instances with duplicate names will be skipped for config updates.
611
+ // This is based on the assumption that all workers serving the same model have identical
612
+ // configuration values (MDC content, runtime config, etc.). This assumption holds because
613
+ // workers are not allowed to change their configuration after registration.
628
614
629
615
// Update configuration metrics for current models
630
616
for entry in current_entries {
@@ -657,14 +643,6 @@ impl Metrics {
657
643
}
658
644
}
659
645
660
- // Log models that are no longer active (worker count reset to 0, other metrics preserved)
661
- for model_name in known_models. difference ( & current_models_with_workers) {
662
- tracing:: debug!(
663
- model = %model_name,
664
- "Model no longer active (worker count reset to 0, other metrics preserved)"
665
- ) ;
666
- }
667
-
668
646
// Update our known models set
669
647
known_models. extend ( current_models. iter ( ) . cloned ( ) ) ;
670
648
0 commit comments