@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
22
22
use crate :: local_model:: runtime_config:: ModelRuntimeConfig ;
23
23
use crate :: model_card:: { ModelDeploymentCard , ROOT_PATH as MDC_ROOT_PATH } ;
24
24
use dynamo_runtime:: metrics:: prometheus_names:: clamp_u64_to_i64;
25
- use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
26
25
use dynamo_runtime:: slug:: Slug ;
26
+ use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
27
27
28
28
pub use prometheus:: Registry ;
29
29
@@ -49,7 +49,6 @@ pub struct Metrics {
49
49
model_context_length : IntGaugeVec ,
50
50
model_kv_cache_block_size : IntGaugeVec ,
51
51
model_migration_limit : IntGaugeVec ,
52
- model_workers : IntGaugeVec , // this is an actual gauge, not a counter
53
52
}
54
53
55
54
// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -156,12 +155,11 @@ impl Metrics {
156
155
/// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
157
156
/// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
158
157
/// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
159
- /// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
160
158
///
161
159
/// ## Runtime Config Polling Configuration
162
160
///
163
161
/// The polling behavior can be configured via environment variables:
164
- /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
162
+ /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
165
163
///
166
164
/// Metrics are never removed to preserve historical data. Runtime config and MDC
167
165
/// metrics are updated when models are discovered and their configurations are available.
@@ -332,15 +330,6 @@ impl Metrics {
332
330
)
333
331
. unwrap ( ) ;
334
332
335
- let model_workers = IntGaugeVec :: new (
336
- Opts :: new (
337
- frontend_metric_name ( frontend_service:: MODEL_WORKERS ) ,
338
- "Number of worker instances currently serving the model" ,
339
- ) ,
340
- & [ "model" ] ,
341
- )
342
- . unwrap ( ) ;
343
-
344
333
Metrics {
345
334
request_counter,
346
335
inflight_gauge,
@@ -357,7 +346,6 @@ impl Metrics {
357
346
model_context_length,
358
347
model_kv_cache_block_size,
359
348
model_migration_limit,
360
- model_workers,
361
349
}
362
350
}
363
351
@@ -454,7 +442,6 @@ impl Metrics {
454
442
registry. register ( Box :: new ( self . model_context_length . clone ( ) ) ) ?;
455
443
registry. register ( Box :: new ( self . model_kv_cache_block_size . clone ( ) ) ) ?;
456
444
registry. register ( Box :: new ( self . model_migration_limit . clone ( ) ) ) ?;
457
- registry. register ( Box :: new ( self . model_workers . clone ( ) ) ) ?;
458
445
459
446
Ok ( ( ) )
460
447
}
@@ -507,7 +494,6 @@ impl Metrics {
507
494
. set ( migration_limit as i64 ) ;
508
495
}
509
496
510
-
511
497
/// Update metrics from a ModelEntry
512
498
/// This is a convenience method that extracts runtime config from a ModelEntry
513
499
/// and updates the appropriate metrics
@@ -534,7 +520,10 @@ impl Metrics {
534
520
let store: Box < dyn KeyValueStore > = Box :: new ( EtcdStorage :: new ( etcd_client. clone ( ) ) ) ;
535
521
let card_store = Arc :: new ( KeyValueStoreManager :: new ( store) ) ;
536
522
537
- match card_store. load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug) . await {
523
+ match card_store
524
+ . load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug)
525
+ . await
526
+ {
538
527
Ok ( Some ( mdc) ) => {
539
528
self . update_mdc_metrics (
540
529
& model_entry. name ,
@@ -566,11 +555,27 @@ impl Metrics {
566
555
}
567
556
568
557
/// Start a background task that periodically updates runtime config metrics
569
- /// This polls the ModelManager for current models and updates metrics accordingly
570
- /// Models are never removed - only marked as healthy/unhealthy to preserve historical data
571
558
///
572
- /// Note: If multiple model instances have the same name, only the first instance's metrics are used.
573
- /// Subsequent instances with duplicate names will be skipped.
559
+ /// ## Why Polling is Required
560
+ ///
561
+ /// Polling is necessary because new models may come online at any time through the distributed
562
+ /// discovery system. The ModelManager is continuously updated as workers register/deregister
563
+ /// with etcd, and we need to periodically check for these changes to expose their metrics.
564
+ ///
565
+ /// ## Behavior
566
+ ///
567
+ /// - Polls the ModelManager for current models and updates metrics accordingly
568
+ /// - Models are never removed from metrics to preserve historical data
569
+ /// - If multiple model instances have the same name, only the first instance's metrics are used
570
+ /// - Subsequent instances with duplicate names will be skipped
571
+ ///
572
+ /// ## MDC (Model Deployment Card) Behavior
573
+ ///
574
+ /// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
575
+ /// that all other workers claiming to serve that model really are using the same configuration.
576
+ /// Later, every worker will have its own MDC, and the frontend will validate that they
577
+ /// checksum the same. For right now, you can assume they have the same MDC, because
578
+ /// they aren't allowed to change it.
574
579
///
575
580
/// The task will run until the provided cancellation token is cancelled.
576
581
pub fn start_runtime_config_polling_task (
@@ -603,31 +608,12 @@ impl Metrics {
603
608
// Get current model entries from the manager
604
609
let current_entries = manager. get_model_entries ( ) ;
605
610
let mut current_models = std:: collections:: HashSet :: new ( ) ;
606
- let mut model_worker_counts = std:: collections:: HashMap :: new ( ) ;
607
-
608
- // Count worker instances per model
609
- for entry in & current_entries {
610
- * model_worker_counts. entry ( entry. name . clone ( ) ) . or_insert ( 0 ) += 1 ;
611
- }
612
-
613
- // Update worker count metrics for all models
614
- for ( model_name, count) in & model_worker_counts {
615
- metrics. model_workers
616
- . with_label_values ( & [ model_name] )
617
- . set ( * count) ;
618
- }
619
-
620
- // Reset worker count to 0 for models that no longer have any workers
621
- let current_models_with_workers: std:: collections:: HashSet < String > =
622
- model_worker_counts. keys ( ) . cloned ( ) . collect ( ) ;
623
- for model_name in known_models. difference ( & current_models_with_workers) {
624
- metrics. model_workers
625
- . with_label_values ( & [ model_name] )
626
- . set ( 0 ) ;
627
- }
628
611
629
612
// Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
630
613
// Subsequent instances with duplicate names will be skipped for config updates.
614
+ // This is based on the assumption that all workers serving the same model have identical
615
+ // configuration values (MDC content, runtime config, etc.). This assumption holds because
616
+ // workers are not allowed to change their configuration after registration.
631
617
632
618
// Update configuration metrics for current models
633
619
for entry in current_entries {
@@ -660,14 +646,6 @@ impl Metrics {
660
646
}
661
647
}
662
648
663
- // Log models that are no longer active (worker count reset to 0, other metrics preserved)
664
- for model_name in known_models. difference ( & current_models_with_workers) {
665
- tracing:: debug!(
666
- model = %model_name,
667
- "Model no longer active (worker count reset to 0, other metrics preserved)"
668
- ) ;
669
- }
670
-
671
649
// Update our known models set
672
650
known_models. extend ( current_models. iter ( ) . cloned ( ) ) ;
673
651
0 commit comments