Skip to content

Commit 8ca98e5

Browse files
committed
test: add MDC registration integration test and fix fractional polling intervals
Signed-off-by: Keiven Chang <[email protected]>
1 parent 57e8844 commit 8ca98e5

File tree

4 files changed

+476
-62
lines changed

4 files changed

+476
-62
lines changed

lib/llm/src/http/service/metrics.rs

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
2222
use crate::local_model::runtime_config::ModelRuntimeConfig;
2323
use crate::model_card::{ModelDeploymentCard, ROOT_PATH as MDC_ROOT_PATH};
2424
use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
25-
use dynamo_runtime::storage::key_value_store::{EtcdStorage, KeyValueStore, KeyValueStoreManager};
2625
use dynamo_runtime::slug::Slug;
26+
use dynamo_runtime::storage::key_value_store::{EtcdStorage, KeyValueStore, KeyValueStoreManager};
2727

2828
pub use prometheus::Registry;
2929

@@ -49,7 +49,6 @@ pub struct Metrics {
4949
model_context_length: IntGaugeVec,
5050
model_kv_cache_block_size: IntGaugeVec,
5151
model_migration_limit: IntGaugeVec,
52-
model_workers: IntGaugeVec, // this is an actual gauge, not a counter
5352
}
5453

5554
// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -153,12 +152,11 @@ impl Metrics {
153152
/// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
154153
/// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
155154
/// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
156-
/// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
157155
///
158156
/// ## Runtime Config Polling Configuration
159157
///
160158
/// The polling behavior can be configured via environment variables:
161-
/// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
159+
/// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
162160
///
163161
/// Metrics are never removed to preserve historical data. Runtime config and MDC
164162
/// metrics are updated when models are discovered and their configurations are available.
@@ -329,15 +327,6 @@ impl Metrics {
329327
)
330328
.unwrap();
331329

332-
let model_workers = IntGaugeVec::new(
333-
Opts::new(
334-
frontend_metric_name(frontend_service::MODEL_WORKERS),
335-
"Number of worker instances currently serving the model",
336-
),
337-
&["model"],
338-
)
339-
.unwrap();
340-
341330
Metrics {
342331
request_counter,
343332
inflight_gauge,
@@ -354,7 +343,6 @@ impl Metrics {
354343
model_context_length,
355344
model_kv_cache_block_size,
356345
model_migration_limit,
357-
model_workers,
358346
}
359347
}
360348

@@ -451,7 +439,6 @@ impl Metrics {
451439
registry.register(Box::new(self.model_context_length.clone()))?;
452440
registry.register(Box::new(self.model_kv_cache_block_size.clone()))?;
453441
registry.register(Box::new(self.model_migration_limit.clone()))?;
454-
registry.register(Box::new(self.model_workers.clone()))?;
455442

456443
Ok(())
457444
}
@@ -504,7 +491,6 @@ impl Metrics {
504491
.set(migration_limit as i64);
505492
}
506493

507-
508494
/// Update metrics from a ModelEntry
509495
/// This is a convenience method that extracts runtime config from a ModelEntry
510496
/// and updates the appropriate metrics
@@ -531,7 +517,10 @@ impl Metrics {
531517
let store: Box<dyn KeyValueStore> = Box::new(EtcdStorage::new(etcd_client.clone()));
532518
let card_store = Arc::new(KeyValueStoreManager::new(store));
533519

534-
match card_store.load::<ModelDeploymentCard>(MDC_ROOT_PATH, &model_slug).await {
520+
match card_store
521+
.load::<ModelDeploymentCard>(MDC_ROOT_PATH, &model_slug)
522+
.await
523+
{
535524
Ok(Some(mdc)) => {
536525
self.update_mdc_metrics(
537526
&model_entry.name,
@@ -563,11 +552,27 @@ impl Metrics {
563552
}
564553

565554
/// Start a background task that periodically updates runtime config metrics
566-
/// This polls the ModelManager for current models and updates metrics accordingly
567-
/// Models are never removed - only marked as healthy/unhealthy to preserve historical data
568555
///
569-
/// Note: If multiple model instances have the same name, only the first instance's metrics are used.
570-
/// Subsequent instances with duplicate names will be skipped.
556+
/// ## Why Polling is Required
557+
///
558+
/// Polling is necessary because new models may come online at any time through the distributed
559+
/// discovery system. The ModelManager is continuously updated as workers register/deregister
560+
/// with etcd, and we need to periodically check for these changes to expose their metrics.
561+
///
562+
/// ## Behavior
563+
///
564+
/// - Polls the ModelManager for current models and updates metrics accordingly
565+
/// - Models are never removed from metrics to preserve historical data
566+
/// - If multiple model instances have the same name, only the first instance's metrics are used
567+
/// - Subsequent instances with duplicate names will be skipped
568+
///
569+
/// ## MDC (Model Deployment Card) Behavior
570+
///
571+
/// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
572+
/// that all other workers claiming to serve that model really are using the same configuration.
573+
/// Later, every worker will have its own MDC, and the frontend will validate that they
574+
/// checksum the same. For right now, you can assume they have the same MDC, because
575+
/// they aren't allowed to change it.
571576
///
572577
/// The task will run until the provided cancellation token is cancelled.
573578
pub fn start_runtime_config_polling_task(
@@ -600,31 +605,12 @@ impl Metrics {
600605
// Get current model entries from the manager
601606
let current_entries = manager.get_model_entries();
602607
let mut current_models = std::collections::HashSet::new();
603-
let mut model_worker_counts = std::collections::HashMap::new();
604-
605-
// Count worker instances per model
606-
for entry in &current_entries {
607-
*model_worker_counts.entry(entry.name.clone()).or_insert(0) += 1;
608-
}
609-
610-
// Update worker count metrics for all models
611-
for (model_name, count) in &model_worker_counts {
612-
metrics.model_workers
613-
.with_label_values(&[model_name])
614-
.set(*count);
615-
}
616-
617-
// Reset worker count to 0 for models that no longer have any workers
618-
let current_models_with_workers: std::collections::HashSet<String> =
619-
model_worker_counts.keys().cloned().collect();
620-
for model_name in known_models.difference(&current_models_with_workers) {
621-
metrics.model_workers
622-
.with_label_values(&[model_name])
623-
.set(0);
624-
}
625608

626609
// Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
627610
// Subsequent instances with duplicate names will be skipped for config updates.
611+
// This is based on the assumption that all workers serving the same model have identical
612+
// configuration values (MDC content, runtime config, etc.). This assumption holds because
613+
// workers are not allowed to change their configuration after registration.
628614

629615
// Update configuration metrics for current models
630616
for entry in current_entries {
@@ -657,14 +643,6 @@ impl Metrics {
657643
}
658644
}
659645

660-
// Log models that are no longer active (worker count reset to 0, other metrics preserved)
661-
for model_name in known_models.difference(&current_models_with_workers) {
662-
tracing::debug!(
663-
model = %model_name,
664-
"Model no longer active (worker count reset to 0, other metrics preserved)"
665-
);
666-
}
667-
668646
// Update our known models set
669647
known_models.extend(current_models.iter().cloned());
670648

lib/llm/src/http/service/service_v2.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ impl HttpService {
207207
// Start background task to poll runtime config metrics with proper cancellation
208208
let poll_interval_secs = std::env::var("DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS")
209209
.ok()
210-
.and_then(|s| s.parse::<u64>().ok())
211-
.filter(|&secs| secs > 0) // Guard against zero or negative values
212-
.unwrap_or(8);
213-
let poll_interval = Duration::from_secs(poll_interval_secs);
210+
.and_then(|s| s.parse::<f64>().ok())
211+
.filter(|&secs| secs > 0.0) // Guard against zero or negative values
212+
.unwrap_or(8.0);
213+
let poll_interval = Duration::from_secs_f64(poll_interval_secs);
214214

215215
let _polling_task = super::metrics::Metrics::start_runtime_config_polling_task(
216216
self.state.metrics_clone(),

0 commit comments

Comments
 (0)