Skip to content

Commit 3151cb1

Browse files
committed
test: add MDC registration integration test and fix fractional polling intervals
Signed-off-by: Keiven Chang <[email protected]>
1 parent 205b397 commit 3151cb1

File tree

4 files changed

+476
-62
lines changed

4 files changed

+476
-62
lines changed

lib/llm/src/http/service/metrics.rs

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
2222
use crate::local_model::runtime_config::ModelRuntimeConfig;
2323
use crate::model_card::{ModelDeploymentCard, ROOT_PATH as MDC_ROOT_PATH};
2424
use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
25-
use dynamo_runtime::storage::key_value_store::{EtcdStorage, KeyValueStore, KeyValueStoreManager};
2625
use dynamo_runtime::slug::Slug;
26+
use dynamo_runtime::storage::key_value_store::{EtcdStorage, KeyValueStore, KeyValueStoreManager};
2727

2828
pub use prometheus::Registry;
2929

@@ -49,7 +49,6 @@ pub struct Metrics {
4949
model_context_length: IntGaugeVec,
5050
model_kv_cache_block_size: IntGaugeVec,
5151
model_migration_limit: IntGaugeVec,
52-
model_workers: IntGaugeVec, // this is an actual gauge, not a counter
5352
}
5453

5554
// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -156,12 +155,11 @@ impl Metrics {
156155
/// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
157156
/// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
158157
/// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
159-
/// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
160158
///
161159
/// ## Runtime Config Polling Configuration
162160
///
163161
/// The polling behavior can be configured via environment variables:
164-
/// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
162+
/// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
165163
///
166164
/// Metrics are never removed to preserve historical data. Runtime config and MDC
167165
/// metrics are updated when models are discovered and their configurations are available.
@@ -332,15 +330,6 @@ impl Metrics {
332330
)
333331
.unwrap();
334332

335-
let model_workers = IntGaugeVec::new(
336-
Opts::new(
337-
frontend_metric_name(frontend_service::MODEL_WORKERS),
338-
"Number of worker instances currently serving the model",
339-
),
340-
&["model"],
341-
)
342-
.unwrap();
343-
344333
Metrics {
345334
request_counter,
346335
inflight_gauge,
@@ -357,7 +346,6 @@ impl Metrics {
357346
model_context_length,
358347
model_kv_cache_block_size,
359348
model_migration_limit,
360-
model_workers,
361349
}
362350
}
363351

@@ -454,7 +442,6 @@ impl Metrics {
454442
registry.register(Box::new(self.model_context_length.clone()))?;
455443
registry.register(Box::new(self.model_kv_cache_block_size.clone()))?;
456444
registry.register(Box::new(self.model_migration_limit.clone()))?;
457-
registry.register(Box::new(self.model_workers.clone()))?;
458445

459446
Ok(())
460447
}
@@ -507,7 +494,6 @@ impl Metrics {
507494
.set(migration_limit as i64);
508495
}
509496

510-
511497
/// Update metrics from a ModelEntry
512498
/// This is a convenience method that extracts runtime config from a ModelEntry
513499
/// and updates the appropriate metrics
@@ -534,7 +520,10 @@ impl Metrics {
534520
let store: Box<dyn KeyValueStore> = Box::new(EtcdStorage::new(etcd_client.clone()));
535521
let card_store = Arc::new(KeyValueStoreManager::new(store));
536522

537-
match card_store.load::<ModelDeploymentCard>(MDC_ROOT_PATH, &model_slug).await {
523+
match card_store
524+
.load::<ModelDeploymentCard>(MDC_ROOT_PATH, &model_slug)
525+
.await
526+
{
538527
Ok(Some(mdc)) => {
539528
self.update_mdc_metrics(
540529
&model_entry.name,
@@ -566,11 +555,27 @@ impl Metrics {
566555
}
567556

568557
/// Start a background task that periodically updates runtime config metrics
569-
/// This polls the ModelManager for current models and updates metrics accordingly
570-
/// Models are never removed - only marked as healthy/unhealthy to preserve historical data
571558
///
572-
/// Note: If multiple model instances have the same name, only the first instance's metrics are used.
573-
/// Subsequent instances with duplicate names will be skipped.
559+
/// ## Why Polling is Required
560+
///
561+
/// Polling is necessary because new models may come online at any time through the distributed
562+
/// discovery system. The ModelManager is continuously updated as workers register/deregister
563+
/// with etcd, and we need to periodically check for these changes to expose their metrics.
564+
///
565+
/// ## Behavior
566+
///
567+
/// - Polls the ModelManager for current models and updates metrics accordingly
568+
/// - Models are never removed from metrics to preserve historical data
569+
/// - If multiple model instances have the same name, only the first instance's metrics are used
570+
/// - Subsequent instances with duplicate names will be skipped
571+
///
572+
/// ## MDC (Model Deployment Card) Behavior
573+
///
574+
/// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
575+
/// that all other workers claiming to serve that model really are using the same configuration.
576+
/// Later, every worker will have its own MDC, and the frontend will validate that they
577+
/// checksum the same. For right now, you can assume they have the same MDC, because
578+
/// they aren't allowed to change it.
574579
///
575580
/// The task will run until the provided cancellation token is cancelled.
576581
pub fn start_runtime_config_polling_task(
@@ -603,31 +608,12 @@ impl Metrics {
603608
// Get current model entries from the manager
604609
let current_entries = manager.get_model_entries();
605610
let mut current_models = std::collections::HashSet::new();
606-
let mut model_worker_counts = std::collections::HashMap::new();
607-
608-
// Count worker instances per model
609-
for entry in &current_entries {
610-
*model_worker_counts.entry(entry.name.clone()).or_insert(0) += 1;
611-
}
612-
613-
// Update worker count metrics for all models
614-
for (model_name, count) in &model_worker_counts {
615-
metrics.model_workers
616-
.with_label_values(&[model_name])
617-
.set(*count);
618-
}
619-
620-
// Reset worker count to 0 for models that no longer have any workers
621-
let current_models_with_workers: std::collections::HashSet<String> =
622-
model_worker_counts.keys().cloned().collect();
623-
for model_name in known_models.difference(&current_models_with_workers) {
624-
metrics.model_workers
625-
.with_label_values(&[model_name])
626-
.set(0);
627-
}
628611

629612
// Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
630613
// Subsequent instances with duplicate names will be skipped for config updates.
614+
// This is based on the assumption that all workers serving the same model have identical
615+
// configuration values (MDC content, runtime config, etc.). This assumption holds because
616+
// workers are not allowed to change their configuration after registration.
631617

632618
// Update configuration metrics for current models
633619
for entry in current_entries {
@@ -660,14 +646,6 @@ impl Metrics {
660646
}
661647
}
662648

663-
// Log models that are no longer active (worker count reset to 0, other metrics preserved)
664-
for model_name in known_models.difference(&current_models_with_workers) {
665-
tracing::debug!(
666-
model = %model_name,
667-
"Model no longer active (worker count reset to 0, other metrics preserved)"
668-
);
669-
}
670-
671649
// Update our known models set
672650
known_models.extend(current_models.iter().cloned());
673651

lib/llm/src/http/service/service_v2.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ impl HttpService {
207207
// Start background task to poll runtime config metrics with proper cancellation
208208
let poll_interval_secs = std::env::var("DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS")
209209
.ok()
210-
.and_then(|s| s.parse::<u64>().ok())
211-
.filter(|&secs| secs > 0) // Guard against zero or negative values
212-
.unwrap_or(8);
213-
let poll_interval = Duration::from_secs(poll_interval_secs);
210+
.and_then(|s| s.parse::<f64>().ok())
211+
.filter(|&secs| secs > 0.0) // Guard against zero or negative values
212+
.unwrap_or(8.0);
213+
let poll_interval = Duration::from_secs_f64(poll_interval_secs);
214214

215215
let _polling_task = super::metrics::Metrics::start_runtime_config_polling_task(
216216
self.state.metrics_clone(),

0 commit comments

Comments
 (0)