|
| 1 | +use crate::garbage_collector_orchestrator_v2::GarbageCollectorError; |
| 2 | +use crate::operators::delete_unused_logs::{ |
| 3 | + DeleteUnusedLogsError, DeleteUnusedLogsInput, DeleteUnusedLogsOperator, DeleteUnusedLogsOutput, |
| 4 | +}; |
| 5 | +use crate::types::{CleanupMode, GarbageCollectorResponse}; |
| 6 | +use async_trait::async_trait; |
| 7 | +use chroma_log::Log; |
| 8 | +use chroma_storage::Storage; |
| 9 | +use chroma_system::{ |
| 10 | + wrap, ComponentContext, ComponentHandle, Dispatcher, Handler, Orchestrator, |
| 11 | + OrchestratorContext, TaskResult, |
| 12 | +}; |
| 13 | +use chroma_types::CollectionUuid; |
| 14 | +use std::collections::{HashMap, HashSet}; |
| 15 | +use tokio::sync::oneshot::Sender; |
| 16 | +use tracing::{Level, Span}; |
| 17 | + |
| 18 | +#[derive(Debug)] |
| 19 | +pub struct HardDeleteLogOnlyGarbageCollectorOrchestrator { |
| 20 | + context: OrchestratorContext, |
| 21 | + storage: Storage, |
| 22 | + logs: Log, |
| 23 | + result_channel: Option<Sender<Result<GarbageCollectorResponse, GarbageCollectorError>>>, |
| 24 | + collection_to_destroy: CollectionUuid, |
| 25 | +} |
| 26 | + |
| 27 | +#[allow(clippy::too_many_arguments)] |
| 28 | +impl HardDeleteLogOnlyGarbageCollectorOrchestrator { |
| 29 | + pub fn new( |
| 30 | + dispatcher: ComponentHandle<Dispatcher>, |
| 31 | + storage: Storage, |
| 32 | + logs: Log, |
| 33 | + collection_to_destroy: CollectionUuid, |
| 34 | + ) -> Self { |
| 35 | + Self { |
| 36 | + context: OrchestratorContext::new(dispatcher), |
| 37 | + storage, |
| 38 | + logs, |
| 39 | + result_channel: None, |
| 40 | + collection_to_destroy, |
| 41 | + } |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +#[async_trait] |
| 46 | +impl Orchestrator for HardDeleteLogOnlyGarbageCollectorOrchestrator { |
| 47 | + type Output = GarbageCollectorResponse; |
| 48 | + type Error = GarbageCollectorError; |
| 49 | + |
| 50 | + fn dispatcher(&self) -> ComponentHandle<Dispatcher> { |
| 51 | + self.context.dispatcher.clone() |
| 52 | + } |
| 53 | + |
| 54 | + fn context(&self) -> &OrchestratorContext { |
| 55 | + &self.context |
| 56 | + } |
| 57 | + |
| 58 | + async fn on_start(&mut self, ctx: &ComponentContext<Self>) { |
| 59 | + let _ = self |
| 60 | + .try_start_delete_unused_logs_operator(ctx) |
| 61 | + .await |
| 62 | + .inspect_err(|_| { |
| 63 | + tracing::event!( |
| 64 | + Level::ERROR, |
| 65 | + "could not start job to hard delete unused logs", |
| 66 | + ) |
| 67 | + }); |
| 68 | + } |
| 69 | + |
| 70 | + fn set_result_channel( |
| 71 | + &mut self, |
| 72 | + sender: Sender<Result<GarbageCollectorResponse, GarbageCollectorError>>, |
| 73 | + ) { |
| 74 | + self.result_channel = Some(sender); |
| 75 | + } |
| 76 | + |
| 77 | + fn take_result_channel( |
| 78 | + &mut self, |
| 79 | + ) -> Option<Sender<Result<GarbageCollectorResponse, GarbageCollectorError>>> { |
| 80 | + self.result_channel.take() |
| 81 | + } |
| 82 | +} |
| 83 | + |
| 84 | +impl HardDeleteLogOnlyGarbageCollectorOrchestrator { |
| 85 | + async fn try_start_delete_unused_logs_operator( |
| 86 | + &mut self, |
| 87 | + ctx: &ComponentContext<Self>, |
| 88 | + ) -> Result<(), GarbageCollectorError> { |
| 89 | + let collections_to_destroy = |
| 90 | + HashSet::from_iter(vec![self.collection_to_destroy].into_iter()); |
| 91 | + let collections_to_garbage_collect = HashMap::new(); |
| 92 | + let task = wrap( |
| 93 | + Box::new(DeleteUnusedLogsOperator { |
| 94 | + enabled: true, |
| 95 | + mode: CleanupMode::DeleteV2, |
| 96 | + storage: self.storage.clone(), |
| 97 | + logs: self.logs.clone(), |
| 98 | + enable_dangerous_option_to_ignore_min_versions_for_wal3: false, |
| 99 | + }), |
| 100 | + DeleteUnusedLogsInput { |
| 101 | + collections_to_destroy, |
| 102 | + collections_to_garbage_collect, |
| 103 | + }, |
| 104 | + ctx.receiver(), |
| 105 | + self.context.task_cancellation_token.clone(), |
| 106 | + ); |
| 107 | + self.dispatcher() |
| 108 | + .send(task, Some(Span::current())) |
| 109 | + .await |
| 110 | + .map_err(GarbageCollectorError::Channel)?; |
| 111 | + Ok(()) |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +#[async_trait] |
| 116 | +impl Handler<TaskResult<DeleteUnusedLogsOutput, DeleteUnusedLogsError>> |
| 117 | + for HardDeleteLogOnlyGarbageCollectorOrchestrator |
| 118 | +{ |
| 119 | + type Result = (); |
| 120 | + |
| 121 | + async fn handle( |
| 122 | + &mut self, |
| 123 | + message: TaskResult<DeleteUnusedLogsOutput, DeleteUnusedLogsError>, |
| 124 | + ctx: &ComponentContext<HardDeleteLogOnlyGarbageCollectorOrchestrator>, |
| 125 | + ) { |
| 126 | + let _output = match self.ok_or_terminate(message.into_inner(), ctx).await { |
| 127 | + Some(output) => output, |
| 128 | + None => return, |
| 129 | + }; |
| 130 | + self.terminate_with_result( |
| 131 | + Ok(GarbageCollectorResponse { |
| 132 | + collection_id: self.collection_to_destroy, |
| 133 | + num_versions_deleted: 0, |
| 134 | + num_files_deleted: 0, |
| 135 | + ..Default::default() |
| 136 | + }), |
| 137 | + ctx, |
| 138 | + ) |
| 139 | + .await; |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +#[cfg(test)] |
| 144 | +mod tests { |
| 145 | + //! Test suite for the `HardDeleteLogOnlyGarbageCollectorOrchestrator`. |
| 146 | + //! |
| 147 | + //! This module verifies the core functionality of the hard delete orchestrator, |
| 148 | + //! which is responsible for permanently removing log data for destroyed collections. |
| 149 | + //! The tests ensure proper initialization, configuration, and trait implementation |
| 150 | + //! of the orchestrator component. |
| 151 | + //! |
| 152 | + //! # Test Coverage |
| 153 | + //! |
| 154 | + //! The test suite validates: |
| 155 | + //! - Correct initialization with required dependencies |
| 156 | + //! - Proper storage of collection UUID for destruction |
| 157 | + //! - Result channel lifecycle management |
| 158 | + //! - Orchestrator trait contract fulfillment |
| 159 | + //! |
| 160 | + //! # Testing Approach |
| 161 | + //! |
| 162 | + //! Tests use mock components (test storage, dispatcher, logs) to isolate |
| 163 | + //! orchestrator behavior without requiring actual I/O operations. |
| 164 | + //! Each test is self-contained and can run in parallel using tokio's |
| 165 | + //! multi-threaded runtime. |
| 166 | + use super::*; |
| 167 | + use chroma_config::registry::Registry; |
| 168 | + use chroma_config::Configurable; |
| 169 | + use chroma_log::config::{GrpcLogConfig, LogConfig}; |
| 170 | + use chroma_storage::test_storage; |
| 171 | + use chroma_system::{Dispatcher, System}; |
| 172 | + |
| 173 | + /// Verifies that the orchestrator correctly initializes with all required components. |
| 174 | + /// |
| 175 | + /// This test ensures that when creating a new `HardDeleteLogOnlyGarbageCollectorOrchestrator`, |
| 176 | + /// all provided dependencies (dispatcher, storage, logs, collection UUID) are properly |
| 177 | + /// stored and the result channel starts in an uninitialized state. |
| 178 | + /// |
| 179 | + /// # Test Invariants |
| 180 | + /// |
| 181 | + /// - Collection UUID must match the one provided during construction |
| 182 | + /// - Result channel must be `None` initially (set later by the system) |
| 183 | + #[tokio::test(flavor = "multi_thread")] |
| 184 | + async fn test_k8s_integration_orchestrator_initialization() { |
| 185 | + let (_storage_dir, storage) = test_storage(); |
| 186 | + let system = System::new(); |
| 187 | + let dispatcher = Dispatcher::new(Default::default()); |
| 188 | + let dispatcher_handle = system.start_component(dispatcher); |
| 189 | + let registry = Registry::new(); |
| 190 | + let log_config = LogConfig::Grpc(GrpcLogConfig::default()); |
| 191 | + let logs = Log::try_from_config(&(log_config, system.clone()), ®istry) |
| 192 | + .await |
| 193 | + .unwrap(); |
| 194 | + let collection_to_destroy = CollectionUuid::new(); |
| 195 | + |
| 196 | + // Create orchestrator with test dependencies |
| 197 | + let orchestrator = HardDeleteLogOnlyGarbageCollectorOrchestrator::new( |
| 198 | + dispatcher_handle.clone(), |
| 199 | + storage.clone(), |
| 200 | + logs.clone(), |
| 201 | + collection_to_destroy, |
| 202 | + ); |
| 203 | + |
| 204 | + // Verify the orchestrator is properly initialized |
| 205 | + assert_eq!(orchestrator.collection_to_destroy, collection_to_destroy); |
| 206 | + assert!(orchestrator.result_channel.is_none()); |
| 207 | + } |
| 208 | + |
| 209 | + /// Validates that the orchestrator correctly stores the collection UUID for hard deletion. |
| 210 | + /// |
| 211 | + /// This test verifies that the orchestrator preserves the collection UUID that will be |
| 212 | + /// passed to the `DeleteUnusedLogsOperator` when `on_start` is called. It also documents |
| 213 | + /// the hardcoded configuration that will be used for the delete operation. |
| 214 | + /// |
| 215 | + /// # Implementation Details |
| 216 | + /// |
| 217 | + /// When the orchestrator starts the delete operator (in `try_start_delete_unused_logs_operator`), |
| 218 | + /// it uses the following hardcoded configuration: |
| 219 | + /// - `enabled`: true (operator is active) |
| 220 | + /// - `mode`: `CleanupMode::DeleteV2` (performs hard deletion) |
| 221 | + /// - `enable_dangerous_option_to_ignore_min_versions_for_wal3`: false (safety check enabled) |
| 222 | + /// |
| 223 | + /// The collection UUID stored in `collection_to_destroy` is placed in the |
| 224 | + /// `collections_to_destroy` set, while `collections_to_garbage_collect` remains empty |
| 225 | + /// since this orchestrator only handles hard deletion, not soft garbage collection. |
| 226 | + #[tokio::test(flavor = "multi_thread")] |
| 227 | + async fn test_k8s_integration_delete_operator_params() { |
| 228 | + let (_storage_dir, storage) = test_storage(); |
| 229 | + let system = System::new(); |
| 230 | + let dispatcher = Dispatcher::new(Default::default()); |
| 231 | + let dispatcher_handle = system.start_component(dispatcher); |
| 232 | + let registry = Registry::new(); |
| 233 | + let log_config = LogConfig::Grpc(GrpcLogConfig::default()); |
| 234 | + let logs = Log::try_from_config(&(log_config, system.clone()), ®istry) |
| 235 | + .await |
| 236 | + .unwrap(); |
| 237 | + let collection_to_destroy = CollectionUuid::new(); |
| 238 | + |
| 239 | + let orchestrator = HardDeleteLogOnlyGarbageCollectorOrchestrator::new( |
| 240 | + dispatcher_handle, |
| 241 | + storage.clone(), |
| 242 | + logs.clone(), |
| 243 | + collection_to_destroy, |
| 244 | + ); |
| 245 | + |
| 246 | + // Verify the orchestrator stores correct collection UUID for destruction |
| 247 | + assert_eq!(orchestrator.collection_to_destroy, collection_to_destroy); |
| 248 | + |
| 249 | + // Note: The delete operator configuration is hardcoded in try_start_delete_unused_logs_operator |
| 250 | + // and cannot be modified externally. This ensures consistent deletion behavior. |
| 251 | + } |
| 252 | +} |
0 commit comments