Skip to content

Commit 4fca845

Browse files
authored
fix(meta-service): eliminate 250ms lock delay during snapshot compaction (#18542)
Move expensive compactor drop operation outside state machine write lock to prevent blocking other operations for ~250ms during snapshot building. Changes: - Change replace_with_compacted to take compactor by reference - Move compactor drop to async spawn_blocking after lock release The compactor contains large data structures (ImmutableLevels, DB handles) whose destruction involves significant memory deallocation and resource cleanup, causing multi-hundred millisecond delays when done under lock.
1 parent b977514 commit 4fca845

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

src/meta/raft-store/src/leveled_store/leveled_map/mod.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,11 @@ impl LeveledMap {
146146
self.immutable_levels = b;
147147
}
148148

149-
/// Replace several bottom immutable levels and persisted level
150-
/// with the compacted data.
151-
pub fn replace_with_compacted(&mut self, mut compactor: Compactor, db: DB) {
149+
/// Replace bottom immutable levels and persisted level with compacted data.
150+
///
151+
/// **Important**: Do not drop the compactor within this function when called
152+
/// under a state machine lock, as dropping may take ~250ms.
153+
pub fn replace_with_compacted(&mut self, compactor: &mut Compactor, db: DB) {
152154
let len = compactor.immutable_levels.len();
153155
let corresponding_index = compactor
154156
.immutable_levels
@@ -169,7 +171,7 @@ impl LeveledMap {
169171

170172
self.persisted = Some(db);
171173

172-
info!("compaction finished replacing the db");
174+
info!("replace_with_compacted: finished replacing the db");
173175
}
174176

175177
/// Get a singleton `Compactor` instance specific to `self`.

src/meta/service/src/store/store_inner.rs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,10 @@ impl RaftStoreInner {
188188
let g = self
189189
.state_machine
190190
.read()
191-
.with_timing(|_output, total, busy| {
191+
.with_timing(|_output, total, _busy| {
192192
info!(
193-
"StateMachineLock-Read-Acquire: total: {:?}, busy: {:?}; {}",
194-
total, busy, &for_what
193+
"StateMachineLock-Read-Acquire: total: {:?}; {}",
194+
total, &for_what
195195
);
196196
})
197197
.await;
@@ -208,10 +208,10 @@ impl RaftStoreInner {
208208
let g = self
209209
.state_machine
210210
.write()
211-
.with_timing(|_output, total, busy| {
211+
.with_timing(|_output, total, _busy| {
212212
info!(
213-
"StateMachineLock-Write-Acquire: total: {:?}, busy: {:?}; {}",
214-
total, busy, &for_what
213+
"StateMachineLock-Write-Acquire: total: {:?}; {}",
214+
total, &for_what
215215
);
216216
})
217217
.await;
@@ -313,9 +313,22 @@ impl RaftStoreInner {
313313
.get_state_machine_write("do_build_snapshot-replace-compacted")
314314
.await;
315315
sm.levels_mut()
316-
.replace_with_compacted(compactor, db.clone());
316+
.replace_with_compacted(&mut compactor, db.clone());
317+
info!("do_build_snapshot-replace_with_compacted returned");
317318
}
318319

320+
// Consume and drop compactor, dropping it may involve blocking IO
321+
databend_common_base::runtime::spawn_blocking(move || {
322+
let drop_start = std::time::Instant::now();
323+
drop(compactor);
324+
info!(
325+
"do_build_snapshot-compactor-drop completed in {:?}",
326+
drop_start.elapsed()
327+
);
328+
})
329+
.await
330+
.expect("compactor drop task should not panic");
331+
319332
// Clean old snapshot
320333
ss_store.new_loader().clean_old_snapshots().await?;
321334
info!("do_build_snapshot clean_old_snapshots complete");

0 commit comments

Comments
 (0)