@@ -29,6 +29,8 @@ use std::sync::Arc;
29
29
use std:: sync:: atomic:: { AtomicBool , AtomicU64 , Ordering } ;
30
30
31
31
use log:: { LevelFilter , error} ;
32
+ #[ cfg( mshv3) ]
33
+ use mshv_bindings:: MSHV_GPAP_ACCESS_OP_CLEAR ;
32
34
#[ cfg( mshv2) ]
33
35
use mshv_bindings:: hv_message;
34
36
use mshv_bindings:: {
@@ -76,6 +78,9 @@ use crate::sandbox::SandboxConfiguration;
76
78
use crate :: sandbox:: uninitialized:: SandboxRuntimeConfig ;
77
79
use crate :: { Result , log_then_return, new_error} ;
78
80
81
+ #[ cfg( mshv2) ]
82
+ const CLEAR_DIRTY_BIT_FLAG : u64 = 0b100 ;
83
+
79
84
#[ cfg( gdb) ]
80
85
mod debug {
81
86
use std:: sync:: { Arc , Mutex } ;
@@ -302,6 +307,7 @@ pub(crate) struct HypervLinuxDriver {
302
307
vcpu_fd : VcpuFd ,
303
308
entrypoint : u64 ,
304
309
mem_regions : Vec < MemoryRegion > ,
310
+ n_initial_regions : usize ,
305
311
orig_rsp : GuestPtr ,
306
312
interrupt_handle : Arc < LinuxInterruptHandle > ,
307
313
@@ -351,6 +357,7 @@ impl HypervLinuxDriver {
351
357
vm_fd. initialize ( ) ?;
352
358
vm_fd
353
359
} ;
360
+ vm_fd. enable_dirty_page_tracking ( ) ?;
354
361
355
362
let mut vcpu_fd = vm_fd. create_vcpu ( 0 ) ?;
356
363
@@ -391,13 +398,31 @@ impl HypervLinuxDriver {
391
398
( None , None )
392
399
} ;
393
400
401
+ let mut base_pfn = u64:: MAX ;
402
+ let mut total_size: usize = 0 ;
403
+
394
404
mem_regions. iter ( ) . try_for_each ( |region| {
395
- let mshv_region = region. to_owned ( ) . into ( ) ;
405
+ let mshv_region: mshv_user_mem_region = region. to_owned ( ) . into ( ) ;
406
+ if base_pfn == u64:: MAX {
407
+ base_pfn = mshv_region. guest_pfn ;
408
+ }
409
+ total_size += mshv_region. size as usize ;
396
410
vm_fd. map_user_memory ( mshv_region)
397
411
} ) ?;
398
412
399
413
Self :: setup_initial_sregs ( & mut vcpu_fd, pml4_ptr. absolute ( ) ?) ?;
400
414
415
+ // get/clear the dirty page bitmap, mshv sets all the bit dirty at initialization
416
+ // if we dont clear them then we end up taking a complete snapsot of memory page by page which gets
417
+ // progressively slower as the sandbox size increases
418
+ // the downside of doing this here is that the call to get_dirty_log will takes longer as the number of pages increase
419
+ // but for larger sandboxes its easily cheaper than copying all the pages
420
+
421
+ #[ cfg( mshv2) ]
422
+ vm_fd. get_dirty_log ( base_pfn, total_size, CLEAR_DIRTY_BIT_FLAG ) ?;
423
+ #[ cfg( mshv3) ]
424
+ vm_fd. get_dirty_log ( base_pfn, total_size, MSHV_GPAP_ACCESS_OP_CLEAR as u8 ) ?;
425
+
401
426
let interrupt_handle = Arc :: new ( LinuxInterruptHandle {
402
427
running : AtomicU64 :: new ( 0 ) ,
403
428
cancel_requested : AtomicBool :: new ( false ) ,
@@ -428,6 +453,7 @@ impl HypervLinuxDriver {
428
453
page_size : 0 ,
429
454
vm_fd,
430
455
vcpu_fd,
456
+ n_initial_regions : mem_regions. len ( ) ,
431
457
mem_regions,
432
458
entrypoint : entrypoint_ptr. absolute ( ) ?,
433
459
orig_rsp : rsp_ptr,
@@ -885,6 +911,69 @@ impl Hypervisor for HypervLinuxDriver {
885
911
self . interrupt_handle . clone ( )
886
912
}
887
913
914
+ // TODO: Implement getting additional host-mapped dirty pages.
915
+ fn get_and_clear_dirty_pages ( & mut self ) -> Result < Vec < u64 > > {
916
+ let first_mshv_region: mshv_user_mem_region = self
917
+ . mem_regions
918
+ . first ( )
919
+ . ok_or ( new_error ! (
920
+ "tried to get dirty page bitmap of 0-sized region"
921
+ ) ) ?
922
+ . to_owned ( )
923
+ . into ( ) ;
924
+
925
+ let n_contiguous = self
926
+ . mem_regions
927
+ . windows ( 2 )
928
+ . take_while ( |window| window[ 0 ] . guest_region . end == window[ 1 ] . guest_region . start )
929
+ . count ( )
930
+ + 1 ; // +1 because windows(2) gives us n-1 pairs for n regions
931
+
932
+ if n_contiguous != self . n_initial_regions {
933
+ return Err ( new_error ! (
934
+ "get_and_clear_dirty_pages: not all regions are contiguous, expected {} but got {}" ,
935
+ self . n_initial_regions,
936
+ n_contiguous
937
+ ) ) ;
938
+ }
939
+
940
+ let sandbox_total_size = self
941
+ . mem_regions
942
+ . iter ( )
943
+ . take ( n_contiguous)
944
+ . map ( |r| r. guest_region . len ( ) )
945
+ . sum ( ) ;
946
+
947
+ let mut sandbox_dirty_pages = self . vm_fd . get_dirty_log (
948
+ first_mshv_region. guest_pfn ,
949
+ sandbox_total_size,
950
+ #[ cfg( mshv2) ]
951
+ CLEAR_DIRTY_BIT_FLAG ,
952
+ #[ cfg( mshv3) ]
953
+ ( MSHV_GPAP_ACCESS_OP_CLEAR as u8 ) ,
954
+ ) ?;
955
+
956
+ // Sanitize bits beyond sandbox
957
+ //
958
+ // TODO: remove this once bug in mshv is fixed. The bug makes it possible
959
+ // for non-mapped memory to incorrectly be marked dirty. To fix this, we just zero out
960
+ // any bits that are not within the sandbox size.
961
+ let sandbox_pages = sandbox_total_size / self . page_size ;
962
+ let last_block_idx = sandbox_dirty_pages. len ( ) . saturating_sub ( 1 ) ;
963
+ if let Some ( last_block) = sandbox_dirty_pages. last_mut ( ) {
964
+ let last_block_start_page = last_block_idx * 64 ;
965
+ let last_block_end_page = last_block_start_page + 64 ;
966
+
967
+ // If the last block extends beyond the sandbox, clear the invalid bits
968
+ if last_block_end_page > sandbox_pages {
969
+ let valid_bits_in_last_block = sandbox_pages - last_block_start_page;
970
+ let mask = ( 1u64 << valid_bits_in_last_block) - 1 ;
971
+ * last_block &= mask;
972
+ }
973
+ }
974
+ Ok ( sandbox_dirty_pages)
975
+ }
976
+
888
977
#[ cfg( crashdump) ]
889
978
fn crashdump_context ( & self ) -> Result < Option < super :: crashdump:: CrashDumpContext > > {
890
979
if self . rt_cfg . guest_core_dump {
0 commit comments