Skip to content

Commit 9f64a10

Browse files
authored
CA-419227 Missing PGPU in pool_migrate_complete on destination host (#6731)
There is a regression test fail between xapi v25.30.0 and v25.33.0. The job is cross pool SXM with vGPU. The source host VM.migrate_send failed with exception `Storage_error ([S(Does_not_exist);[S(mirror)` which is raised by `MIRROR.stat`. The `MIRROR.stat` is triggered by destination host in `pool_migrate_complete`. The error is `Server_error(HANDLE_INVALID, [ PGPU; OpaqueRef:NULL ])`. I find #6648 inserts `force_state_reset_keep_current_operations` in `pool_migrate_complete` which set VGPU resident_on to NULL. After reverting this commit, the job pass.
2 parents 093d098 + a465056 commit 9f64a10

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

ocaml/xapi/xapi_vm_lifecycle.ml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state =
862862
(* First update the power_state. Some operations below indirectly rely on this. *)
863863
let old_state = Db.VM.get_power_state ~__context ~self in
864864
Db.VM.set_power_state ~__context ~self ~value:state ;
865+
debug "%s: VM power state changed from %s to %s" __FUNCTION__
866+
(Record_util.vm_power_state_to_string old_state)
867+
(Record_util.vm_power_state_to_string state) ;
865868
if state = `Suspended then
866869
remove_pending_guidance ~__context ~self ~value:`restart_device_model ;
867870
if state = `Halted then (

ocaml/xapi/xapi_vm_migrate.ml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -489,18 +489,18 @@ let pool_migrate_complete ~__context ~vm ~host:_ =
489489
~value:`restart_device_model ;
490490
let dbg = Context.string_of_task __context in
491491
let queue_name = Xapi_xenops_queue.queue_of_vm ~__context ~self:vm in
492-
(* Reset the state, which will update allowed operations, clear reservations
493-
for halted VMs, disconnect devices *)
494-
let power_state = Db.VM.get_power_state ~__context ~self:vm in
495-
Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context
496-
~self:vm ~value:power_state ;
497492
if Xapi_xenops.vm_exists_in_xenopsd queue_name dbg id then (
498493
remove_stale_pcis ~__context ~vm ;
499494
Xapi_xenops.set_resident_on ~__context ~self:vm ;
500495
Xapi_xenops.add_caches id ;
501496
Xapi_xenops.refresh_vm ~__context ~self:vm ;
502497
Monitor_dbcalls_cache.clear_cache_for_vm ~vm_uuid:id
503498
) ;
499+
(* Reset the state, which will update allowed operations, clear reservations
500+
for halted VMs, disconnect devices *)
501+
let power_state = Db.VM.get_power_state ~__context ~self:vm in
502+
Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context
503+
~self:vm ~value:power_state ;
504504
Xapi_vm_group_helpers.maybe_update_vm_anti_affinity_alert_for_vm ~__context
505505
~vm
506506

@@ -1100,6 +1100,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far
11001100
) ;
11011101
result
11021102
with e ->
1103+
error "Catch error in post_mirror: %s" (Printexc.to_string e) ;
11031104
let mirror_failed =
11041105
match mirror_id with
11051106
| Some mid ->

0 commit comments

Comments
 (0)