Skip to content

Commit 72e919a

Browse files
committed
CP-54207: Move VBD_attach outside of VM migrate downtime
VBDs can be attached to multiple VMs, so now that VBD_plug has been split into VBD_attach and VBD_activate, the attach can happen outside of the VM migrate downtime. This doesn't change the overall duration of the migration but can reduce the downtime by several seconds. Signed-off-by: Steven Woods <[email protected]>
1 parent c135976 commit 72e919a

File tree

1 file changed

+40
-10
lines changed

1 file changed

+40
-10
lines changed

ocaml/xenopsd/lib/xenops_server.ml

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,8 @@ let rec atomics_of_operation = function
17631763
serial "VIF.activate_and_plug" ~id
17641764
[VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id]
17651765
)
1766-
| VM_restore_devices (id, restore_vifs) ->
1766+
| VM_restore_devices (id, migration) ->
1767+
let restore_vifs = not migration in
17671768
let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in
17681769
let vgpus = VGPU_DB.vgpus id in
17691770
let pcis = PCI_DB.pcis id |> pci_plug_order in
@@ -1777,10 +1778,23 @@ let rec atomics_of_operation = function
17771778
[VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id]
17781779
)
17791780
in
1781+
let activate_vbds typ vbds =
1782+
let name_multi = Printf.sprintf "VBDs.activate %s" typ in
1783+
parallel name_multi ~id
1784+
(List.map (fun vbd -> VBD_activate vbd.Vbd.id) vbds)
1785+
in
1786+
let prep_vbds =
1787+
if !xenopsd_vbd_plug_unplug_legacy || not migration then
1788+
plug_vbds
1789+
else
1790+
(* If plug is split into activate and attach, when migrating we don't
1791+
need to attach here as we attached outside of the VM downtime *)
1792+
activate_vbds
1793+
in
17801794
[
17811795
(* rw vbds must be plugged before ro vbds, see vbd_plug_sets *)
1782-
plug_vbds "RW" vbds_rw
1783-
; plug_vbds "RO" vbds_ro
1796+
prep_vbds "RW" vbds_rw
1797+
; prep_vbds "RO" vbds_ro
17841798
; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else [])
17851799
; (* Nvidia SRIOV PCI devices have been already been plugged *)
17861800
parallel_map "VGPUs.activate" ~id vgpus (fun vgpu ->
@@ -1897,7 +1911,7 @@ let rec atomics_of_operation = function
18971911
]
18981912
; vgpu_start_operations
18991913
; [VM_restore (id, data, vgpu_data)]
1900-
; atomics_of_operation (VM_restore_devices (id, true))
1914+
; atomics_of_operation (VM_restore_devices (id, false))
19011915
; [
19021916
(* At this point the domain is considered survivable. *)
19031917
VM_set_domain_action_request (id, None)
@@ -2696,9 +2710,9 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
26962710
| VM_restore_vifs id ->
26972711
debug "VM_restore_vifs %s" id ;
26982712
perform_atomics (atomics_of_operation op) t
2699-
| VM_restore_devices (id, restore_vifs) ->
2713+
| VM_restore_devices (id, migration) ->
27002714
(* XXX: this is delayed due to the 'attach'/'activate' behaviour *)
2701-
debug "VM_restore_devices %s %b" id restore_vifs ;
2715+
debug "VM_restore_devices %s %b" id migration ;
27022716
perform_atomics (atomics_of_operation op) t
27032717
| VM_resume (id, _data) ->
27042718
debug "VM.resume %s" id ;
@@ -3022,11 +3036,27 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
30223036
( try
30233037
let no_sharept = VGPU_DB.vgpus id |> List.exists is_no_sharept in
30243038
debug "VM %s no_sharept=%b (%s)" id no_sharept __LOC__ ;
3039+
let early_attach =
3040+
if !xenopsd_vbd_plug_unplug_legacy then
3041+
[]
3042+
else
3043+
(* If plug is split into activate and attach, we can attach
3044+
early so that it is outside of the VM downtime *)
3045+
parallel_map "VBDs.set_active_and_attach" ~id (VBD_DB.vbds id)
3046+
(fun vbd ->
3047+
serial "VBD.set_active_and_attach" ~id
3048+
[
3049+
VBD_set_active (vbd.Vbd.id, true)
3050+
; VBD_attach vbd.Vbd.id
3051+
]
3052+
)
3053+
in
30253054
perform_atomics
30263055
([VM_create (id, Some memory_limit, Some final_id, no_sharept)]
3027-
@ (* Perform as many operations as possible on the destination
3028-
domain before pausing the original domain *)
3029-
atomics_of_operation (VM_restore_vifs id)
3056+
(* Perform as many operations as possible on the destination
3057+
domain before pausing the original domain *)
3058+
@ atomics_of_operation (VM_restore_vifs id)
3059+
@ early_attach
30303060
)
30313061
t ;
30323062
Handshake.send s Handshake.Success
@@ -3142,7 +3172,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
31423172
) ;
31433173
debug "VM.receive_memory: restoring remaining devices and unpausing" ;
31443174
perform_atomics
3145-
(atomics_of_operation (VM_restore_devices (final_id, false))
3175+
(atomics_of_operation (VM_restore_devices (final_id, true))
31463176
@ [
31473177
VM_unpause final_id
31483178
; VM_set_domain_action_request (final_id, None)

0 commit comments

Comments
 (0)