Skip to content

Commit 9eb5f9f

Browse files
authored
Raise VIF limit from 7 to 16 by calculating max_grant_frames on domain creation (#6577)
xenopsd currently hardcodes 64 as the value for `max_grant_frames` for all domains. This limits how many grants a domain can allocate, thus limiting the number of VIFs and VBDs a domain can have. `max_grant_frames` can be changed for individual VMs through specifying a particular value in `platform`, but it's much better to be able to estimate how many grant frames a VM would need based on the number of VBDs and VIFs. Implement this and add some notes on the difficulties and imprecisions of such an estimation. This allows raising the number of supported VIFs from the current limit of 7 - we've picked 16 as a sweet spot, though this could be discussed further. (note that it's the `allowed_vifs` limit that's changed, VIFs can be created in arbitrary numbers on the CLI and by clients not honouring the `allowed_vifs` advice). Given the current behaviour of the XenServer/XCP-ng system (hypervisor+drivers), where more VIFs allow for higher overall networking throughput, this is highly beneficial - in testing overall throughput with 16 VIFs was 18-27% higher than with 8 VIFs (tested with multiple iperf3 instances running on all interfaces simultaneously). The following table shows the performance per-VIF and per-VM on a host with 16 pcpus with 8 vCPUs for domU and dom0 each: ![image](https://github.com/user-attachments/assets/f61488bf-90f0-4cbd-8896-176e053f708f) Moreover, some users coming from VMWare are used to networking setups with dozens of VIFs, and this is a step towards allowing that without encountering any other bottlenecks in the system. Most of this work (except the last commit) was tested very thoroughly at XenServer, since it was initially intended to raise the supported limit of VBDs (it passed Ring3 BST+BVT multiple times, and ran through the config limits test several times). Raising the number of supported VBDs has other issues so was abandoned, but this PR solves all the issues for VIFs. The new limit was tested with XCP-ng, with a new test being added to our CI (xcp-ng/xcp-ng-tests#338), which will verify the ability to hit the new limit and perform well at it.
2 parents 1436b8f + 947e496 commit 9eb5f9f

File tree

8 files changed

+116
-30
lines changed

8 files changed

+116
-30
lines changed

ocaml/xapi/xapi_vm_helpers.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,9 +1304,9 @@ let allowed_VBD_devices_HVM_floppy =
13041304
(fun x -> Device_number.(make Floppy ~disk:x ~partition:0))
13051305
(inclusive_range 0 1)
13061306

1307-
let allowed_VIF_devices_HVM = vif_inclusive_range 0 6
1307+
let allowed_VIF_devices_HVM = vif_inclusive_range 0 15
13081308

1309-
let allowed_VIF_devices_PV = vif_inclusive_range 0 6
1309+
let allowed_VIF_devices_PV = vif_inclusive_range 0 15
13101310

13111311
(** [possible_VBD_devices_of_string s] returns a list of Device_number.t which
13121312
represent possible interpretations of [s]. *)

ocaml/xenopsd/lib/xenops_server.ml

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -848,10 +848,11 @@ module Queues = struct
848848

849849
let get tag qs =
850850
with_lock qs.m (fun () ->
851-
if StringMap.mem tag qs.qs then
852-
StringMap.find tag qs.qs
853-
else
854-
Queue.create ()
851+
match StringMap.find_opt tag qs.qs with
852+
| Some x ->
853+
x
854+
| None ->
855+
Queue.create ()
855856
)
856857

857858
let tags qs =
@@ -862,10 +863,11 @@ module Queues = struct
862863
let push_with_coalesce should_keep tag item qs =
863864
with_lock qs.m (fun () ->
864865
let q =
865-
if StringMap.mem tag qs.qs then
866-
StringMap.find tag qs.qs
867-
else
868-
Queue.create ()
866+
match StringMap.find_opt tag qs.qs with
867+
| Some x ->
868+
x
869+
| None ->
870+
Queue.create ()
869871
in
870872
push_with_coalesce should_keep item q ;
871873
qs.qs <- StringMap.add tag q qs.qs ;
@@ -2297,11 +2299,18 @@ let rec perform_atomic ~progress_callback ?result (op : atomic)
22972299
debug "VM.destroy %s" id ;
22982300
B.VM.destroy t (VM_DB.read_exn id)
22992301
| VM_create (id, memory_upper_bound, final_id, no_sharept) ->
2300-
debug "VM.create %s memory_upper_bound = %s" id
2302+
let num_of_vbds = List.length (VBD_DB.vbds id) in
2303+
let num_of_vifs = List.length (VIF_DB.vifs id) in
2304+
debug
2305+
"VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \
2306+
%d"
2307+
id
23012308
(Option.value ~default:"None"
23022309
(Option.map Int64.to_string memory_upper_bound)
2303-
) ;
2310+
)
2311+
num_of_vbds num_of_vifs ;
23042312
B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept
2313+
num_of_vbds num_of_vifs
23052314
| VM_build (id, force) ->
23062315
debug "VM.build %s" id ;
23072316
let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in

ocaml/xenopsd/lib/xenops_server_plugin.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ module type S = sig
8484
-> Vm.t
8585
-> Vm.id option
8686
-> bool (* no_sharept*)
87+
-> int (* num_of_vbds *)
88+
-> int (* num_of_vifs *)
8789
-> unit
8890

8991
val build :

ocaml/xenopsd/lib/xenops_server_simulator.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,8 @@ module VM = struct
547547

548548
let remove _vm = ()
549549

550-
let create _ memory_limit vm _ _ = with_lock m (create_nolock memory_limit vm)
550+
let create _ memory_limit vm _ _ _ _ =
551+
with_lock m (create_nolock memory_limit vm)
551552

552553
let destroy _ vm = with_lock m (destroy_nolock vm)
553554

ocaml/xenopsd/lib/xenops_utils.ml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,13 @@ module MemFS = struct
227227
match (path, fs) with
228228
| [], Dir d ->
229229
d
230-
| p :: ps, Dir d ->
231-
if StringMap.mem p !d then
232-
aux ps (StringMap.find p !d)
233-
else
230+
| p :: ps, Dir d -> (
231+
match StringMap.find_opt p !d with
232+
| Some x ->
233+
aux ps x
234+
| None ->
234235
raise Not_dir
236+
)
235237
| _, Leaf _ ->
236238
raise Not_dir
237239
in
@@ -285,14 +287,13 @@ module MemFS = struct
285287
(fun p ->
286288
let dir = dir_locked (dirname p) in
287289
let deletable =
288-
if StringMap.mem (filename p) !dir then
289-
match StringMap.find (filename p) !dir with
290-
| Dir child ->
291-
StringMap.is_empty !child
292-
| Leaf _ ->
293-
true
294-
else
295-
false
290+
match StringMap.find_opt (filename p) !dir with
291+
| Some (Dir child) ->
292+
StringMap.is_empty !child
293+
| Some (Leaf _) ->
294+
true
295+
| None ->
296+
false
296297
in
297298
if deletable then dir := StringMap.remove (filename p) !dir
298299
)

ocaml/xenopsd/xc/domain.ml

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib
269269
in
270270
wait 0
271271

272-
let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept =
272+
let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept
273+
num_of_vbds num_of_vifs =
273274
let open Xenctrl in
274275
let host_info = Xenctrl.physinfo xc in
275276

@@ -385,12 +386,80 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept =
385386
; max_evtchn_port= -1
386387
; max_grant_frames=
387388
( try int_of_string (List.assoc "max_grant_frames" vm_info.platformdata)
388-
with _ -> 64
389+
with _ ->
390+
let max_per_vif = 8 in
391+
(* 1 VIF takes up (256 rx entries + 256 tx entries) * 8 queues max
392+
* 8 bytes per grant table entry / 4096 bytes size of frame *)
393+
let reasonable_per_vbd = 1 in
394+
(* (1 ring (itself taking up one granted page) + 1 ring *
395+
32 requests * 11 grant refs contained in each * 8 bytes ) /
396+
4096 bytes size of frame = 0.6875, rounded up *)
397+
let frames_number =
398+
max 64
399+
((max_per_vif * (num_of_vifs + 1))
400+
+ (reasonable_per_vbd * (num_of_vbds + 1))
401+
)
402+
in
403+
debug "estimated max_grant_frames = %d" frames_number ;
404+
frames_number
405+
(* max_per_vif * (num_of_vifs + 1 hotplugged future one) +
406+
max_per_vbd * (num_of_vbds + 1 hotplugged future one) *)
407+
408+
(* NOTE: While the VIF calculation is precise, the VBD one is a
409+
very rough approximation of a reasonable value of
410+
RING_SIZE * MAX_SEGMENTS_PER_REQUEST + PAGES_FOR_RING_ITSELF
411+
The following points should allow for a rough understanding
412+
of the scale of the problem of better estimation:
413+
414+
1) The blkfront driver can consume different numbers of grant
415+
pages depending on the features advertised by the back driver
416+
(and negotiated with it). These features can differ per VBD, and
417+
right now aren't even known at the time of domain creation.
418+
These include:
419+
1.1) indirect segments - these contain
420+
BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST grants at most, and each
421+
of these frames contains GRANTS_PER_INDIRECT_FRAME grants in
422+
turn (stored in blkif_request_segment).
423+
In practice, this means a catastrophic explosion - we should
424+
not really aim to detect if indirect requests feature is on,
425+
but turn it off to get reasonable estimates.
426+
1.2) persistent grants - these are an optimization, so
427+
shouldn't really change the calculations, worst case is none
428+
of the grants are persistent.
429+
1.3) multi-page rings - these change the RING_SIZE, but not in
430+
a trivial manner (see ring-page-order)
431+
1.4) multi-queue - these change the number of rings, adding
432+
another multiplier.
433+
2) The "8 bytes" multiplier for a grant table entry only applies
434+
to grants_v1. v2 grants take up 16 bytes per entry. And it's
435+
impossible to detect this feature at the moment.
436+
3) A dynamically-sized grant table itself could be a solution?
437+
Used to exist before, caused a lot of XSAs, hard to get right.
438+
4) Drivers might need to be more explicitly limited in how many
439+
pages they can consume
440+
5) VBD backdriver's features should be managed by XAPI on the
441+
object itself and (their max bound) known at the time of domain
442+
creation.
443+
444+
So for this estimate, there is only 1 ring which is 1 page, with
445+
32 entries, each entry (request) can have up to 11 pages
446+
(excluding indirect pages and other complications).
447+
448+
SEE: xen-blkfront.c, blkif.h, and the backdriver to understand
449+
the process of negotiation (visible in xenstore, in kernel
450+
module parameters in the sys filesystem afterwards)
451+
*)
389452
)
390453
; max_maptrack_frames=
391454
( try
392455
int_of_string (List.assoc "max_maptrack_frames" vm_info.platformdata)
393-
with _ -> 1024
456+
with _ ->
457+
0
458+
(* This should be >0 only for driver domains (Dom0 startup is not
459+
handled by the toolstack), which currently do not exist.
460+
To support these in the future, xenopsd would need to check what
461+
type of domain is being started.
462+
*)
394463
)
395464
; max_grant_version=
396465
(if List.mem CAP_Gnttab_v2 host_info.capabilities then 2 else 1)

ocaml/xenopsd/xc/domain.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ val make :
149149
-> [`VM] Uuidx.t
150150
-> string option
151151
-> bool (* no_sharept *)
152+
-> int (* num_of_vbds *)
153+
-> int (* num_of_vifs *)
152154
-> domid
153155
(** Create a fresh (empty) domain with a specific UUID, returning the domain ID *)
154156

ocaml/xenopsd/xc/xenops_server_xen.ml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,7 +1389,8 @@ module VM = struct
13891389
in
13901390
(device_id, revision)
13911391

1392-
let create_exn task memory_upper_bound vm final_id no_sharept =
1392+
let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds
1393+
num_of_vifs =
13931394
let k = vm.Vm.id in
13941395
with_xc_and_xs (fun xc xs ->
13951396
(* Ensure the DB contains something for this VM - this is to avoid a
@@ -1518,7 +1519,8 @@ module VM = struct
15181519
let create_info = generate_create_info ~xs vm persistent in
15191520
let domid =
15201521
Domain.make ~xc ~xs create_info vm.vcpu_max domain_config
1521-
(uuid_of_vm vm) final_id no_sharept
1522+
(uuid_of_vm vm) final_id no_sharept num_of_vbds
1523+
num_of_vifs
15221524
in
15231525
Mem.transfer_reservation_to_domain dbg domid reservation_id ;
15241526
let initial_target =

0 commit comments

Comments
 (0)