From 69b80360127146e4c8faae14bb9729e144f913e0 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Fri, 31 Oct 2025 12:18:01 -0700 Subject: [PATCH 1/6] Changes from 500 VMs Hybrid work Adds VM recovery playbooks Enable hugepages on the hypervisor and VM configuration. Add playbook to disable devices created for virtual functions Working changes to configure hugetlb Complaint of a missing var, this import seems required Improve CSR approve and node Ready wait loop Add interfaces that can be used as virtual functions But they seemed to generate a lot of iowait activity on the VMs, so I don't know whether something is wrong with them or not Some changes were generated using Cursor and the claude-4-sonnet model. Signed-off-by: Andrew Collins Apply suggestion from @mcornea Co-authored-by: Marius Cornea --- .gitignore | 3 +- ansible.cfg | 5 +- ansible/.gitignore | 1 + ansible/copy-pull-secret.yml | 15 +++ ansible/hv-vm-start-one.yml | 6 + ansible/hv-vm-stop-all.yml | 5 + ansible/mno-add-vm-workers.yml | 121 ++++++++++++++++++ ansible/roles/copy-pull-secret/tasks/main.yml | 11 ++ ansible/roles/hv-install/defaults/main.yml | 17 +++ ansible/roles/hv-install/tasks/main.yml | 67 ++++++++++ .../templates/hugetlb-reserve-pages.sh.j2 | 15 +++ ansible/roles/hv-vm-create/defaults/main.yml | 16 ++- ansible/roles/hv-vm-create/tasks/main.yml | 27 ++++ .../hv-vm-create/templates/kvm-def.xml.j2 | 20 +++ ansible/roles/hv-vm-destroy/tasks/main.yml | 5 + ansible/roles/hv-vm-start/tasks/main.yml | 5 + .../tasks/check_nodes_joined.yml | 45 ++----- .../roles/ocp-scale-out-csr/tasks/main.yml | 17 ++- .../tasks/set_hostname_role.yml | 1 + ansible/vars/hv.sample.yml | 32 +++++ ansible/vars/lab.yml | 2 +- ansible/vars/scale_out.sample.yml | 4 +- ansible/vm-sriov-disable.yml | 25 ++++ docs/deploy-vmno.md | 31 +++++ docs/hypervisors.md | 6 +- docs/troubleshooting.md | 10 ++ 26 files changed, 458 insertions(+), 54 deletions(-) create mode 100644 ansible/copy-pull-secret.yml create mode 100644 ansible/hv-vm-start-one.yml create mode 100644 ansible/hv-vm-stop-all.yml create mode 100644 ansible/mno-add-vm-workers.yml create mode 100644 ansible/roles/copy-pull-secret/tasks/main.yml create mode 100644 ansible/roles/hv-install/defaults/main.yml create mode 100644 ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 create mode 100644 ansible/roles/hv-vm-destroy/tasks/main.yml create mode 100644 ansible/roles/hv-vm-start/tasks/main.yml create mode 100644 ansible/vm-sriov-disable.yml diff --git a/.gitignore b/.gitignore index 9c186b89..e9d84683 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ out gen .idea/ .idea/workspace.xml - +*.log +*.orig diff --git a/ansible.cfg b/ansible.cfg index 60dc3a2e..214988f0 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -1,3 +1,6 @@ [defaults] -interpreter_python=auto +interpreter_python=auto_silent callbacks_enabled = profile_tasks +deprecation_warnings = False +log_path = ~/.ansible/jetlag-ansible.log +display_args_to_stdout = True diff --git a/ansible/.gitignore b/ansible/.gitignore index 2768362a..f2310062 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -1 +1,2 @@ smcipmitool.tar.gz +*.sw* diff --git a/ansible/copy-pull-secret.yml b/ansible/copy-pull-secret.yml new file mode 100644 index 00000000..061a3039 --- /dev/null +++ b/ansible/copy-pull-secret.yml @@ -0,0 +1,15 @@ +--- +# Copy pull secret playbook +# +# This playbook is used to copy the pull secret to the nodes in the cluster. +# It is used to updae the pull secret on nodes to pull images from the Red Hat registry. +# +# Example Usage: +# +# ansible-playbook ansible/copy-pull-secret.yml +# + +- name: Copies pull secret to nodes + hosts: hv_vm + roles: + - copy-pull-secret diff --git a/ansible/hv-vm-start-one.yml b/ansible/hv-vm-start-one.yml new file mode 100644 index 00000000..f4f64662 --- /dev/null +++ b/ansible/hv-vm-start-one.yml @@ -0,0 +1,6 @@ +--- +- name: start one VMs + gather_facts: false + hosts: hv + roles: + - hv-vm-start diff --git a/ansible/hv-vm-stop-all.yml b/ansible/hv-vm-stop-all.yml new file mode 100644 index 00000000..ee8db9a4 --- /dev/null +++ b/ansible/hv-vm-stop-all.yml @@ -0,0 +1,5 @@ +--- +- name: destroy all VMs + hosts: hv + roles: + - hv-vm-destroy diff --git a/ansible/mno-add-vm-workers.yml b/ansible/mno-add-vm-workers.yml new file mode 100644 index 00000000..142bd1d3 --- /dev/null +++ b/ansible/mno-add-vm-workers.yml @@ -0,0 +1,121 @@ +--- +# Create and deploy a cluster with the Assisted Installer +# +# Example Usage: +# +# ansible-playbook -i ansible/inventory/cloud42.local ansible/mno-deploy.yml +# + +- name: Prep cluster to add hosts + hosts: bastion + vars_files: + - vars/lab.yml + - vars/all.yml + gather_facts: false + tasks: + - name: Set assisted installer connection + set_fact: + assisted_installer_host: "{{ groups['bastion'][0] }}" + assisted_installer_port: "8090" + + - name: Get cluster status + uri: + url: "http://{{ assisted_installer_host }}:{{ assisted_installer_port }}/api/assisted-install/v2/clusters/{{ ai_cluster_id }}" + method: GET + body_format: json + status_code: [200] + return_content: true + register: cluster_data + failed_when: cluster_data.json.status not in ['installed', 'adding-hosts'] + + - name: Set cluster status to adding-hosts + uri: + url: "http://{{ assisted_installer_host }}:{{ assisted_installer_port }}/api/assisted-install/v2/clusters/{{ ai_cluster_id }}/actions/allow-add-workers" + method: POST + body_format: json + status_code: [201, 202] + when: cluster_data.json.status == 'installed' + + - name: Get infra-env + uri: + url: "http://{{ assisted_installer_host }}:{{ assisted_installer_port }}/api/assisted-install/v2/clusters/{{ ai_cluster_id }}" + method: GET + body_format: json + status_code: [200] + return_content: true + register: infra_env_return + + - name: Set ai_infraenv_id + set_fact: + ai_infraenv_id: "{{ infra_env_return.json.hosts[0].infra_env_id }}" + + - name: Get infra-env static_network_config + uri: + url: "http://{{ assisted_installer_host }}:{{ assisted_installer_port }}/api/assisted-install/v2/infra-envs/{{ ai_infraenv_id }}" + method: GET + body_format: json + status_code: [200] + return_content: true + register: infra_env_return + + - name: Set ai_infraenv_static_config + set_fact: + ai_infraenv_static_config: "{{ infra_env_return.json.static_network_config }}" + + - name: Set empty static network configuration + set_fact: + static_network_config: [] + + - name: Generate Static Network Config for VMs + ansible.builtin.include_role: + name: create-ai-cluster + tasks_from: static_network_config + vars: + hybrid_worker_count: "{{ add_worker_count }}" + loop: "{{ groups['hv_vm'][:hybrid_worker_count | int] }}" + + - name: show ai_infraenv_static_config + debug: + var: ai_infraenv_static_config + + - name: show static_network_config + debug: + var: static_network_config + + - name: Set static network composite + set_fact: + static_network_config_comp: "{{ static_network_config + ai_infraenv_static_config }}" + + - name: show static_network_config composite + debug: + var: static_network_config_comp + + - name: Update static config + uri: + url: "http://{{ assisted_installer_host }}:{{ assisted_installer_port }}/api/assisted-install/v2/infra-envs/{{ ai_infraenv_id }}" + body: { + "static_network_config": "{{ static_network_config + ai_infraenv_static_config }}" + } + method: PATCH + body_format: json + status_code: [201] + return_content: true + + +- name: Boot / Install VMs + hosts: bastion + vars_files: + - vars/lab.yml + - vars/all.yml + roles: + - generate-discovery-iso + - role: boot-iso + vars: + inventory_group: hv_vm + index: "{{ add_worker_count }}" + virtual_media_iso: "discovery.iso" + - role: wait-hosts-discovered + vars: + inventory_nodes: "{{ groups['hv_vm'][:add_worker_count|int] }}" + discover_nodes: "{{ groups['hv_vm'][:add_worker_count|int] }}" + - add-hosts-install diff --git a/ansible/roles/copy-pull-secret/tasks/main.yml b/ansible/roles/copy-pull-secret/tasks/main.yml new file mode 100644 index 00000000..078e04fb --- /dev/null +++ b/ansible/roles/copy-pull-secret/tasks/main.yml @@ -0,0 +1,11 @@ +--- +- name: Copy pull secret + copy: + src: pull-secret.json + dest: "/var/lib/kubelet/config.json" + become: true +- name: touch force update + file: + path: /run/machine-config-daemon-force + state: touch + become: true diff --git a/ansible/roles/hv-install/defaults/main.yml b/ansible/roles/hv-install/defaults/main.yml new file mode 100644 index 00000000..bfd2265b --- /dev/null +++ b/ansible/roles/hv-install/defaults/main.yml @@ -0,0 +1,17 @@ +--- +# hv-install default vars + +# Hugepages configuration for hypervisors +enable_hugepages: false + +# Hugepage size: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate (e.g., 32 for 32GB of 1G hugepages) +hugepage_count: 32 + +# Additional kernel parameters for performance tuning +additional_kernel_params: [] + +# Number of hugepages per node (e.g. total / 2) +hugepages_count_per_node: 190 diff --git a/ansible/roles/hv-install/tasks/main.yml b/ansible/roles/hv-install/tasks/main.yml index 4451e0c4..a73d5ee9 100644 --- a/ansible/roles/hv-install/tasks/main.yml +++ b/ansible/roles/hv-install/tasks/main.yml @@ -21,6 +21,55 @@ name: sushy-tools version: 1.2.0 +- name: Configure hugepages support + when: enable_hugepages + block: + + - name: Run grubby to add hugepages arguments + command: grubby --update-kernel=ALL --args="default_hugepagesz={{ hugepage_size }} hugepagesz={{ hugepage_size }}" + register: grub_updated + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: grub_updated.changed + + - name: Create hugetlb-gigantic-pages.service file + copy: + dest: /usr/lib/systemd/system/hugetlb-gigantic-pages.service + content: | + [Unit] + Description=HugeTLB Gigantic Pages Reservation + DefaultDependencies=no + Before=dev-hugepages.mount + ConditionPathExists=/sys/devices/system/node + ConditionKernelCommandLine=hugepagesz=1G + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/usr/lib/systemd/hugetlb-reserve-pages.sh + + [Install] + WantedBy=sysinit.target + + - name: Create hugetlb-reserve-pages.sh + template: + src: hugetlb-reserve-pages.sh.j2 + dest: /usr/lib/systemd/hugetlb-reserve-pages.sh + mode: "0755" + register: hugetlb_script + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: hugetlb_script.changed + + - name: Enable hugetlb-gigantic-pages.service + systemd: + enabled: true + name: hugetlb-gigantic-pages.service + - name: Get coredns get_url: validate_certs: false @@ -65,3 +114,21 @@ state: started enabled: true name: ksmtuned + +- name: Reboot hypervisor for hugepages configuration + when: + - enable_hugepages + - hugepages_reboot_required | default(false) + block: + - name: Reboot hypervisor + reboot: + msg: "Rebooting to apply hugepages configuration" + reboot_timeout: 600 + + - name: Verify hugepages are configured + shell: cat /proc/meminfo | grep -E "HugePages_Total|HugePages_Free|Hugepagesize" + register: hugepages_status + + - name: Display hugepages status + debug: + msg: "{{ hugepages_status.stdout_lines }}" diff --git a/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 new file mode 100644 index 00000000..5bed529d --- /dev/null +++ b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 @@ -0,0 +1,15 @@ +#!/bin/sh + +nodes_path=/sys/devices/system/node/ +if [ ! -d $nodes_path ]; then + echo "ERROR: $nodes_path does not exist" + exit 1 +fi + +reserve_pages() +{ + echo $1 > $nodes_path/$2/hugepages/hugepages-1048576kB/nr_hugepages +} + +reserve_pages {{ hugepages_count_per_node }} node0 +reserve_pages {{ hugepages_count_per_node }} node1 diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index 7585df82..bd0f1690 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -10,7 +10,7 @@ vnuma_enabled: false vnuma_memory_placement: "static" vnuma_cpu_placement: "static" -# Manual vNUMA configuration +# Manual vNUMA configuration # vnuma_nodes: # - id: 0 # cpus: "0-3" @@ -21,4 +21,16 @@ vnuma_cpu_placement: "static" # vNUMA topology settings vnuma_memory_mode: "strict" # strict, preferred, interleave -vnuma_cpu_mode: "strict" # strict, preferred +vnuma_cpu_mode: "strict" # strict, preferred + +# Hugepages configuration for VMs +enable_vm_hugepages: false + +# Hugepage size for VMs: 2M or 1G +vm_hugepage_size: "1G" + +# Number of hugepages to allocate per VM (auto-calculated based on VM memory if not specified) +vm_hugepage_count: + +# Hugepage mount path in VMs +vm_hugepage_mount: "/mnt/hugepages" diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index 875407a8..293e84bc 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -20,6 +20,33 @@ set_fact: hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}" +- name: Configure VM hugepages + when: enable_vm_hugepages + block: + - name: Calculate hugepages needed for VM if not specified + set_fact: + calculated_vm_hugepage_count: "{{ (hostvars[inventory_hostname]['memory'] | int) // (vm_hugepage_size[:-1] | int) }}" + when: vm_hugepage_count is not defined or vm_hugepage_count == "" + + - name: Set hugepage count for VM + set_fact: + vm_hugepages_needed: "{{ vm_hugepage_count if vm_hugepage_count is defined and vm_hugepage_count != '' else calculated_vm_hugepage_count }}" + + - name: Check host hugepages availability + shell: | + grep -E "HugePages_Free.*{{ vm_hugepage_size }}" /proc/meminfo | awk '{print $2}' || echo "0" + register: host_hugepages_free + delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}" + + - name: Validate sufficient hugepages available + fail: + msg: "Not enough {{ vm_hugepage_size }} hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}" + when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int) + + - name: Display hugepages configuration for VM + debug: + msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} {{ vm_hugepage_size }} hugepages ({{ (vm_hugepages_needed | int) * (vm_hugepage_size[:-1] | int) }}G total)" + - name: Set vNUMA configuration tasks when: vnuma_enabled block: diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 index df33d85a..6b671470 100644 --- a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 +++ b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 @@ -3,6 +3,13 @@ {{ hostvars[inventory_hostname]['domain_uuid'] }} {{ hostvars[inventory_hostname]['memory'] }} {{ hostvars[inventory_hostname]['memory'] }} +{% if enable_vm_hugepages %} + + + + + +{% endif %} {{ hv_vm_cpu_count | int }} hvm @@ -11,6 +18,7 @@ + {% if vnuma_enabled %} @@ -125,6 +133,18 @@ {% endif %}
+{% for i in range(1, 6) %} + +{% set mac_prefix = "%s:%02x" | format('52:54:00',i) %} + + + +
+ +{% endfor %} + + + diff --git a/ansible/roles/hv-vm-destroy/tasks/main.yml b/ansible/roles/hv-vm-destroy/tasks/main.yml new file mode 100644 index 00000000..28049f8f --- /dev/null +++ b/ansible/roles/hv-vm-destroy/tasks/main.yml @@ -0,0 +1,5 @@ +--- +- name: Stop all vms + shell: + for i in $(virsh list --all --name | grep vm) ; do virsh destroy $i ; done + become: true diff --git a/ansible/roles/hv-vm-start/tasks/main.yml b/ansible/roles/hv-vm-start/tasks/main.yml new file mode 100644 index 00000000..6f9d711e --- /dev/null +++ b/ansible/roles/hv-vm-start/tasks/main.yml @@ -0,0 +1,5 @@ +--- +- name: Start one vm + shell: + for i in $(virsh list --all --name --state-shutoff | grep vm |head -1) ; do virsh start $i ; done + become: true diff --git a/ansible/roles/ocp-scale-out-csr/tasks/check_nodes_joined.yml b/ansible/roles/ocp-scale-out-csr/tasks/check_nodes_joined.yml index f2be9be4..853ff866 100644 --- a/ansible/roles/ocp-scale-out-csr/tasks/check_nodes_joined.yml +++ b/ansible/roles/ocp-scale-out-csr/tasks/check_nodes_joined.yml @@ -1,15 +1,5 @@ --- -- name: Set Facts to recurse with - set_fact: - r_qry: "{{ qry }}" - r_worker_counter: "{{ worker_counter }}" - -- name: Set KUBECONFIG path based on cluster type - set_fact: - cluster_kubeconfig: "{{ bastion_cluster_config_dir }}/{{ 'kubeconfig' if cluster_type != 'sno' else groups['sno'][0] + '/kubeconfig' }}" - -- name: approve CSRs and check if nodes have joined the cluster - block: +- block: - name: Increment the retry count set_fact: retry: "{{ 0 if retry is undefined else retry | int + 1 }}" @@ -19,35 +9,21 @@ seconds: "30" when: retry|int > 0 - - name: Get CSRs + - name: Get Pending CSRs shell: | - KUBECONFIG={{ cluster_kubeconfig }} oc get csr -o json + KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc get csr --no-headers | grep Pending | awk '{ print $1 }' register: oc_get_csr - name: Approve pending CSRs shell: | - KUBECONFIG={{ cluster_kubeconfig }} oc adm certificate approve {{ item.metadata.name }} - loop: "{{ oc_get_csr.stdout | from_json | json_query(qry) }}" - loop_control: - label: "{{ item.metadata.name }}" - - - name: Get worker node count - shell: | - KUBECONFIG={{ cluster_kubeconfig }} oc get nodes | {{ worker_counter }} - register: oc_get_nodes_workers - - - name: Current Worker Node Count - debug: - var: oc_get_nodes_workers.stdout - - - name: Waiting for Worker Node Count - debug: - msg: "{{ current_worker_count+scale_out_count }}" + KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc adm certificate approve {{ item }} + loop: "{{ oc_get_csr.stdout_lines }}" + when: oc_get_csr.stdout_lines | length > 0 - - name: Raise fail to trigger retry if all nodes didn't meet requirments + - name: Raise fail to trigger retry if CSRs still Pending fail: - msg: All nodes have not met check requirements - when: oc_get_nodes_workers.stdout|int < current_worker_count+scale_out_count + msg: CSRs still pending. Try again + when: oc_get_csr.stdout_lines |length > 0 rescue: - name: Fail on maximum retry count fail: @@ -56,6 +32,3 @@ - name: Retry the check include_tasks: check_nodes_joined.yml - vars: - qry: "{{ r_qry }}" - worker_counter: "{{ r_worker_counter }}" diff --git a/ansible/roles/ocp-scale-out-csr/tasks/main.yml b/ansible/roles/ocp-scale-out-csr/tasks/main.yml index 13e64a94..e5bc2f2f 100644 --- a/ansible/roles/ocp-scale-out-csr/tasks/main.yml +++ b/ansible/roles/ocp-scale-out-csr/tasks/main.yml @@ -1,12 +1,11 @@ --- -- name: Approve node-bootstrapper CSRs and wait for nodes to join cluster +- name: Approve CSRs include_tasks: check_nodes_joined.yml - vars: - qry: "items[?status.conditions==null && spec.username == 'system:serviceaccount:openshift-machine-config-operator:node-bootstrapper']" - worker_counter: "grep worker | grep -v -c master" -- name: Approve Kublet-serving CSRs and wait for nodes to join cluster - include_tasks: check_nodes_joined.yml - vars: - qry: "items[?status.conditions==null && spec.signerName == 'kubernetes.io/kubelet-serving']" - worker_counter: "grep worker | grep -v master | grep -c -v NotReady" +- name: Wait for expected number of workers to be Ready + shell: | + KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc get nodes --no-headers -l node-role.kubernetes.io/worker | grep -c -v NotReady + register: oc_get_nodes_workers + until: oc_get_nodes_workers.stdout|int < current_worker_count+scale_out_count + retries: 540 + delay: 30 diff --git a/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml b/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml index 807b3b11..fe951ca3 100644 --- a/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml +++ b/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml @@ -54,3 +54,4 @@ "host_name": "{{ hostname }}", "host_role": "{{ host_role }}" } + ignore_errors: yes diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index b50b2551..70217973 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -48,3 +48,35 @@ hv_vm_manifest_acm_cr: true use_bastion_registry: false # Provide pull-secret for connected manifests pull_secret: "{{ lookup('file', '../pull-secret.txt') | b64encode }}" + +################################################################################ +# Hugepages Configuration +################################################################################ + +# Enable hugepages on hypervisors +enable_hugepages: false + +# Hugepage size for hypervisors: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate on hypervisors (e.g., 64 for 64GB of 1G hugepages) +# Calculate based on total memory and VM requirements +hugepage_count: 64 + +# Additional kernel parameters for performance tuning +additional_kernel_params: + - "intel_iommu=on" + - "iommu=pt" + - "isolcpus=2-15,18-31" + +# Enable hugepages for VMs +enable_vm_hugepages: false + +# Hugepage size for VMs (should match hypervisor hugepage_size) +vm_hugepage_size: "1G" + +# Number of hugepages per VM (auto-calculated based on VM memory if not specified) +# vm_hugepage_count: 18 + +# Enable vNUMA for performance (recommended with hugepages) +vnuma_enabled: false diff --git a/ansible/vars/lab.yml b/ansible/vars/lab.yml index 7f7ca2d4..81d54250 100644 --- a/ansible/vars/lab.yml +++ b/ansible/vars/lab.yml @@ -218,7 +218,7 @@ hw_vm_counts: nvme0n1: 12 r650: default: 4 - nvme0n1: 23 + nvme0n1: 16 r660: default: 4 nvme0n1: 23 diff --git a/ansible/vars/scale_out.sample.yml b/ansible/vars/scale_out.sample.yml index 30133fef..4b0b7990 100644 --- a/ansible/vars/scale_out.sample.yml +++ b/ansible/vars/scale_out.sample.yml @@ -3,9 +3,9 @@ # This assumes they are all listed in the worker inventory # group. This varable is an offset used to skip worker node # records in the worker inventory group. -current_worker_count: 120 +current_worker_count: 3 # Set this to the number of worker nodes being added to the # cluster. At minimum, current_worker_count + scale_out_count # inventory records must exist in the inventory file. -scale_out_count: 100 +scale_out_count: 3 diff --git a/ansible/vm-sriov-disable.yml b/ansible/vm-sriov-disable.yml new file mode 100644 index 00000000..9fdba3c2 --- /dev/null +++ b/ansible/vm-sriov-disable.yml @@ -0,0 +1,25 @@ +--- +# Disables igb VFs from attempting to connect, which never succeeds and thus drives up CPU across all the workers. +# +# Exepects an inventory that has only the [worker] block, as with the normal inventory created where workers show up under [worker] and [hv_vm] seem to have some variables that affect how the node is accessed. +# +# Example Usage: +# +# ansible-playbook -i ansible/inventory/cloud42.local ansible/vm-sriov-disable.yml +# + +- name: Disable all fake sr-iov devices and connections + gather_facts: false + hosts: worker + tasks: + - name: devices down + shell: + for i in {5..9} ; do for j in {0..6} ; do nmcli d down enp${i}s0v${j} ; done ; done + become: true + ignore_errors: true + + - name: connections autoconnect off + shell: + for i in $( nmcli conn show | grep "Wired connection" | awk '{ print $4 }' ) ; do nmcli conn mod $i connection.autoconnect no ; done + become: true + ignore_errors: true diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md index 109a0a1f..6d3bb724 100644 --- a/docs/deploy-vmno.md +++ b/docs/deploy-vmno.md @@ -123,9 +123,21 @@ hw_vm_counts: nvme0n1: 7 ``` +When mixing different machines, the hv_vm_counts may be adjusted for those machine models to create the same number of VMs per hypervisor. For example, when mixing Dell r640 and r650 in ScaleLab, the following counts were used: + +```yaml +hw_vm_counts: + scalelab: + r650: + default: 4 + nvme0n1: 16 +``` + > [!NOTE] > Depending upon your hardware, you may have to parition and format a 2nd disk to help store VM disk files. +In some VM scenarios, hugepages may be required. To configure VMs with hugepages, enable with the variable `enable_hugepages`, and configure specifics with other similar variables found in: `ansible/roles/hv-install/defaults/main.yml`. + ## Configure Ansible vars in `hv.yml` ```console @@ -484,3 +496,22 @@ vm00008 Ready worker 1d v1.31.7 (.ansible) [root@ jetlag]# cat /root/vmno/kubeadmin-password xxxxx-xxxxx-xxxxx-xxxxx ``` + +## Additional helper playbooks for VM management + +If VMs become unresponsive, sometimes destroying and restarting them is the only remedy. Since the garbage cleanup of pods of all VMs on a single hypervisor at a time can cause stalling, it also may be beneficial to start one VM per HV at a time. Playbooks have been added for all of these tasks. + +See the following playbooks to help in these cases: +``` +ansible/hv-vm-stop-all.yml +ansible/hv-vm-start-one.yml +``` + +## Disabling NetworkManager devices and connections for SR-IOV devices on VMs + +When VMs are created with SR-IOV devices using the IGB driver, the devices and connections will never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. + +See the following playbook: +``` +ansible/vm-sriov-disable.yml +``` \ No newline at end of file diff --git a/docs/hypervisors.md b/docs/hypervisors.md index d050123b..1d605a49 100644 --- a/docs/hypervisors.md +++ b/docs/hypervisors.md @@ -112,16 +112,18 @@ Check if the servers in your allocation support NUMA config: ``` Example output indicating NUMA support: +```console NUMA node(s): 2 NUMA node0 CPU(s): 0-11,24-35 NUMA node1 CPU(s): 12-23,36-47 +``` -Add this var to your ansible/vars/all.yml file to enable vnuma config for virtual deployments: +Add this var to your `ansible/vars/all.yml` file to enable vnuma config for virtual deployments: ```yaml vnuma_enabled: true ``` -Refer to ansible/roles/hv-vm-create/defaults/main.yml for other vNUMA configuration options. +Refer to `ansible/roles/hv-vm-create/defaults/main.yml` for other vNUMA configuration options. ## Create/Delete/Replace VMs diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 2d47c9a1..ef6a3d9a 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -12,6 +12,7 @@ _**Table of Contents**_ - [Failed on Wait for cluster to be ready](#failed-on-wait-for-cluster-to-be-ready) - [Failed on Adjust by-path selected install disk](#failed-on-adjust-by-path-selected-install-disk) - [Failed on Insert Virtual Media](#failed-on-insert-virtual-media) + - [Failing ImagePull due to Pull Secret](#failing-imagepull-due-to-pull-secret) - [Bastion](#bastion) - [Accessing services](#accessing-services) - [Clean all container services / podman pods](#clean-all-container-services--podman-pods) @@ -274,6 +275,15 @@ racadm>>set iDRAC.VirtualMedia.Attached Attached Object value modified successfully ``` +## Failing ImagePull due to Pull Secret + +If a cluster has been running for some time or has changed hands between owners, there is a chance the pull secret supplied at install time may have expired. If the cluster is degraded enough as a result, the control plane will not be able to update the kubelet's pull secret automatically. + +For this emergency scenario, a playbook has been created that should hopefully help: +``` +ansible-playbook ansible/copy-pull-secret.yml +``` + # Bastion ## Accessing services From e5a7d64eb03d8cf830cda6bd32933f0e282e8ead Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 20 Nov 2025 10:14:59 -0800 Subject: [PATCH 2/6] Feedback from PR Renamed variable to remove "enabled" Added new variable to switch ON VFs in VMs. Defaults to OFF. --- ansible/roles/hv-vm-create/defaults/main.yml | 5 ++++- ansible/roles/hv-vm-create/tasks/main.yml | 2 +- ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 | 6 +++++- ansible/roles/hv-vm-start/tasks/main.yml | 2 +- ansible/vars/hv.sample.yml | 5 ++++- docs/troubleshooting.md | 9 ++++++--- 6 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index bd0f1690..527ba186 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -24,7 +24,7 @@ vnuma_memory_mode: "strict" # strict, preferred, interleave vnuma_cpu_mode: "strict" # strict, preferred # Hugepages configuration for VMs -enable_vm_hugepages: false +vm_hugepages: false # Hugepage size for VMs: 2M or 1G vm_hugepage_size: "1G" @@ -34,3 +34,6 @@ vm_hugepage_count: # Hugepage mount path in VMs vm_hugepage_mount: "/mnt/hugepages" + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index 293e84bc..f78add51 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -21,7 +21,7 @@ hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}" - name: Configure VM hugepages - when: enable_vm_hugepages + when: vm_hugepages block: - name: Calculate hugepages needed for VM if not specified set_fact: diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 index 6b671470..197e2fca 100644 --- a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 +++ b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 @@ -3,7 +3,7 @@ {{ hostvars[inventory_hostname]['domain_uuid'] }} {{ hostvars[inventory_hostname]['memory'] }} {{ hostvars[inventory_hostname]['memory'] }} -{% if enable_vm_hugepages %} +{% if vm_hugepages %} @@ -18,7 +18,9 @@ +{% if vm_igb_nics | default(false) %} +{% endif %} {% if vnuma_enabled %} @@ -133,6 +135,7 @@ {% endif %}
+{% if vm_igb_nics | default(false) %} {% for i in range(1, 6) %} {% set mac_prefix = "%s:%02x" | format('52:54:00',i) %} @@ -145,6 +148,7 @@ +{% endif %} diff --git a/ansible/roles/hv-vm-start/tasks/main.yml b/ansible/roles/hv-vm-start/tasks/main.yml index 6f9d711e..e278efff 100644 --- a/ansible/roles/hv-vm-start/tasks/main.yml +++ b/ansible/roles/hv-vm-start/tasks/main.yml @@ -1,5 +1,5 @@ --- - name: Start one vm shell: - for i in $(virsh list --all --name --state-shutoff | grep vm |head -1) ; do virsh start $i ; done + for i in $(virsh list --all --name --state-shutoff | grep vm |head -1) ; do virsh start $i ; done become: true diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index 70217973..288e4959 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -70,7 +70,7 @@ additional_kernel_params: - "isolcpus=2-15,18-31" # Enable hugepages for VMs -enable_vm_hugepages: false +vm_hugepages: false # Hugepage size for VMs (should match hypervisor hugepage_size) vm_hugepage_size: "1G" @@ -80,3 +80,6 @@ vm_hugepage_size: "1G" # Enable vNUMA for performance (recommended with hugepages) vnuma_enabled: false + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index ef6a3d9a..c3bc5ba4 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -12,7 +12,7 @@ _**Table of Contents**_ - [Failed on Wait for cluster to be ready](#failed-on-wait-for-cluster-to-be-ready) - [Failed on Adjust by-path selected install disk](#failed-on-adjust-by-path-selected-install-disk) - [Failed on Insert Virtual Media](#failed-on-insert-virtual-media) - - [Failing ImagePull due to Pull Secret](#failing-imagepull-due-to-pull-secret) + - [Failing ImagePull due to Pull Secret](#failing-imagepull-due-to-deactivated-pull-secret) - [Bastion](#bastion) - [Accessing services](#accessing-services) - [Clean all container services / podman pods](#clean-all-container-services--podman-pods) @@ -275,9 +275,12 @@ racadm>>set iDRAC.VirtualMedia.Attached Attached Object value modified successfully ``` -## Failing ImagePull due to Pull Secret +## Failing ImagePull due to Deactivated Pull Secret -If a cluster has been running for some time or has changed hands between owners, there is a chance the pull secret supplied at install time may have expired. If the cluster is degraded enough as a result, the control plane will not be able to update the kubelet's pull secret automatically. +If a cluster has been running for some time or has changed hands between owners, there is a chance the pull secret supplied at install time has expired. +Any attempt to update the pull secret by standard means (i.e. `oc edit -n openshift-config secret/pull-secret`) will not work. + +If the cluster is degraded enough as a result, the control plane will not be able to update the kubelet's pull secret automatically. For this emergency scenario, a playbook has been created that should hopefully help: ``` From a0d87ff88f42fa2a42ab47e3fa1011507ab72e45 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 20 Nov 2025 10:16:28 -0800 Subject: [PATCH 3/6] Revert ocp-scale-out-csr changes Signed-off-by: Andrew Collins --- ansible/roles/ocp-scale-out-csr/tasks/main.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ansible/roles/ocp-scale-out-csr/tasks/main.yml b/ansible/roles/ocp-scale-out-csr/tasks/main.yml index e5bc2f2f..13e64a94 100644 --- a/ansible/roles/ocp-scale-out-csr/tasks/main.yml +++ b/ansible/roles/ocp-scale-out-csr/tasks/main.yml @@ -1,11 +1,12 @@ --- -- name: Approve CSRs +- name: Approve node-bootstrapper CSRs and wait for nodes to join cluster include_tasks: check_nodes_joined.yml + vars: + qry: "items[?status.conditions==null && spec.username == 'system:serviceaccount:openshift-machine-config-operator:node-bootstrapper']" + worker_counter: "grep worker | grep -v -c master" -- name: Wait for expected number of workers to be Ready - shell: | - KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc get nodes --no-headers -l node-role.kubernetes.io/worker | grep -c -v NotReady - register: oc_get_nodes_workers - until: oc_get_nodes_workers.stdout|int < current_worker_count+scale_out_count - retries: 540 - delay: 30 +- name: Approve Kublet-serving CSRs and wait for nodes to join cluster + include_tasks: check_nodes_joined.yml + vars: + qry: "items[?status.conditions==null && spec.signerName == 'kubernetes.io/kubelet-serving']" + worker_counter: "grep worker | grep -v master | grep -c -v NotReady" From 4a22cd2bababc5c00f14ea4f87d62dff4b48f95a Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 20 Nov 2025 10:17:59 -0800 Subject: [PATCH 4/6] Revert set_hostname_role Signed-off-by: Andrew Collins --- ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml b/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml index fe951ca3..807b3b11 100644 --- a/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml +++ b/ansible/roles/wait-hosts-discovered/tasks/set_hostname_role.yml @@ -54,4 +54,3 @@ "host_name": "{{ hostname }}", "host_role": "{{ host_role }}" } - ignore_errors: yes From e3db4bd248e9ec70dbd4f1db4e79d9e32b122d04 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 20 Nov 2025 10:20:18 -0800 Subject: [PATCH 5/6] Revert ansible.cfg Signed-off-by: Andrew Collins --- ansible.cfg | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ansible.cfg b/ansible.cfg index 214988f0..60dc3a2e 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -1,6 +1,3 @@ [defaults] -interpreter_python=auto_silent +interpreter_python=auto callbacks_enabled = profile_tasks -deprecation_warnings = False -log_path = ~/.ansible/jetlag-ansible.log -display_args_to_stdout = True From ffa0dce085b2f77c51953a972824e8cbf41a5954 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 20 Nov 2025 10:22:41 -0800 Subject: [PATCH 6/6] Added note about VM NICs into the docs --- docs/deploy-vmno.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md index 6d3bb724..2bae6e56 100644 --- a/docs/deploy-vmno.md +++ b/docs/deploy-vmno.md @@ -509,7 +509,10 @@ ansible/hv-vm-start-one.yml ## Disabling NetworkManager devices and connections for SR-IOV devices on VMs -When VMs are created with SR-IOV devices using the IGB driver, the devices and connections will never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. +One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver. +This may be achieved by setting the variable `vm_igb_nics: true` in your variables. + +**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections will never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. See the following playbook: ```