From fa014a21475367c0a07f3a528b3b155c73dd4b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Mon, 19 Feb 2024 13:18:55 +0100 Subject: [PATCH 1/7] Control plane replication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- configs/k8s_ha/check_apiserver.sh | 11 ++++ configs/k8s_ha/haproxy.cfg | 53 +++++++++++++++++++ configs/k8s_ha/keepalived_backup.conf | 29 +++++++++++ configs/k8s_ha/keepalived_master.conf | 29 +++++++++++ configs/setup/kube.json | 7 ++- scripts/cloudlab/setup_node.go | 4 +- scripts/cluster/create_multinode_cluster.go | 57 ++++++++++++++++----- scripts/configs/kube.go | 5 +- scripts/setup.go | 22 ++++---- scripts/setup/setup.go | 44 +++++++++++++++- 10 files changed, 233 insertions(+), 28 deletions(-) create mode 100644 configs/k8s_ha/check_apiserver.sh create mode 100644 configs/k8s_ha/haproxy.cfg create mode 100644 configs/k8s_ha/keepalived_backup.conf create mode 100644 configs/k8s_ha/keepalived_master.conf diff --git a/configs/k8s_ha/check_apiserver.sh b/configs/k8s_ha/check_apiserver.sh new file mode 100644 index 000000000..b923dd027 --- /dev/null +++ b/configs/k8s_ha/check_apiserver.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +errorExit() { + echo "*** $*" 1>&2 + exit 1 +} + +curl --silent --max-time 2 --insecure https://localhost:8443/ -o /dev/null || errorExit "Error GET https://localhost:8443/" +if ip addr | grep -q 10.0.1.254; then + curl --silent --max-time 2 --insecure https://10.0.1.254:8443/ -o /dev/null || errorExit "Error GET https://10.0.1.254:8443/" +fi \ No newline at end of file diff --git a/configs/k8s_ha/haproxy.cfg b/configs/k8s_ha/haproxy.cfg new file mode 100644 index 000000000..6c72ec2da --- /dev/null +++ b/configs/k8s_ha/haproxy.cfg @@ -0,0 +1,53 @@ +# /etc/haproxy/haproxy.cfg +#--------------------------------------------------------------------- +# Global settings +#--------------------------------------------------------------------- +global + log /dev/log local0 + log /dev/log local1 notice + daemon + +#--------------------------------------------------------------------- +# common defaults that all the 'listen' and 'backend' sections will +# use if not designated in their block +#--------------------------------------------------------------------- +defaults + mode http + log global + option httplog + option dontlognull + option http-server-close + option forwardfor except 127.0.0.0/8 + option redispatch + retries 1 + timeout http-request 10s + timeout queue 20s + timeout connect 5s + timeout client 20s + timeout server 20s + timeout http-keep-alive 10s + timeout check 10s + +#--------------------------------------------------------------------- +# apiserver frontend which proxys to the control plane nodes +#--------------------------------------------------------------------- +frontend apiserver + bind *:8443 + mode tcp + option tcplog + default_backend apiserverbackend + +#--------------------------------------------------------------------- +# round robin balancing for apiserver +#--------------------------------------------------------------------- +backend apiserverbackend + option httpchk GET /healthz + http-check expect status 200 + mode tcp + option ssl-hello-chk + balance roundrobin + server control_plane_1 10.0.1.1:6443 check + server control_plane_2 10.0.1.2:6443 check + server control_plane_3 10.0.1.3:6443 check + server control_plane_4 10.0.1.4:6443 check + server control_plane_5 10.0.1.5:6443 check \ No newline at end of file diff --git a/configs/k8s_ha/keepalived_backup.conf b/configs/k8s_ha/keepalived_backup.conf new file mode 100644 index 000000000..e10ae22e4 --- /dev/null +++ b/configs/k8s_ha/keepalived_backup.conf @@ -0,0 +1,29 @@ +! /etc/keepalived/keepalived.conf +! Configuration File for keepalived +global_defs { + router_id LVS_DEVEL +} +vrrp_script check_apiserver { + script "/etc/keepalived/check_apiserver.sh" + interval 3 + weight -2 + fall 10 + rise 2 +} + +vrrp_instance VI_1 { + state BACKUP + interface enp4s0f1 + virtual_router_id 51 + priority 101 + authentication { + auth_type PASS + auth_pass 42 + } + virtual_ipaddress { + 10.0.1.254 + } + track_script { + check_apiserver + } +} \ No newline at end of file diff --git a/configs/k8s_ha/keepalived_master.conf b/configs/k8s_ha/keepalived_master.conf new file mode 100644 index 000000000..9a27196be --- /dev/null +++ b/configs/k8s_ha/keepalived_master.conf @@ -0,0 +1,29 @@ +! /etc/keepalived/keepalived.conf +! Configuration File for keepalived +global_defs { + router_id LVS_DEVEL +} +vrrp_script check_apiserver { + script "/etc/keepalived/check_apiserver.sh" + interval 3 + weight -2 + fall 10 + rise 2 +} + +vrrp_instance VI_1 { + state MASTER + interface enp4s0f1 + virtual_router_id 51 + priority 101 + authentication { + auth_type PASS + auth_pass 42 + } + virtual_ipaddress { + 10.0.1.254 + } + track_script { + check_apiserver + } +} \ No newline at end of file diff --git a/configs/setup/kube.json b/configs/setup/kube.json index 026d8243e..2485fcf2b 100644 --- a/configs/setup/kube.json +++ b/configs/setup/kube.json @@ -5,6 +5,9 @@ "PodNetworkCidr": "192.168.0.0/16", "ApiserverPort": "6443", "ApiserverToken": "", - "ApiserverTokenHash": "", - "CalicoVersion": "3.27.2" + "ApiserverDiscoveryToken": "", + "ApiserverCertificateKey": "", + "CPHAEndpoint": "10.0.1.254", + "CPHAPort": "8443", + "CalicoVersion": "3.27.2", } \ No newline at end of file diff --git a/scripts/cloudlab/setup_node.go b/scripts/cloudlab/setup_node.go index b8476e1a3..4d9d1f1a2 100644 --- a/scripts/cloudlab/setup_node.go +++ b/scripts/cloudlab/setup_node.go @@ -28,7 +28,7 @@ import ( utils "github.com/vhive-serverless/vHive/scripts/utils" ) -func SetupNode(sandbox string, useStargz string) error { +func SetupNode(haMode string, sandbox string, useStargz string) error { if sandbox == "" { sandbox = "firecracker" } @@ -55,7 +55,7 @@ func SetupNode(sandbox string, useStargz string) error { // Set up system utils.InfoPrintf("Set up system\n") - if err := setup.SetupSystem(); err != nil { + if err := setup.SetupSystem(haMode); err != nil { return err } diff --git a/scripts/cluster/create_multinode_cluster.go b/scripts/cluster/create_multinode_cluster.go index ea0a4cbc3..95d134d95 100644 --- a/scripts/cluster/create_multinode_cluster.go +++ b/scripts/cluster/create_multinode_cluster.go @@ -25,6 +25,7 @@ package cluster import ( "fmt" "os" + "strconv" "strings" "time" @@ -32,14 +33,19 @@ import ( utils "github.com/vhive-serverless/vHive/scripts/utils" ) -func CreateMultinodeCluster(stockContainerd string) error { +func CreateMultinodeCluster(stockContainerd string, rawHaReplicaCount string) error { // Original Bash Scripts: scripts/cluster/create_multinode_cluster.sh + haReplicaCount, err := strconv.Atoi(rawHaReplicaCount) + if err != nil { + return err + } + if err := CreateMasterKubeletService(); err != nil { return err } - if err := DeployKubernetes(); err != nil { + if err := DeployKubernetes(haReplicaCount); err != nil { return err } @@ -95,19 +101,28 @@ EOF'` } // Deploy Kubernetes -func DeployKubernetes() error { - +func DeployKubernetes(haReplicaCount int) error { utils.WaitPrintf("Deploying Kubernetes(version %s)", configs.Kube.K8sVersion) masterNodeIp, iperr := utils.ExecShellCmd(`ip route | awk '{print $(NF)}' | awk '/^10\..*/'`) if iperr != nil { return iperr } - shellCmd := fmt.Sprintf(`sudo kubeadm init --v=%d \ + + command := `sudo kubeadm init --v=%d \ --apiserver-advertise-address=%s \ --cri-socket unix:///run/containerd/containerd.sock \ --kubernetes-version %s \ ---pod-network-cidr="%s" `, - configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr) +--pod-network-cidr="%s" ` + args := []any{configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr} + + if haReplicaCount > 0 { + command += ` \ +--control-plane-endpoint "%s:%s" \ +--upload-certs` + args = append(args, configs.Kube.CPHAEndpoint, configs.Kube.CPHAPort) + } + + shellCmd := fmt.Sprintf(command, args) if len(configs.Kube.AlternativeImageRepo) > 0 { shellCmd = fmt.Sprintf(shellCmd+"--image-repository %s ", configs.Kube.AlternativeImageRepo) } @@ -141,24 +156,37 @@ func KubectlForNonRoot() error { func ExtractMasterNodeInfo() error { // Extract master node information from logs utils.WaitPrintf("Extracting master node information from logs") + + // API Server address, port, token shellOut, err := utils.ExecShellCmd("sed -n '/.*kubeadm join.*/p' < %s/masterNodeInfo | sed -n 's/.*join \\(.*\\):\\(\\S*\\) --token \\(\\S*\\).*/\\1 \\2 \\3/p'", configs.System.TmpDir) - if !utils.CheckErrorWithMsg(err, "Failed to extract master node information from logs!\n") { + if !utils.CheckErrorWithMsg(err, "Failed to extract API Server address, port, and token from logs!\n") { return err } splittedOut := strings.Split(shellOut, " ") configs.Kube.ApiserverAdvertiseAddress = splittedOut[0] configs.Kube.ApiserverPort = splittedOut[1] configs.Kube.ApiserverToken = splittedOut[2] + + // API Server discovery token shellOut, err = utils.ExecShellCmd("sed -n '/.*sha256:.*/p' < %s/masterNodeInfo | sed -n 's/.*\\(sha256:\\S*\\).*/\\1/p'", configs.System.TmpDir) - if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract master node information from logs!\n") { + if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract API Server discovery token from logs!\n") { + return err + } + configs.Kube.ApiserverDiscoveryToken = shellOut + + // API Server certificate key + shellOut, err = utils.ExecShellCmd("sed -n 's/^.*--certificate-key //p' < %s/masterNodeInfo", configs.System.TmpDir) + if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract API Server certificate key from logs!\n") { return err } - configs.Kube.ApiserverTokenHash = shellOut + configs.Kube.ApiserverCertificateKey = shellOut + masterKeyYamlTemplate := "ApiserverAdvertiseAddress: %s\n" + "ApiserverPort: %s\n" + "ApiserverToken: %s\n" + - "ApiserverTokenHash: %s" + "ApiserverDiscoveryToken: %s\n" + + "ApiserverCertificateKey: %s" // Create masterKey.yaml with master node information utils.WaitPrintf("Creating masterKey.yaml with master node information") @@ -172,14 +200,17 @@ func ExtractMasterNodeInfo() error { configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, - configs.Kube.ApiserverTokenHash) + configs.Kube.ApiserverDiscoveryToken) _, err = masterKeyYamlFile.WriteString(masterKeyYaml) if !utils.CheckErrorWithTagAndMsg(err, "Failed to create masterKey.yaml with master node information!\n") { return err } + utils.SuccessPrintf("Join cluster from worker nodes as a new control plane node with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s --control-plane --certificate-key %s\n", + configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken, configs.Kube.ApiserverCertificateKey) + utils.SuccessPrintf("Join cluster from worker nodes with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s\n", - configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverTokenHash) + configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken) return nil } diff --git a/scripts/configs/kube.go b/scripts/configs/kube.go index 0bbfb6f8d..24dd63e07 100644 --- a/scripts/configs/kube.go +++ b/scripts/configs/kube.go @@ -29,7 +29,10 @@ type KubeConfigStruct struct { PodNetworkCidr string ApiserverPort string ApiserverToken string - ApiserverTokenHash string + ApiserverDiscoveryToken string + ApiserverCertificateKey string + CPHAEndpoint string + CPHAPort string CalicoVersion string } diff --git a/scripts/setup.go b/scripts/setup.go index 366f58fb2..f32cb58d7 100644 --- a/scripts/setup.go +++ b/scripts/setup.go @@ -143,12 +143,17 @@ func main() { // Original scripts from `scripts/cluster` directory case "create_multinode_cluster": if setupFlags.NArg() < 2 { - utils.FatalPrintf("Missing parameters: %s \n", subCmd) + utils.FatalPrintf("Missing parameters: %s [control_plane_replicas]\n", subCmd) utils.CleanEnvironment() os.Exit(1) } utils.InfoPrintf("Create multinode cluster\n") - err = cluster.CreateMultinodeCluster(setupFlags.Args()[1]) + + if setupFlags.NArg() == 3 { + err = cluster.CreateMultinodeCluster(setupFlags.Args()[1], setupFlags.Args()[2]) + } else { + err = cluster.CreateMultinodeCluster(setupFlags.Args()[1], "1") + } case "create_one_node_cluster": if setupFlags.NArg() < 2 { utils.FatalPrintf("Missing parameters: %s \n", subCmd) @@ -175,18 +180,17 @@ func main() { err = cluster.SetupWorkerKubelet(setupFlags.Args()[1]) // Original scripts from `scripts/cloudlab` directory case "setup_node": - if setupFlags.NArg() < 2 { - utils.FatalPrintf("Missing parameters: %s [use-stargz]\n", subCmd) + if setupFlags.NArg() < 3 { + utils.FatalPrintf("Missing parameters: %s [use-stargz]\n", subCmd) utils.CleanEnvironment() os.Exit(1) } utils.InfoPrintf("Set up node\n") - if setupFlags.NArg() >= 3 { - err = cloudlab.SetupNode(setupFlags.Args()[1], setupFlags.Args()[2]) + if setupFlags.NArg() >= 4 { + err = cloudlab.SetupNode(setupFlags.Args()[1], setupFlags.Args()[2], setupFlags.Args()[3]) } else { - err = cloudlab.SetupNode(setupFlags.Args()[1], "") + err = cloudlab.SetupNode(setupFlags.Args()[1], setupFlags.Args()[2], "") } - case "start_onenode_vhive_cluster": if setupFlags.NArg() < 2 { utils.FatalPrintf("Missing parameters: %s \n", subCmd) @@ -205,7 +209,7 @@ func main() { err = setup.SetupZipkin() case "setup_system": utils.InfoPrintf("Set up system\n") - err = setup.SetupSystem() + err = setup.SetupSystem("REGULAR") case "setup_gvisor_containerd": utils.InfoPrintf("Set up gvisor_containerd\n") err = setup.SetupGvisorContainerd() diff --git a/scripts/setup/setup.go b/scripts/setup/setup.go index 3a075517c..cf0f0b91c 100644 --- a/scripts/setup/setup.go +++ b/scripts/setup/setup.go @@ -199,7 +199,7 @@ func SetupGvisorContainerd() error { return nil } -func SetupSystem() error { +func SetupSystem(haMode string) error { // Original Bash Scripts: scripts/setup_system.sh // Install required dependencies @@ -292,6 +292,48 @@ func SetupSystem() error { return err } + // High-availability control plane + if haMode == "MASTER" || haMode == "BACKUP" { + k8s_ha_path := path.Join(configs.VHive.VHiveRepoPath, "configs/k8s_ha") + + err = utils.InstallPackages("haproxy keepalived") + if !utils.CheckErrorWithTagAndMsg(err, "Failed to install keepalived and haproxy!\n") { + return err + } + + err = utils.CopyToDir(path.Join(k8s_ha_path, "check_apiserver.yaml"), "/etc/keepalived/check_apiserver.sh", true) + if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/check_apiserver.sh!\n") { + return err + } + + if haMode == "MASTER" { + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_master.yaml"), "/etc/keepalived/keepalived.conf", true) + if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { + return err + } + } else { + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_backup.yaml"), "/etc/keepalived/keepalived.conf", true) + if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { + return err + } + } + + err = utils.CopyToDir(path.Join(k8s_ha_path, "haproxy.cfg"), "/etc/haproxy/haproxy.cfg", true) + if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/haproxy/haproxy.cfg!\n") { + return err + } + + _, err = utils.ExecShellCmd("sudo systemctl enable keepalived --now") + if !utils.CheckErrorWithTagAndMsg(err, "Failed to start Keepalived!\n") { + return err + } + + _, err = utils.ExecShellCmd("sudo systemctl enable haproxy --now") + if !utils.CheckErrorWithTagAndMsg(err, "Failed to start HAProxy!\n") { + return err + } + } + return nil } From b00bdc798bb173a9df2cae66a15b54b05c1f0d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Mon, 19 Feb 2024 13:23:36 +0100 Subject: [PATCH 2/7] Keepalived health script install bugfix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- scripts/setup/setup.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/setup/setup.go b/scripts/setup/setup.go index cf0f0b91c..d5f821d94 100644 --- a/scripts/setup/setup.go +++ b/scripts/setup/setup.go @@ -301,18 +301,18 @@ func SetupSystem(haMode string) error { return err } - err = utils.CopyToDir(path.Join(k8s_ha_path, "check_apiserver.yaml"), "/etc/keepalived/check_apiserver.sh", true) + err = utils.CopyToDir(path.Join(k8s_ha_path, "check_apiserver.sh"), "/etc/keepalived/check_apiserver.sh", true) if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/check_apiserver.sh!\n") { return err } if haMode == "MASTER" { - err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_master.yaml"), "/etc/keepalived/keepalived.conf", true) + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_master.conf"), "/etc/keepalived/keepalived.conf", true) if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { return err } } else { - err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_backup.yaml"), "/etc/keepalived/keepalived.conf", true) + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_backup.conf"), "/etc/keepalived/keepalived.conf", true) if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { return err } From b9b62b86e5a500635b1e54444e5393b1bd5cb904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Mon, 19 Feb 2024 13:50:29 +0100 Subject: [PATCH 3/7] Interface substitution script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- configs/k8s_ha/check_apiserver.sh | 0 configs/k8s_ha/keepalived_backup.conf | 2 +- configs/k8s_ha/keepalived_master.conf | 2 +- configs/k8s_ha/substitute_interface.sh | 10 ++++++++++ scripts/setup/setup.go | 9 +++++++-- 5 files changed, 19 insertions(+), 4 deletions(-) mode change 100644 => 100755 configs/k8s_ha/check_apiserver.sh create mode 100755 configs/k8s_ha/substitute_interface.sh diff --git a/configs/k8s_ha/check_apiserver.sh b/configs/k8s_ha/check_apiserver.sh old mode 100644 new mode 100755 diff --git a/configs/k8s_ha/keepalived_backup.conf b/configs/k8s_ha/keepalived_backup.conf index e10ae22e4..57fe34a3e 100644 --- a/configs/k8s_ha/keepalived_backup.conf +++ b/configs/k8s_ha/keepalived_backup.conf @@ -13,7 +13,7 @@ vrrp_script check_apiserver { vrrp_instance VI_1 { state BACKUP - interface enp4s0f1 + interface $INTERFACE_NAME virtual_router_id 51 priority 101 authentication { diff --git a/configs/k8s_ha/keepalived_master.conf b/configs/k8s_ha/keepalived_master.conf index 9a27196be..82d42d1d9 100644 --- a/configs/k8s_ha/keepalived_master.conf +++ b/configs/k8s_ha/keepalived_master.conf @@ -13,7 +13,7 @@ vrrp_script check_apiserver { vrrp_instance VI_1 { state MASTER - interface enp4s0f1 + interface $INTERFACE_NAME virtual_router_id 51 priority 101 authentication { diff --git a/configs/k8s_ha/substitute_interface.sh b/configs/k8s_ha/substitute_interface.sh new file mode 100755 index 000000000..31c53eab1 --- /dev/null +++ b/configs/k8s_ha/substitute_interface.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +readonly DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)" + +export INTERFACE_NAME=$(ifconfig | grep -B1 "10.0.1" | head -n1 | sed 's/:.*//') + +cat $DIR/keepalived_master.conf | envsubst > $DIR/keepalived_master.conff +cat $DIR/keepalived_backup.conf | envsubst > $DIR/keepalived_backup.conff + +echo "Successfully created HA load balancer configuration!" \ No newline at end of file diff --git a/scripts/setup/setup.go b/scripts/setup/setup.go index d5f821d94..2084e0298 100644 --- a/scripts/setup/setup.go +++ b/scripts/setup/setup.go @@ -306,13 +306,18 @@ func SetupSystem(haMode string) error { return err } + _, err = utils.ExecVHiveBashScript("configs/k8s_ha/substitute_interface.sh") + if !utils.CheckErrorWithTagAndMsg(err, "Failed to create HA load balancer !\n") { + return err + } + if haMode == "MASTER" { - err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_master.conf"), "/etc/keepalived/keepalived.conf", true) + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_master.conff"), "/etc/keepalived/keepalived.conf", true) if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { return err } } else { - err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_backup.conf"), "/etc/keepalived/keepalived.conf", true) + err = utils.CopyToDir(path.Join(k8s_ha_path, "keepalived_backup.conff"), "/etc/keepalived/keepalived.conf", true) if !utils.CheckErrorWithMsg(err, "Failed to copy files to /etc/keepalived/keepalived.conf!\n") { return err } From b91ec0f3ec39a7eeee8a0d6dd9aad4fc5162d630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Mon, 19 Feb 2024 15:55:39 +0100 Subject: [PATCH 4/7] Kubeadm fix with VRRP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- configs/k8s_ha/keepalived_backup.conf | 2 +- configs/setup/system.json | 2 +- scripts/cluster/create_multinode_cluster.go | 12 +++++------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/configs/k8s_ha/keepalived_backup.conf b/configs/k8s_ha/keepalived_backup.conf index 57fe34a3e..c35332558 100644 --- a/configs/k8s_ha/keepalived_backup.conf +++ b/configs/k8s_ha/keepalived_backup.conf @@ -15,7 +15,7 @@ vrrp_instance VI_1 { state BACKUP interface $INTERFACE_NAME virtual_router_id 51 - priority 101 + priority 100 authentication { auth_type PASS auth_pass 42 diff --git a/configs/setup/system.json b/configs/setup/system.json index 0a1ec361d..eae7cf9b2 100644 --- a/configs/setup/system.json +++ b/configs/setup/system.json @@ -15,6 +15,6 @@ "PmuToolsRepoUrl": "https://github.com/vhive-serverless/pmu-tools", "ProtocVersion": "3.19.4", "ProtocDownloadUrlTemplate": "https://github.com/protocolbuffers/protobuf/releases/download/v%s/protoc-%s-linux-x86_64.zip", - "LogVerbosity": 0, + "LogVerbosity": 1, "YQDownloadUrlTemplate": "https://github.com/mikefarah/yq/releases/latest/download/yq_linux_%s" } \ No newline at end of file diff --git a/scripts/cluster/create_multinode_cluster.go b/scripts/cluster/create_multinode_cluster.go index 95d134d95..46ca03fb4 100644 --- a/scripts/cluster/create_multinode_cluster.go +++ b/scripts/cluster/create_multinode_cluster.go @@ -108,21 +108,19 @@ func DeployKubernetes(haReplicaCount int) error { return iperr } - command := `sudo kubeadm init --v=%d \ + command := fmt.Sprintf(`sudo kubeadm init --v=%d \ --apiserver-advertise-address=%s \ --cri-socket unix:///run/containerd/containerd.sock \ --kubernetes-version %s \ ---pod-network-cidr="%s" ` - args := []any{configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr} +--pod-network-cidr="%s" `, configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr) if haReplicaCount > 0 { - command += ` \ + command += fmt.Sprintf(`\ --control-plane-endpoint "%s:%s" \ ---upload-certs` - args = append(args, configs.Kube.CPHAEndpoint, configs.Kube.CPHAPort) +--upload-certs`, configs.Kube.CPHAEndpoint, configs.Kube.CPHAPort) } - shellCmd := fmt.Sprintf(command, args) + shellCmd := command if len(configs.Kube.AlternativeImageRepo) > 0 { shellCmd = fmt.Sprintf(shellCmd+"--image-repository %s ", configs.Kube.AlternativeImageRepo) } From 71ca4be58cef8a580f8e46ac6a027eb4ed08a928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Mon, 19 Feb 2024 16:04:27 +0100 Subject: [PATCH 5/7] Disabling manual kubelet startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- configs/k8s_ha/check_apiserver.sh | 4 +-- configs/k8s_ha/haproxy.cfg | 2 +- configs/setup/kube.json | 4 +-- configs/setup/system.json | 2 +- scripts/cluster/create_multinode_cluster.go | 35 ++++++++++++--------- scripts/utils/system.go | 2 +- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/configs/k8s_ha/check_apiserver.sh b/configs/k8s_ha/check_apiserver.sh index b923dd027..7beca7b22 100755 --- a/configs/k8s_ha/check_apiserver.sh +++ b/configs/k8s_ha/check_apiserver.sh @@ -5,7 +5,7 @@ errorExit() { exit 1 } -curl --silent --max-time 2 --insecure https://localhost:8443/ -o /dev/null || errorExit "Error GET https://localhost:8443/" +curl --silent --max-time 2 --insecure https://localhost:6443/ -o /dev/null || errorExit "Error GET https://localhost:6443/" if ip addr | grep -q 10.0.1.254; then - curl --silent --max-time 2 --insecure https://10.0.1.254:8443/ -o /dev/null || errorExit "Error GET https://10.0.1.254:8443/" + curl --silent --max-time 2 --insecure https://10.0.1.254:6443/ -o /dev/null || errorExit "Error GET https://10.0.1.254:6443/" fi \ No newline at end of file diff --git a/configs/k8s_ha/haproxy.cfg b/configs/k8s_ha/haproxy.cfg index 6c72ec2da..b1dbe46d5 100644 --- a/configs/k8s_ha/haproxy.cfg +++ b/configs/k8s_ha/haproxy.cfg @@ -32,7 +32,7 @@ defaults # apiserver frontend which proxys to the control plane nodes #--------------------------------------------------------------------- frontend apiserver - bind *:8443 + bind *:6443 mode tcp option tcplog default_backend apiserverbackend diff --git a/configs/setup/kube.json b/configs/setup/kube.json index 2485fcf2b..2c89984f6 100644 --- a/configs/setup/kube.json +++ b/configs/setup/kube.json @@ -8,6 +8,6 @@ "ApiserverDiscoveryToken": "", "ApiserverCertificateKey": "", "CPHAEndpoint": "10.0.1.254", - "CPHAPort": "8443", - "CalicoVersion": "3.27.2", + "CPHAPort": "6443", + "CalicoVersion": "3.27.2" } \ No newline at end of file diff --git a/configs/setup/system.json b/configs/setup/system.json index eae7cf9b2..0a1ec361d 100644 --- a/configs/setup/system.json +++ b/configs/setup/system.json @@ -15,6 +15,6 @@ "PmuToolsRepoUrl": "https://github.com/vhive-serverless/pmu-tools", "ProtocVersion": "3.19.4", "ProtocDownloadUrlTemplate": "https://github.com/protocolbuffers/protobuf/releases/download/v%s/protoc-%s-linux-x86_64.zip", - "LogVerbosity": 1, + "LogVerbosity": 0, "YQDownloadUrlTemplate": "https://github.com/mikefarah/yq/releases/latest/download/yq_linux_%s" } \ No newline at end of file diff --git a/scripts/cluster/create_multinode_cluster.go b/scripts/cluster/create_multinode_cluster.go index 46ca03fb4..16a98a2f7 100644 --- a/scripts/cluster/create_multinode_cluster.go +++ b/scripts/cluster/create_multinode_cluster.go @@ -41,9 +41,9 @@ func CreateMultinodeCluster(stockContainerd string, rawHaReplicaCount string) er return err } - if err := CreateMasterKubeletService(); err != nil { + /*if err := CreateMasterKubeletService(); err != nil { return err - } + }*/ if err := DeployKubernetes(haReplicaCount); err != nil { return err @@ -103,16 +103,11 @@ EOF'` // Deploy Kubernetes func DeployKubernetes(haReplicaCount int) error { utils.WaitPrintf("Deploying Kubernetes(version %s)", configs.Kube.K8sVersion) - masterNodeIp, iperr := utils.ExecShellCmd(`ip route | awk '{print $(NF)}' | awk '/^10\..*/'`) - if iperr != nil { - return iperr - } command := fmt.Sprintf(`sudo kubeadm init --v=%d \ ---apiserver-advertise-address=%s \ ---cri-socket unix:///run/containerd/containerd.sock \ +--cri-socket /run/containerd/containerd.sock \ --kubernetes-version %s \ ---pod-network-cidr="%s" `, configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr) +--pod-network-cidr="%s" `, configs.System.LogVerbosity, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr) if haReplicaCount > 0 { command += fmt.Sprintf(`\ @@ -198,17 +193,29 @@ func ExtractMasterNodeInfo() error { configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, - configs.Kube.ApiserverDiscoveryToken) + configs.Kube.ApiserverDiscoveryToken, + configs.Kube.ApiserverCertificateKey) _, err = masterKeyYamlFile.WriteString(masterKeyYaml) if !utils.CheckErrorWithTagAndMsg(err, "Failed to create masterKey.yaml with master node information!\n") { return err } - utils.SuccessPrintf("Join cluster from worker nodes as a new control plane node with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s --control-plane --certificate-key %s\n", - configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken, configs.Kube.ApiserverCertificateKey) + utils.SuccessPrintf("Join cluster from worker nodes as a new control plane node with command: "+ + "sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s --control-plane --certificate-key %s\n", + configs.Kube.ApiserverAdvertiseAddress, + configs.Kube.ApiserverPort, + configs.Kube.ApiserverToken, + configs.Kube.ApiserverDiscoveryToken, + configs.Kube.ApiserverCertificateKey, + ) - utils.SuccessPrintf("Join cluster from worker nodes with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s\n", - configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken) + utils.SuccessPrintf("Join cluster from worker nodes with command: "+ + "sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s\n", + configs.Kube.ApiserverAdvertiseAddress, + configs.Kube.ApiserverPort, + configs.Kube.ApiserverToken, + configs.Kube.ApiserverDiscoveryToken, + ) return nil } diff --git a/scripts/utils/system.go b/scripts/utils/system.go index 2a8e09f82..ec81ed75a 100644 --- a/scripts/utils/system.go +++ b/scripts/utils/system.go @@ -253,7 +253,7 @@ func PrepareEnvironment() error { func CleanEnvironment() error { // Define task List cleanTaskList := []func() error{ - CleanUpTmpDir, + //CleanUpTmpDir, } // Execute task for _, task := range cleanTaskList { From c9a464edc6cedc2112968b835b078727b125127d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Tue, 20 Feb 2024 12:14:25 +0100 Subject: [PATCH 6/7] Fixing bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- .github/workflows/stargz_tests.yml | 2 +- scripts/cluster/create_multinode_cluster.go | 4 ---- scripts/setup.go | 1 + scripts/utils/system.go | 2 +- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/stargz_tests.yml b/.github/workflows/stargz_tests.yml index 624d18f69..4674af57a 100644 --- a/.github/workflows/stargz_tests.yml +++ b/.github/workflows/stargz_tests.yml @@ -67,7 +67,7 @@ jobs: kubectl patch configmap -n knative-serving config-autoscaler -p "{\"data\": {\"allow-zero-initial-scale\": \"true\"}}" - name: Setup stock-only node - run: ./scripts/setup_tool setup_node stock-only use-stargz + run: ./scripts/setup_tool setup_node REGULAR stock-only use-stargz - name: Check containerd service is running run: sudo screen -list | grep "containerd" diff --git a/scripts/cluster/create_multinode_cluster.go b/scripts/cluster/create_multinode_cluster.go index 16a98a2f7..635ceccac 100644 --- a/scripts/cluster/create_multinode_cluster.go +++ b/scripts/cluster/create_multinode_cluster.go @@ -41,10 +41,6 @@ func CreateMultinodeCluster(stockContainerd string, rawHaReplicaCount string) er return err } - /*if err := CreateMasterKubeletService(); err != nil { - return err - }*/ - if err := DeployKubernetes(haReplicaCount); err != nil { return err } diff --git a/scripts/setup.go b/scripts/setup.go index f32cb58d7..bfa4e8f90 100644 --- a/scripts/setup.go +++ b/scripts/setup.go @@ -181,6 +181,7 @@ func main() { // Original scripts from `scripts/cloudlab` directory case "setup_node": if setupFlags.NArg() < 3 { + // ha_mode - REGULAR (do not install load balancers), MASTER, BACKUP (with load balancers) utils.FatalPrintf("Missing parameters: %s [use-stargz]\n", subCmd) utils.CleanEnvironment() os.Exit(1) diff --git a/scripts/utils/system.go b/scripts/utils/system.go index ec81ed75a..2a8e09f82 100644 --- a/scripts/utils/system.go +++ b/scripts/utils/system.go @@ -253,7 +253,7 @@ func PrepareEnvironment() error { func CleanEnvironment() error { // Define task List cleanTaskList := []func() error{ - //CleanUpTmpDir, + CleanUpTmpDir, } // Execute task for _, task := range cleanTaskList { From a167a9af2449b9c423b62eb580b482f3070bc545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lazar=20Cvetkovi=C4=87?= Date: Tue, 27 Feb 2024 14:21:14 +0100 Subject: [PATCH 7/7] Addressing Leonid's comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lazar Cvetković --- docs/developers_guide.md | 6 +- docs/logging.md | 2 +- docs/quickstart_guide.md | 8 +- scripts/README.md | 96 +++++++++++++++------ scripts/cluster/create_multinode_cluster.go | 2 +- 5 files changed, 79 insertions(+), 35 deletions(-) diff --git a/docs/developers_guide.md b/docs/developers_guide.md index b732ddf65..6363d1a60 100644 --- a/docs/developers_guide.md +++ b/docs/developers_guide.md @@ -13,7 +13,7 @@ cd vhive ./scripts/install_go.sh; source /etc/profile # or install Go manually pushd scripts && go build -o setup_tool && popd && mv scripts/setup_tool . -./setup_tool setup_node [stock-only|gvisor|firecracker] +./setup_tool setup_node REGULAR [stock-only|gvisor|firecracker] sudo containerd ./setup_tool create_one_node_cluster [stock-only|gvisor|firecracker] # wait for the containers to boot up using @@ -115,7 +115,7 @@ Assuming you rented a node using the vHive CloudLab profile: 1. Setup the node for the desired sandbox: ```bash -./setup_tool setup_node [firecracker|gvisor] +./setup_tool setup_node REGULAR [firecracker|gvisor] ``` 2. Setup the CRI test environment for the desired sandbox: @@ -240,7 +240,7 @@ Knative functions can use GPU although only `stock-only` mode is supported. Follow the guide to [setup stock knative](#testing-stock-knative-setup-or-images). ``` bash -./setup_tool setup_node stock-only +./setup_tool setup_node REGULAR stock-only ``` ### Install NVIDIA Driver and NVIDIA Container Toolkit diff --git a/docs/logging.md b/docs/logging.md index 43db3ea26..9f0a96f5b 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -41,7 +41,7 @@ We present how to set up a multi-node cluster, however, the same modifications c 3. Run the node setup script: ```bash - ./setup_tool setup_node + ./setup_tool setup_node REGULAR ``` > **BEWARE:** > diff --git a/docs/quickstart_guide.md b/docs/quickstart_guide.md index b3969c42e..5c7aaf706 100644 --- a/docs/quickstart_guide.md +++ b/docs/quickstart_guide.md @@ -92,7 +92,7 @@ Another option is to install using official instructions: [https://golang.org/do > flags as follows: > > ```bash - > ./setup_tool setup_node stock-only use-stargz + > ./setup_tool setup_node REGULAR stock-only use-stargz > ``` > **IMPORTANT** > Currently `stargz` is only supported in native kubelet contexts without firecracker. @@ -103,7 +103,7 @@ Another option is to install using official instructions: [https://golang.org/do For the standard setup, run the following script: ```bash - ./setup_tool setup_node firecracker + ./setup_tool setup_node REGULAR firecracker ``` > **BEWARE:** > @@ -250,13 +250,13 @@ In essence, you will execute the same commands for master and worker setups but Execute the following below **as a non-root user with sudo rights** using **bash**: 1. Run the node setup script: ```bash - ./setup_tool setup_node firecracker + ./setup_tool setup_node REGULAR firecracker ``` > **Note:** > To enable runs with `stargz` images, setup kubelet by adding the `stock-only` and `use-stargz` > flags as follows: > ```bash - > ./setup_tool setup_node stock-only use-stargz + > ./setup_tool setup_node REGULAR stock-only use-stargz > ``` > **IMPORTANT** > Currently `stargz` is only supported in native kubelet contexts without firecracker. diff --git a/scripts/README.md b/scripts/README.md index fe80e519c..de7150c9e 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,36 +1,47 @@ # vHive Setup Scripts + - [vHive Setup Scripts](#vhive-setup-scripts) - - [1. Get Setup Tool](#1-get-setup-tool) - - [1.1 Download the Binary Executable Directly](#11-download-the-binary-executable-directly) - - [1.2 Build from Source](#12-build-from-source) - - [2. Config the Setup Tool](#2-config-the-setup-tool) - - [3. Use of Setup Tool](#3-use-of-setup-tool) - - [3.1 General Usage](#31-general-usage) - - [3.2 Specify Config Files](#32-specify-config-files) - - [3.3 Use with Local vHive Repo](#33-use-with-local-vhive-repo) - - [3.4 Use with Remote vHive Repo (Standalone Use)](#34-use-with-remote-vhive-repo-standalone-use) - - [3.5 Migrate from Legacy Shell Scripts](#35-migrate-from-legacy-shell-scripts) - - [4. Logs](#4-logs) - - [5. Supported Platform](#5-supported-platform) + - [1. Get Setup Tool](#1-get-setup-tool) + - [1.1 Download the Binary Executable Directly](#11-download-the-binary-executable-directly) + - [1.2 Build from Source](#12-build-from-source) + - [2. Config the Setup Tool](#2-config-the-setup-tool) + - [3. Use of Setup Tool](#3-use-of-setup-tool) + - [3.1 General Usage](#31-general-usage) + - [3.2 Specify Config Files](#32-specify-config-files) + - [3.3 Use with Local vHive Repo](#33-use-with-local-vhive-repo) + - [3.4 Use with Remote vHive Repo (Standalone Use)](#34-use-with-remote-vhive-repo-standalone-use) + - [3.5 Migrate from Legacy Shell Scripts](#35-migrate-from-legacy-shell-scripts) + - [4. Logs](#4-logs) + - [5. Supported Platform](#5-supported-platform) + ## 1. Get Setup Tool + There are basically two ways to get the setup tool ### 1.1 Download the Binary Executable Directly -Check [vHive GitHub Repo](https://github.com/vhive-serverless/vHive/releases) for more details and choose the appropriate version to download. + +Check [vHive GitHub Repo](https://github.com/vhive-serverless/vHive/releases) for more details and choose the +appropriate version to download. ### 1.2 Build from Source + **Building from source requires Go (version 1.19 at least) installed on your system.** + ```bash git clone --depth 1 https://github.com/vhive-serverless/vHive cd vHive pushd scripts && go build -o setup_tool && popd ``` + Compiled executable file will be in the `scripts` directory. ## 2. Config the Setup Tool -**Normally, just skip this section and use the default config files** which are located in the `configs/setup` directory inside the [vHive repo](https://github.com/vhive-serverless/vHive). -- `configs/setup/knative.json`: knative related configs (all the path in the config file should be relative path inside the vHive repo) +**Normally, just skip this section and use the default config files** which are located in the `configs/setup` directory +inside the [vHive repo](https://github.com/vhive-serverless/vHive). + +- `configs/setup/knative.json`: knative related configs (all the path in the config file should be relative path inside + the vHive repo) - `configs/setup/kube.json`: Kubernetes related configs - `configs/setup/system.json`: system related configs - `configs/setup/vhive.json`: vHive related configs @@ -38,50 +49,68 @@ Compiled executable file will be in the `scripts` directory. You can modify the config files on your demand and then place all of them in one directory for the later use. ## 3. Use of Setup Tool + ### 3.1 General Usage + ```bash ./setup_tool [options] [parameters] ``` + use the `-h` or `--help` option to look for the help + ### 3.2 Specify Config Files -By default, the setup_tool will use the config files in `configs/setup` directory inside the vHive repo. + +By default, the setup_tool will use the config files in `configs/setup` directory inside the vHive repo. To change the path of config files, use the `--setup-configs-dir` option to specify it. + ```bash ./setup_tool --setup-configs-dir ... ``` ### 3.3 Use with Local vHive Repo -By default, the setup_tool will check the current directory to ensure it is a vHive repo and then use it during the setup process. + +By default, the setup_tool will check the current directory to ensure it is a vHive repo and then use it during the +setup process. To use other vHive repos locally, provide the `--vhive-repo-dir` option to specify it. + ```bash ./setup_tool --vhive-repo-dir ... ``` -If the current directory or the provided path is not a valid vHive repo, the setup_tool will [automatically clone the remote vHive repo and use it](#34-use-with-remote-vhive-repo). +If the current directory or the provided path is not a valid vHive repo, the setup_tool +will [automatically clone the remote vHive repo and use it](#34-use-with-remote-vhive-repo). ### 3.4 Use with Remote vHive Repo (Standalone Use) -When the setup_tool is directly downloaded or targeted for standalone use, the setup_tool will automatically clone the remote vHive repo to the temporary directory and then use it during the setup process. -To change the URL and branch of the [default remote vHive repo](https://github.com/vhive-serverless/vHive), use `--vhive-repo-url` and `--vhive-repo-branch` options to specify them. +When the setup_tool is directly downloaded or targeted for standalone use, the setup_tool will automatically clone the +remote vHive repo to the temporary directory and then use it during the setup process. + +To change the URL and branch of the [default remote vHive repo](https://github.com/vhive-serverless/vHive), +use `--vhive-repo-url` and `--vhive-repo-branch` options to specify them. + ```bash ./setup_tool --vhive-repo-url --vhive-repo-branch ... ``` -Besides, when the current directory is a vHive repo or the `--vhive-repo-dir` option is valid, **the local repo will be prioritized for use**. **To force the setup_tool to clone and use the remote vHive repo**, provide `--force-remote` option to the setup_tool. +Besides, when the current directory is a vHive repo or the `--vhive-repo-dir` option is valid, **the local repo will be +prioritized for use**. **To force the setup_tool to clone and use the remote vHive repo**, provide `--force-remote` +option to the setup_tool. + ```bash ./setup_tool --force-remote ... ``` - ### 3.5 Migrate from Legacy Shell Scripts + Just type the name of the original shell script and append corresponding parameters behind. For example: + ```bash # Legacy ==> scripts/cloudlab/setup_node.sh stock-only # ==> Current -./setup_tool [options] setup_node stock-only +./setup_tool [options] setup_node REGULAR stock-only # Legacy ==> scripts/create_devmapper.sh @@ -94,14 +123,29 @@ scripts/gpu/setup_nvidia_gpu.sh ./setup_tool [options] setup_nvidia_gpu ``` -**NOTICE**: Shell scripts in `scripts/stargz`, `scripts/self-hosted-kind`, and `scripts/github_runner` **are not supported to be invoked in this way at present**. +**NOTICE**: Shell scripts in `scripts/stargz`, `scripts/self-hosted-kind`, and `scripts/github_runner` **are not +supported to be invoked in this way at present**. + +### 3.6 Kubernetes Control Plane High-Availability Mode +For fault tolerance purposes, Kubernetes control plane components can be replicated. This can be done by combining +instructions from +the following links - [#1](https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md) +and [#2](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/). + +While executing these steps manually is complicated, [InVitro Loader](https://github.com/vhive-serverless/invitro) +provides an automatized way of creating a high-available control plane just by configuring `CONTROL_PLANE_REPLICAS` +parameter in `scripts/setup/setup.cfg` and then running `scripts/setup/create_multinode.sh`. ## 4. Logs -The log files will be named as `_common.log` and `_error.log`. All log files will be stored in the directory where the setup_tool is executed. + +The log files will be named as `_common.log` and `_error.log`. All log files will be stored in +the directory where the setup_tool is executed. - `_common.log`: all output originally writes to `stdout` will be redirected to this log file. - `_error.log`: all output originally writes to `stderr` will be redirected to this log file. ## 5. Supported Platform -At present, only `Ubuntu 20.04 (amd64)` is officially tested. Other versions of `Ubuntu` may also work, but not guaranteed. \ No newline at end of file + +At present, only `Ubuntu 20.04 (amd64)` is officially tested. Other versions of `Ubuntu` may also work, but not +guaranteed. \ No newline at end of file diff --git a/scripts/cluster/create_multinode_cluster.go b/scripts/cluster/create_multinode_cluster.go index 635ceccac..2c269b10d 100644 --- a/scripts/cluster/create_multinode_cluster.go +++ b/scripts/cluster/create_multinode_cluster.go @@ -196,7 +196,7 @@ func ExtractMasterNodeInfo() error { return err } - utils.SuccessPrintf("Join cluster from worker nodes as a new control plane node with command: "+ + utils.SuccessPrintf("Join cluster as a new control plane node with command: "+ "sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s --control-plane --certificate-key %s\n", configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort,