Skip to content

Commit b489098

Browse files
committed
RHOAIENG-32532: Move codeflare common and operator items to sdk
1 parent 9925c07 commit b489098

File tree

11 files changed

+411
-36
lines changed

11 files changed

+411
-36
lines changed

.github/kind/action.yml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
name: "Set up KinD"
2+
description: "Step to start and configure KinD cluster"
3+
4+
inputs:
5+
node-hostname:
6+
description: "Hostname of the main kind node"
7+
required: false
8+
default: kind
9+
cluster-name:
10+
description: "Name of the KinD cluster"
11+
required: false
12+
default: cluster
13+
worker-nodes:
14+
description: "Number of worker nodes"
15+
required: false
16+
default: 0
17+
label-prefix:
18+
description: "Prefix to add to worker node labels"
19+
required: false
20+
default: worker
21+
22+
runs:
23+
using: "composite"
24+
steps:
25+
- name: Init directories
26+
shell: bash
27+
run: |
28+
TEMP_DIR="$(pwd)/tmp"
29+
mkdir -p "${TEMP_DIR}"
30+
echo "TEMP_DIR=${TEMP_DIR}" >> $GITHUB_ENV
31+
32+
mkdir -p "$(pwd)/bin"
33+
echo "$(pwd)/bin" >> $GITHUB_PATH
34+
35+
- name: Container image registry
36+
shell: bash
37+
run: |
38+
podman run -d -p 5000:5000 --name registry registry:2.8.1
39+
40+
export NODE_IMAGE="kindest/node:v1.26.0@sha256:691e24bd2417609db7e589e1a479b902d2e209892a10ce375fab60a8407c7352"
41+
42+
export REGISTRY_ADDRESS=$(hostname -i):5000
43+
echo "REGISTRY_ADDRESS=${REGISTRY_ADDRESS}" >> $GITHUB_ENV
44+
echo "Container image registry started at ${REGISTRY_ADDRESS}"
45+
46+
KIND_CONFIG_FILE=${{ env.TEMP_DIR }}/kind.yaml
47+
48+
WORKER_NODES=${{ inputs.worker-nodes }}
49+
LABEL_PREFIX=${{ inputs.label-prefix }}
50+
51+
if [ "$WORKER_NODES" -gt 0 ]; then
52+
for i in $(seq 1 $WORKER_NODES); do
53+
sed -i "/^nodes:/a \ \ - role: worker\n image: ${NODE_IMAGE}\n labels:\n ${LABEL_PREFIX}-${i}: true\n extraMounts:\n - hostPath: /dev/null\n containerPath: /var/run/nvidia-container-devices/all" ${GITHUB_ACTION_PATH}/resources/kind.yaml
54+
done
55+
fi
56+
57+
echo "KIND_CONFIG_FILE=${KIND_CONFIG_FILE}" >> $GITHUB_ENV
58+
envsubst < ${GITHUB_ACTION_PATH}/resources/kind.yaml > ${KIND_CONFIG_FILE}
59+
60+
sudo --preserve-env=REGISTRY_ADDRESS sh -c 'cat > /etc/containers/registries.conf.d/local.conf <<EOF
61+
[[registry]]
62+
prefix = "$REGISTRY_ADDRESS"
63+
insecure = true
64+
location = "$REGISTRY_ADDRESS"
65+
EOF'
66+
67+
- name: Setup KinD cluster
68+
uses: helm/[email protected]
69+
with:
70+
cluster_name: ${{ inputs.cluster-name }}
71+
version: v0.17.0
72+
config: ${{ env.KIND_CONFIG_FILE }}
73+
74+
- name: Print cluster info
75+
shell: bash
76+
run: |
77+
echo "KinD cluster:"
78+
kubectl cluster-info
79+
kubectl describe nodes
80+
81+
- name: Install Ingress controller
82+
shell: bash
83+
run: |
84+
VERSION=controller-v1.9.6
85+
echo "Deploying Ingress controller into KinD cluster"
86+
curl https://raw.githubusercontent.com/kubernetes/ingress-nginx/"${VERSION}"/deploy/static/provider/kind/deploy.yaml | sed "s/--publish-status-address=localhost/--report-node-internal-ip-address\\n - --status-update-interval=10/g" | kubectl apply -f -
87+
kubectl annotate ingressclass nginx "ingressclass.kubernetes.io/is-default-class=true"
88+
# Turn on SSL Passthrough
89+
kubectl patch deploy --type json --patch '[{"op":"add","path": "/spec/template/spec/containers/0/args/-","value":"--enable-ssl-passthrough"}]' ingress-nginx-controller -n ingress-nginx
90+
91+
kubectl -n ingress-nginx wait --timeout=300s --for=condition=Available deployments --all
92+
93+
- name: Setup Dnsmasq to resolve hostnames with domain name ${{ inputs.node-hostname }}
94+
shell: bash
95+
run: |
96+
# Based on https://sixfeetup.com/blog/local-development-with-wildcard-dns-on-linux
97+
sudo apt-get -y install dnsmasq
98+
99+
sudo sed -i -E "s/#DNS=/DNS=127.0.0.2/" /etc/systemd/resolved.conf
100+
sudo sed -i -E "s/#Domains=/Domains=~${{ inputs.node-hostname }}/" /etc/systemd/resolved.conf
101+
sudo systemctl restart systemd-resolved
102+
103+
sudo sed -i -E "s/#IGNORE_RESOLVCONF=yes/IGNORE_RESOLVCONF=yes/" /etc/default/dnsmasq
104+
sudo sed -i -E "s/#listen-address=/listen-address=127.0.0.2/" /etc/dnsmasq.conf
105+
sudo sed -i -E "s/#bind-interfaces/bind-interfaces/" /etc/dnsmasq.conf
106+
sudo sed -i -E "s|#(address=).*|\1/${{ inputs.node-hostname }}/127.0.0.1|" /etc/dnsmasq.conf
107+
sudo systemctl restart dnsmasq
108+
systemctl status dnsmasq
109+
110+
- name: Set env variables for tests to properly leverage KinD cluster
111+
shell: bash
112+
run: |
113+
echo "CLUSTER_TYPE=KIND" >> $GITHUB_ENV
114+
echo "CLUSTER_HOSTNAME=${{ inputs.node-hostname }}" >> $GITHUB_ENV
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: "Add custom user to KinD"
2+
description: "Step to add custom user to KinD"
3+
4+
inputs:
5+
user-name:
6+
description: "Name of the user added to KinD"
7+
required: true
8+
cluster-name:
9+
description: "Name of the KinD cluster"
10+
required: false
11+
default: cluster
12+
13+
runs:
14+
using: "composite"
15+
steps:
16+
- name: Add user to KinD context
17+
run: |
18+
# Get KinD certificates
19+
docker cp ${{ inputs.cluster-name }}-control-plane:/etc/kubernetes/pki/ca.crt .
20+
docker cp ${{ inputs.cluster-name }}-control-plane:/etc/kubernetes/pki/ca.key .
21+
22+
# Generate certificates for new user
23+
openssl genrsa -out user.key 2048
24+
openssl req -new -key user.key -out user.csr -subj '/CN=${{ inputs.user-name }}/O=tenant'
25+
openssl x509 -req -in user.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out user.crt -days 360
26+
27+
# Add generated certificated to KinD context
28+
user_crt=$(base64 --wrap=0 user.crt)
29+
user_key=$(base64 --wrap=0 user.key)
30+
yq eval -i ".contexts += {\"context\": {\"cluster\": \"kind-${{ inputs.cluster-name }}\", \"user\": \"${{ inputs.user-name }}\"}, \"name\": \"${{ inputs.user-name }}\"}" $HOME/.kube/config
31+
yq eval -i ".users += {\"name\": \"${{ inputs.user-name }}\", \"user\": {\"client-certificate-data\": \"$user_crt\", \"client-key-data\": \"$user_key\"}}" $HOME/.kube/config
32+
33+
cat $HOME/.kube/config
34+
35+
# Cleanup
36+
rm --force ca.crt
37+
rm --force ca.srl
38+
rm --force ca.key
39+
rm --force user.crt
40+
rm --force user.key
41+
rm --force user.csr
42+
shell: bash
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: "Export all KinD pod logs"
2+
description: "Step to export all KinD pod logs"
3+
4+
inputs:
5+
output-directory:
6+
description: "Directory to export log files to"
7+
required: true
8+
cluster-name:
9+
description: "Name of the KinD cluster"
10+
required: false
11+
default: cluster
12+
13+
runs:
14+
using: "composite"
15+
steps:
16+
- name: Export all KinD pod logs
17+
run: |
18+
echo "Export all KinD pod logs to ${{ inputs.output-directory }}"
19+
kind export logs ${{ inputs.output-directory }} --name ${{ inputs.cluster-name }}
20+
shell: bash

.github/kind/resources/kind.yaml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# ---------------------------------------------------------------------------
2+
# Copyright 2023.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ---------------------------------------------------------------------------
16+
17+
kind: Cluster
18+
apiVersion: kind.x-k8s.io/v1alpha4
19+
nodes:
20+
- role: control-plane
21+
image: ${NODE_IMAGE}
22+
extraMounts:
23+
- hostPath: /dev/null
24+
containerPath: /var/run/nvidia-container-devices/all
25+
extraPortMappings:
26+
- containerPort: 80
27+
hostPort: 80
28+
protocol: TCP
29+
- containerPort: 443
30+
hostPort: 443
31+
protocol: TCP
32+
kubeadmConfigPatches:
33+
- |
34+
kind: InitConfiguration
35+
nodeRegistration:
36+
kubeletExtraArgs:
37+
node-labels: "ingress-ready=true"
38+
containerdConfigPatches:
39+
- |-
40+
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."${REGISTRY_ADDRESS}"]
41+
endpoint = ["http://${REGISTRY_ADDRESS}"]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: "Install NVidia GPU operator for KinD"
2+
description: "Step to install NVidia GPU operator for KinD, based on https://www.substratus.ai/blog/kind-with-gpus"
3+
4+
inputs:
5+
enable-time-slicing:
6+
description: "Enable time slicing for NVidia GPU operator"
7+
required: false
8+
default: 'false'
9+
time-slicing-capacity:
10+
description: "Time slicing GPU multiplier"
11+
required: false
12+
default: 4
13+
14+
runs:
15+
using: "composite"
16+
steps:
17+
- name: Install Helm
18+
run: |
19+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
20+
chmod 700 get_helm.sh
21+
./get_helm.sh
22+
sudo chmod 777 /usr/local/bin/helm
23+
shell: bash
24+
25+
- name: Install NVidia GPU operator
26+
run: |
27+
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true
28+
helm repo update
29+
helm install --wait --generate-name -n gpu-operator --create-namespace nvidia/gpu-operator --set driver.enabled=false
30+
shell: bash
31+
32+
- name: Print KinD node
33+
run: |
34+
kubectl describe nodes
35+
shell: bash
36+
37+
- name: Configuring Time-Slicing for NVidia GPU operator
38+
if: inputs.enable-time-slicing == 'true'
39+
run: |
40+
sudo sed -i "s/<REPLICAS>/${{ inputs.time-slicing-capacity }}/" ${GITHUB_ACTION_PATH}/resources/time-slicing-config.yaml
41+
kubectl create -n gpu-operator -f ${GITHUB_ACTION_PATH}/resources/time-slicing-config.yaml
42+
kubectl patch clusterpolicies.nvidia.com/cluster-policy -n gpu-operator --type merge -p '{"spec": {"devicePlugin": {"config": {"name": "time-slicing-config", "default": "any"}}}}'
43+
kubectl wait --timeout=120s --for=jsonpath='{.status.capacity.nvidia\.com/gpu}'=${{ inputs.time-slicing-capacity }} node/cluster-control-plane
44+
shell: bash
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: time-slicing-config
5+
data:
6+
any: |-
7+
version: v1
8+
flags:
9+
migStrategy: none
10+
sharing:
11+
timeSlicing:
12+
resources:
13+
- name: nvidia.com/gpu
14+
replicas: <REPLICAS>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: "Setup NVidia GPU environment for KinD"
2+
description: "Step to setup NVidia GPU environment for KinD, based on https://www.substratus.ai/blog/kind-with-gpus"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Install Podman
8+
run: |
9+
sudo apt-get -y install podman
10+
mkdir --parents ~/.config
11+
cp -r /etc/containers ~/.config/containers
12+
shell: bash
13+
14+
- name: Install yq
15+
run: |
16+
sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
17+
sudo chmod 777 /usr/local/bin/yq
18+
shell: bash
19+
20+
- name: Setup NVIDIA Container Toolkit
21+
run: |
22+
sudo nvidia-ctk runtime configure --runtime=docker --set-as-default
23+
sudo systemctl restart docker
24+
sudo sed -i "s/#accept-nvidia-visible-devices-as-volume-mounts = false/accept-nvidia-visible-devices-as-volume-mounts = true/" /etc/nvidia-container-runtime/config.toml
25+
shell: bash
26+
27+
- name: List NVIDIA GPUs
28+
run: |
29+
nvidia-smi
30+
shell: bash

0 commit comments

Comments
 (0)