Skip to content

Commit 8349ffe

Browse files
committed
try the convalescence logic multiple times
1 parent 59e60c3 commit 8349ffe

File tree

1 file changed

+103
-99
lines changed

1 file changed

+103
-99
lines changed

integration-tests/pipelines/tssc-cli-e2e.yaml

Lines changed: 103 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -97,122 +97,126 @@ spec:
9797
9898
oc get secret -n $cp_namespace $kubeconfig_secret -o jsonpath={.data.kubeconfig} |base64 -d > /tmp/ephemereal.config
9999
export KUBECONFIG=/tmp/ephemereal.config
100-
csr_max_retries=5
101-
csr_sleep_duration=10
102-
approved_csrs=false
103100
104-
console_max_retries=30
105-
console_sleep_duration=10
106-
console_connect_timeout=10
107-
console_accessible=false
101+
# --- Cluster Provisioning Retry Loop ---
102+
provisioning_max_retries=3
103+
provisioning_successful=false
108104
109-
echo "--- Starting CSR Approval Process ---"
110-
for ((i=1; i<=csr_max_retries; i++)); do
111-
echo "CSR Attempt $i of $csr_max_retries: Checking for pending CSRs..."
112-
if ! oc get csr 2>/dev/null | grep -i Pending; then
113-
echo "No pending CSRs found. Continuing"
114-
approved_csrs=true
115-
break
116-
else
117-
echo "There are pending CSRs. That probably means cluster was hibernated for more than 24 hours. Need to approve them (until OCPBUGS-55339 is resolved)"
118-
if oc get csr -oname | xargs oc adm certificate approve; then
119-
echo "Successfully submitted approval for CSRs on attempt $i."
120-
sleep 60 # Small delay for changes to propagate
121-
if ! oc get csr 2>/dev/null | grep -i Pending; then
122-
echo "Confirmed no pending CSRs after approval."
123-
approved_csrs=true
124-
break
105+
for ((provisioning_attempt=1; provisioning_attempt<=provisioning_max_retries; provisioning_attempt++)); do
106+
echo "=== Cluster Provisioning Attempt $provisioning_attempt of $provisioning_max_retries ==="
107+
108+
# Reset flags for this attempt
109+
csr_max_retries=5
110+
csr_sleep_duration=10
111+
approved_csrs=false
112+
113+
console_max_retries=30
114+
console_sleep_duration=10
115+
console_connect_timeout=10
116+
console_accessible=false
117+
118+
echo "--- Starting CSR Approval Process ---"
119+
for ((i=1; i<=csr_max_retries; i++)); do
120+
echo "CSR Attempt $i of $csr_max_retries: Checking for pending CSRs..."
121+
if ! oc get csr 2>/dev/null | grep -i Pending; then
122+
echo "No pending CSRs found. Continuing"
123+
approved_csrs=true
124+
break
125+
else
126+
echo "There are pending CSRs. That probably means cluster was hibernated for more than 24 hours. Need to approve them (until OCPBUGS-55339 is resolved)"
127+
if oc get csr -oname | xargs oc adm certificate approve; then
128+
echo "Successfully submitted approval for CSRs on attempt $i."
129+
sleep 60 # Small delay for changes to propagate
130+
if ! oc get csr 2>/dev/null | grep -i Pending; then
131+
echo "Confirmed no pending CSRs after approval."
132+
approved_csrs=true
133+
break
134+
else
135+
echo "Pending CSRs still exist after approval attempt $i."
136+
fi
125137
else
126-
echo "Pending CSRs still exist after approval attempt $i."
138+
echo "Failed to run approval command for CSRs on attempt $i."
127139
fi
128-
else
129-
echo "Failed to run approval command for CSRs on attempt $i."
130140
fi
131-
fi
132141
133-
if [[ "$i" -lt "$csr_max_retries" ]]; then
134-
echo "Sleeping for $csr_sleep_duration seconds before next CSR retry..."
135-
sleep "$csr_sleep_duration"
136-
fi
137-
done
142+
if [[ "$i" -lt "$csr_max_retries" ]]; then
143+
echo "Sleeping for $csr_sleep_duration seconds before next CSR retry..."
144+
sleep "$csr_sleep_duration"
145+
fi
146+
done
138147
139-
if [[ "$approved_csrs" == "true" ]]; then
148+
if [[ "$approved_csrs" == "false" ]]; then
149+
echo "Failed to ensure all pending CSRs were approved after $csr_max_retries attempts."
150+
if [[ "$provisioning_attempt" -lt "$provisioning_max_retries" ]]; then
151+
echo "Will retry entire provisioning process..."
152+
continue
153+
else
154+
echo "All provisioning attempts exhausted. Exiting."
155+
exit 1
156+
fi
157+
fi
140158
echo "CSR check and approval process completed successfully."
141-
else
142-
echo "Failed to ensure all pending CSRs were approved after $csr_max_retries attempts."
143-
exit 1
144-
fi
145-
echo "--- CSR Approval Process Finished ---"
146-
147-
# --- Console URL Accessibility Check ---
148-
echo "--- Starting Console Accessibility Check ---"
149-
159+
echo "--- CSR Approval Process Finished ---"
150160
151-
oc whoami
152-
console_url=$(oc whoami --show-console)
153-
echo "Console URL: $console_url"
154-
# # Check if routes are available (OpenShift-specific resource)
155-
# echo "Checking if routes are available..."
156-
# if ! oc api-resources | grep -q "routes"; then
157-
# echo "Warning: Routes are not available. This might not be an OpenShift cluster or it's not fully ready."
158-
# echo "Waiting for OpenShift components to be ready..."
159-
# sleep 30
160-
# if ! oc api-resources | grep -q "routes"; then
161-
# echo "Error: Routes still not available. This doesn't appear to be an OpenShift cluster."
162-
# exit 1
163-
# fi
164-
# fi
165-
166-
# # Check if openshift-console namespace exists
167-
# echo "Checking if openshift-console namespace exists..."
168-
# if ! oc get namespace openshift-console &>/dev/null; then
169-
# echo "Error: openshift-console namespace not found."
170-
# exit 1
171-
# fi
172-
173-
# # Wait for console route to be available
174-
# echo "Waiting for console route to be available..."
175-
# for ((k=1; k<=10; k++)); do
176-
# if oc get route console -n openshift-console &>/dev/null; then
177-
# echo "Console route found."
178-
# break
179-
# fi
180-
# echo "Console route not found, attempt $k/10. Waiting 30 seconds..."
181-
# sleep 30
182-
# done
161+
# --- Console URL Accessibility Check ---
162+
echo "--- Starting Console Accessibility Check ---"
183163
184-
# console_url="https://$(oc get route console -n openshift-console -o jsonpath='{.spec.host}' 2>/dev/null)"
164+
oc whoami
165+
console_url=$(oc whoami --show-console)
166+
echo "Console URL: $console_url"
185167
186-
if [[ -z "$console_url" ]]; then
187-
echo "Error: Could not retrieve OpenShift console URL."
188-
exit 1
189-
else
190-
echo "Console URL found: $console_url"
191-
for ((j=1; j<=console_max_retries; j++)); do
192-
echo "Console Check Attempt $j of $console_max_retries: Checking console URL accessibility..."
193-
if curl -k --silent --output /dev/null --head --fail --connect-timeout "$console_connect_timeout" "$console_url"; then
194-
echo "Console URL $console_url is accessible (HTTP 2xx)."
195-
console_accessible=true
196-
break
168+
if [[ -z "$console_url" ]]; then
169+
echo "Error: Could not retrieve OpenShift console URL."
170+
if [[ "$provisioning_attempt" -lt "$provisioning_max_retries" ]]; then
171+
echo "Will retry entire provisioning process..."
172+
continue
197173
else
198-
curl_exit_code=$?
199-
echo "Console URL $console_url not accessible on attempt $j (curl exit code: $curl_exit_code)."
174+
echo "All provisioning attempts exhausted. Exiting."
175+
exit 1
200176
fi
177+
else
178+
echo "Console URL found: $console_url"
179+
for ((j=1; j<=console_max_retries; j++)); do
180+
echo "Console Check Attempt $j of $console_max_retries: Checking console URL accessibility..."
181+
if curl -k --silent --output /dev/null --head --fail --connect-timeout "$console_connect_timeout" "$console_url"; then
182+
echo "Console URL $console_url is accessible (HTTP 2xx)."
183+
console_accessible=true
184+
break
185+
else
186+
curl_exit_code=$?
187+
echo "Console URL $console_url not accessible on attempt $j (curl exit code: $curl_exit_code)."
188+
fi
201189
202-
if [[ "$j" -lt "$console_max_retries" ]]; then
203-
echo "Sleeping for $console_sleep_duration seconds before next console check retry..."
204-
sleep "$console_sleep_duration"
205-
fi
206-
done
190+
if [[ "$j" -lt "$console_max_retries" ]]; then
191+
echo "Sleeping for $console_sleep_duration seconds before next console check retry..."
192+
sleep "$console_sleep_duration"
193+
fi
194+
done
207195
208-
if [[ "$console_accessible" == "true" ]]; then
209-
echo "Console is ready. Continuing."
210-
else
211-
echo "Failed to access console URL $console_url after $console_max_retries attempts."
212-
exit 1
196+
if [[ "$console_accessible" == "false" ]]; then
197+
echo "Failed to access console URL $console_url after $console_max_retries attempts."
198+
if [[ "$provisioning_attempt" -lt "$provisioning_max_retries" ]]; then
199+
echo "Will retry entire provisioning process..."
200+
continue
201+
else
202+
echo "All provisioning attempts exhausted. Exiting."
203+
exit 1
204+
fi
205+
fi
213206
fi
207+
echo "Console is ready. Continuing."
208+
echo "--- Console Accessibility Check Finished ---"
209+
210+
# If we reach here, both CSR approval and console accessibility succeeded
211+
provisioning_successful=true
212+
echo "=== Cluster Provisioning Completed Successfully on Attempt $provisioning_attempt ==="
213+
break
214+
done
215+
216+
if [[ "$provisioning_successful" == "false" ]]; then
217+
echo "Cluster provisioning failed after $provisioning_max_retries attempts."
218+
exit 1
214219
fi
215-
echo "--- Console Accessibility Check Finished ---"
216220
- name: tssc-install
217221
runAfter:
218222
- provision-cluster

0 commit comments

Comments
 (0)