Skip to content

Commit a91d3d0

Browse files
Merge pull request #30437 from xueqzhan/revert-30296-1761742915924
TRT-2386: Revert #30296 "OTA-1637: ClusterOperators should not go Progressing only for a node reboot"
2 parents f886e4e + 7657bcc commit a91d3d0

File tree

2 files changed

+10
-201
lines changed

2 files changed

+10
-201
lines changed

pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C
4545
isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{})
4646
if isUpgrade {
4747
junits = append(junits, testUpgradeOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
48-
junits = append(junits, clusterOperatorIsNotProgressingWhenMachineConfigIs(finalIntervals)...)
4948
} else {
5049
junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
5150
}

pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go

Lines changed: 10 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,21 @@ import (
66
"strings"
77
"time"
88

9-
configv1 "github.com/openshift/api/config/v1"
10-
clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
11-
"github.com/sirupsen/logrus"
9+
"github.com/openshift/origin/pkg/monitortestlibrary/utility"
1210
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13-
"k8s.io/apimachinery/pkg/util/sets"
1411
"k8s.io/client-go/kubernetes"
15-
"k8s.io/client-go/rest"
1612

13+
"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer"
14+
"github.com/sirupsen/logrus"
15+
16+
configv1 "github.com/openshift/api/config/v1"
17+
clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
1718
"github.com/openshift/origin/pkg/monitor/monitorapi"
1819
"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
1920
platformidentification2 "github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
20-
"github.com/openshift/origin/pkg/monitortestlibrary/utility"
21-
"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer"
2221
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
2322
exutil "github.com/openshift/origin/test/extended/util"
23+
"k8s.io/client-go/rest"
2424
)
2525

2626
// exceptionCallback consumes a suspicious condition and returns an
@@ -516,6 +516,9 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
516516
for _, conditionType := range conditionTypes {
517517
for _, operatorName := range platformidentification.KnownOperators.List() {
518518
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
519+
if bzComponent == "Unknown" {
520+
bzComponent = operatorName
521+
}
519522
testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should not change condition/%v", bzComponent, operatorName, conditionType)
520523
operatorEvents := eventsByOperator[operatorName]
521524
if len(operatorEvents) == 0 {
@@ -583,9 +586,6 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
583586
}
584587

585588
if len(fatal) > 0 || len(excepted) > 0 {
586-
// add a failure so we
587-
// either flake (or pass) in case len(fatal) == 0 by adding a success to the same test
588-
// or fail in case len(fatal) > 0 by leaving the failure as the only output for the test
589589
ret = append(ret, &junitapi.JUnitTestCase{
590590
Name: testName,
591591
Duration: duration,
@@ -597,197 +597,7 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
597597
}
598598

599599
if len(fatal) == 0 {
600-
if len(excepted) > 0 {
601-
// add a success so we flake (or pass) and don't fail
602-
ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"})
603-
} else {
604-
ret = append(ret, &junitapi.JUnitTestCase{Name: testName})
605-
}
606-
}
607-
}
608-
}
609-
610-
return ret
611-
}
612-
613-
func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Intervals) []*junitapi.JUnitTestCase {
614-
var ret []*junitapi.JUnitTestCase
615-
upgradeWindows := getUpgradeWindows(events)
616-
617-
var machineConfigProgressingStart time.Time
618-
var eventsInUpgradeWindows monitorapi.Intervals
619-
620-
var start, stop time.Time
621-
for _, event := range events {
622-
if !isInUpgradeWindow(upgradeWindows, event) {
623-
continue
624-
}
625-
eventsInUpgradeWindows = append(eventsInUpgradeWindows, event)
626-
if start.IsZero() || event.From.Before(start) {
627-
start = event.From
628-
}
629-
if stop.IsZero() || event.To.After(stop) {
630-
stop = event.To
631-
}
632-
}
633-
duration := stop.Sub(start).Seconds()
634-
635-
eventsByOperator := getEventsByOperator(eventsInUpgradeWindows)
636-
for _, mcEvent := range eventsByOperator["machine-config"] {
637-
condition := monitorapi.GetOperatorConditionStatus(mcEvent)
638-
if condition == nil {
639-
continue // ignore non-condition intervals
640-
}
641-
if condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionTrue {
642-
machineConfigProgressingStart = mcEvent.To
643-
break
644-
}
645-
}
646-
647-
mcTestCase := &junitapi.JUnitTestCase{
648-
Name: fmt.Sprintf("[bz-Machine Config Operator] clusteroperator/machine-config must go Progressing=True during an upgrade test"),
649-
Duration: duration,
650-
}
651-
if machineConfigProgressingStart.IsZero() {
652-
mcTestCase.FailureOutput = &junitapi.FailureOutput{
653-
Output: fmt.Sprintf("machine-config was never Progressing=True during the upgrade window from %s to %s", start.Format(time.RFC3339), stop.Format(time.RFC3339)),
654-
}
655-
return []*junitapi.JUnitTestCase{mcTestCase}
656-
} else {
657-
mcTestCase.SystemOut = fmt.Sprintf("machine-config became Progressing=True at %s during the upgrade window from %s to %s", machineConfigProgressingStart.Format(time.RFC3339), start.Format(time.RFC3339), stop.Format(time.RFC3339))
658-
}
659-
ret = append(ret, mcTestCase)
660-
661-
for _, operatorName := range platformidentification.KnownOperators.Difference(sets.NewString("machine-config")).List() {
662-
bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName)
663-
testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should stay Progressing=False while MCO is Progressing=True", bzComponent, operatorName)
664-
operatorEvents := eventsByOperator[operatorName]
665-
if len(operatorEvents) == 0 {
666-
ret = append(ret, &junitapi.JUnitTestCase{
667-
Name: testName,
668-
Duration: duration,
669-
})
670-
continue
671-
}
672-
673-
except := func(co string, reason string) string {
674-
switch co {
675-
case "csi-snapshot-controller":
676-
if reason == "CSISnapshotController_Deploying" {
677-
return "https://issues.redhat.com/browse/OCPBUGS-62624"
678-
}
679-
case "dns":
680-
if reason == "DNSReportsProgressingIsTrue" {
681-
return "https://issues.redhat.com/browse/OCPBUGS-62623"
682-
}
683-
case "image-registry":
684-
if reason == "NodeCADaemonUnavailable::Ready" || reason == "DeploymentNotCompleted" {
685-
return "https://issues.redhat.com/browse/OCPBUGS-62626"
686-
}
687-
case "ingress":
688-
if reason == "Reconciling" {
689-
return "https://issues.redhat.com/browse/OCPBUGS-62627"
690-
}
691-
case "kube-storage-version-migrator":
692-
if reason == "KubeStorageVersionMigrator_Deploying" {
693-
return "https://issues.redhat.com/browse/OCPBUGS-62629"
694-
}
695-
case "network":
696-
if reason == "Deploying" {
697-
return "https://issues.redhat.com/browse/OCPBUGS-62630"
698-
}
699-
case "node-tuning":
700-
if reason == "Reconciling" {
701-
return "https://issues.redhat.com/browse/OCPBUGS-62632"
702-
}
703-
case "openshift-controller-manager":
704-
if reason == "_DesiredStateNotYetAchieved" {
705-
return "https://issues.redhat.com/browse/OCPBUGS-63116"
706-
}
707-
case "service-ca":
708-
if reason == "_ManagedDeploymentsAvailable" {
709-
return "https://issues.redhat.com/browse/OCPBUGS-62633"
710-
}
711-
case "storage":
712-
// GCPPDCSIDriverOperatorCR_GCPPDDriverControllerServiceController_Deploying
713-
// GCPPDCSIDriverOperatorCR_GCPPDDriverNodeServiceController_Deploying
714-
// AWSEBSCSIDriverOperatorCR_AWSEBSDriverNodeServiceController_Deploying
715-
// VolumeDataSourceValidatorDeploymentController_Deploying
716-
if strings.HasSuffix(reason, "Controller_Deploying") ||
717-
reason == "GCPPD_Deploying" {
718-
return "https://issues.redhat.com/browse/OCPBUGS-62634"
719-
}
720-
case "olm":
721-
// CatalogdDeploymentCatalogdControllerManager_Deploying
722-
// OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying
723-
if strings.HasSuffix(reason, "ControllerManager_Deploying") {
724-
return "https://issues.redhat.com/browse/OCPBUGS-62635"
725-
}
726-
}
727-
return ""
728-
}
729-
730-
var excepted, fatal []string
731-
for _, operatorEvent := range operatorEvents {
732-
if operatorEvent.From.Before(machineConfigProgressingStart) {
733-
continue
734-
}
735-
condition := monitorapi.GetOperatorConditionStatus(operatorEvent)
736-
if condition == nil {
737-
continue // ignore non-condition intervals
738-
}
739-
if condition.Type == "" {
740-
fatal = append(fatal, fmt.Sprintf("failed to convert %v into a condition with a type", operatorEvent))
741-
continue
742-
}
743-
744-
if condition.Type != configv1.OperatorProgressing || condition.Status == configv1.ConditionFalse {
745-
continue
746-
}
747-
748-
// if there was any switch, it was wrong/unexpected at some point
749-
failure := fmt.Sprintf("%v", operatorEvent)
750-
751-
exception := except(operatorName, condition.Reason)
752-
if exception == "" {
753-
fatal = append(fatal, failure)
754-
} else {
755-
excepted = append(excepted, fmt.Sprintf("%s (exception: %s)", failure, exception))
756-
}
757-
}
758-
759-
output := fmt.Sprintf("%d (out of %d) unexpected clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", len(fatal), len(operatorEvents), start.Format(time.RFC3339), stop.Format(time.RFC3339))
760-
if len(fatal) > 0 {
761-
output = fmt.Sprintf("%s. These did not match any known exceptions, so they cause this test-case to fail:\n\n%v\n", output, strings.Join(fatal, "\n"))
762-
} else {
763-
output = fmt.Sprintf("%s, as desired.", output)
764-
}
765-
output = fmt.Sprintf("%s\n%d unwelcome but acceptable clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", output, len(excepted), start.Format(time.RFC3339), stop.Format(time.RFC3339))
766-
if len(excepted) > 0 {
767-
output = fmt.Sprintf("%s. These should not happen, but because they are tied to exceptions, the fact that they did happen is not sufficient to cause this test-case to fail:\n\n%v\n", output, strings.Join(excepted, "\n"))
768-
} else {
769-
output = fmt.Sprintf("%s, as desired.", output)
770-
}
771-
772-
if len(fatal) > 0 || len(excepted) > 0 {
773-
// add a failure so we
774-
// either flake (or pass) in case len(fatal) == 0 by adding a success to the same test
775-
// or fail in case len(fatal) > 0 by leaving the failure as the only output for the test
776-
ret = append(ret, &junitapi.JUnitTestCase{
777-
Name: testName,
778-
Duration: duration,
779-
SystemOut: output,
780-
FailureOutput: &junitapi.FailureOutput{
781-
Output: output,
782-
},
783-
})
784-
}
785-
786-
if len(fatal) == 0 {
787-
if len(excepted) > 0 {
788600
// add a success so we flake (or pass) and don't fail
789-
ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"})
790-
} else {
791601
ret = append(ret, &junitapi.JUnitTestCase{Name: testName})
792602
}
793603
}

0 commit comments

Comments
 (0)