Skip to content

Commit 5bdd118

Browse files
zhujian7claude
andcommitted
Fix ManagedClusterAddons not removed when ClusterManagementAddon is deleted
The addon template controller was stopping addon managers immediately when ClusterManagementAddon was deleted, without waiting for pre-delete jobs to complete or ManagedClusterAddons to be cleaned up via owner reference cascading deletion. This change implements the TODO at line 105 by checking if all ManagedClusterAddons are deleted before stopping the manager. The controller now uses field selectors to efficiently query for remaining ManagedClusterAddons and requeues after 10 seconds if any still exist, allowing time for proper cleanup. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]> Signed-off-by: zhujian <[email protected]>
1 parent 2657dd6 commit 5bdd118

File tree

2 files changed

+144
-2
lines changed

2 files changed

+144
-2
lines changed

pkg/addon/controllers/addontemplate/controller.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,33 @@ func NewAddonTemplateController(
102102

103103
func (c *addonTemplateController) stopUnusedManagers(
104104
ctx context.Context, syncCtx factory.SyncContext, addOnName string) {
105-
// TODO: check if all managed cluster addon instances are deleted
105+
logger := klog.FromContext(ctx)
106+
107+
// Check if all managed cluster addon instances are deleted before stopping the manager
108+
// This allows pre-delete jobs to complete
109+
listOptions := metav1.ListOptions{
110+
FieldSelector: "metadata.name=" + addOnName,
111+
}
112+
mcaList, err := c.addonClient.AddonV1alpha1().ManagedClusterAddOns("").List(ctx, listOptions)
113+
if err != nil {
114+
logger.Error(err, "Failed to list ManagedClusterAddOns", "addonName", addOnName)
115+
return
116+
}
117+
118+
// Check if there are still ManagedClusterAddOns for this addon
119+
if len(mcaList.Items) > 0 {
120+
logger.Info("ManagedClusterAddOn still exists, waiting for deletion",
121+
"addonName", addOnName, "count", len(mcaList.Items))
122+
// Requeue to check again later
123+
syncCtx.Queue().AddAfter(addOnName, 10*time.Second)
124+
return
125+
}
126+
106127
stopFunc, ok := c.addonManagers[addOnName]
107128
if ok {
108129
stopFunc()
109130
delete(c.addonManagers, addOnName)
110-
klog.FromContext(ctx).Info("Stopping the manager for addon", "addonName", addOnName)
131+
logger.Info("Stopping the manager for addon", "addonName", addOnName)
111132
}
112133
}
113134

pkg/addon/controllers/addontemplate/controller_test.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
workinformers "open-cluster-management.io/api/client/work/informers/externalversions"
2626

2727
testingcommon "open-cluster-management.io/ocm/pkg/common/testing"
28+
testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing"
2829
)
2930

3031
func TestReconcile(t *testing.T) {
@@ -268,3 +269,123 @@ func TestRunController(t *testing.T) {
268269
}
269270
}
270271
}
272+
273+
func TestStopUnusedManagers(t *testing.T) {
274+
cases := []struct {
275+
name string
276+
addonName string
277+
managedClusterAddons []runtime.Object
278+
existingManagers map[string]context.CancelFunc
279+
expectedManagerStopped bool
280+
expectedRequeue bool
281+
}{
282+
{
283+
name: "no managed cluster addons, manager should be stopped",
284+
addonName: "test-addon",
285+
managedClusterAddons: []runtime.Object{},
286+
existingManagers: map[string]context.CancelFunc{
287+
"test-addon": func() {},
288+
},
289+
expectedManagerStopped: true,
290+
expectedRequeue: false,
291+
},
292+
{
293+
name: "managed cluster addons exist, manager should not be stopped",
294+
addonName: "test-addon",
295+
managedClusterAddons: []runtime.Object{
296+
testinghelpers.NewManagedClusterAddons("test-addon", "cluster1", nil, nil),
297+
},
298+
existingManagers: map[string]context.CancelFunc{
299+
"test-addon": func() {},
300+
},
301+
expectedManagerStopped: false,
302+
expectedRequeue: true,
303+
},
304+
{
305+
name: "multiple managed cluster addons exist, manager should not be stopped",
306+
addonName: "test-addon",
307+
managedClusterAddons: []runtime.Object{
308+
testinghelpers.NewManagedClusterAddons("test-addon", "cluster1", nil, nil),
309+
testinghelpers.NewManagedClusterAddons("test-addon", "cluster2", nil, nil),
310+
},
311+
existingManagers: map[string]context.CancelFunc{
312+
"test-addon": func() {},
313+
},
314+
expectedManagerStopped: false,
315+
expectedRequeue: true,
316+
},
317+
{
318+
name: "no manager exists, should not error",
319+
addonName: "test-addon",
320+
managedClusterAddons: []runtime.Object{},
321+
existingManagers: map[string]context.CancelFunc{},
322+
expectedManagerStopped: false,
323+
expectedRequeue: false,
324+
},
325+
}
326+
327+
for _, c := range cases {
328+
t.Run(c.name, func(t *testing.T) {
329+
fakeAddonClient := fakeaddon.NewSimpleClientset(c.managedClusterAddons...)
330+
addonInformers := addoninformers.NewSharedInformerFactory(fakeAddonClient, 10*time.Minute)
331+
332+
fakeDynamicClient := dynamicfake.NewSimpleDynamicClient(runtime.NewScheme())
333+
dynamicInformerFactory := dynamicinformer.NewDynamicSharedInformerFactory(fakeDynamicClient, 0)
334+
fakeClusterClient := fakecluster.NewSimpleClientset()
335+
clusterInformers := clusterv1informers.NewSharedInformerFactory(fakeClusterClient, 10*time.Minute)
336+
fakeWorkClient := fakework.NewSimpleClientset()
337+
workInformers := workinformers.NewSharedInformerFactory(fakeWorkClient, 10*time.Minute)
338+
hubKubeClient := fakekube.NewSimpleClientset()
339+
340+
managerStopped := false
341+
existingManagers := make(map[string]context.CancelFunc)
342+
for name, stopFunc := range c.existingManagers {
343+
existingManagers[name] = func() {
344+
if name == c.addonName {
345+
managerStopped = true
346+
}
347+
stopFunc()
348+
}
349+
}
350+
351+
controller := &addonTemplateController{
352+
kubeConfig: &rest.Config{},
353+
kubeClient: hubKubeClient,
354+
addonClient: fakeAddonClient,
355+
workClient: fakeWorkClient,
356+
cmaLister: addonInformers.Addon().V1alpha1().ClusterManagementAddOns().Lister(),
357+
addonManagers: existingManagers,
358+
addonInformers: addonInformers,
359+
clusterInformers: clusterInformers,
360+
dynamicInformers: dynamicInformerFactory,
361+
workInformers: workInformers,
362+
eventRecorder: eventstesting.NewTestingEventRecorder(t),
363+
}
364+
365+
ctx := context.TODO()
366+
syncContext := testingcommon.NewFakeSyncContext(t, c.addonName)
367+
368+
controller.stopUnusedManagers(ctx, syncContext, c.addonName)
369+
370+
// Check if manager was stopped
371+
if c.expectedManagerStopped {
372+
assert.True(t, managerStopped, "expected manager to be stopped")
373+
_, exists := controller.addonManagers[c.addonName]
374+
assert.False(t, exists, "expected manager to be removed from map")
375+
} else {
376+
assert.False(t, managerStopped, "expected manager not to be stopped")
377+
if len(c.existingManagers) > 0 {
378+
_, exists := controller.addonManagers[c.addonName]
379+
assert.True(t, exists, "expected manager to still exist in map")
380+
}
381+
}
382+
383+
// Check if requeue was called
384+
if c.expectedRequeue {
385+
// We can't easily test the exact requeue behavior with the fake sync context
386+
// but we can verify the manager wasn't stopped when it should be requeued
387+
assert.False(t, managerStopped, "manager should not be stopped when requeue is expected")
388+
}
389+
})
390+
}
391+
}

0 commit comments

Comments
 (0)