diff --git a/.ko.yaml b/.ko.yaml new file mode 100644 index 0000000000000..552c4733203d8 --- /dev/null +++ b/.ko.yaml @@ -0,0 +1,4 @@ +defaultLdflags: +- -s -w +- -X k8s.io/kops.Version={{.Env.VERSION}} +- -X k8s.io/kops.GitVersion={{.Env.GITSHA}} diff --git a/Makefile b/Makefile index 03506d9a74bcf..c2748421cb0b6 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,8 @@ unexport AZURE_CLIENT_ID AZURE_CLIENT_SECRET AZURE_STORAGE_ACCOUNT AZURE_SUBSCRI VERSION=$(shell tools/get_version.sh | grep VERSION | awk '{print $$2}') +export VERSION + IMAGE_TAG=$(shell tools/get_version.sh | grep IMAGE_TAG | awk '{print $$2}') KOPS_CI_VERSION:=$(shell grep 'KOPS_CI_VERSION\s*=' kops-version.go | awk '{print $$3}' | sed -e 's_"__g') @@ -62,6 +64,7 @@ KOPS_CI_VERSION:=$(shell grep 'KOPS_CI_VERSION\s*=' kops-version.go | awk '{prin KOPS=${DIST}/$(shell go env GOOS)/$(shell go env GOARCH)/kops GITSHA := $(shell cd ${KOPS_ROOT}; git describe --always) +export GITSHA # We lock the versions of our controllers also # We need to keep in sync with: diff --git a/clusterapi/README.md b/clusterapi/README.md index 5d5ad50ac0d26..bac5ddf99e50a 100644 --- a/clusterapi/README.md +++ b/clusterapi/README.md @@ -7,18 +7,20 @@ We plug in our own bootstrap provider with the goal of enabling cluster-api node *Note*: the name & zone matter, we need to match the values we'll create later in the CAPI resources. ``` -kops create cluster clusterapi.k8s.local --zones us-east4-a -kops update cluster clusterapi.k8s.local --yes --admin -kops validate cluster --wait=10m +go run ./cmd/kops create cluster clusterapi.k8s.local --zones us-east4-a +go run ./cmd/kops update cluster clusterapi.k8s.local --yes --admin +go run ./cmd/kops validate cluster --wait=10m ``` -#cd cluster-api-provider-gcp -#REGISTRY=${USER} make docker-build docker-push -#REGISTRY=${USER} make install-management-cluster # Doesn't yet exist in capg - +# Install cert-manager +``` +kubectl apply --server-side -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml -# TODO: Install cert-manager +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager-cainjector +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager-webhook +``` # Install CAPI and CAPG ``` @@ -29,7 +31,19 @@ kustomize build ${REPO_ROOT}/clusterapi/manifests/cluster-api-provider-gcp | kub # Install our CRDs ``` -kustomize build config | kubectl apply --server-side -f - +kustomize build ${REPO_ROOT}/k8s | kubectl apply --server-side -f - +kustomize build ${REPO_ROOT}/clusterapi/config | kubectl apply --server-side -f - +``` + +## Create our Cluster object +``` +go run ./cmd/kops get cluster clusterapi.k8s.local -oyaml | kubectl apply --server-side -n kube-system -f - +``` + +## Create our instancegroup object + +``` +go run ./cmd/kops get ig nodes-us-east4-a --name clusterapi.k8s.local -oyaml | kubectl apply --server-side -n kube-system -f - ``` # Remove any stuff left over from previous runs @@ -39,8 +53,11 @@ kubectl delete gcpmachinetemplate --all ``` ``` -# Very carefully create a MachineDeployment matching our configuration -cat examples/manifest.yaml | IMAGE_ID=projects/ubuntu-os-cloud/global/images/family/ubuntu-2204-lts GCP_NODE_MACHINE_TYPE=e2-medium KUBERNETES_VERSION=v1.28.6 WORKER_MACHINE_COUNT=1 GCP_ZONE=us-east4-a GCP_REGION=us-east4 GCP_NETWORK_NAME=clusterapi-k8s-local GCP_SUBNET=us-east4-clusterapi-k8s-local GCP_PROJECT=$(gcloud config get project) CLUSTER_NAME=clusterapi-k8s-local envsubst | kubectl apply --server-side -n kube-system -f - +# Create a MachineDeployment matching our configuration +go run ./cmd/kops toolbox clusterapi generate machinedeployment \ + --cluster clusterapi.k8s.local \ + --name clusterapi-k8s-local-md-0 \ + --namespace kube-system | kubectl apply --server-side -n kube-system -f - ``` # IMAGE_ID=projects/debian-cloud/global/images/family/debian-12 doesn't work with user-data (????) diff --git a/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigs.yaml b/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigs.yaml index 0474dd3293633..aab32b859fe86 100644 --- a/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigs.yaml +++ b/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigs.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.19.0 name: kopsconfigs.bootstrap.cluster.x-k8s.io spec: group: bootstrap.cluster.x-k8s.io diff --git a/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigtemplates.yaml b/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigtemplates.yaml index ac20a8d51f9c9..e898df58fd8b0 100644 --- a/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigtemplates.yaml +++ b/clusterapi/config/crds/bootstrap.cluster.x-k8s.io_kopsconfigtemplates.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.19.0 name: kopsconfigtemplates.bootstrap.cluster.x-k8s.io spec: group: bootstrap.cluster.x-k8s.io diff --git a/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanes.yaml b/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanes.yaml index d433a632f53a4..fc2de7218f257 100644 --- a/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanes.yaml +++ b/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanes.yaml @@ -3,7 +3,9 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.19.0 + labels: + cluster.x-k8s.io/v1beta2: v1beta1 name: kopscontrolplanes.controlplane.cluster.x-k8s.io spec: group: controlplane.cluster.x-k8s.io @@ -90,6 +92,42 @@ spec: type: object status: description: KopsControlPlaneStatus defines the observed state of KopsControlPlane. + properties: + initialization: + description: |- + initialization provides observations of the KopsControlPlane initialization process. + NOTE: Fields in this struct are part of the Cluster API contract and are used to orchestrate initial Machine provisioning. + minProperties: 1 + properties: + controlPlaneInitialized: + description: |- + controlPlaneInitialized is true when the KopsControlPlane provider reports that the Kubernetes control plane is initialized; + A control plane is considered initialized when it can accept requests, no matter if this happens before + the control plane is fully provisioned or not. + NOTE: this field is part of the Cluster API contract, and it is used to orchestrate initial Machine provisioning. + type: boolean + type: object + systemEndpoints: + description: KopsControllerEndpoint represents the endpoints used + to communicate with the control plane. + items: + description: SystemEndpoint represents a reachable Kubernetes API + endpoint. + properties: + host: + description: The hostname on which the API server is serving. + type: string + scope: + type: string + type: + description: The hostname on which the API server is serving. + type: string + required: + - host + - scope + - type + type: object + type: array type: object type: object served: true diff --git a/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanetemplates.yaml b/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanetemplates.yaml index 668fb171a34b4..786425afff1ec 100644 --- a/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanetemplates.yaml +++ b/clusterapi/config/crds/controlplane.cluster.x-k8s.io_kopscontrolplanetemplates.yaml @@ -3,7 +3,9 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.19.0 + labels: + cluster.x-k8s.io/v1beta2: v1beta1 name: kopscontrolplanetemplates.controlplane.cluster.x-k8s.io spec: group: controlplane.cluster.x-k8s.io diff --git a/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplane_types.go b/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplane_types.go index 5799346cac114..82484b87a43b5 100644 --- a/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplane_types.go +++ b/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplane_types.go @@ -40,6 +40,56 @@ type KopsControlPlaneMachineTemplate struct { // KopsControlPlaneStatus defines the observed state of KopsControlPlane. type KopsControlPlaneStatus struct { + // initialization provides observations of the KopsControlPlane initialization process. + // NOTE: Fields in this struct are part of the Cluster API contract and are used to orchestrate initial Machine provisioning. + // +optional + Initialization KopsControlPlaneInitializationStatus `json:"initialization,omitempty,omitzero"` + + // KopsControllerEndpoint represents the endpoints used to communicate with the control plane. + SystemEndpoints []SystemEndpoint `json:"systemEndpoints,omitempty"` +} + +// KopsControlPlaneInitializationStatus provides observations of the KopsControlPlane initialization process. +// +kubebuilder:validation:MinProperties=1 +type KopsControlPlaneInitializationStatus struct { + // controlPlaneInitialized is true when the KopsControlPlane provider reports that the Kubernetes control plane is initialized; + // A control plane is considered initialized when it can accept requests, no matter if this happens before + // the control plane is fully provisioned or not. + // NOTE: this field is part of the Cluster API contract, and it is used to orchestrate initial Machine provisioning. + // +optional + ControlPlaneInitialized *bool `json:"controlPlaneInitialized,omitempty"` +} + +// SystemEndpointType identifies the service that the SystemEndpoint is describing. +type SystemEndpointType string + +const ( + // SystemEndpointTypeKubeAPIServer indicates that the endpoint is for the Kubernetes API server. + SystemEndpointTypeKubeAPIServer SystemEndpointType = "kube-apiserver" + // SystemEndpointTypeKopsController indicates that the endpoint is for the kops-controller. + SystemEndpointTypeKopsController SystemEndpointType = "kops-controller" +) + +// SystemEndpointScope describes whether an endpoint is intended for internal or external use. +type SystemEndpointScope string + +const ( + // SystemEndpointScopeInternal indicates that the endpoint is intended for internal use. + SystemEndpointScopeInternal SystemEndpointScope = "internal" + // SystemEndpointScopeExternal indicates that the endpoint is intended for external use. + SystemEndpointScopeExternal SystemEndpointScope = "external" +) + +// SystemEndpoint represents a reachable Kubernetes API endpoint. +type SystemEndpoint struct { + // The type of the endpoint + Type SystemEndpointType `json:"type"` + + // The hostname or IP on which the API server is serving. + Endpoint string `json:"endpoint"` + + // Whether the endpoint is intended for internal or external use. + Scope SystemEndpointScope `json:"scope"` } // +kubebuilder:object:root=true @@ -47,6 +97,7 @@ type KopsControlPlaneStatus struct { // +kubebuilder:storageversion // +kubebuilder:subresource:status // +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector +// +kubebuilder:metadata:labels=cluster.x-k8s.io/v1beta2=v1beta1 // +kubebuilder:printcolumn:name="Cluster",type="string",JSONPath=".metadata.labels['cluster\\.x-k8s\\.io/cluster-name']",description="Cluster" // +kubebuilder:printcolumn:name="Initialized",type=boolean,JSONPath=".status.initialized",description="This denotes whether or not the control plane has the uploaded kops-config configmap" // +kubebuilder:printcolumn:name="API Server Available",type=boolean,JSONPath=".status.ready",description="KopsControlPlane API Server is ready to receive requests" diff --git a/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplanetemplate_types.go b/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplanetemplate_types.go index ac8a4c45751b1..b1727f46265a8 100644 --- a/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplanetemplate_types.go +++ b/clusterapi/controlplane/kops/api/v1beta1/kopscontrolplanetemplate_types.go @@ -31,6 +31,7 @@ type KopsControlPlaneTemplateSpec struct { // +kubebuilder:object:root=true // +kubebuilder:resource:path=kopscontrolplanetemplates,scope=Namespaced,categories=cluster-api // +kubebuilder:storageversion +// +kubebuilder:metadata:labels=cluster.x-k8s.io/v1beta2=v1beta1 // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of KopsControlPlaneTemplate" // KopsControlPlaneTemplate is the Schema for the kopscontrolplanetemplates API. diff --git a/clusterapi/controlplane/kops/api/v1beta1/zz_generated.deepcopy.go b/clusterapi/controlplane/kops/api/v1beta1/zz_generated.deepcopy.go index d7ed0f8033ead..d0f8af1e6c9ee 100644 --- a/clusterapi/controlplane/kops/api/v1beta1/zz_generated.deepcopy.go +++ b/clusterapi/controlplane/kops/api/v1beta1/zz_generated.deepcopy.go @@ -30,7 +30,7 @@ func (in *KopsControlPlane) DeepCopyInto(out *KopsControlPlane) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) out.Spec = in.Spec - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KopsControlPlane. @@ -51,6 +51,26 @@ func (in *KopsControlPlane) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KopsControlPlaneInitializationStatus) DeepCopyInto(out *KopsControlPlaneInitializationStatus) { + *out = *in + if in.ControlPlaneInitialized != nil { + in, out := &in.ControlPlaneInitialized, &out.ControlPlaneInitialized + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KopsControlPlaneInitializationStatus. +func (in *KopsControlPlaneInitializationStatus) DeepCopy() *KopsControlPlaneInitializationStatus { + if in == nil { + return nil + } + out := new(KopsControlPlaneInitializationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KopsControlPlaneList) DeepCopyInto(out *KopsControlPlaneList) { *out = *in @@ -117,6 +137,12 @@ func (in *KopsControlPlaneSpec) DeepCopy() *KopsControlPlaneSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *KopsControlPlaneStatus) DeepCopyInto(out *KopsControlPlaneStatus) { *out = *in + in.Initialization.DeepCopyInto(&out.Initialization) + if in.SystemEndpoints != nil { + in, out := &in.SystemEndpoints, &out.SystemEndpoints + *out = make([]SystemEndpoint, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KopsControlPlaneStatus. @@ -250,3 +276,18 @@ func (in *KopsControlPlaneTemplateSpec) DeepCopy() *KopsControlPlaneTemplateSpec in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SystemEndpoint) DeepCopyInto(out *SystemEndpoint) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SystemEndpoint. +func (in *SystemEndpoint) DeepCopy() *SystemEndpoint { + if in == nil { + return nil + } + out := new(SystemEndpoint) + in.DeepCopyInto(out) + return out +} diff --git a/clusterapi/examples/capi-loopback.yaml b/clusterapi/examples/capi-loopback.yaml new file mode 100644 index 0000000000000..6826524bc59d8 --- /dev/null +++ b/clusterapi/examples/capi-loopback.yaml @@ -0,0 +1,42 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: root-reader +rules: +# Needed for a probe; but this feels like a poor choice - /api would more open by default +# TODO: File bug, also error handling is poor and usually prints "Unknown" +- nonResourceURLs: ["/"] + verbs: ["get"] + +# Pods +- resources: ["pods"] + apiGroups: [""] + verbs: + # Needed for "checking if we're running on workload cluster" + - "get" + +# Nodes +- resources: ["nodes"] + apiGroups: [""] + verbs: + # We monitor in-cluster nodes + - get + - list + - watch + # We patch nodes with our labels + - patch + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: root-reader-capi-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: root-reader +subjects: +- kind: ServiceAccount + name: capi-manager + namespace: capi-system diff --git a/clusterapi/examples/kopscontroller.yaml b/clusterapi/examples/kopscontroller.yaml new file mode 100644 index 0000000000000..428fca50a1b13 --- /dev/null +++ b/clusterapi/examples/kopscontroller.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kops-controller-capi +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin # TODO: We only need to read pods +subjects: +- kind: ServiceAccount + name: kops-controller + namespace: kube-system diff --git a/clusterapi/examples/manifest.yaml b/clusterapi/examples/manifest.yaml deleted file mode 100644 index c2e5a76ab3725..0000000000000 --- a/clusterapi/examples/manifest.yaml +++ /dev/null @@ -1,128 +0,0 @@ ---- -apiVersion: cluster.x-k8s.io/v1beta1 -kind: Cluster -metadata: - name: "${CLUSTER_NAME}" -spec: - #clusterNetwork: - # pods: - # cidrBlocks: ["192.168.0.0/16"] - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: GCPCluster - name: "${CLUSTER_NAME}" - controlPlaneRef: - kind: KopsControlPlane - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 - name: "${CLUSTER_NAME}-control-plane" ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: GCPCluster -metadata: - name: "${CLUSTER_NAME}" -spec: - project: "${GCP_PROJECT}" - region: "${GCP_REGION}" - network: - name: "${GCP_NETWORK_NAME}" -# --- -# kind: KubeadmControlPlane -# apiVersion: controlplane.cluster.x-k8s.io/v1beta1 -# metadata: -# name: "${CLUSTER_NAME}-control-plane" -# spec: -# replicas: ${CONTROL_PLANE_MACHINE_COUNT} -# machineTemplate: -# infrastructureRef: -# kind: GCPMachineTemplate -# apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -# name: "${CLUSTER_NAME}-control-plane" -# kubeadmConfigSpec: -# initConfiguration: -# nodeRegistration: -# name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' -# kubeletExtraArgs: -# cloud-provider: gce -# clusterConfiguration: -# apiServer: -# timeoutForControlPlane: 20m -# extraArgs: -# cloud-provider: gce -# controllerManager: -# extraArgs: -# cloud-provider: gce -# allocate-node-cidrs: "false" -# joinConfiguration: -# nodeRegistration: -# name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' -# kubeletExtraArgs: -# cloud-provider: gce -# version: "${KUBERNETES_VERSION}" -# --- -# kind: GCPMachineTemplate -# apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -# metadata: -# name: "${CLUSTER_NAME}-control-plane" -# spec: -# template: -# spec: -# instanceType: "${GCP_CONTROL_PLANE_MACHINE_TYPE}" -# image: "${IMAGE_ID}" ---- -apiVersion: cluster.x-k8s.io/v1beta1 -kind: MachineDeployment -metadata: - name: "${CLUSTER_NAME}-md-0" -spec: - clusterName: "${CLUSTER_NAME}" - replicas: ${WORKER_MACHINE_COUNT} - selector: - matchLabels: - template: - spec: - clusterName: "${CLUSTER_NAME}" - version: "${KUBERNETES_VERSION}" - failureDomain: "${GCP_ZONE}" - bootstrap: - configRef: - name: "${CLUSTER_NAME}-md-0" - apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 - kind: KopsConfigTemplate - infrastructureRef: - name: "${CLUSTER_NAME}-md-0" - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: GCPMachineTemplate ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: GCPMachineTemplate -metadata: - name: "${CLUSTER_NAME}-md-0" -spec: - template: - spec: - instanceType: "${GCP_NODE_MACHINE_TYPE}" - image: "${IMAGE_ID}" - subnet: "${GCP_SUBNET}" - additionalNetworkTags: - - clusterapi-k8s-local-k8s-io-role-node - publicIP: true - additionalMetadata: - - key: kops-k8s-io-instance-group-name - value: nodes-us-east4-a - - key: cluster-name - value: clusterapi.k8s.local - - ---- -apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 -kind: KopsConfigTemplate -metadata: - name: "${CLUSTER_NAME}-md-0" -spec: - template: - spec: {} - #joinConfiguration: - # nodeRegistration: - # name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' - # kubeletExtraArgs: - # cloud-provider: gce diff --git a/clusterapi/gen.go b/clusterapi/gen.go index e69ee0cfc6f18..bd7e1ca128093 100644 --- a/clusterapi/gen.go +++ b/clusterapi/gen.go @@ -16,6 +16,6 @@ limitations under the License. package main -//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen@v0.14.0 output:dir=config/crds crd:crdVersions=v1 paths=./bootstrap/kops/api/...;./controlplane/kops/api/... +//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen@v0.19.0 output:dir=config/crds crd:crdVersions=v1 paths=./bootstrap/kops/api/...;./controlplane/kops/api/... -//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen@v0.14.0 object paths=./snapshot/cluster-api/...;./bootstrap/kops/api/...;./controlplane/kops/api/... +//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen@v0.19.0 object paths=./snapshot/cluster-api/...;./bootstrap/kops/api/...;./controlplane/kops/api/... diff --git a/clusterapi/main.go b/clusterapi/main.go deleted file mode 100644 index 80b2d88d42157..0000000000000 --- a/clusterapi/main.go +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "context" - "flag" - "fmt" - "os" - - coordinationv1 "k8s.io/api/coordination/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" - "k8s.io/klog/v2" - "k8s.io/klog/v2/klogr" - "k8s.io/kops/clusterapi/bootstrap/controllers" - bootstrapapi "k8s.io/kops/clusterapi/bootstrap/kops/api/v1beta1" - controlplaneapi "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/metrics/server" - // +kubebuilder:scaffold:imports -) - -var ( - scheme = runtime.NewScheme() -) - -func init() { - // +kubebuilder:scaffold:scheme -} - -func main() { - ctx := context.Background() - if err := run(ctx); err != nil { - fmt.Fprintf(os.Stderr, "%v\n", err) - os.Exit(1) - } -} - -func run(ctx context.Context) error { - klog.InitFlags(nil) - - // Disable metrics by default (avoid port conflicts, also risky because we are host network) - metricsAddress := ":0" - - flag.Parse() - - ctrl.SetLogger(klogr.New()) - - if err := buildScheme(); err != nil { - return fmt.Errorf("error building scheme: %w", err) - } - - kubeConfig := ctrl.GetConfigOrDie() - options := ctrl.Options{ - Scheme: scheme, - // MetricsBindAddress: metricsAddress, - // LeaderElection: true, - // LeaderElectionID: "kops-clusterapi-leader", - } - options.Metrics = server.Options{ - BindAddress: metricsAddress, - } - mgr, err := ctrl.NewManager(kubeConfig, options) - - if err != nil { - return fmt.Errorf("error starting manager: %w", err) - } - - if err := controllers.NewKopsConfigReconciler(mgr); err != nil { - return fmt.Errorf("error creating controller: %w", err) - } - - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { - return fmt.Errorf("error running manager: %w", err) - } - return nil -} - -func buildScheme() error { - if err := corev1.AddToScheme(scheme); err != nil { - return fmt.Errorf("error registering corev1: %v", err) - } - - if err := bootstrapapi.AddToScheme(scheme); err != nil { - return fmt.Errorf("error registering api: %w", err) - } - - if err := controlplaneapi.AddToScheme(scheme); err != nil { - return fmt.Errorf("error registering api: %w", err) - } - - // Needed so that the leader-election system can post events - if err := coordinationv1.AddToScheme(scheme); err != nil { - return fmt.Errorf("error registering coordinationv1: %v", err) - } - return nil -} diff --git a/clusterapi/manifests/cluster-api-provider-gcp/kustomization.yaml b/clusterapi/manifests/cluster-api-provider-gcp/kustomization.yaml index 58acf03ad8f3a..12494bc30eefb 100644 --- a/clusterapi/manifests/cluster-api-provider-gcp/kustomization.yaml +++ b/clusterapi/manifests/cluster-api-provider-gcp/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - https://github.com/kubernetes-sigs/cluster-api-provider-gcp//config/default?ref=v1.5.0 + - https://github.com/kubernetes-sigs/cluster-api-provider-gcp//config/default?ref=v1.10.0 patches: - path: patches/set_feature_gates.yaml diff --git a/clusterapi/manifests/cluster-api/kustomization.yaml b/clusterapi/manifests/cluster-api/kustomization.yaml index 422dffbef25c9..09268e95c3a0c 100644 --- a/clusterapi/manifests/cluster-api/kustomization.yaml +++ b/clusterapi/manifests/cluster-api/kustomization.yaml @@ -2,7 +2,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - https://github.com/kubernetes-sigs/cluster-api//config/default?ref=v1.5.3 + - https://github.com/kubernetes-sigs/cluster-api//config/default?ref=v1.11.1 patches: - path: patches/set_feature_gates.yaml + - path: patches/set_manager_image.yaml diff --git a/clusterapi/manifests/cluster-api/patches/set_feature_gates.yaml b/clusterapi/manifests/cluster-api/patches/set_feature_gates.yaml index c8a78de787a15..8ee0483b5253f 100644 --- a/clusterapi/manifests/cluster-api/patches/set_feature_gates.yaml +++ b/clusterapi/manifests/cluster-api/patches/set_feature_gates.yaml @@ -10,6 +10,6 @@ spec: - name: manager args: - --leader-elect - - --metrics-bind-addr=localhost:8080 + #- --metrics-bind-addr=localhost:8080 #- --feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},ClusterResourceSet=${EXP_CLUSTER_RESOURCE_SET:=false},ClusterTopology=${CLUSTER_TOPOLOGY:=false},RuntimeSDK=${EXP_RUNTIME_SDK:=false},MachineSetPreflightChecks=${EXP_MACHINE_SET_PREFLIGHT_CHECKS:=false} - --feature-gates=MachinePool=false,ClusterResourceSet=false,ClusterTopology=false,RuntimeSDK=false,MachineSetPreflightChecks=false diff --git a/clusterapi/manifests/cluster-api/patches/set_manager_image.yaml b/clusterapi/manifests/cluster-api/patches/set_manager_image.yaml new file mode 100644 index 0000000000000..5d20f7eb9870d --- /dev/null +++ b/clusterapi/manifests/cluster-api/patches/set_manager_image.yaml @@ -0,0 +1,12 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: capi-controller-manager + namespace: capi-system +spec: + template: + spec: + containers: + - image: registry.k8s.io/cluster-api/cluster-api-controller:v1.11.1 + imagePullPolicy: IfNotPresent + name: manager diff --git a/cmd/kops-controller/controllers/gceipam.go b/cmd/kops-controller/controllers/gceipam.go index 37a8e8c9ad485..80f97a21cd3ae 100644 --- a/cmd/kops-controller/controllers/gceipam.go +++ b/cmd/kops-controller/controllers/gceipam.go @@ -72,6 +72,8 @@ type GCEIPAMReconciler struct { } // +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch + // Reconcile is the main reconciler function that observes node changes. func (r *GCEIPAMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = r.log.WithValues("node", req.NamespacedName) diff --git a/cmd/kops-controller/main.go b/cmd/kops-controller/main.go index d108fb7993984..88e636a53cffc 100644 --- a/cmd/kops-controller/main.go +++ b/cmd/kops-controller/main.go @@ -29,15 +29,19 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/klog/v2" "k8s.io/klog/v2/klogr" + bootstrapapi "k8s.io/kops/clusterapi/bootstrap/kops/api/v1beta1" + controlplaneapi "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" "k8s.io/kops/cmd/kops-controller/controllers" "k8s.io/kops/cmd/kops-controller/pkg/config" "k8s.io/kops/cmd/kops-controller/pkg/server" "k8s.io/kops/pkg/apis/kops/v1alpha2" "k8s.io/kops/pkg/bootstrap" "k8s.io/kops/pkg/bootstrap/pkibootstrap" + "k8s.io/kops/pkg/controllers/clusterapi" "k8s.io/kops/pkg/nodeidentity" nodeidentityaws "k8s.io/kops/pkg/nodeidentity/aws" nodeidentityazure "k8s.io/kops/pkg/nodeidentity/azure" + nodeidentityclusterapi "k8s.io/kops/pkg/nodeidentity/clusterapi" nodeidentitydo "k8s.io/kops/pkg/nodeidentity/do" nodeidentitygce "k8s.io/kops/pkg/nodeidentity/gce" nodeidentityhetzner "k8s.io/kops/pkg/nodeidentity/hetzner" @@ -102,7 +106,7 @@ func main() { ctrl.SetLogger(klogr.New()) - scheme, err := buildScheme() + scheme, err := buildScheme(&opt) if err != nil { setupLog.Error(err, "error building scheme") os.Exit(1) @@ -127,6 +131,11 @@ func main() { vfsContext := vfs.NewVFSContext() + var capiManager *nodeidentityclusterapi.Manager + if opt.CAPI.IsEnabled() { + capiManager = nodeidentityclusterapi.NewManager(mgr.GetClient()) + } + if opt.Server != nil { var verifiers []bootstrap.Verifier var err error @@ -139,7 +148,7 @@ func main() { verifiers = append(verifiers, verifier) } if opt.Server.Provider.GCE != nil { - verifier, err := gcetpmverifier.NewTPMVerifier(opt.Server.Provider.GCE) + verifier, err := gcetpmverifier.NewTPMVerifier(opt.Server.Provider.GCE, capiManager) if err != nil { setupLog.Error(err, "unable to create verifier") os.Exit(1) @@ -239,6 +248,13 @@ func main() { // +kubebuilder:scaffold:builder + if opt.CAPI.IsEnabled() { + if err := clusterapi.RegisterControllers(mgr); err != nil { + setupLog.Error(err, "registering Cluster API controllers") + os.Exit(1) + } + } + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") @@ -246,7 +262,7 @@ func main() { } } -func buildScheme() (*runtime.Scheme, error) { +func buildScheme(opt *config.Options) (*runtime.Scheme, error) { scheme := runtime.NewScheme() if err := corev1.AddToScheme(scheme); err != nil { return nil, fmt.Errorf("error registering corev1: %v", err) @@ -258,10 +274,24 @@ func buildScheme() (*runtime.Scheme, error) { if err := coordinationv1.AddToScheme(scheme); err != nil { return nil, fmt.Errorf("error registering coordinationv1: %v", err) } + if opt.CAPI.IsEnabled() { + if err := bootstrapapi.AddToScheme(scheme); err != nil { + return nil, fmt.Errorf("error registering kops bootstrap cluster API: %v", err) + } + if err := controlplaneapi.AddToScheme(scheme); err != nil { + return nil, fmt.Errorf("error registering kops control-plane cluster API: %v", err) + } + } + return scheme, nil } func addNodeController(ctx context.Context, mgr manager.Manager, vfsContext *vfs.VFSContext, opt *config.Options) error { + var capiManager *nodeidentityclusterapi.Manager + if opt.CAPI.IsEnabled() { + capiManager = nodeidentityclusterapi.NewManager(mgr.GetClient()) + } + var legacyIdentifier nodeidentity.LegacyIdentifier var identifier nodeidentity.Identifier var err error @@ -269,43 +299,43 @@ func addNodeController(ctx context.Context, mgr manager.Manager, vfsContext *vfs case "aws": identifier, err = nodeidentityaws.New(ctx, opt.CacheNodeidentityInfo) if err != nil { - return fmt.Errorf("error building identifier: %v", err) + return fmt.Errorf("error building node identifier: %w", err) } case "gce": - legacyIdentifier, err = nodeidentitygce.New() + identifier, err = nodeidentitygce.New(opt.ClusterName, capiManager) if err != nil { - return fmt.Errorf("error building identifier: %v", err) + return fmt.Errorf("error building node identifier: %w", err) } case "openstack": identifier, err = nodeidentityos.New(opt.CacheNodeidentityInfo) if err != nil { - return fmt.Errorf("error building identifier: %v", err) + return fmt.Errorf("error building node identifier: %w", err) } case "digitalocean": legacyIdentifier, err = nodeidentitydo.New() if err != nil { - return fmt.Errorf("error building identifier: %v", err) + return fmt.Errorf("error building node identifier: %w", err) } case "hetzner": identifier, err = nodeidentityhetzner.New(opt.CacheNodeidentityInfo) if err != nil { - return fmt.Errorf("error building identifier: %w", err) + return fmt.Errorf("error building node identifier: %w", err) } case "azure": identifier, err = nodeidentityazure.New(opt.CacheNodeidentityInfo) if err != nil { - return fmt.Errorf("error building identifier: %v", err) + return fmt.Errorf("error building node identifier: %w", err) } case "scaleway": identifier, err = nodeidentityscw.New(opt.CacheNodeidentityInfo) if err != nil { - return fmt.Errorf("error building identifier: %w", err) + return fmt.Errorf("error building node identifier: %w", err) } case "metal": diff --git a/cmd/kops-controller/pkg/config/options.go b/cmd/kops-controller/pkg/config/options.go index 9c6fb65d20269..0255f002e8fb1 100644 --- a/cmd/kops-controller/pkg/config/options.go +++ b/cmd/kops-controller/pkg/config/options.go @@ -40,11 +40,27 @@ type Options struct { // Discovery configures options relating to discovery, particularly for gossip mode. Discovery *DiscoveryOptions `json:"discovery,omitempty"` + + // CAPI configures Cluster API (CAPI) support. + CAPI *CAPIOptions `json:"capi,omitempty"` } func (o *Options) PopulateDefaults() { } +type CAPIOptions struct { + // Enabled specifies whether CAPI support is enabled. + Enabled *bool `json:"enabled,omitempty"` +} + +// IsEnabled returns true if CAPI support is enabled. +func (o *CAPIOptions) IsEnabled() bool { + if o == nil || o.Enabled == nil { + return false + } + return *o.Enabled +} + type ServerOptions struct { // Listen is the network endpoint (ip and port) we should listen on. Listen string diff --git a/cmd/kops-controller/pkg/server/node_config.go b/cmd/kops-controller/pkg/server/node_config.go index b442dc29054ca..5ea1e5016d0d2 100644 --- a/cmd/kops-controller/pkg/server/node_config.go +++ b/cmd/kops-controller/pkg/server/node_config.go @@ -22,33 +22,45 @@ import ( "fmt" "k8s.io/klog/v2" + "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/apis/nodeup" "k8s.io/kops/pkg/bootstrap" "k8s.io/kops/pkg/commands" + "k8s.io/kops/pkg/nodeidentity/clusterapi" ) func (s *Server) getNodeConfig(ctx context.Context, req *nodeup.BootstrapRequest, identity *bootstrap.VerifyResult) (*nodeup.NodeConfig, error) { - klog.Infof("getting node config for %+v", req) + log := klog.FromContext(ctx) + + if identity == nil { + return nil, fmt.Errorf("node identity is required") + } + + log.Info("getting node config", "req", req, "identity", identity) instanceGroupName := identity.InstanceGroupName - if instanceGroupName == "" { - return nil, fmt.Errorf("did not find InstanceGroup for node %q", identity.NodeName) + if instanceGroupName == "" && identity.CAPIMachine == nil { + return nil, fmt.Errorf("did not find owner for node %q", identity.NodeName) } - nodeConfig := &nodeup.NodeConfig{} + var nodeConfig *nodeup.NodeConfig - if s.opt.Cloud == "metal" { - bootstrapData, err := s.buildNodeupConfig(ctx, s.opt.ClusterName, identity.InstanceGroupName) - if err != nil { - return nil, fmt.Errorf("building nodeConfig for instanceGroup: %w", err) - } - nodeupConfig, err := json.Marshal(bootstrapData.NodeupConfig) + configBuilder := &commands.ConfigBuilder{ + Clientset: s.clientset, + ClusterName: s.opt.ClusterName, + } + + if identity.CAPIMachine != nil && instanceGroupName == "" { + // We have a CAPI Machine (but no instance group) + instanceGroup, err := s.buildInstanceGroupFromCAPI(ctx, identity.CAPIMachine) if err != nil { - return nil, fmt.Errorf("marshalling nodeupConfig: %w", err) + return nil, fmt.Errorf("error building InstanceGroup from CAPI Machine: %w", err) } - nodeConfig.NodeupConfig = string(nodeupConfig) + log.Info("built InstanceGroup from CAPI Machine", "instanceGroup", instanceGroup) + configBuilder.InstanceGroup = instanceGroup + } else if s.opt.Cloud == "metal" { + configBuilder.InstanceGroupName = instanceGroupName } else { - // Note: For now, we're assuming there is only a single cluster, and it is ours. // We therefore use the configured base path @@ -58,9 +70,23 @@ func (s *Server) getNodeConfig(ctx context.Context, req *nodeup.BootstrapRequest if err != nil { return nil, fmt.Errorf("error loading NodeupConfig %q: %v", p, err) } + nodeConfig = &nodeup.NodeConfig{} nodeConfig.NodeupConfig = string(b) } + if nodeConfig == nil { + bootstrapData, err := configBuilder.GetBootstrapData(ctx) + if err != nil { + return nil, fmt.Errorf("building nodeConfig for instanceGroup: %w", err) + } + nodeupConfig, err := json.Marshal(bootstrapData.NodeupConfig) + if err != nil { + return nil, fmt.Errorf("marshalling nodeupConfig: %w", err) + } + nodeConfig = &nodeup.NodeConfig{} + nodeConfig.NodeupConfig = string(nodeupConfig) + } + { secretIDs := []string{ "dockerconfig", @@ -80,17 +106,35 @@ func (s *Server) getNodeConfig(ctx context.Context, req *nodeup.BootstrapRequest return nodeConfig, nil } -func (s *Server) buildNodeupConfig(ctx context.Context, clusterName string, instanceGroupName string) (*commands.BootstrapData, error) { - configBuilder := &commands.ConfigBuilder{ - Clientset: s.clientset, - ClusterName: clusterName, - InstanceGroupName: instanceGroupName, - } +// buildInstanceGroupFromCAPI builds an InstanceGroup from a CAPI Machine, for building bootstrap data. +// It builds a minimal instanceGroup, because many fields (e.g. image, machineType, minSize, maxSize) +// are not relevant for building the bootstrap data. +func (s *Server) buildInstanceGroupFromCAPI(ctx context.Context, capiMachine *clusterapi.Machine) (*kops.InstanceGroup, error) { + log := klog.FromContext(ctx) - bootstrapData, err := configBuilder.GetBootstrapData(ctx) - if err != nil { - return nil, err + capiDeploymentName := capiMachine.GetDeploymentName() + if capiDeploymentName == "" { + return nil, fmt.Errorf("CAPI Machine is missing cluster.x-k8s.io/deployment-name label") + } + failureDomain := capiMachine.GetFailureDomain() + if failureDomain == "" { + return nil, fmt.Errorf("CAPI Machine is missing spec.failureDomain") } - return bootstrapData, nil + ig := &kops.InstanceGroup{} + ig.Labels = map[string]string{ + // kops.LabelClusterName: cluster.Name, // Should not matter + } + ig.Name = capiDeploymentName + + // "maxSize": 1, // Should not matter + // "minSize": 1, // Should not matter + // "image": "", // Should not matter + // "machineType": "", // Should not matter + // "subnets": // Should not matter + ig.Spec.Zones = []string{failureDomain} + ig.Spec.Role = "Node" // TODO: Support other roles? + + log.Info("built InstanceGroup from CAPI Machine", "instanceGroup", ig) + return ig, nil } diff --git a/cmd/kops-controller/pkg/server/server.go b/cmd/kops-controller/pkg/server/server.go index f1609741bb021..0e03118a54ded 100644 --- a/cmd/kops-controller/pkg/server/server.go +++ b/cmd/kops-controller/pkg/server/server.go @@ -272,7 +272,7 @@ func (s *Server) bootstrap(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(resp) - klog.Infof("bootstrap %s %s success", r.RemoteAddr, id.NodeName) + klog.Infof("bootstrap %s (req.includeNodeConfig: %t, req.certs.#: %d, req.keypairs.#: %d) success", r.RemoteAddr, req.IncludeNodeConfig, len(req.Certs), len(req.KeypairIDs)) } func (s *Server) issueCert(ctx context.Context, name string, pubKey string, id *bootstrap.VerifyResult, validHours uint32, keypairIDs map[string]string) (string, error) { diff --git a/hack/dev-build-metal.sh b/hack/dev-build-metal.sh index 813c1a3d350b3..a58c4931b1dc3 100644 --- a/hack/dev-build-metal.sh +++ b/hack/dev-build-metal.sh @@ -27,9 +27,6 @@ REPO_ROOT=$(git rev-parse --show-toplevel) cd "${REPO_ROOT}" || return -# Dev environments typically do not need to test multiple architectures -KOPS_ARCH=amd64 -export KOPS_ARCH # Configure aws cli to talk to local storage aws configure --profile metal set aws_access_key_id accesskey @@ -46,7 +43,7 @@ aws configure --profile metal set s3.multipart_threshold 64GB export UPLOAD_DEST=s3://kops-dev-build/ aws --version aws s3 ls "${UPLOAD_DEST}" || aws s3 mb "${UPLOAD_DEST}" || return -make kops-install dev-version-dist-${KOPS_ARCH} || return +make kops-install dev-version-dist || return hack/upload .build/upload/ "${UPLOAD_DEST}" || return diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml new file mode 100644 index 0000000000000..11a0f6d8a689a --- /dev/null +++ b/k8s/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonAnnotations: + api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1111 + +resources: +- crds/kops.k8s.io_clusters.yaml +- crds/kops.k8s.io_instancegroups.yaml diff --git a/pkg/bootstrap/authenticate.go b/pkg/bootstrap/authenticate.go index dd04777394284..a182ea94ebf76 100644 --- a/pkg/bootstrap/authenticate.go +++ b/pkg/bootstrap/authenticate.go @@ -20,6 +20,8 @@ import ( "context" "errors" "net/http" + + "k8s.io/kops/pkg/nodeidentity/clusterapi" ) var ErrAlreadyExists = errors.New("node already exists") @@ -37,6 +39,9 @@ type VerifyResult struct { // InstanceGroupName is the name of the kops InstanceGroup this node is a member of. InstanceGroupName string + // CAPIMachine is the Cluster API Machine object corresponding to this node, if available. + CAPIMachine *clusterapi.Machine + // CertificateNames is the alternate names the node is authorized to use for certificates. CertificateNames []string diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index 02a82571687c7..bcb25e8405c23 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -670,7 +670,7 @@ func (b *ConfigBuilder) GetAssetBuilder(ctx context.Context) (*assets.AssetBuild return nil, err } - // ApplyClusterCmd is get the assets. + // ApplyClusterCmd is used to get the assets. // We use DryRun and GetAssets to do this without applying any changes. apply := &cloudup.ApplyClusterCmd{ Cloud: cloud, diff --git a/pkg/controllers/clusterapi/cluster_controller.go b/pkg/controllers/clusterapi/cluster_controller.go new file mode 100644 index 0000000000000..8d57bf07a3d4c --- /dev/null +++ b/pkg/controllers/clusterapi/cluster_controller.go @@ -0,0 +1,385 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import ( + "context" + "fmt" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/kops/pkg/apis/kops" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + "k8s.io/kops/pkg/kopscodecs" + "k8s.io/kops/upup/pkg/fi/cloudup" + "k8s.io/kops/upup/pkg/fi/cloudup/gce" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/yaml" + + capikops "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" +) + +// NewClusterReconciler is the constructor for an ClusterReconciler +func NewClusterReconciler(mgr manager.Manager) error { + r := &ClusterReconciler{ + client: mgr.GetClient(), + } + + return ctrl.NewControllerManagedBy(mgr). + For(&kopsapi.Cluster{}). + Complete(r) +} + +// ClusterReconciler observes Node objects, and labels them with the correct labels for the instancegroup +// This used to be done by the kubelet, but is moving to a central controller for greater security in 1.16 +type ClusterReconciler struct { + // client is the controller-runtime client + client client.Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// Reconcile is the main reconciler function that observes node changes. +func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + cluster := &kopsapi.Cluster{} + if err := r.client.Get(ctx, req.NamespacedName, cluster); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + clusterScope := &clusterScope{ + Cluster: cluster, + } + if err := clusterScope.Apply(ctx, r.client); err != nil { + return ctrl.Result{}, fmt.Errorf("error configuring cluster CAPI objects: %w", err) + } + + return ctrl.Result{}, nil +} + +type clusterScope struct { + Cluster *kopsapi.Cluster + + capiCluster *unstructured.Unstructured + capiInfra *unstructured.Unstructured + capiControlPlane *unstructured.Unstructured +} + +func (s *clusterScope) namespace() string { + return "kube-system" +} + +func (s *clusterScope) ssa(ctx context.Context, kube client.Client, u *unstructured.Unstructured) error { + return kube.Patch(ctx, u, client.Apply, client.FieldOwner("cluster-controller")) +} + +func (s *clusterScope) ssaStatus(ctx context.Context, kube client.Client, u *unstructured.Unstructured) error { + return kube.Status().Patch(ctx, u, client.Apply, client.FieldOwner("cluster-controller")) +} + +func (s *clusterScope) Apply(ctx context.Context, kube client.Client) error { + if err := s.applyGCPCluster(ctx, kube); err != nil { + return err + } + if err := s.createKopsControlPlane(ctx, kube); err != nil { + return err + } + if err := s.createClusterObject(ctx, kube); err != nil { + return err + } + return nil +} + +func (s *clusterScope) applyGCPCluster(ctx context.Context, kube client.Client) error { + // This is because of network tags in cloud-provider-gcp + // TODO: cloud-provider-gcp should not assume cluster name is a valid prefix + name := gce.SafeClusterName(s.Cluster.GetName()) + + gcpProject := s.Cluster.Spec.Project + if gcpProject == "" { + return fmt.Errorf("unable to determine gcp project for cluster") + } + gcpRegion := "" + for _, subnet := range s.Cluster.Spec.Subnets { + if gcpRegion == "" { + gcpRegion = subnet.Region + } else if gcpRegion != subnet.Region { + return fmt.Errorf("found multiple gcp regions for cluster") + } + } + if gcpRegion == "" { + return fmt.Errorf("unable to determine gcp region for cluster") + } + + // TODO: Sync with LinkToNetwork + gcpNetworkName := s.Cluster.Spec.NetworkID + if gcpNetworkName == "" { + gcpNetworkName = gce.SafeTruncatedClusterName(s.Cluster.ObjectMeta.Name, 63) + } + if gcpNetworkName == "" { + return fmt.Errorf("unable to determine gcp network for cluster") + } + + obj := map[string]any{ + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "kind": "GCPCluster", + "metadata": map[string]any{ + "name": name, + "namespace": s.namespace(), + }, + "spec": map[string]any{ + "project": gcpProject, + "region": gcpRegion, + "network": map[string]any{ + "name": gcpNetworkName, + }, + }, + } + + u := &unstructured.Unstructured{Object: obj} + + // setOwnerRef(u, s.Cluster) + + if err := s.ssa(ctx, kube, u); err != nil { + return fmt.Errorf("applying GCPCluster object to cluster: %w", err) + } + + s.capiInfra = u + return nil +} + +func (s *clusterScope) findSystemEndpoints(ctx context.Context) ([]capikops.SystemEndpoint, error) { + cluster := s.Cluster + + clusterInternal := &kops.Cluster{} + if err := kopscodecs.Scheme.Convert(cluster, clusterInternal, nil); err != nil { + return nil, fmt.Errorf("converting cluster object: %w", err) + } + + cloud, err := cloudup.BuildCloud(clusterInternal) + if err != nil { + return nil, err + } + + // TODO: Sync with BuildKubecfg + + ingresses, err := cloud.GetApiIngressStatus(clusterInternal) + if err != nil { + return nil, fmt.Errorf("error getting ingress status: %v", err) + } + + var targets []capikops.SystemEndpoint + + for _, ingress := range ingresses { + var target capikops.SystemEndpoint + if ingress.Hostname != "" { + target.Endpoint = ingress.Hostname + } + if ingress.IP != "" { + target.Endpoint = ingress.IP + } + target.Type = capikops.SystemEndpointTypeKopsController + if ingress.InternalEndpoint { + target.Scope = capikops.SystemEndpointScopeInternal + } else { + target.Scope = capikops.SystemEndpointScopeExternal + } + targets = append(targets, target) + } + + for _, ingress := range ingresses { + var target capikops.SystemEndpoint + if ingress.Hostname != "" { + target.Endpoint = ingress.Hostname + } + if ingress.IP != "" { + target.Endpoint = ingress.IP + } + target.Type = capikops.SystemEndpointTypeKubeAPIServer + if ingress.InternalEndpoint { + target.Scope = capikops.SystemEndpointScopeInternal + } else { + target.Scope = capikops.SystemEndpointScopeExternal + } + targets = append(targets, target) + } + + // TODO: Sort targets + // TODO: Mark targets as atomic list + + if len(targets) == 0 { + return nil, fmt.Errorf("did not find API endpoint") + } + + return targets, nil +} + +func (s *clusterScope) createKopsControlPlane(ctx context.Context, kube client.Client) error { + // This is because of network tags in cloud-provider-gcp + // TODO: cloud-provider-gcp should not assume cluster name is a valid prefix + name := gce.SafeClusterName(s.Cluster.GetName()) + + status := capikops.KopsControlPlaneStatus{} + + systemEndpoints, err := s.findSystemEndpoints(ctx) + if err != nil { + return err + } + status.SystemEndpoints = systemEndpoints + + status.Initialization = capikops.KopsControlPlaneInitializationStatus{} + controlPlaneInitialized := true + status.Initialization.ControlPlaneInitialized = &controlPlaneInitialized + + // Create secret + { + kubeconfig := map[string]any{ + "apiVersion": "v1", + "clusters": []map[string]any{ + { + "cluster": map[string]any{ + "server": "https://kubernetes.default.svc.cluster.local", + "certificate-authority": "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + }, + "name": "in-cluster", + }, + }, + "contexts": []map[string]any{ + { + "context": map[string]any{ + "cluster": "in-cluster", + "user": "in-cluster", + }, + "name": "in-cluster", + }, + }, + "current-context": "in-cluster", + "kind": "Config", + "preferences": map[string]any{}, + "users": []map[string]any{ + { + "name": "in-cluster", + "user": map[string]any{ + "tokenFile": "/var/run/secrets/kubernetes.io/serviceaccount/token", + }, + }, + }, + } + + kubeconfigBytes, err := yaml.Marshal(kubeconfig) + if err != nil { + return fmt.Errorf("converting kubeconfig to yaml: %w", err) + } + + obj := map[string]any{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]any{ + "name": name + "-kubeconfig", + "namespace": s.namespace(), + }, + "data": map[string]any{ + "value": kubeconfigBytes, + }, + "type": "Opaque", + } + + u := &unstructured.Unstructured{Object: obj} + + // Needed so that capi manager has "permission" to read the secret + labels := map[string]string{ + "cluster.x-k8s.io/cluster-name": name, + } + + u.SetLabels(labels) + + setOwnerRef(u, s.Cluster) + if err := s.ssa(ctx, kube, u); err != nil { + return fmt.Errorf("applying kubeconfig secret to cluster: %w", err) + } + } + + // TODO: Sync with LinkToNetwork + obj := map[string]any{ + "apiVersion": "controlplane.cluster.x-k8s.io/v1beta1", + "kind": "KopsControlPlane", + "metadata": map[string]any{ + "name": name, + "namespace": s.namespace(), + }, + "spec": map[string]any{}, + } + + u := &unstructured.Unstructured{Object: obj} + + // setOwnerRef(u, s.Cluster) + + if err := s.ssa(ctx, kube, u); err != nil { + return fmt.Errorf("applying object to cluster: %w", err) + } + + // TODO: Sync with LinkToNetwork + statusObj := map[string]any{ + "apiVersion": "controlplane.cluster.x-k8s.io/v1beta1", + "kind": "KopsControlPlane", + "metadata": map[string]any{ + "name": name, + "namespace": s.namespace(), + }, + "status": status, + } + + if err := s.ssaStatus(ctx, kube, &unstructured.Unstructured{Object: statusObj}); err != nil { + return fmt.Errorf("applying object to cluster: %w", err) + } + + s.capiControlPlane = u + return nil +} + +func (s *clusterScope) createClusterObject(ctx context.Context, kube client.Client) error { + // This is because of network tags in cloud-provider-gcp + // TODO: cloud-provider-gcp should not assume cluster name is a valid prefix + name := gce.SafeClusterName(s.Cluster.GetName()) + + obj := map[string]any{ + "apiVersion": "cluster.x-k8s.io/v1beta1", + "kind": "Cluster", + "metadata": map[string]any{ + "name": name, + "namespace": s.namespace(), + }, + "spec": map[string]any{ + "infrastructureRef": makeRef(s.capiInfra), + "controlPlaneRef": makeRef(s.capiControlPlane), + }, + } + + u := &unstructured.Unstructured{Object: obj} + + setOwnerRef(u, s.Cluster) + + if err := s.ssa(ctx, kube, u); err != nil { + return fmt.Errorf("applying object to cluster: %w", err) + } + + s.capiCluster = u + return nil +} diff --git a/clusterapi/bootstrap/controllers/kopsconfig_controller.go b/pkg/controllers/clusterapi/kopsconfig_controller.go similarity index 64% rename from clusterapi/bootstrap/controllers/kopsconfig_controller.go rename to pkg/controllers/clusterapi/kopsconfig_controller.go index e9b20c3ba06d5..9581f61eb07fc 100644 --- a/clusterapi/bootstrap/controllers/kopsconfig_controller.go +++ b/pkg/controllers/clusterapi/kopsconfig_controller.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package controllers +package clusterapi import ( "bytes" @@ -28,15 +28,21 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" api "k8s.io/kops/clusterapi/bootstrap/kops/api/v1beta1" + capikops "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" clusterv1 "k8s.io/kops/clusterapi/snapshot/cluster-api/api/v1beta1" "k8s.io/kops/pkg/apis/kops" + "k8s.io/kops/pkg/apis/kops/registry" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + "k8s.io/kops/pkg/apis/nodeup" "k8s.io/kops/pkg/assets" "k8s.io/kops/pkg/client/simple/vfsclientset" + "k8s.io/kops/pkg/kopscodecs" "k8s.io/kops/pkg/model" "k8s.io/kops/pkg/model/resources" "k8s.io/kops/pkg/nodemodel" "k8s.io/kops/pkg/wellknownservices" "k8s.io/kops/upup/pkg/fi" + "k8s.io/kops/upup/pkg/fi/cloudup" "k8s.io/kops/util/pkg/vfs" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" @@ -77,7 +83,22 @@ func (r *KopsConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } - data, err := r.buildBootstrapData(ctx) + capiCluster, err := getCAPIClusterFromCAPIObject(ctx, r.client, obj) + if err != nil { + return ctrl.Result{}, err + } + + cluster, err := getKopsClusterFromCAPICluster(ctx, r.client, capiCluster) + if err != nil { + return ctrl.Result{}, err + } + + kopsControlPlane, err := getKopsControlPlaneFromCAPICluster(ctx, r.client, capiCluster) + if err != nil { + return ctrl.Result{}, err + } + + data, err := r.buildBootstrapData(ctx, cluster, kopsControlPlane) if err != nil { return ctrl.Result{}, err } @@ -154,44 +175,101 @@ func (r *KopsConfigReconciler) storeBootstrapData(ctx context.Context, parent *a return nil } -func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, error) { +func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context, cluster *kopsapi.Cluster, kopsControlPlane *capikops.KopsControlPlane) ([]byte, error) { + + config, err := BuildNodeupConfig(ctx, cluster, kopsControlPlane) + if err != nil { + return nil, err + } + return config.NodeupScript, nil +} + +type NodeupConfig struct { + NodeupScript []byte + NodeupConfig *nodeup.Config +} + +// TODO: Dedup with b.builder.NodeUpConfigBuilder.BuildConfig +func BuildNodeupConfig(ctx context.Context, cluster *kopsapi.Cluster, kopsControlPlane *capikops.KopsControlPlane) (*NodeupConfig, error) { // tf := &TemplateFunctions{ // KopsModelContext: *modelContext, // cloud: cloud, // } - // TODO: Make dynamic - clusterName := "clusterapi.k8s.local" - clusterStoreBasePath := "gs://kops-state-justinsb-root-20220725" - wellKnownAddresses := model.WellKnownAddresses{} - wellKnownAddresses[wellknownservices.KopsController] = append(wellKnownAddresses[wellknownservices.KopsController], "10.0.16.2") - wellKnownAddresses[wellknownservices.KubeAPIServer] = append(wellKnownAddresses[wellknownservices.KubeAPIServer], "10.0.16.2") + for _, systemEndpoint := range kopsControlPlane.Status.SystemEndpoints { + switch systemEndpoint.Type { + case capikops.SystemEndpointTypeKopsController: + wellKnownAddresses[wellknownservices.KopsController] = append(wellKnownAddresses[wellknownservices.KopsController], systemEndpoint.Endpoint) + case capikops.SystemEndpointTypeKubeAPIServer: + wellKnownAddresses[wellknownservices.KubeAPIServer] = append(wellKnownAddresses[wellknownservices.KubeAPIServer], systemEndpoint.Endpoint) + } + } - vfsContext := vfs.NewVFSContext() - basePath, err := vfsContext.BuildVfsPath(clusterStoreBasePath) - if err != nil { - return nil, fmt.Errorf("parsing vfs base path: %w", err) + // TODO: Sync with other nodeup config builder + clusterInternal := &kops.Cluster{} + if err := kopscodecs.Scheme.Convert(cluster, clusterInternal, nil); err != nil { + return nil, fmt.Errorf("converting cluster object: %w", err) } + // TODO: Fix validation + clusterInternal.Namespace = "" + + // if clusterInternal.Spec.KubeAPIServer == nil { + // clusterInternal.Spec.KubeAPIServer = &kops.KubeAPIServerConfig{} + // } // cluster := &kops.Cluster{} // cluster.Spec.KubernetesVersion = "1.28.3" // cluster.Spec.KubeAPIServer = &kops.KubeAPIServerConfig{} - vfsClientset := vfsclientset.NewVFSClientset(vfsContext, basePath) - cluster, err := vfsClientset.GetCluster(ctx, clusterName) + // if cluster.Spec.KubeAPIServer == nil { + // cluster.Spec.KubeAPIServer = &kopsapi.KubeAPIServerConfig{} + // } + + vfsContext := vfs.NewVFSContext() + + basePath, err := registry.ConfigBase(vfsContext, clusterInternal) if err != nil { - return nil, fmt.Errorf("getting cluster %q: %w", clusterName, err) + return nil, fmt.Errorf("parsing vfs base path: %w", err) } - if cluster.Spec.KubeAPIServer == nil { - cluster.Spec.KubeAPIServer = &kops.KubeAPIServerConfig{} - } + clientset := vfsclientset.NewVFSClientset(vfsContext, basePath) ig := &kops.InstanceGroup{} + // TODO: Name + ig.SetName("todo-ig-name") ig.Spec.Role = kops.InstanceGroupRoleNode getAssets := false - assetBuilder := assets.NewAssetBuilder(vfsContext, cluster.Spec.Assets, getAssets) + assetBuilder := assets.NewAssetBuilder(vfsContext, clusterInternal.Spec.Assets, getAssets) + + cloud, err := cloudup.BuildCloud(clusterInternal) + if err != nil { + return nil, fmt.Errorf("building cloud: %w", err) + } + + // assetBuilder := assets.NewAssetBuilder(clientset.VFSContext(), cluster.Spec.Assets, cluster.Spec.KubernetesVersion, false) + var instanceGroups []*kops.InstanceGroup + instanceGroups = append(instanceGroups, ig) + + fullCluster, err := cloudup.PopulateClusterSpec(ctx, clientset, clusterInternal, instanceGroups, cloud, assetBuilder) + if err != nil { + return nil, fmt.Errorf("building full cluster spec: %w", err) + } + + channel, err := cloudup.ChannelForCluster(clientset.VFSContext(), fullCluster) + if err != nil { + // TODO: Maybe this should be a warning + return nil, fmt.Errorf("building channel for cluster: %w", err) + } + + var fullInstanceGroups []*kops.InstanceGroup + for _, instanceGroup := range instanceGroups { + fullGroup, err := cloudup.PopulateInstanceGroupSpec(fullCluster, instanceGroup, cloud, channel) + if err != nil { + return nil, fmt.Errorf("building full instance group spec: %w", err) + } + fullInstanceGroups = append(fullInstanceGroups, fullGroup) + } encryptionConfigSecretHash := "" // if fi.ValueOf(c.Cluster.Spec.EncryptionConfig) { @@ -214,9 +292,9 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, return nil, err } - configBuilder, err := nodemodel.NewNodeUpConfigBuilder(cluster, assetBuilder, encryptionConfigSecretHash) + configBuilder, err := nodemodel.NewNodeUpConfigBuilder(fullCluster, assetBuilder, encryptionConfigSecretHash) if err != nil { - return nil, err + return nil, fmt.Errorf("building node config: %w", err) } // bootstrapScript := &model.BootstrapScript{ @@ -228,7 +306,24 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, keysets := make(map[string]*fi.Keyset) - keystore, err := vfsClientset.KeyStore(cluster) + // var keystoreBase vfs.Path + + // if cluster.Spec.ConfigStore.Keypairs == "" { + // configBase, err := registry.ConfigBase(vfsContext, clusterInternal) + // if err != nil { + // return nil, err + // } + // keystoreBase = configBase.Join("pki") + // } else { + // storePath, err := vfsContext.BuildVfsPath(cluster.Spec.ConfigStore.Keypairs) + // if err != nil { + // return nil, err + // } + // keystoreBase = storePath + // } + + // keystore := fi.NewVFSCAStore(clusterInternal, keystoreBase) + keystore, err := clientset.KeyStore(fullCluster) if err != nil { return nil, err } @@ -246,7 +341,7 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, keysets[keyName] = keyset } - _, bootConfig, err := configBuilder.BuildConfig(ig, wellKnownAddresses, keysets) + nodeupConfig, bootConfig, err := configBuilder.BuildConfig(fullInstanceGroups[0], wellKnownAddresses, keysets) if err != nil { return nil, err } @@ -261,6 +356,7 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, var nodeupScript resources.NodeUpScript nodeupScript.NodeUpAssets = nodeUpAssets.NodeUpAssets + // nodeupScript.NodeUpAssets = configBuilder.NodeUpAssets() nodeupScript.BootConfig = bootConfig { @@ -298,7 +394,8 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, // See https://github.com/kubernetes/kops/issues/10206 for details. // TODO: nodeupScript.SetSysctls = setSysctls() - nodeupScript.CloudProvider = string(cluster.GetCloudProvider()) + // nodeupScript.CloudProvider = string(cluster.Spec.GetCloudProvider()) + nodeupScript.CloudProvider = string(clusterInternal.GetCloudProvider()) nodeupScriptResource, err := nodeupScript.Build() if err != nil { @@ -310,5 +407,8 @@ func (r *KopsConfigReconciler) buildBootstrapData(ctx context.Context) ([]byte, return nil, err } - return b, nil + return &NodeupConfig{ + NodeupScript: b, + NodeupConfig: nodeupConfig, + }, nil } diff --git a/pkg/controllers/clusterapi/kopscontrolplane_controller.go b/pkg/controllers/clusterapi/kopscontrolplane_controller.go new file mode 100644 index 0000000000000..d9d88a1d56631 --- /dev/null +++ b/pkg/controllers/clusterapi/kopscontrolplane_controller.go @@ -0,0 +1,78 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import ( + "context" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/klog/v2" + "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// NewKopsControlPlaneReconciler is the constructor for a KopsControlPlaneReconciler +func NewKopsControlPlaneReconciler(mgr manager.Manager) error { + r := &KopsControlPlaneReconciler{ + client: mgr.GetClient(), + } + + return ctrl.NewControllerManagedBy(mgr). + For(&v1beta1.KopsControlPlane{}). + Complete(r) +} + +// KopsControlPlaneReconciler observes KopsControlPlane objects. +type KopsControlPlaneReconciler struct { + // client is the controller-runtime client + client client.Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch + +// Reconcile is the main reconciler function that observes node changes. +func (r *KopsControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + obj := &v1beta1.KopsControlPlane{} + if err := r.client.Get(ctx, req.NamespacedName, obj); err != nil { + klog.Warningf("unable to fetch object: %v", err) + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + capiCluster, err := getCAPIClusterFromCAPIObject(ctx, r.client, obj) + if err != nil { + return ctrl.Result{}, err + } + cluster, err := getKopsClusterFromCAPICluster(ctx, r.client, capiCluster) + if err != nil { + return ctrl.Result{}, err + } + + log := klog.FromContext(ctx) + log.Info("found cluster", "cluster", cluster) + // if err := r.client.Status().Update(ctx, obj); err != nil { + // return ctrl.Result{}, fmt.Errorf("error patching status: %w", err) + // } + return ctrl.Result{}, nil +} diff --git a/pkg/controllers/clusterapi/register.go b/pkg/controllers/clusterapi/register.go new file mode 100644 index 0000000000000..bc69541f4011f --- /dev/null +++ b/pkg/controllers/clusterapi/register.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import ( + "fmt" + + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +func RegisterControllers(mgr manager.Manager) error { + if err := NewKopsConfigReconciler(mgr); err != nil { + return fmt.Errorf("error creating KopsConfig controller: %w", err) + } + + if err := NewClusterReconciler(mgr); err != nil { + return fmt.Errorf("error creating Cluster controller: %w", err) + } + + return nil +} diff --git a/pkg/controllers/clusterapi/utils.go b/pkg/controllers/clusterapi/utils.go new file mode 100644 index 0000000000000..56eef603e0d40 --- /dev/null +++ b/pkg/controllers/clusterapi/utils.go @@ -0,0 +1,144 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import ( + "context" + "fmt" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/kops/clusterapi/controlplane/kops/api/v1beta1" + clusterv1 "k8s.io/kops/clusterapi/snapshot/cluster-api/api/v1beta1" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func makeRef(u *unstructured.Unstructured) map[string]any { + apiVersion, kind := u.GroupVersionKind().ToAPIVersionAndKind() + ref := map[string]any{ + "name": u.GetName(), + "apiVersion": apiVersion, + "kind": kind, + } + return ref +} + +func setOwnerRef(u *unstructured.Unstructured, owner client.Object) { + apiVersion, kind := owner.GetObjectKind().GroupVersionKind().ToAPIVersionAndKind() + + u.SetOwnerReferences([]metav1.OwnerReference{ + { + APIVersion: apiVersion, + Kind: kind, + Name: owner.GetName(), + UID: owner.GetUID(), + Controller: PtrTo(true), + }, + }) +} + +func PtrTo[T any](t T) *T { + return &t +} + +func getCAPIClusterFromCAPIObject(ctx context.Context, kube client.Client, obj client.Object) (*unstructured.Unstructured, error) { + id := types.NamespacedName{ + Namespace: obj.GetNamespace(), + Name: obj.GetName(), + } + + capiClusterName := obj.GetLabels()[clusterv1.ClusterNameLabel] + if capiClusterName == "" { + return nil, fmt.Errorf("label %q not set on %v", clusterv1.ClusterNameLabel, id) + } + + capiCluster := &unstructured.Unstructured{} + capiCluster.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Kind: "Cluster", + }) + clusterKey := types.NamespacedName{ + Namespace: obj.GetNamespace(), + Name: capiClusterName, + } + if err := kube.Get(ctx, clusterKey, capiCluster); err != nil { + return nil, fmt.Errorf("error fetching cluster %v: %w", clusterKey, err) + } + + return capiCluster, nil +} + +func getKopsClusterFromCAPICluster(ctx context.Context, kube client.Client, capiCluster *unstructured.Unstructured) (*kopsapi.Cluster, error) { + var clusterKey types.NamespacedName + + for _, ownerRef := range capiCluster.GetOwnerReferences() { + if ownerRef.Kind == "Cluster" && strings.HasPrefix(ownerRef.APIVersion, "kops.k8s.io/") { + clusterKey = types.NamespacedName{ + Namespace: capiCluster.GetNamespace(), + Name: ownerRef.Name, + } + } + } + + if clusterKey.Name == "" { + return nil, fmt.Errorf("cluster ownerRef not set on CAPI cluster %v/%v", capiCluster.GetNamespace(), capiCluster.GetName()) + } + + cluster := &kopsapi.Cluster{} + if err := kube.Get(ctx, clusterKey, cluster); err != nil { + return nil, fmt.Errorf("error fetching cluster %v: %w", clusterKey, err) + } + + return cluster, nil +} + +func getKopsControlPlaneFromCAPICluster(ctx context.Context, kube client.Client, capiCluster *unstructured.Unstructured) (*v1beta1.KopsControlPlane, error) { + capiClusterKey := types.NamespacedName{ + Namespace: capiCluster.GetNamespace(), + Name: capiCluster.GetName(), + } + name, _, _ := unstructured.NestedString(capiCluster.Object, "spec", "controlPlaneRef", "name") + if name == "" { + return nil, fmt.Errorf("controlPlaneRef.name not set for %v", capiClusterKey) + } + kind, _, _ := unstructured.NestedString(capiCluster.Object, "spec", "controlPlaneRef", "kind") + if kind == "" { + return nil, fmt.Errorf("controlPlaneRef.kind not set for %v", capiClusterKey) + } + if kind != "KopsControlPlane" { + return nil, fmt.Errorf("controlPlaneRef.kind was %q for %v, expected KopsControlPlane", kind, capiClusterKey) + } + + key := types.NamespacedName{ + Namespace: capiCluster.GetNamespace(), + Name: name, + } + + // TODO: Add ip addresses to status + + kopsControlPlane := &v1beta1.KopsControlPlane{} + if err := kube.Get(ctx, key, kopsControlPlane); err != nil { + return nil, fmt.Errorf("error fetching KopsControlPlane %v: %w", key, err) + } + + return kopsControlPlane, nil +} diff --git a/pkg/featureflag/featureflag.go b/pkg/featureflag/featureflag.go index bdae6a19e4bc8..67888182ef7b5 100644 --- a/pkg/featureflag/featureflag.go +++ b/pkg/featureflag/featureflag.go @@ -98,6 +98,8 @@ var ( Metal = new("Metal", Bool(false)) // AWSSingleNodesInstanceGroup enables the creation of a single node instance group instead of one per availability zone. AWSSingleNodesInstanceGroup = new("AWSSingleNodesInstanceGroup", Bool(false)) + // ClusterAPI enables support for Cluster API (CAPI) resources. + ClusterAPI = new("ClusterAPI", Bool(false)) ) // FeatureFlag defines a feature flag diff --git a/pkg/nodeidentity/clusterapi/machines.go b/pkg/nodeidentity/clusterapi/machines.go new file mode 100644 index 0000000000000..47d285b218e5b --- /dev/null +++ b/pkg/nodeidentity/clusterapi/machines.go @@ -0,0 +1,33 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + +// Machine wraps a clusterapi Machine object +type Machine struct { + u *unstructured.Unstructured +} + +func (m *Machine) GetDeploymentName() string { + return m.u.GetLabels()["cluster.x-k8s.io/deployment-name"] +} + +func (m *Machine) GetFailureDomain() string { + failureDomain, _, _ := unstructured.NestedString(m.u.Object, "spec", "failureDomain") + return failureDomain +} diff --git a/pkg/nodeidentity/clusterapi/manager.go b/pkg/nodeidentity/clusterapi/manager.go new file mode 100644 index 0000000000000..0fc92e345921b --- /dev/null +++ b/pkg/nodeidentity/clusterapi/manager.go @@ -0,0 +1,71 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clusterapi + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Manager struct { + kubeClient client.Client +} + +func NewManager(kubeClient client.Client) *Manager { + return &Manager{ + kubeClient: kubeClient, + } +} + +func (m *Manager) FindMachineByProviderID(ctx context.Context, providerID string) (*Machine, error) { + // TODO: Can we build an index + // selector := client.MatchingFieldsSelector{ + // Selector: fields.OneTermEqualSelector("spec.providerID", providerID), + // } + var machines unstructured.UnstructuredList + machines.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "cluster.x-k8s.io", + Kind: "Machine", + Version: "v1beta1", + }) + if err := m.kubeClient.List(ctx, &machines); err != nil { + return nil, fmt.Errorf("error listing machines: %w", err) + } + var matches []*unstructured.Unstructured + for i := range machines.Items { + machine := &machines.Items[i] + machineSpecProviderID, _, _ := unstructured.NestedString(machine.Object, "spec", "providerID") + if machineSpecProviderID != providerID { + continue + } + matches = append(matches, machine) + } + if len(matches) > 0 { + if len(matches) > 1 { + return nil, fmt.Errorf("found multiple machines with providerID %q", providerID) + } + machine := matches[0] + machine = machine.DeepCopy() + return &Machine{u: machine}, nil + } + + return nil, nil +} diff --git a/pkg/nodeidentity/gce/identify.go b/pkg/nodeidentity/gce/identify.go index 095c41615ea99..59dcb244e8023 100644 --- a/pkg/nodeidentity/gce/identify.go +++ b/pkg/nodeidentity/gce/identify.go @@ -27,13 +27,20 @@ import ( compute "google.golang.org/api/compute/v1" corev1 "k8s.io/api/core/v1" "k8s.io/klog/v2" + "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/nodeidentity" + "k8s.io/kops/pkg/nodeidentity/clusterapi" + "k8s.io/kops/pkg/nodelabels" + "k8s.io/kops/upup/pkg/fi/cloudup/gce" ) // MetadataKeyInstanceGroupName is the key for the metadata that specifies the instance group name // This is used by the gce nodeidentifier to securely identify the node instancegroup const MetadataKeyInstanceGroupName = "kops-k8s-io-instance-group-name" +// LabelKeyCAPIRoleName is the label key used by the Cluster API Provider GCP to indicate the role of the instance. +const LabelKeyCAPIRoleName = "capg-role" + // nodeIdentifier identifies a node from GCE type nodeIdentifier struct { // computeService is the GCE client @@ -41,10 +48,16 @@ type nodeIdentifier struct { // project is our GCE project; we require that instances be in this project project string + + // clusterName is the metadata.name of our cluster + clusterName string + + // capiManager contains our CAPI support, if CAPI support is enabled + capiManager *clusterapi.Manager } -// New creates and returns a nodeidentity.LegacyIdentifier for Nodes running on GCE -func New() (nodeidentity.LegacyIdentifier, error) { +// New creates and returns a nodeidentity.Identifier for Nodes running on GCE +func New(clusterName string, capiManager *clusterapi.Manager) (nodeidentity.Identifier, error) { ctx := context.Background() computeService, err := compute.NewService(ctx) @@ -71,11 +84,15 @@ func New() (nodeidentity.LegacyIdentifier, error) { return &nodeIdentifier{ computeService: computeService, project: project, + clusterName: clusterName, + capiManager: capiManager, }, nil } // IdentifyNode queries GCE for the node identity information -func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (*nodeidentity.LegacyInfo, error) { +func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (*nodeidentity.Info, error) { + // log := klog.FromContext(ctx) + providerID := node.Spec.ProviderID if providerID == "" { return nil, fmt.Errorf("providerID was not set for node %s", node.Name) @@ -107,44 +124,84 @@ func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (* return nil, fmt.Errorf("found instance %q, but status is %q", instanceName, instanceStatus) } - // The metadata itself is potentially mutable from the instance - // We instead look at the MIG configuration - createdBy := getMetadataValue(instance.Metadata, "created-by") - if createdBy == "" { - return nil, fmt.Errorf("instance %q did not have created-by metadata label set", instanceName) - } + capgRole := instance.Labels[LabelKeyCAPIRoleName] - // We need to double-check the MIG configuration, in case created-by was changed - migName := lastComponent(createdBy) + var capiMachine *clusterapi.Machine - mig, err := i.getMIG(zone, migName) - if err != nil { - return nil, err - } + if i.capiManager != nil && capgRole != "" { + providerID := "gce://" + project + "/" + zone + "/" + instanceName - // We now double check that the instance is indeed managed by the MIG - // this can't be spoofed without GCE API access - migMember, err := i.getManagedInstance(ctx, mig, instance.Id) - if err != nil { - return nil, err + m, err := i.capiManager.FindMachineByProviderID(ctx, providerID) + if err != nil { + return nil, fmt.Errorf("error finding Machine with providerID %q: %w", providerID, err) + } + capiMachine = m } - if migMember.Version == nil { - return nil, fmt.Errorf("instance %s did not have Version set", instance.Name) - } + if capiMachine == nil { + // The metadata itself is potentially mutable from the instance + // We instead look at the MIG configuration + createdBy := getMetadataValue(instance.Metadata, "created-by") + if createdBy == "" { + return nil, fmt.Errorf("cannot find owner for instance %s", instance.Name) + } - instanceTemplate, err := i.getInstanceTemplate(lastComponent(migMember.Version.InstanceTemplate)) - if err != nil { - return nil, err + // We need to double-check the MIG configuration, in case created-by was changed + migName := lastComponent(createdBy) + + mig, err := i.getMIG(zone, migName) + if err != nil { + return nil, err + } + + // We now double check that the instance is indeed managed by the MIG + // this can't be spoofed without GCE API access + migMember, err := i.getManagedInstance(ctx, mig, instance.Id) + if err != nil { + return nil, err + } + + if migMember.Version == nil { + return nil, fmt.Errorf("instance %s did not have Version set", instance.Name) + } + + instanceTemplate, err := i.getInstanceTemplate(lastComponent(migMember.Version.InstanceTemplate)) + if err != nil { + return nil, err + } + + igName := getMetadataValue(instanceTemplate.Properties.Metadata, MetadataKeyInstanceGroupName) + if igName == "" { + return nil, fmt.Errorf("ig name not set on instance template %s", instanceTemplate.Name) + } } - igName := getMetadataValue(instanceTemplate.Properties.Metadata, MetadataKeyInstanceGroupName) - if igName == "" { - return nil, fmt.Errorf("ig name not set on instance template %s", instanceTemplate.Name) + info := &nodeidentity.Info{} + // info.InstanceID TODO: InstanceID is only used by the provider? + + tagToRole := make(map[string]kops.InstanceGroupRole) + for _, role := range kops.AllInstanceGroupRoles { + tag := gce.TagForRole(i.clusterName, role) + tagToRole[tag] = role } - info := &nodeidentity.LegacyInfo{} - info.InstanceGroup = igName + labels := make(map[string]string) + for _, tag := range instance.Tags.Items { + role, found := tagToRole[tag] + if found { + switch role { + case kops.InstanceGroupRoleControlPlane: + labels[nodelabels.RoleLabelControlPlane20] = "" + case kops.InstanceGroupRoleNode: + labels[nodelabels.RoleLabelNode16] = "" + case kops.InstanceGroupRoleAPIServer: + labels[nodelabels.RoleLabelAPIServer16] = "" + default: + klog.Warningf("unknown node role %q for server %q", role, instance.SelfLink) + } + } + } + info.Labels = labels return info, nil } @@ -152,7 +209,7 @@ func (i *nodeIdentifier) IdentifyNode(ctx context.Context, node *corev1.Node) (* func (i *nodeIdentifier) getInstance(zone string, instanceName string) (*compute.Instance, error) { instance, err := i.computeService.Instances.Get(i.project, zone, instanceName).Do() if err != nil { - return nil, fmt.Errorf("error fetching GCE instance: %v", err) + return nil, fmt.Errorf("error fetching GCE instance: %w", err) } return instance, nil diff --git a/pkg/validation/validate_cluster.go b/pkg/validation/validate_cluster.go index 0194a1f5e52b1..395799bb4f831 100644 --- a/pkg/validation/validate_cluster.go +++ b/pkg/validation/validate_cluster.go @@ -206,6 +206,8 @@ var masterStaticPods = []string{ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kubernetes.Interface, nodes []v1.Node, nodeInstanceGroupMapping map[string]*kops.InstanceGroup, podValidationFilter func(pod *v1.Pod) bool, ) error { + log := klog.FromContext(ctx) + masterWithoutPod := map[string]map[string]bool{} nodeByAddress := map[string]string{} @@ -274,6 +276,7 @@ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kuber for _, container := range pod.Status.ContainerStatuses { if !container.Ready { notready = append(notready, container.Name) + log.V(2).Info("container not ready", "pod", pod.Name, "container", container.Name, "state", container.State) } } if len(notready) != 0 { diff --git a/tests/e2e/kubetest2-kops/deployer/common.go b/tests/e2e/kubetest2-kops/deployer/common.go index af374ce7ff2d6..915a89770d6b2 100644 --- a/tests/e2e/kubetest2-kops/deployer/common.go +++ b/tests/e2e/kubetest2-kops/deployer/common.go @@ -251,7 +251,12 @@ func (d *deployer) env() []string { klog.Warningf("DO env var %q not found or empty", k) } } + } else if d.CloudProvider == "gce" { + if d.GCPProject != "" { + vars = append(vars, fmt.Sprintf("GCP_PROJECT=%v", d.GCPProject)) + } } + if d.KopsBaseURL != "" { vars = append(vars, fmt.Sprintf("KOPS_BASE_URL=%v", d.KopsBaseURL)) } else if baseURL := os.Getenv("KOPS_BASE_URL"); baseURL != "" { diff --git a/tests/e2e/scenarios/clusterapi/run-test.sh b/tests/e2e/scenarios/clusterapi/run-test.sh new file mode 100755 index 0000000000000..32d97ff919e98 --- /dev/null +++ b/tests/e2e/scenarios/clusterapi/run-test.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash + +# Copyright 2025 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +REPO_ROOT=$(git rev-parse --show-toplevel); + +# Enable feature flag for CAPI support +export KOPS_FEATURE_FLAGS=ClusterAPI + +# Override some settings +CLUSTER_NAME="clusterapi.k8s.local" + +CLOUD_PROVIDER=gce +ZONES=us-east4-a # Currently the zone name gets encoded in the machinedeployment name (maybe we can use labels instead?) + +OVERRIDES="${OVERRIDES-} --node-count=2" # We need at least 2 nodes for CoreDNS to validate +OVERRIDES="${OVERRIDES} --gce-service-account=default" # Use default service account because boskos permissions are limited + +# Create kOps cluster +source "${REPO_ROOT}"/tests/e2e/scenarios/lib/common.sh + +kops-acquire-latest + +kops-up + +# Export KUBECONFIG; otherwise the precedence for controllers is wrong (?) +KUBECONFIG=$(mktemp -t kops.XXXXXXXXX) +export KUBECONFIG +"${KOPS}" export kubecfg --name "${CLUSTER_NAME}" --admin --kubeconfig "${KUBECONFIG}" + +# Install cert-manager +kubectl apply --server-side -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml + +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager-cainjector +kubectl wait --for=condition=Available --timeout=5m -n cert-manager deployment/cert-manager-webhook + +# Install cluster-api core and cluster-api-provider-gcp +kubectl apply --server-side -k "${REPO_ROOT}/clusterapi/manifests/cluster-api" +kubectl apply --server-side -k "${REPO_ROOT}/clusterapi/manifests/cluster-api-provider-gcp" + +kubectl wait --for=condition=Available --timeout=5m -n capi-system deployment/capi-controller-manager +kubectl wait --for=condition=Available --timeout=5m -n capg-system deployment/capg-controller-manager + +# Install kOps CRDs (Cluster & InstanceGroup) +kubectl apply --server-side -k "${REPO_ROOT}/k8s" +kubectl apply --server-side -k "${REPO_ROOT}/clusterapi/config" + +# Install extra RBAC for clusterapi controllers +# TODO: Should be part of normal configuration? +kubectl apply --server-side -f "${REPO_ROOT}/clusterapi/examples/kopscontroller.yaml" + +# Install extra RBAC for capi-manager loopback connection to cluster (used to check node health etc) +kubectl apply --server-side -f "${REPO_ROOT}/clusterapi/examples/capi-loopback.yaml" + +# Create a Cluster API Cluster object +"${KOPS}" get cluster clusterapi.k8s.local -oyaml | kubectl apply --server-side -n kube-system -f - + +# Create a MachineDeployment matching our configuration +"${KOPS}" toolbox clusterapi generate machinedeployment \ + --cluster clusterapi.k8s.local \ + --name clusterapi-k8s-local-md-0 \ + --namespace kube-system | kubectl apply --server-side -n kube-system -f - + +# Debug: print output from kops-controller +kubectl logs -n kube-system -l k8s-app=kops-controller --follow & + +# Debug: log kops-controller +kubectl logs -n kube-system -l k8s-app=kops-controller --follow & + +# Wait for the MachineDeployment machines to become ready +kubectl wait --for=condition=Available -n kube-system machinedeployment/clusterapi-k8s-local-md-0-us-east4-a --timeout=10m +kubectl wait --for=condition=MachinesReady -n kube-system machinedeployment/clusterapi-k8s-local-md-0-us-east4-a --timeout=10m +kubectl get -n kube-system machinedeployment/clusterapi-k8s-local-md-0-us-east4-a -oyaml + +# Print the nodes, machines and gcpmachines +kubectl get nodes -owide +kubectl get machine -A -owide +kubectl get gcpmachine -A -owide + +# Delete the machinedeployment, causing the machines and nodes to be deleted +kubectl delete -n kube-system machinedeployment/clusterapi-k8s-local-md-0-us-east4-a + +# Print the nodes, machines and gcpmachines again +kubectl get nodes -owide +kubectl get machine -A -owide +kubectl get gcpmachine -A -owide + +# CAPI currently creates some firewall rules that otherwise are not cleaned up, and block kops cluster cleanup +function cleanup_capi_leaks() { + gcloud compute firewall-rules delete allow-clusterapi-k8s-local-cluster --quiet || true + gcloud compute firewall-rules delete allow-clusterapi-k8s-local-healthchecks --quiet || true + + #gcloud compute networks subnets delete us-east4-clusterapi-k8s-local --region us-east4 --quiet + #gcloud compute networks delete clusterapi-k8s-local --quiet +} +cleanup_capi_leaks diff --git a/tests/e2e/scenarios/lib/common.sh b/tests/e2e/scenarios/lib/common.sh index 7240eb6be0e39..fce7012bdbc02 100644 --- a/tests/e2e/scenarios/lib/common.sh +++ b/tests/e2e/scenarios/lib/common.sh @@ -158,4 +158,8 @@ function kops-up() { # Source the env file to get exported variables, in particular KOPS_STATE_STORE . "${ENV_FILE}" export KOPS_STATE_STORE + if [[ -n "${GCP_PROJECT:-}" ]]; then + export GCP_PROJECT + gcloud config set project "${GCP_PROJECT}" + fi } diff --git a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go index ce7773a32265b..3668fde7b8581 100644 --- a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go +++ b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go @@ -36,6 +36,7 @@ import ( compute "google.golang.org/api/compute/v1" "google.golang.org/api/googleapi" "k8s.io/kops/pkg/bootstrap" + "k8s.io/kops/pkg/nodeidentity/clusterapi" "k8s.io/kops/pkg/nodeidentity/gce" "k8s.io/kops/pkg/wellknownports" "k8s.io/kops/upup/pkg/fi" @@ -47,10 +48,12 @@ type tpmVerifier struct { opt gcetpm.TPMVerifierOptions computeClient *compute.Service + + capiManager *clusterapi.Manager } // NewTPMVerifier constructs a new TPM verifier for GCE. -func NewTPMVerifier(opt *gcetpm.TPMVerifierOptions) (bootstrap.Verifier, error) { +func NewTPMVerifier(opt *gcetpm.TPMVerifierOptions, capiManager *clusterapi.Manager) (bootstrap.Verifier, error) { ctx := context.Background() computeClient, err := compute.NewService(ctx) @@ -61,6 +64,7 @@ func NewTPMVerifier(opt *gcetpm.TPMVerifierOptions) (bootstrap.Verifier, error) return &tpmVerifier{ opt: *opt, computeClient: computeClient, + capiManager: capiManager, }, nil } @@ -121,7 +125,7 @@ func (v *tpmVerifier) VerifyToken(ctx context.Context, rawRequest *http.Request, return nil, fmt.Errorf("projectID does not match expected: got %q, want %q", tokenData.GCPProjectID, v.opt.ProjectID) } - instance, err := v.computeClient.Instances.Get(tokenData.GCPProjectID, tokenData.Zone, tokenData.Instance).Do() + instance, err := v.computeClient.Instances.Get(tokenData.GCPProjectID, tokenData.Zone, tokenData.Instance).Context(ctx).Do() if err != nil { if isNotFound(err) { return nil, fmt.Errorf("unable to find instance in compute API: %w", err) @@ -144,6 +148,8 @@ func (v *tpmVerifier) VerifyToken(ctx context.Context, rawRequest *http.Request, } } + capgRole := instance.Labels[gce.LabelKeyCAPIRoleName] + if clusterName == "" { return nil, fmt.Errorf("could not determine cluster for instance %s", instance.SelfLink) } @@ -151,8 +157,22 @@ func (v *tpmVerifier) VerifyToken(ctx context.Context, rawRequest *http.Request, if clusterName != v.opt.ClusterName { return nil, fmt.Errorf("clusterName does not match expected: got %q, want %q", clusterName, v.opt.ClusterName) } - if instanceGroupName == "" { - return nil, fmt.Errorf("could not determine instance group for instance %s", instance.SelfLink) + + var capiMachine *clusterapi.Machine + + if v.capiManager != nil && capgRole != "" { + providerID := "gce://" + tokenData.GCPProjectID + "/" + tokenData.Zone + "/" + tokenData.Instance + + m, err := v.capiManager.FindMachineByProviderID(ctx, providerID) + if err != nil { + return nil, fmt.Errorf("error finding Machine with providerID %q: %w", providerID, err) + } + capiMachine = m + } + + // Check if this is a CAPG managed instance + if instanceGroupName == "" && capiMachine == nil { + return nil, fmt.Errorf("could not determine ownership for instance %s", instance.SelfLink) } // Verify the token has a valid GCE TPM signature. @@ -178,6 +198,7 @@ func (v *tpmVerifier) VerifyToken(ctx context.Context, rawRequest *http.Request, result := &bootstrap.VerifyResult{ NodeName: instance.Name, InstanceGroupName: instanceGroupName, + CAPIMachine: capiMachine, CertificateNames: sans, ChallengeEndpoint: challengeEndpoint, } diff --git a/upup/pkg/fi/cloudup/populate_cluster_spec.go b/upup/pkg/fi/cloudup/populate_cluster_spec.go index 2bc88ac846efc..a0285e7adc9b2 100644 --- a/upup/pkg/fi/cloudup/populate_cluster_spec.go +++ b/upup/pkg/fi/cloudup/populate_cluster_spec.go @@ -69,14 +69,13 @@ func PopulateClusterSpec(ctx context.Context, clientset simple.Clientset, cluste return c.fullCluster, nil } -// Here be dragons -// -// This function has some `interesting` things going on. -// In an effort to let the cluster.Spec fall through I am -// hard coding topology in two places.. It seems and feels -// very wrong.. but at least now my new cluster.Spec.Topology -// struct is falling through.. -// @kris-nova +// run implements the main logic of "filling in" the details of a cluster. +// Some logic is built into this function (and probably should be refactored out), +// but the majority of the logic is delegated to OptionsBuilder implementations. +// So that we don't have to be very careful about convergence, we run the list of OptionsBuilders +// repeatedly until convergence (defined as no changes). +// In general, an OptionsBuilder should populate only if the field is not already set +// (it may have been set by a user). func (c *populateClusterSpec) run(ctx context.Context, clientset simple.Clientset) error { if errs := validation.ValidateCluster(c.InputCluster, false, clientset.VFSContext()); len(errs) != 0 { return errs.ToAggregate() diff --git a/upup/pkg/fi/cloudup/template_functions.go b/upup/pkg/fi/cloudup/template_functions.go index 1f519d9c21c50..fb7538319cf32 100644 --- a/upup/pkg/fi/cloudup/template_functions.go +++ b/upup/pkg/fi/cloudup/template_functions.go @@ -685,6 +685,13 @@ func (tf *TemplateFunctions) KopsControllerConfig() (string, error) { config.CacheNodeidentityInfo = true } + if featureflag.ClusterAPI.Enabled() { + enabled := true + config.CAPI = &kopscontrollerconfig.CAPIOptions{ + Enabled: &enabled, + } + } + { certNames := []string{"kubelet", "kubelet-server"} signingCAs := []string{fi.CertificateIDCA} @@ -901,6 +908,11 @@ func (tf *TemplateFunctions) KopsControllerEnv() []corev1.EnvVar { envMap["KOPS_RUN_TOO_NEW_VERSION"] = v } + // If our assets are served from a custom base URL, we need to pass that to kops-controller for cluster-api etc. + if v := os.Getenv("KOPS_BASE_URL"); v != "" { + envMap["KOPS_BASE_URL"] = v + } + return envMap.ToEnvVars() } diff --git a/upup/pkg/fi/nodeup/command.go b/upup/pkg/fi/nodeup/command.go index 5de8ed6573f7e..6f5d4f33d8e84 100644 --- a/upup/pkg/fi/nodeup/command.go +++ b/upup/pkg/fi/nodeup/command.go @@ -139,6 +139,9 @@ func (c *NodeUpCommand) Run(out io.Writer) error { return fmt.Errorf("error parsing BootConfig config response: %v", err) } nodeupConfigHash = sha256.Sum256([]byte(nodeConfig.NodeupConfig)) + if nodeupConfig.CAs == nil { + nodeupConfig.CAs = make(map[string]string) + } nodeupConfig.CAs[fi.CertificateIDCA] = bootConfig.ConfigServer.CACertificates case bootConfig.InstanceGroupName != "": nodeupConfigLocation := configBase.Join("igconfig", bootConfig.InstanceGroupRole.ToLowerString(), bootConfig.InstanceGroupName, "nodeupconfig.yaml")