diff --git a/backend/kubedeployer/converters.go b/backend/kubedeployer/converters.go index e2ba0711..02a0999c 100644 --- a/backend/kubedeployer/converters.go +++ b/backend/kubedeployer/converters.go @@ -16,7 +16,7 @@ import ( ) const ( - K3S_FLIST = "https://hub.threefold.me/omarabdulaziz.3bot/omarabdul3ziz-k3s-latest.flist" + K3S_FLIST = "https://hub.threefold.me/salmaelsoly.3bot/salmaelsoly-k3s-crd-calico-latest.flist" K3S_ENTRYPOINT = "/sbin/zinit init" K3S_DATA_DIR = "/mnt/data" K3S_IFACE = "flannel-br" diff --git a/k3s/manifests/calico-cluster-info.yaml b/k3s/manifests/calico-cluster-info.yaml new file mode 100644 index 00000000..a4db6bb9 --- /dev/null +++ b/k3s/manifests/calico-cluster-info.yaml @@ -0,0 +1,8 @@ +apiVersion: crd.projectcalico.org/v1 +kind: ClusterInformation +metadata: + name: default +spec: + clusterGUID: "12345678-1234-1234-1234-123456789012" + clusterType: "k8s,operator,bgp" + datastoreReady: true diff --git a/k3s/manifests/calico-felix-config.yaml b/k3s/manifests/calico-felix-config.yaml new file mode 100644 index 00000000..bcf5bbf8 --- /dev/null +++ b/k3s/manifests/calico-felix-config.yaml @@ -0,0 +1,8 @@ +apiVersion: crd.projectcalico.org/v1 +kind: FelixConfiguration +metadata: + name: default +spec: + chainInsertMode: Insert + iptablesBackend: Legacy + natOutgoingAddress: "" diff --git a/k3s/manifests/calico-ippool-config.yaml b/k3s/manifests/calico-ippool-config.yaml new file mode 100644 index 00000000..c955e347 --- /dev/null +++ b/k3s/manifests/calico-ippool-config.yaml @@ -0,0 +1,26 @@ +apiVersion: crd.projectcalico.org/v1 +kind: IPPool +metadata: + name: default-ipv4-ippool +spec: + cidr: 10.42.0.0/16 + ipipMode: Never + natOutgoing: true + disabled: false + nodeSelector: all() + vxlanMode: Always + blockSize: 26 + +--- +apiVersion: crd.projectcalico.org/v1 +kind: IPPool +metadata: + name: default-ipv6-ippool +spec: + cidr: 2001:cafe:42::/56 + ipipMode: Never + natOutgoing: true + disabled: false + nodeSelector: all() + vxlanMode: Always + blockSize: 122 diff --git a/k3s/manifests/calico-nat-config.yaml b/k3s/manifests/calico-nat-config.yaml new file mode 100644 index 00000000..1f72a551 --- /dev/null +++ b/k3s/manifests/calico-nat-config.yaml @@ -0,0 +1,122 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: calico-nat-setup + namespace: kube-system + labels: + k8s-app: calico-nat-setup +spec: + selector: + matchLabels: + k8s-app: calico-nat-setup + template: + metadata: + labels: + k8s-app: calico-nat-setup + spec: + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/os: linux + tolerations: + - operator: Exists + effect: NoSchedule + - operator: Exists + effect: NoExecute + - operator: Exists + key: CriticalAddonsOnly + serviceAccountName: calico-node + initContainers: + - name: setup-nat-rules + image: calico/node:v3.26.1 + command: + - /bin/sh + - -c + - | + # Wait for basic iptables to be available and add rules directly + echo "Setting up NAT rules..." + sleep 10 + + # Add IPv4 NAT rule if not exists + if ! iptables -t nat -C POSTROUTING -s 10.42.0.0/16 ! -d 10.42.0.0/16 -j MASQUERADE 2>/dev/null; then + echo "Adding IPv4 NAT rule..." + iptables -t nat -I POSTROUTING 1 -s 10.42.0.0/16 ! -d 10.42.0.0/16 -j MASQUERADE + fi + + # Add IPv6 NAT rule if not exists and IPv6 is available + if ip6tables -t nat -L >/dev/null 2>&1; then + if ! ip6tables -t nat -C POSTROUTING -s 2001:cafe:42::/56 ! -d 2001:cafe:42::/56 -j MASQUERADE 2>/dev/null; then + echo "Adding IPv6 NAT rule..." + ip6tables -t nat -I POSTROUTING 1 -s 2001:cafe:42::/56 ! -d 2001:cafe:42::/56 -j MASQUERADE + fi + else + echo "IPv6 NAT tables not available" + fi + + echo "NAT rules configured successfully" + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /run/xtables.lock + name: xtables-lock + - mountPath: /var/run/calico + name: var-run-calico + - mountPath: /var/lib/calico + name: var-lib-calico + env: + - name: DATASTORE_TYPE + value: "kubernetes" + - name: WAIT_FOR_DATASTORE + value: "true" + - name: NODENAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CALICO_NETWORKING_BACKEND + value: "vxlan" + - name: CLUSTER_TYPE + value: "k8s,bgp" + - name: IP + value: "autodetect" + - name: IP6 + value: "autodetect" + - name: CALICO_IPV4POOL_CIDR + value: "10.42.0.0/16" + - name: CALICO_IPV6POOL_CIDR + value: "2001:cafe:42::/56" + - name: FELIX_IPINIPMTU + value: "0" + - name: FELIX_VXLANMTU + value: "0" + - name: FELIX_WIREGUARDMTU + value: "0" + - name: CALICO_IPV4POOL_IPIP + value: "Never" + - name: CALICO_IPV4POOL_VXLAN + value: "Always" + - name: CALICO_IPV6POOL_VXLAN + value: "Always" + - name: FELIX_IPV6SUPPORT + value: "true" + - name: FELIX_HEALTHENABLED + value: "true" + containers: + - name: pause + image: registry.k8s.io/pause:3.9 + volumes: + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + - name: var-lib-calico + hostPath: + path: /var/lib/calico + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate diff --git a/k3s/manifests/calico-node.yaml b/k3s/manifests/calico-node.yaml new file mode 100644 index 00000000..158591f1 --- /dev/null +++ b/k3s/manifests/calico-node.yaml @@ -0,0 +1,1254 @@ +--- +# Source: calico/templates/calico-config.yaml +# This ConfigMap is used to configure a self-hosted Calico installation. +kind: ConfigMap +apiVersion: v1 +metadata: + name: calico-config + namespace: kube-system +data: + # Typha is disabled. + typha_service_name: "none" + # Configure the backend to use. + calico_backend: "bird" + + # Configure the MTU to use for workload interfaces and tunnels. + veth_mtu: "1450" + + # The CNI network configuration to install on each node. The special + # values in this config will be automatically populated. + cni_network_config: |- + { + "name": "k8s-pod-network", + "cniVersion": "0.3.1", + "plugins": [ + { + "type": "calico", + "log_level": "info", + "datastore_type": "kubernetes", + "nodename": "__KUBERNETES_NODE_NAME__", + "mtu": __CNI_MTU__, + "ipam": { + "type": "calico-ipam", + "assign_ipv4": "true", + "assign_ipv6": "true" + }, + "policy": { + "type": "k8s" + }, + "kubernetes": { + "kubeconfig": "__KUBECONFIG_FILEPATH__" + } + }, + { + "type": "portmap", + "snat": true, + "capabilities": {"portMappings": true} + }, + { + "type": "bandwidth", + "capabilities": {"bandwidth": true} + } + ] + } + +--- +# Source: calico/templates/kdd-crds.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ippools.crd.projectcalico.org +spec: + group: crd.projectcalico.org + names: + kind: IPPool + listKind: IPPoolList + plural: ippools + singular: ippool + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: IPPoolSpec contains the specification for an IPPool resource. + properties: + allowedUses: + description: AllowedUse controls what the IP pool will be used for. + If not specified or empty, defaults to ["Tunnel", "Workload"] for + back-compatibility + items: + type: string + type: array + blockSize: + description: The block size to use for IP address assignments from + this pool. Defaults to 26 for IPv4 and 122 for IPv6. + type: integer + cidr: + description: The pool CIDR. + type: string + disableBGPExport: + description: 'Disable exporting routes from this IP Pool''s CIDR over + BGP. [Default: false]' + type: boolean + disabled: + description: When disabled is true, Calico IPAM will not assign addresses + from this pool. + type: boolean + encapsulation: + description: The encapsulation type used for packets sent from this + IP pool. + type: string + ipipMode: + description: 'Contains configuration for IPIP tunneling for this pool. + If not specified, then this is defaulted to "Never" (i.e. IPIP tunneling + is disabled).' + type: string + natOutgoing: + description: 'When nat-outgoing is true, packets sent from Calico networked + containers in this pool to destinations outside of this pool will + be masqueraded.' + type: boolean + nodeSelector: + description: Allows IPPool to allocate for a specific node by label + selector. + type: string + vxlanMode: + description: 'Contains configuration for VXLAN tunneling for this pool. + If not specified, then this is defaulted to "Never" (i.e. VXLAN tunneling + is disabled).' + type: string + required: + - cidr + type: object + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ipamconfigs.crd.projectcalico.org +spec: + group: crd.projectcalico.org + names: + kind: IPAMConfig + listKind: IPAMConfigList + plural: ipamconfigs + singular: ipamconfig + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values.' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents.' + type: string + metadata: + type: object + spec: + description: IPAMConfigSpec contains the specification for an IPAMConfig resource. + properties: + autoAllocateBlocks: + type: boolean + maxBlocksPerHost: + description: MaxBlocksPerHost, if non-zero, is the max number of blocks + that can be affine to each host. + type: integer + strictAffinity: + type: boolean + type: object + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: blockaffinities.crd.projectcalico.org +spec: + group: crd.projectcalico.org + names: + kind: BlockAffinity + listKind: BlockAffinityList + plural: blockaffinities + singular: blockaffinity + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object.' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents.' + type: string + metadata: + type: object + spec: + description: BlockAffinitySpec contains the specification for a BlockAffinity resource. + properties: + cidr: + type: string + deleted: + description: Deleted indicates that this block affinity is being deleted. + This field is a string for compatibility with older releases that + mistakenly treat this field as a string. + type: string + node: + type: string + state: + type: string + required: + - cidr + - deleted + - node + - state + type: object + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ipamblocks.crd.projectcalico.org +spec: + group: crd.projectcalico.org + names: + kind: IPAMBlock + listKind: IPAMBlockList + plural: ipamblocks + singular: ipamblock + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object.' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents.' + type: string + metadata: + type: object + spec: + description: IPAMBlockSpec contains the specification for an IPAMBlock resource. + properties: + affinity: + type: string + allocations: + items: + type: integer + nullable: true + type: array + attributes: + items: + properties: + handle_id: + type: string + secondary: + additionalProperties: + type: string + type: object + type: object + type: array + cidr: + type: string + deleted: + type: boolean + strictAffinity: + type: boolean + unallocated: + items: + type: integer + type: array + required: + - allocations + - attributes + - cidr + - strictAffinity + - unallocated + type: object + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: bgpconfigurations.crd.projectcalico.org +spec: + group: crd.projectcalico.org + names: + kind: BGPConfiguration + listKind: BGPConfigurationList + plural: bgpconfigurations + singular: bgpconfiguration + scope: Cluster + versions: + - name: v1 + schema: + openAPIV3Schema: + description: BGPConfiguration contains the configuration for any BGP routing. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: BGPConfigurationSpec contains the values of the BGP configuration. + properties: + asNumber: + description: 'ASNumber is the default AS number used by a node. [Default: + 64512]' + format: int32 + type: integer + communities: + description: Communities is a list of BGP community values and their + arbitrary names for tagging routes. + items: + description: Community contains standard or large community value + and its name. + properties: + name: + description: Name given to community value. + type: string + value: + description: Value must be of format `aa:nn` or `aa:nn:mm`. + For standard community use `aa:nn` format, where `aa` and + `nn` are 16 bit number. For large community use `aa:nn:mm` + format, where `aa`, `nn` and `mm` are 32 bit number. Where, + `aa` is an AS Number, `nn` and `mm` are per-AS identifier. + pattern: ^(\d+):(\d+)$|^(\d+):(\d+):(\d+)$ + type: string + type: object + type: array + listenPort: + description: ListenPort is the port where BGP protocol should listen. + Defaults to 179 + maximum: 65535 + minimum: 1 + type: integer + logSeverityScreen: + description: 'LogSeverityScreen is the log severity above which logs + are sent to the stdout. [Default: INFO]' + type: string + nodeToNodeMeshEnabled: + description: 'NodeToNodeMeshEnabled sets whether full node to node + BGP mesh is enabled. [Default: true]' + type: boolean + prefixAdvertisements: + description: PrefixAdvertisements contains per-prefix advertisement + configuration. + items: + description: PrefixAdvertisement configures advertisement properties + for the specified CIDR. + properties: + cidr: + description: CIDR for which properties should be advertised. + type: string + communities: + description: Communities can be list of either community names + already defined in `Specs.Communities` or community value + of format `aa:nn` or `aa:nn:mm`. For standard community use + `aa:nn` format, where `aa` and `nn` are 16 bit number. For + large community use `aa:nn:mm` format, where `aa`, `nn` and + `mm` are 32 bit number. Where,`aa` is an AS Number, `nn` and + `mm` are per-AS identifier. + items: + type: string + type: array + type: object + type: array + serviceClusterIPs: + description: ServiceClusterIPs are the CIDR blocks from which service + cluster IPs are allocated. If specified, Calico will advertise these + blocks, as well as any cluster IPs within them. + items: + description: ServiceClusterIPBlock represents a single allowed ClusterIP + CIDR block. + properties: + cidr: + type: string + type: object + type: array + serviceExternalIPs: + description: ServiceExternalIPs are the CIDR blocks for Kubernetes + Service External IPs. Kubernetes Service ExternalIPs will only be + advertised if they are within one of these blocks. + items: + description: ServiceExternalIPBlock represents a single allowed + External IP CIDR block. + properties: + cidr: + type: string + type: object + type: array + type: object + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: caliconodestatuses.crd.projectcalico.org +spec: + group: crd.projectcalico.org + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + description: CalicoNodeStatusSpec contains the specification for a CalicoNodeStatus resource. + type: object + properties: + node: + type: string + classes: + items: + type: string + type: array + updatePeriodSeconds: + type: integer + status: + description: CalicoNodeStatusStatus defines the observed state of CalicoNodeStatus. + type: object + properties: + lastUpdated: + type: string + agent: + type: object + properties: + birdV4: + type: object + properties: + state: + type: string + version: + type: string + routerID: + type: string + lastBootTime: + type: string + lastReconfigTime: + type: string + birdV6: + type: object + properties: + state: + type: string + version: + type: string + routerID: + type: string + lastBootTime: + type: string + lastReconfigTime: + type: string + scope: Cluster + names: + plural: caliconodestatuses + singular: caliconodestatus + kind: CalicoNodeStatus + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: bgpfilters.crd.projectcalico.org +spec: + group: crd.projectcalico.org + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + description: BGPFilterSpec contains the specification for a BGPFilter resource. + type: object + properties: + exportV4: + items: + type: object + properties: + cidr: + type: string + matchOperator: + type: string + action: + type: string + type: array + importV4: + items: + type: object + properties: + cidr: + type: string + matchOperator: + type: string + action: + type: string + type: array + exportV6: + items: + type: object + properties: + cidr: + type: string + matchOperator: + type: string + action: + type: string + type: array + importV6: + items: + type: object + properties: + cidr: + type: string + matchOperator: + type: string + action: + type: string + type: array + scope: Cluster + names: + plural: bgpfilters + singular: bgpfilter + kind: BGPFilter + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clusterinformations.crd.projectcalico.org +spec: + group: crd.projectcalico.org + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + description: ClusterInformationSpec contains the values of describing the cluster. + type: object + properties: + clusterGUID: + description: ClusterGUID is the GUID of the cluster + type: string + clusterType: + description: ClusterType describes the type of the cluster + type: string + datastoreReady: + description: DatastoreReady is used during significant datastore migrations to signal to components such as Felix that it should wait before accessing the datastore. + type: boolean + variant: + description: Variant declares which variant of Calico should be active. + type: string + scope: Cluster + names: + plural: clusterinformations + singular: clusterinformation + kind: ClusterInformation + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: felixconfigurations.crd.projectcalico.org +spec: + group: crd.projectcalico.org + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + description: FelixConfigurationSpec contains the values of the Felix configuration. + type: object + properties: + iptablesBackend: + type: string + iptablesRefreshInterval: + type: string + iptablesPostWriteCheckInterval: + type: string + iptablesLockFilePath: + type: string + iptablesLockTimeout: + type: string + iptablesLockProbeInterval: + type: string + natOutgoingAddress: + type: string + natPortRange: + type: string + chainInsertMode: + type: string + scope: Cluster + names: + plural: felixconfigurations + singular: felixconfiguration + kind: FelixConfiguration + +--- +# ClusterRole for calico-cni-plugin +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: calico-cni-plugin +rules: + # The CNI plugin needs to get pods, nodes, and namespaces. + - apiGroups: [""] + resources: + - pods + - nodes + - namespaces + verbs: + - get + - list + - watch + - apiGroups: [""] + resources: + - pods/status + verbs: + - patch + - update + - apiGroups: ["crd.projectcalico.org"] + resources: + - blockaffinities + - ipamblocks + - ipamhandles + - clusterinformations + - ippools + - ipamconfigs + - ipreservations + verbs: + - get + - list + - create + - update + - delete + - apiGroups: ["crd.projectcalico.org"] + resources: + - ipamconfigs + - clusterinformations + verbs: + - create + - update + +--- +# Bind the ClusterRole to the calico-cni-plugin ServiceAccount. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: calico-cni-plugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: calico-cni-plugin +subjects: +- kind: ServiceAccount + name: calico-cni-plugin + namespace: kube-system + +--- +# Source: calico/templates/calico-node.yaml +# This manifest installs the calico-node container, as well +# as the CNI plugins and network config on +# each master and worker node in a Kubernetes cluster. +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: calico-node + namespace: kube-system + labels: + k8s-app: calico-node +spec: + selector: + matchLabels: + k8s-app: calico-node + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + k8s-app: calico-node + spec: + nodeSelector: + kubernetes.io/os: linux + hostNetwork: true + tolerations: + # Make sure calico-node gets scheduled on all nodes. + - effect: NoSchedule + operator: Exists + # Mark the pod as a critical add-on for rescheduling. + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + operator: Exists + serviceAccountName: calico-node + # Minimize downtime during a rolling upgrade or deletion; tell Kubernetes to do a "force + # deletion": https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods. + terminationGracePeriodSeconds: 0 + priorityClassName: system-node-critical + initContainers: + # This container performs upgrade from host-local IPAM to calico-ipam. + # It can be deleted if this is a fresh installation, or if you have already + # upgraded to use calico-ipam. + - name: upgrade-ipam + image: docker.io/calico/cni:v3.26.1 + command: ["/opt/cni/bin/calico-ipam", "-upgrade"] + envFrom: + - configMapRef: + # Allow KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT to be overridden for eBPF mode. + name: kubernetes-services-endpoint + optional: true + env: + - name: KUBERNETES_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CALICO_NETWORKING_BACKEND + valueFrom: + configMapKeyRef: + name: calico-config + key: calico_backend + volumeMounts: + - mountPath: /var/lib/cni/networks + name: host-local-net-dir + - mountPath: /host/opt/cni/bin + name: cni-bin-dir + securityContext: + privileged: true + # This container installs the CNI binaries + # and CNI network config file on each node. + - name: install-cni + image: docker.io/calico/cni:v3.26.1 + command: ["/opt/cni/bin/install"] + envFrom: + - configMapRef: + # Allow KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT to be overridden for eBPF mode. + name: kubernetes-services-endpoint + optional: true + env: + # Name of the CNI config file to create. + - name: CNI_CONF_NAME + value: "10-calico.conflist" + # The CNI network config to install on each node. + - name: CNI_NETWORK_CONFIG + valueFrom: + configMapKeyRef: + name: calico-config + key: cni_network_config + # Set the hostname based on the k8s node name. + - name: KUBERNETES_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + # CNI MTU Config variable + - name: CNI_MTU + valueFrom: + configMapKeyRef: + name: calico-config + key: veth_mtu + # Prevents the container from sleeping forever. + - name: SLEEP + value: "false" + volumeMounts: + - mountPath: /host/opt/cni/bin + name: cni-bin-dir + - mountPath: /host/etc/cni/net.d + name: cni-net-dir + securityContext: + privileged: true + # This init container mounts the necessary filesystems needed by the BPF data plane + # i.e. bpf at /sys/fs/bpf and cgroup2 at /run/calico/cgroup. Calico-node initialisation is executed + # in best effort fashion, i.e. no failure for errors, to not disrupt pod creation in iptable mode. + - name: "mount-bpffs" + image: docker.io/calico/node:v3.26.1 + command: ["calico-node", "-init", "-best-effort"] + volumeMounts: + - mountPath: /sys/fs + name: sys-fs + # Bidirectional is required to ensure that the new mount we make at /sys/fs/bpf propagates to the host + # so that it outlives the init container. + mountPropagation: Bidirectional + - mountPath: /var/run/calico + name: var-run-calico + # Bidirectional is required to ensure that the new mount we make at /run/calico/cgroup propagates to the host + # so that it outlives the init container. + mountPropagation: Bidirectional + # Mount /proc/ from host which usually is an indication of the host OS. + # It's used by mountns binary, shipped by Calico, to mount host specific filesystems. + - mountPath: /host/proc + name: proc + securityContext: + privileged: true + containers: + # Runs calico-node container on each Kubernetes node. This + # container programs network policy and routes on each + # host. + - name: calico-node + image: docker.io/calico/node:v3.26.1 + envFrom: + - configMapRef: + # Allow KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT to be overridden for eBPF mode. + name: kubernetes-services-endpoint + optional: true + env: + # Use Kubernetes API as the backing datastore. + - name: DATASTORE_TYPE + value: "kubernetes" + # Wait for the datastore. + - name: WAIT_FOR_DATASTORE + value: "true" + # Set based on the k8s node name. + - name: NODENAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + # Choose the backend to use. + - name: CALICO_NETWORKING_BACKEND + valueFrom: + configMapKeyRef: + name: calico-config + key: calico_backend + # Cluster type to identify the deployment type + - name: CLUSTER_TYPE + value: "k8s,bgp" + # Auto-detect the BGP IP address. + - name: IP + value: "autodetect" + - name: IP_AUTODETECTION_METHOD + value: "interface=eth0" + # Enable IPv6 on Kubernetes. + - name: IP6 + value: "autodetect" + - name: IP6_AUTODETECTION_METHOD + value: "interface=eth.*" + # Set MTU for tunnel device used if ipip is enabled + - name: FELIX_IPINIPMTU + valueFrom: + configMapKeyRef: + name: calico-config + key: veth_mtu + # Set MTU for the VXLAN tunnel device. + - name: FELIX_VXLANMTU + valueFrom: + configMapKeyRef: + name: calico-config + key: veth_mtu + # Set MTU for the Wireguard tunnel device. + - name: FELIX_WIREGUARDMTU + valueFrom: + configMapKeyRef: + name: calico-config + key: veth_mtu + # The default IPv4 pool to create on startup if none exists. Pod IPs will be + # chosen from this range. Changing this value after installation will have + # no effect. This should fall within `--cluster-cidr`. + - name: CALICO_IPV4POOL_CIDR + value: "10.42.0.0/16" + # Enable VXLAN + - name: CALICO_IPV4POOL_VXLAN + value: "Always" + # The default IPv6 pool to create on startup if none exists. Pod IPs will be + # chosen from this range. Changing this value after installation will have + # no effect. This should fall within `--cluster-cidr`. + - name: CALICO_IPV6POOL_CIDR + value: "2001:cafe:42::/56" + # Enable VXLAN for IPv6 + - name: CALICO_IPV6POOL_VXLAN + value: "Always" + # Enable or Disable VXLAN on the default IP pool. + - name: CALICO_IPV4POOL_IPIP + value: "Never" + # Enable or Disable VXLAN on the default IPv6 IP pool. + - name: CALICO_IPV6POOL_IPIP + value: "Never" + # Enable IP-in-IP within Felix. + - name: FELIX_IPINIPENABLED + value: "false" + # UDPSourceSpoofing controls whether Felix will attempt to + # work around kernel bugs by spoofing the source address of UDP packets. + - name: FELIX_UDPSOURCESPOOFING + value: "true" + # Set the log level + - name: FELIX_LOGSEVERITYSCREEN + value: "info" + - name: FELIX_HEALTHENABLED + value: "true" + securityContext: + privileged: true + resources: + requests: + cpu: 250m + lifecycle: + preStop: + exec: + command: + - /bin/calico-node + - -shutdown + livenessProbe: + exec: + command: + - /bin/calico-node + - -felix-live + - -bird-live + periodSeconds: 10 + initialDelaySeconds: 10 + failureThreshold: 6 + timeoutSeconds: 10 + readinessProbe: + exec: + command: + - /bin/calico-node + - -felix-ready + - -bird-ready + periodSeconds: 10 + timeoutSeconds: 10 + volumeMounts: + # For maintaining CNI plugin API credentials. + - mountPath: /host/etc/cni/net.d + name: cni-net-dir + readOnly: false + - mountPath: /lib/modules + name: lib-modules + readOnly: true + - mountPath: /run/xtables.lock + name: xtables-lock + readOnly: false + - mountPath: /var/run/calico + name: var-run-calico + readOnly: false + - mountPath: /var/lib/calico + name: var-lib-calico + readOnly: false + - mountPath: /var/run/nodeagent + name: policysync + - mountPath: /sys/fs/ + name: sys-fs + # Bidirectional means that, if we mount the BPF filesystem at /sys/fs/bpf it will propagate to the host. + # If the host is known to mount BPF filesystem already, setting this to HostToContainer avoids + # double mounting the filesystem at /sys/fs/bpf. + mountPropagation: Bidirectional + - name: cni-log-dir + mountPath: /var/log/calico/cni + readOnly: true + volumes: + # Used by calico-node. + - name: lib-modules + hostPath: + path: /lib/modules + - name: var-run-calico + hostPath: + path: /var/run/calico + - name: var-lib-calico + hostPath: + path: /var/lib/calico + - name: xtables-lock + hostPath: + path: /run/xtables.lock + type: FileOrCreate + - name: sys-fs + hostPath: + path: /sys/fs/ + type: DirectoryOrCreate + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + - name: cni-log-dir + hostPath: + path: /var/log/calico/cni + - name: host-local-net-dir + hostPath: + path: /var/lib/cni/networks + - name: policysync + hostPath: + type: DirectoryOrCreate + path: /var/run/nodeagent + - name: proc + hostPath: + path: /proc + +--- +# Source: calico/templates/calico-node-rbac.yaml +# Include a clusterrole for the calico-node DaemonSet, +# and bind it to the calico-node serviceaccount. +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: calico-node +rules: + # The CNI plugin needs to get pods, nodes, and namespaces. + - apiGroups: [""] + resources: + - pods + - nodes + - namespaces + verbs: + - get + # EndpointSlices are used for Service-based network policy rule + # enforcement. + - apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: + - watch + - list + - apiGroups: [""] + resources: + - endpoints + - services + verbs: + # Used to discover service IPs for advertisement. + - watch + - list + # Used to discover Typhas. + - get + # Pod CIDR auto-detection on kubeadm needs access to config maps. + - apiGroups: [""] + resources: + - configmaps + verbs: + - get + - apiGroups: [""] + resources: + - nodes/status + verbs: + # Needed for clearing NodeNetworkUnavailable flag. + - patch + # Calico stores some configuration information in node annotations. + - update + # Watch for changes to Kubernetes NetworkPolicies. + - apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: + - watch + - list + # Used by Calico for policy information. + - apiGroups: [""] + resources: + - pods + - namespaces + - serviceaccounts + verbs: + - list + - watch + # The CNI plugin patches pods/status. + - apiGroups: [""] + resources: + - pods/status + verbs: + - patch + # Calico monitors various CRDs for config. + - apiGroups: ["crd.projectcalico.org"] + resources: + - globalfelixconfigs + - felixconfigurations + - bgppeers + - globalbgpconfigs + - bgpconfigurations + - ippools + - ipamblocks + - globalnetworkpolicies + - globalnetworksets + - networkpolicies + - networksets + - clusterinformations + - hostendpoints + - blockaffinities + - ipamhandles + - ipamconfigs + - ipreservations + - caliconodestatuses + - bgpfilters + verbs: + - get + - list + - watch + # Calico must create and update some CRDs on startup. + - apiGroups: ["crd.projectcalico.org"] + resources: + - ippools + - felixconfigurations + - clusterinformations + - ipamhandles + - ipamconfigs + - ipamblocks + - ipreservations + - blockaffinities + - caliconodestatuses + - bgpfilters + verbs: + - create + - update + - delete + # Calico stores some configuration information on the node. + - apiGroups: [""] + resources: + - nodes + verbs: + - get + - list + - watch + # These permissions are only requried for upgrade from v2.6, and can + # be removed after upgrade or on fresh installations. + - apiGroups: ["crd.projectcalico.org"] + resources: + - bgpconfigurations + - bgppeers + verbs: + - create + - update + # Required for CNI plugin to create service account tokens + - apiGroups: [""] + resources: + - serviceaccounts/token + verbs: + - create + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: calico-node + namespace: kube-system + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: calico-cni-plugin + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: calico-node +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: calico-node +subjects: +- kind: ServiceAccount + name: calico-node + namespace: kube-system diff --git a/k3s/rootfs/etc/zinit/create_flannel_iface.yaml b/k3s/rootfs/etc/zinit/create_flannel_iface.yaml deleted file mode 100644 index 35748005..00000000 --- a/k3s/rootfs/etc/zinit/create_flannel_iface.yaml +++ /dev/null @@ -1,2 +0,0 @@ -exec: /scripts/create_flannel_iface.sh -oneshot: true diff --git a/k3s/rootfs/etc/zinit/k3s.yaml b/k3s/rootfs/etc/zinit/k3s.yaml index ad5ace4d..4968637b 100644 --- a/k3s/rootfs/etc/zinit/k3s.yaml +++ b/k3s/rootfs/etc/zinit/k3s.yaml @@ -1,3 +1 @@ exec: /scripts/entrypoint.sh -after: - - create_flannel_iface diff --git a/k3s/scripts/create_flannel_iface.sh b/k3s/scripts/create_flannel_iface.sh deleted file mode 100644 index 8551f9f0..00000000 --- a/k3s/scripts/create_flannel_iface.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash -set -e - -if [ -z "${DUAL_STACK}" ]; then - echo "❌ Not a dual stack setup" - exit 1 -fi - -bridge="flannel-br" -eth_iface="eth0" - -echo "πŸ”§ Creating bridge: $bridge" -ip link add name $bridge type bridge || true -ip link set $bridge up - -echo "[*] Migrating IPv4 configuration from $eth_iface to $bridge..." -# Step 1: Detect IPv4 address and default gateway -ipv4=$(ip -4 addr show dev "$eth_iface" | awk '$1 == "inet" {print $2}') -ipv4_gw=$(ip route show | awk '$1 == "default" && $5 == "'"$eth_iface"'" {print $3; exit}') - -# Step 2: Capture all non-default IPv4 routes on eth0 -mapfile -t old_ipv4_routes < <(ip route show | awk '$1 != "default" && $5 == "'"$eth_iface"'"') - -# Step 3: Find the interface with 400::/7 route among eth1-eth9 -for iface in eth{1..9}; do - if ip -6 route show 400::/7 | grep -qw "dev $iface"; then - IPV6_IFACE="$iface" - echo "βœ… Found IPv6 interface: $IPV6_IFACE (has 400::/7 route)" - break - fi -done - -if [[ -z "$IPV6_IFACE" ]]; then - echo "❌ No interface eth1–eth9 has a route to 400::/7" - exit 1 -fi - -# Step 4: Extract IPs -ipv6_global=$(ip -6 addr show dev "$IPV6_IFACE" | awk '/inet6/ && !/fe80::/ {print $2}' | head -n1) -ipv6_gw=$(ip -6 route show 400::/7 | grep "dev $IPV6_IFACE" | awk '/via/ {print $3}' | head -n1) - -# Extract additional global IPv6 on eth0 (not link-local) -eth0_ipv6_extra=$(ip -6 addr show dev "$eth_iface" | awk '/inet6/ && !/fe80::/ {print $2}' | head -n1) - -# Detect current default IPv6 route via eth0 -eth0_ipv6_default=$(ip -6 route show default | grep "dev $eth_iface" | grep -v "fe80::" | head -n1) -if [[ -n "$eth0_ipv6_default" ]]; then - eth0_ipv6_gw=$(echo "$eth0_ipv6_default" | awk '{for(i=1;i<=NF;i++) if($i=="via") print $(i+1)}') - echo "[*] Found default IPv6 gateway on $eth_iface: $eth0_ipv6_gw" -fi - -# Step 5: Clean up original IPs -ip addr del "$ipv4" dev "$eth_iface" -ip addr del "$ipv6_global" dev "$IPV6_IFACE" -if [[ -n "$eth0_ipv6_extra" ]]; then - ip addr del "$eth0_ipv6_extra" dev "$eth_iface" -fi - -# Step 6: Attach interfaces to bridge -ip link set "$bridge" up -ip link set "$eth_iface" master "$bridge" -ip link set "$IPV6_IFACE" master "$bridge" -ip link set "$eth_iface" up -ip link set "$IPV6_IFACE" up - -# Step 7: Reassign IPs to bridge (important: order matters) -if [[ -n "$eth0_ipv6_extra" ]]; then - echo "[+] Moving additional IPv6 ($eth0_ipv6_extra) from $eth_iface to $bridge" - ip addr add "$eth0_ipv6_extra" dev "$bridge" -fi - -if [[ -n "$ipv6_global" ]]; then - ip addr add "$ipv6_global" dev "$bridge" -fi - -if [[ -n "$ipv4" ]]; then - ip addr add "$ipv4" dev "$bridge" -fi - -# Step 8: Reapply default IPv4 route -if [[ -n "$ipv4_gw" ]]; then - ip route del default dev "$eth_iface" 2>/dev/null || true - ip route add default via "$ipv4_gw" dev "$bridge" -fi - -# Step 9: Move default IPv6 route from eth0 to bridge -if [[ -n "$eth0_ipv6_gw" ]]; then - echo "[*] Replacing default IPv6 route via $eth0_ipv6_gw from $eth_iface to $bridge..." - ip -6 route del default dev "$eth_iface" 2>/dev/null || true - ip -6 route add default via "$eth0_ipv6_gw" dev "$bridge" -fi - -# Step 10: Reapply non-default IPv4 routes -echo "[*] Re-applying non-default IPv4 routes previously on $eth_iface..." -for route in "${old_ipv4_routes[@]}"; do - new_route=$(echo "$route" | sed "s/ dev $eth_iface/ dev $bridge/") - echo " ➀ $new_route" - ip route replace $new_route -done - -# Step 11: Re-add 400::/7 route via bridge -echo "🧹 Removing old 400::/7 route via $IPV6_IFACE" -ip -6 route del 400::/7 dev "$IPV6_IFACE" || true - -echo "πŸ“‘ Adding route: 400::/7 via $ipv6_gw on $bridge" -ip -6 route add 400::/7 via "$ipv6_gw" dev "$bridge" - -# Step 12: Enable forwarding and proxying -echo "[*] Enabling forwarding and proxy features..." -sysctl -w net.ipv4.ip_forward=1 -sysctl -w net.ipv6.conf."$bridge".forwarding=1 -sysctl -w net.ipv4.conf."$bridge".proxy_arp=1 -sysctl -w net.ipv6.conf."$bridge".proxy_ndp=1 diff --git a/k3s/scripts/entrypoint.sh b/k3s/scripts/entrypoint.sh index f01c6629..55e18deb 100644 --- a/k3s/scripts/entrypoint.sh +++ b/k3s/scripts/entrypoint.sh @@ -1,60 +1,56 @@ #!/bin/bash +mount --make-shared /run +mount --make-shared /sys + if [ ! -z "${K3S_DATA_DIR}" ]; then echo "k3s data-dir set to: $K3S_DATA_DIR" cp -r /var/lib/rancher/k3s/* $K3S_DATA_DIR && rm -rf /var/lib/rancher/k3s EXTRA_ARGS="--data-dir $K3S_DATA_DIR --kubelet-arg=root-dir=$K3S_DATA_DIR/kubelet" fi -if [ -z "${K3S_FLANNEL_IFACE}" ]; then - K3S_FLANNEL_IFACE=flannel-br - if [ -z "${DUAL_STACK}" ]; then - K3S_FLANNEL_IFACE=eth0 - fi -fi +# Disable Flannel and use Calico for dual-stack networking +EXTRA_ARGS="$EXTRA_ARGS --flannel-backend=none --disable-network-policy" if [[ "${DUAL_STACK}" = "true" && "${MASTER}" = "true" ]]; then EXTRA_ARGS="$EXTRA_ARGS --cluster-cidr=10.42.0.0/16,2001:cafe:42::/56" EXTRA_ARGS="$EXTRA_ARGS --service-cidr=10.43.0.0/16,2001:cafe:43::/112" - EXTRA_ARGS="$EXTRA_ARGS --flannel-ipv6-masq" fi if [[ "${DUAL_STACK}" = "true" ]]; then - # this to force the ip selection from flannel-br to use mycelium ip - # not any other ipv6 on flannel-br - - if [ -z "$K3S_FLANNEL_IFACE" ]; then - echo "Usage: $0 " - exit 1 - fi - - # Step 1: Find the next-hop for 400::/7 - route_line=$(ip -6 route | grep "^400::/7.*dev $K3S_FLANNEL_IFACE") - if [ -z "$route_line" ]; then - echo "No 400::/7 route found via interface $K3S_FLANNEL_IFACE" - exit 1 - fi - - # Extract next-hop IPv6 - nexthop=$(echo "$route_line" | awk '{for(i=1;i<=NF;i++) if ($i=="via") print $(i+1)}') - prefix=$(echo "$nexthop" | cut -d':' -f1-4) - - # Step 2: Get the IPv4 address - ipv4=$(ip -4 addr show dev "$K3S_FLANNEL_IFACE" | awk '/inet / {print $2}' | cut -d'/' -f1) - - # Step 3: Get global IPv6 addresses and match subnet - ipv6_list=$(ip -6 addr show dev "$K3S_FLANNEL_IFACE" scope global | awk '/inet6/ {print $2}' | cut -d'/' -f1) - ipv6="" - - for ip in $ipv6_list; do - ip_prefix=$(echo "$ip" | cut -d':' -f1-4) - if [ "$ip_prefix" = "$prefix" ]; then - ipv6=$ip + # With Calico, we can use the original interfaces directly without bridging + # Get IPv4 from eth0 + ipv4=$(ip -4 addr show dev "eth0" | awk '/inet / {print $2}' | cut -d'/' -f1) + + # Find IPv6 interface with 400::/7 route + for iface in eth{1..9}; do + if ip -6 route show 400::/7 | grep -qw "dev $iface"; then + IPV6_IFACE="$iface" break fi done - - EXTRA_ARGS="$EXTRA_ARGS --node-ip=$ipv4,$ipv6" + + if [[ -n "$IPV6_IFACE" ]]; then + # Get global IPv6 from the mycelium interface + route_line=$(ip -6 route | grep "^400::/7.*dev $IPV6_IFACE") + if [[ -n "$route_line" ]]; then + nexthop=$(echo "$route_line" | awk '{for(i=1;i<=NF;i++) if ($i=="via") print $(i+1)}') + prefix=$(echo "$nexthop" | cut -d':' -f1-4) + + ipv6_list=$(ip -6 addr show dev "$IPV6_IFACE" scope global | awk '/inet6/ {print $2}' | cut -d'/' -f1) + for ip in $ipv6_list; do + ip_prefix=$(echo "$ip" | cut -d':' -f1-4) + if [ "$ip_prefix" = "$prefix" ]; then + ipv6=$ip + break + fi + done + fi + fi + + if [[ -n "$ipv4" && -n "$ipv6" ]]; then + EXTRA_ARGS="$EXTRA_ARGS --node-ip=$ipv4,$ipv6" + fi fi if [ -z "${K3S_URL}" ]; then @@ -74,9 +70,9 @@ if [ -z "${K3S_URL}" ]; then if [ "${HA}" = "true" ]; then EXTRA_ARGS="$EXTRA_ARGS --cluster-init" fi - exec k3s server --flannel-iface $K3S_FLANNEL_IFACE $EXTRA_ARGS 2>&1 + exec k3s server $EXTRA_ARGS 2>&1 elif [ "${MASTER}" = "true" ]; then - exec k3s server --server $K3S_URL --flannel-iface $K3S_FLANNEL_IFACE $EXTRA_ARGS 2>&1 + exec k3s server --server $K3S_URL $EXTRA_ARGS 2>&1 else - exec k3s agent --flannel-iface $K3S_FLANNEL_IFACE $EXTRA_ARGS 2>&1 + exec k3s agent $EXTRA_ARGS 2>&1 fi