Skip to content

Conformance Cluster Mesh (ci-clustermesh) #23634

Conformance Cluster Mesh (ci-clustermesh)

Conformance Cluster Mesh (ci-clustermesh) #23634

name: Conformance Cluster Mesh (ci-clustermesh)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
push:
branches:
- v1.15
- ft/v1.15/**
paths-ignore:
- 'Documentation/**'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - push: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'push' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
cilium_cli_ci_version:
CILIUM_CLI_MODE: helm
clusterName1: cluster1-${{ github.run_id }}
clusterName2: cluster2-${{ github.run_id }}
contextName1: kind-cluster1-${{ github.run_id }}
contextName2: kind-cluster2-${{ github.run_id }}
jobs:
commit-status-start:
if: ${{ github.event_name != 'push' }}
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
installation-and-connectivity:
name: Installation and Connectivity Test
runs-on: ${{ vars.GH_RUNNER_EXTRA_POWER }}
timeout-minutes: 60
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix:
include:
- name: '1'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'iptables'
mode: 'kvstoremesh'
cm-auth-mode-1: 'legacy'
cm-auth-mode-2: 'legacy'
maxConnectedClusters: '255'
- name: '2'
tunnel: 'disabled'
ipfamily: 'dual'
encryption: 'wireguard'
kube-proxy: 'none'
mode: 'clustermesh'
cm-auth-mode-1: 'migration'
cm-auth-mode-2: 'migration'
maxConnectedClusters: '511'
# IPsec encryption cannot be used with BPF NodePort.
- name: '3'
tunnel: 'disabled'
ipfamily: 'dual'
encryption: 'ipsec'
kube-proxy: 'iptables'
mode: 'kvstoremesh'
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
maxConnectedClusters: '255'
# IPsec encryption is currently not supported in case of ipv6-only clusters (#23553)
# Wireguard encryption is currently affected by a bug in case of ipv6-only clusters (#23917)
- name: '4'
tunnel: 'disabled'
ipfamily: 'ipv6'
encryption: 'disabled'
kube-proxy: 'none'
mode: 'clustermesh'
cm-auth-mode-1: 'legacy'
cm-auth-mode-2: 'migration'
maxConnectedClusters: '255'
# IPsec encryption cannot be used with BPF NodePort.
- name: '5'
tunnel: 'disabled'
ipfamily: 'dual'
encryption: 'ipsec'
kube-proxy: 'iptables'
mode: 'external'
maxConnectedClusters: '255'
- name: '6'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'none'
mode: 'clustermesh'
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
maxConnectedClusters: '255'
- name: '7'
tunnel: 'geneve'
ipfamily: 'dual'
encryption: 'wireguard'
kube-proxy: 'iptables'
mode: 'kvstoremesh'
cm-auth-mode-1: 'migration'
cm-auth-mode-2: 'cluster'
maxConnectedClusters: '511'
# IPsec encryption cannot be used with BPF NodePort.
- name: '8'
tunnel: 'vxlan'
ipfamily: 'dual'
encryption: 'ipsec'
kube-proxy: 'iptables'
mode: 'clustermesh'
cm-auth-mode-1: 'cluster'
cm-auth-mode-2: 'cluster'
maxConnectedClusters: '255'
# Tunneling is currently not supported in case of ipv6-only clusters (#17240)
# - name: '9'
# tunnel: 'vxlan'
# ipfamily: 'ipv6'
# encryption: 'disabled'
# kube-proxy: 'none'
# mode: 'kvstoremesh'
# cm-auth-mode-1: 'cluster'
# cm-auth-mode-2: 'cluster'
- name: '10'
tunnel: 'vxlan'
ipfamily: 'dual'
encryption: 'wireguard'
kube-proxy: 'iptables'
mode: 'external'
maxConnectedClusters: '511'
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ inputs.SHA }}
chart-dir: ./untrusted/install/kubernetes/cilium
- name: Set up job variables for GHA environment
id: vars
run: |
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set=kubeProxyReplacement=${{ matrix.kube-proxy == 'none' }} \
--helm-set=bpf.masquerade=${{ matrix.kube-proxy == 'none' }} \
--helm-set=hubble.enabled=true \
--helm-set=hubble.relay.enabled=true \
--helm-set=clustermesh.useAPIServer=${{ matrix.mode != 'external' }} \
--helm-set=clustermesh.apiserver.kvstoremesh.enabled=${{ matrix.mode == 'kvstoremesh' }} \
--helm-set=clustermesh.maxConnectedClusters=${{ matrix.maxConnectedClusters }} \
"
CILIUM_INSTALL_TUNNEL="--helm-set=tunnelProtocol=${{ matrix.tunnel }}"
if [ "${{ matrix.tunnel }}" == "disabled" ]; then
CILIUM_INSTALL_TUNNEL="--helm-set-string=routingMode=native \
--helm-set=autoDirectNodeRoutes=true \
--helm-set=ipv4NativeRoutingCIDR=10.240.0.0/12 \
--helm-set=ipv6NativeRoutingCIDR=fd00:10:240::/44"
fi
case "${{ matrix.ipFamily }}" in
ipv4)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=false"
KIND_POD_CIDR_1="10.242.0.0/16"
KIND_SVC_CIDR_1="10.243.0.0/16"
KIND_POD_CIDR_2="10.244.0.0/16"
KIND_SVC_CIDR_2="10.245.0.0/16"
;;
ipv6)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=false --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="fd00:10:242::/48"
KIND_SVC_CIDR_1="fd00:10:243::/112"
KIND_POD_CIDR_2="fd00:10:244::/48"
KIND_SVC_CIDR_2="fd00:10:245::/112"
;;
dual)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="10.242.0.0/16,fd00:10:242::/48"
KIND_SVC_CIDR_1="10.243.0.0/16,fd00:10:243::/112"
KIND_POD_CIDR_2="10.244.0.0/16,fd00:10:244::/48"
KIND_SVC_CIDR_2="10.245.0.0/16,fd00:10:245::/112"
;;
*)
echo "Unknown IP family '${{ matrix.ipFamily }}'" && false
;;
esac
CILIUM_INSTALL_ENCRYPTION=""
if [ "${{ matrix.encryption }}" != "disabled" ]; then
CILIUM_INSTALL_ENCRYPTION="--helm-set=encryption.enabled=true \
--helm-set=encryption.type=${{ matrix.encryption }}"
fi
CILIUM_INSTALL_INGRESS=""
if [ "${{ matrix.kube-proxy }}" == "none" ]; then
# The ingress controller requires KPR to be enabled.
CILIUM_INSTALL_INGRESS="--helm-set=ingressController.enabled=true"
fi
CONNECTIVITY_TEST_DEFAULTS="--hubble=false \
--flow-validation=disabled \
--multi-cluster=${{ env.contextName2 }} \
--external-target=google.com. \
--include-unsafe-tests \
--collect-sysdump-on-failure"
# Skip external traffic (e.g. 1.1.1.1 and www.google.com) tests as IPv6 is not supported
# in GitHub runners: https://github.com/actions/runner-images/issues/668
if [[ "${{ matrix.ipFamily }}" == "ipv6" ]]; then
CONNECTIVITY_TEST_DEFAULTS="$CONNECTIVITY_TEST_DEFAULTS \
--test='!/pod-to-world' \
--test='!/pod-to-cidr'"
fi
echo cilium_install_defaults="${CILIUM_INSTALL_DEFAULTS} ${CILIUM_INSTALL_TUNNEL} \
${CILIUM_INSTALL_IPFAMILY} ${CILIUM_INSTALL_ENCRYPTION} ${CILIUM_INSTALL_INGRESS}" >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
echo kind_pod_cidr_1=${KIND_POD_CIDR_1} >> $GITHUB_OUTPUT
echo kind_svc_cidr_1=${KIND_SVC_CIDR_1} >> $GITHUB_OUTPUT
echo kind_pod_cidr_2=${KIND_POD_CIDR_2} >> $GITHUB_OUTPUT
echo kind_svc_cidr_2=${KIND_SVC_CIDR_2} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@a9579d1afc528b085930c5237a69ee77b2c2be76 # v0.16.4
with:
repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
release-version: ${{ env.CILIUM_CLI_VERSION }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Generate Kind configuration files
run: |
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_1 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_1 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster1.yaml
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_2 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_2 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster2.yaml
- name: Create Kind cluster 1
uses: helm/kind-action@99576bfa6ddf9a8e612d83b513da5a75875caced # v1.9.0
with:
cluster_name: ${{ env.clusterName1 }}
version: ${{ env.KIND_VERSION }}
node_image: ${{ env.KIND_K8S_IMAGE }}
kubectl_version: ${{ env.KIND_K8S_VERSION }}
config: ./.github/kind-config-cluster1.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
- name: Create Kind cluster 2
uses: helm/kind-action@99576bfa6ddf9a8e612d83b513da5a75875caced # v1.9.0
with:
cluster_name: ${{ env.clusterName2 }}
version: ${{ env.KIND_VERSION }}
node_image: ${{ env.KIND_K8S_IMAGE }}
kubectl_version: ${{ env.KIND_K8S_VERSION }}
config: ./.github/kind-config-cluster2.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
- name: Label one of the nodes as external to the cluster
run: |
kubectl --context ${{ env.contextName1 }} label node \
${{ env.clusterName1 }}-worker2 cilium.io/no-schedule=true
# Make sure that coredns uses IPv4-only upstream DNS servers also in case of clusters
# with IP family dual, since IPv6 ones are not reachable and cause spurious failures.
# Additionally, this is also required to workaround #23283.
- name: Configure the coredns nameservers
run: |
COREDNS_PATCH="
spec:
template:
spec:
dnsPolicy: None
dnsConfig:
nameservers:
- 8.8.4.4
- 8.8.8.8
"
kubectl --context ${{ env.contextName1 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
kubectl --context ${{ env.contextName2 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
- name: Start kvstore clusters
id: kvstore
if: matrix.mode == 'external'
uses: ./.github/actions/kvstore
with:
clusters: 2
- name: Create the secret containing the kvstore credentials
if: matrix.mode == 'external'
run: |
kubectl --context ${{ env.contextName1 }} create -n kube-system -f ${{ steps.kvstore.outputs.cilium_etcd_secrets_path }}
kubectl --context ${{ env.contextName2 }} create -n kube-system -f ${{ steps.kvstore.outputs.cilium_etcd_secrets_path }}
- name: Set clustermesh connection parameters
if: matrix.mode == 'external'
id: clustermesh-vars
run: |
echo "cilium_install_clustermesh= \
--set=clustermesh.config.enabled=true \
--set clustermesh.config.clusters[0].name=${{ env.clusterName1 }} \
--set clustermesh.config.clusters[1].name=${{ env.clusterName2 }} \
${{ steps.kvstore.outputs.cilium_install_clustermesh }} \
" >> $GITHUB_OUTPUT
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Create the IPSec secret in both clusters
if: matrix.encryption == 'ipsec'
run: |
SECRET="3 rfc4106(gcm(aes)) $(openssl rand -hex 20) 128"
kubectl --context ${{ env.contextName1 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"
kubectl --context ${{ env.contextName2 }} create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${SECRET}"
- name: Install Cilium in cluster1
id: install-cilium-cluster1
env:
KVSTORE_ID: 1
run: |
# Explicitly configure the NodePort to make sure that it is different in
# each cluster, to workaround #24692
cilium --context ${{ env.contextName1 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--helm-set cluster.name=${{ env.clusterName1 }} \
--helm-set cluster.id=1 \
--helm-set clustermesh.apiserver.service.nodePort=32379 \
--helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-1 }} \
${{ steps.kvstore.outputs.cilium_install_kvstore }} \
${{ steps.clustermesh-vars.outputs.cilium_install_clustermesh }} \
--nodes-without-cilium
- name: Copy the Cilium CA secret to cluster2, as they must match
run: |
kubectl --context ${{ env.contextName1 }} get secret -n kube-system cilium-ca -o yaml |
kubectl --context ${{ env.contextName2 }} create -f -
- name: Install Cilium in cluster2
env:
KVSTORE_ID: 2
run: |
# Explicitly configure the NodePort to make sure that it is different in
# each cluster, to workaround #24692
cilium --context ${{ env.contextName2 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--helm-set cluster.name=${{ env.clusterName2 }} \
--helm-set cluster.id=${{ matrix.maxConnectedClusters }} \
--helm-set clustermesh.apiserver.service.nodePort=32380 \
--helm-set clustermesh.apiserver.tls.authMode=${{ matrix.cm-auth-mode-2 }} \
${{ steps.kvstore.outputs.cilium_install_kvstore }} \
${{ steps.clustermesh-vars.outputs.cilium_install_clustermesh }}
- name: Wait for cluster mesh status to be ready
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Connect clusters
if: matrix.mode != 'external'
run: |
cilium --context ${{ env.contextName1 }} clustermesh connect --destination-context ${{ env.contextName2 }}
- name: Wait for cluster mesh status to be ready
if: matrix.mode != 'external'
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Port forward Relay
run: |
cilium --context ${{ env.contextName1 }} hubble port-forward &
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium --context ${{ env.contextName1 }} connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() && steps.install-cilium-cluster1.outcome != 'skipped' }}
run: |
cilium --context ${{ env.contextName1 }} status
cilium --context ${{ env.contextName1 }} clustermesh status
cilium --context ${{ env.contextName2 }} status
cilium --context ${{ env.contextName2 }} clustermesh status
kubectl config use-context ${{ env.contextName1 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context1-final-${{ join(matrix.*, '-') }}
kubectl config use-context ${{ env.contextName2 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context2-final-${{ join(matrix.*, '-') }}
if [ "${{ matrix.mode }}" == "external" ]; then
for i in {1..2}; do
echo
echo "# Retrieving logs from kvstore$i docker container"
docker logs kvstore$i
done
fi
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
with:
name: cilium-sysdumps-${{ matrix.name }}
path: cilium-sysdump-*.zip
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
with:
name: cilium-junits-${{ matrix.name }}
path: cilium-junits/*.xml
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
merge-upload:
if: ${{ always() }}
name: Merge and Upload Artifacts
runs-on: ubuntu-latest
needs: installation-and-connectivity
steps:
- name: Merge Sysdumps
if: ${{ needs.installation-and-connectivity.result == 'failure' }}
uses: actions/upload-artifact/merge@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
with:
name: cilium-sysdumps
pattern: cilium-sysdumps-*
retention-days: 5
delete-merged: true
continue-on-error: true
- name: Merge JUnits
uses: actions/upload-artifact/merge@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
with:
name: cilium-junits
pattern: cilium-junits-*
retention-days: 5
delete-merged: true
commit-status-final:
if: ${{ always() && github.event_name != 'push' }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}