Skip to content

Conformance Cluster Mesh (ci-clustermesh) #24365

Conformance Cluster Mesh (ci-clustermesh)

Conformance Cluster Mesh (ci-clustermesh) #24365

name: Conformance Cluster Mesh (ci-clustermesh)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run once a day
schedule:
- cron: '0 4 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
# renovate: datasource=github-releases depName=cilium/cilium-cli
cilium_cli_version: v0.14.8
cilium_cli_ci_version:
clusterName1: cluster1-${{ github.run_id }}
clusterName2: cluster2-${{ github.run_id }}
contextName1: kind-cluster1-${{ github.run_id }}
contextName2: kind-cluster2-${{ github.run_id }}
jobs:
commit-status-start:
name: Commmit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
installation-and-connectivity:
name: Installation and Connectivity Test
runs-on: ${{ vars.GH_RUNNER_EXTRA_POWER }}
timeout-minutes: 60
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix:
include:
- name: '1'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'iptables'
- name: '2'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'wireguard'
kube-proxy: 'none'
# IPsec encryption cannot be used with BPF NodePort.
- name: '3'
tunnel: 'disabled'
ipfamily: 'ipv4'
encryption: 'ipsec'
kube-proxy: 'iptables'
# IPsec encryption is currently not supported in case of ipv6-only clusters (#23553)
# Wireguard encryption is currently affected by a bug in case of ipv6-only clusters (#23917)
- name: '4'
tunnel: 'disabled'
ipfamily: 'ipv6'
encryption: 'disabled'
kube-proxy: 'none'
# IPsec encryption cannot be used with BPF NodePort.
- name: '5'
tunnel: 'disabled'
ipfamily: 'dual'
encryption: 'ipsec'
kube-proxy: 'iptables'
- name: '6'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'disabled'
kube-proxy: 'none'
- name: '7'
tunnel: 'geneve'
ipfamily: 'ipv4'
encryption: 'wireguard'
kube-proxy: 'iptables'
# IPsec encryption cannot be used with BPF NodePort.
- name: '8'
tunnel: 'vxlan'
ipfamily: 'ipv4'
encryption: 'ipsec'
kube-proxy: 'iptables'
# Tunneling is currently not supported in case of ipv6-only clusters (#17240)
# - name: '9'
# tunnel: 'vxlan'
# ipfamily: 'ipv6'
# encryption: 'disabled'
# kube-proxy: 'none'
- name: '10'
tunnel: 'vxlan'
ipfamily: 'dual'
encryption: 'wireguard'
kube-proxy: 'iptables'
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Set up job variables for GHA environment
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
SHA="${{ inputs.SHA }}"
else
SHA="${{ github.sha }}"
fi
# bpf.masquerade is disabled due to #23283
CILIUM_INSTALL_DEFAULTS="--chart-directory=./untrusted/install/kubernetes/cilium \
--helm-set=debug.enabled=true \
--helm-set=image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci \
--helm-set=image.useDigest=false \
--helm-set=image.tag=${SHA} \
--helm-set=operator.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/operator \
--helm-set=operator.image.suffix=-ci \
--helm-set=operator.image.tag=${SHA} \
--helm-set=operator.image.useDigest=false \
--helm-set=bpf.masquerade=false \
--config monitor-aggregation=none \
--rollback=false \
--version="
CILIUM_INSTALL_TUNNEL="--helm-set=tunnel=vxlan"
if [ "${{ matrix.tunnel }}" == "disabled" ]; then
CILIUM_INSTALL_TUNNEL="--helm-set-string=tunnel=disabled \
--helm-set=autoDirectNodeRoutes=true \
--helm-set=ipv4NativeRoutingCIDR=10.240.0.0/12 \
--helm-set=ipv6NativeRoutingCIDR=fd00:10:240::/44"
fi
case "${{ matrix.ipFamily }}" in
ipv4)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=false"
KIND_POD_CIDR_1="10.242.0.0/16"
KIND_SVC_CIDR_1="10.243.0.0/16"
KIND_POD_CIDR_2="10.244.0.0/16"
KIND_SVC_CIDR_2="10.245.0.0/16"
;;
ipv6)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=false --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="fd00:10:242::/48"
KIND_SVC_CIDR_1="fd00:10:243::/112"
KIND_POD_CIDR_2="fd00:10:244::/48"
KIND_SVC_CIDR_2="fd00:10:245::/112"
;;
dual)
CILIUM_INSTALL_IPFAMILY="--helm-set=ipv4.enabled=true --helm-set=ipv6.enabled=true"
KIND_POD_CIDR_1="10.242.0.0/16,fd00:10:242::/48"
KIND_SVC_CIDR_1="10.243.0.0/16,fd00:10:243::/112"
KIND_POD_CIDR_2="10.244.0.0/16,fd00:10:244::/48"
KIND_SVC_CIDR_2="10.245.0.0/16,fd00:10:245::/112"
;;
*)
echo "Unknown IP family '${{ matrix.ipFamily }}'" && false
;;
esac
CILIUM_INSTALL_L7_PROXY="--helm-set=l7Proxy=true"
if [ "${{ matrix.encryption }}" == "wireguard" ]; then
# Wireguard (--enable-wireguard) is not compatible with L7 proxy (--enable-l7-proxy)
CILIUM_INSTALL_L7_PROXY="--helm-set=l7Proxy=false"
fi
HUBBLE_ENABLE_DEFAULTS="--chart-directory=install/kubernetes/cilium \
--helm-set=hubble.relay.image.override=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci:${SHA} \
--helm-set=hubble.relay.image.useDigest=false"
CLUSTERMESH_ENABLE_DEFAULTS="--apiserver-image=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci \
--apiserver-version=${SHA} --service-type=NodePort"
CONNECTIVITY_TEST_DEFAULTS="--hubble=false \
--flow-validation=disabled \
--multi-cluster=${{ env.contextName2 }} \
--external-target=google.com \
--collect-sysdump-on-failure"
# Skip external traffic (e.g. 1.1.1.1 and www.google.com) tests as IPv6 is not supported
# in GitHub runners: https://github.com/actions/runner-images/issues/668
if [[ "${{ matrix.ipFamily }}" == "ipv6" ]]; then
CONNECTIVITY_TEST_DEFAULTS="$CONNECTIVITY_TEST_DEFAULTS \
--test='!/pod-to-world' \
--test='!/pod-to-cidr'"
fi
echo cilium_install_defaults="${CILIUM_INSTALL_DEFAULTS} ${CILIUM_INSTALL_TUNNEL} ${CILIUM_INSTALL_IPFAMILY} ${CILIUM_INSTALL_L7_PROXY}" >> $GITHUB_OUTPUT
echo hubble_enable_defaults=${HUBBLE_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo clustermesh_enable_defaults=${CLUSTERMESH_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${SHA} >> $GITHUB_OUTPUT
echo kind_pod_cidr_1=${KIND_POD_CIDR_1} >> $GITHUB_OUTPUT
echo kind_svc_cidr_1=${KIND_SVC_CIDR_1} >> $GITHUB_OUTPUT
echo kind_pod_cidr_2=${KIND_POD_CIDR_2} >> $GITHUB_OUTPUT
echo kind_svc_cidr_2=${KIND_SVC_CIDR_2} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@5362f383942260c0aed4f3876e09c3452435577a # v0.14.8
with:
release-version: ${{ env.cilium_cli_version }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Generate Kind configuration files
run: |
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_1 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_1 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster1.yaml
PODCIDR=${{ steps.vars.outputs.kind_pod_cidr_2 }} \
SVCCIDR=${{ steps.vars.outputs.kind_svc_cidr_2 }} \
IPFAMILY=${{ matrix.ipFamily }} \
KUBEPROXYMODE=${{ matrix.kube-proxy }} \
envsubst < ./.github/kind-config.yaml.tmpl > ./.github/kind-config-cluster2.yaml
- name: Create Kind cluster 1
uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0
with:
cluster_name: ${{ env.clusterName1 }}
version: ${{ env.KIND_VERSION }}
node_image: ${{ env.KIND_K8S_IMAGE }}
kubectl_version: ${{ env.KIND_K8S_VERSION }}
config: ./.github/kind-config-cluster1.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
- name: Create Kind cluster 2
uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0
with:
cluster_name: ${{ env.clusterName2 }}
version: ${{ env.KIND_VERSION }}
node_image: ${{ env.KIND_K8S_IMAGE }}
kubectl_version: ${{ env.KIND_K8S_VERSION }}
config: ./.github/kind-config-cluster2.yaml
wait: 0 # The control-plane never becomes ready, since no CNI is present
# Make sure that coredns uses IPv4-only upstream DNS servers also in case of clusters
# with IP family dual, since IPv6 ones are not reachable and cause spurious failures.
- name: Configure the coredns nameservers
if: matrix.ipfamily == 'dual'
run: |
COREDNS_PATCH="
spec:
template:
spec:
dnsPolicy: None
dnsConfig:
nameservers:
- 8.8.4.4
- 8.8.8.8
"
kubectl --context ${{ env.contextName1 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
kubectl --context ${{ env.contextName2 }} patch deployment -n kube-system coredns --patch="$COREDNS_PATCH"
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Install Cilium in cluster1
run: |
# Using the deprecated flag --cluster-name due to cilium/cilium-cli#1347
# --helm-set cluster.name ${{ env.clusterName1 }}
cilium --context ${{ env.contextName1 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--encryption ${{ matrix.encryption }} \
--cluster-name ${{ env.clusterName1 }} \
--helm-set cluster.id=1
- name: Copy the IPsec secret to cluster2, as they must match
if: matrix.encryption == 'ipsec'
run: |
kubectl --context ${{ env.contextName1 }} get secret -n kube-system cilium-ipsec-keys -o yaml |
kubectl --context ${{ env.contextName2 }} create -f -
- name: Install Cilium in cluster2
run: |
# Using the deprecated form --cluster-name due to cilium/cilium-cli#1347
# --helm-set cluster.name ${{ env.clusterName2 }}
cilium --context ${{ env.contextName2 }} install \
${{ steps.vars.outputs.cilium_install_defaults }} \
--encryption ${{ matrix.encryption }} \
--cluster-name ${{ env.clusterName2 }} \
--helm-set cluster.id=255 \
--inherit-ca ${{ env.contextName1 }}
- name: Enable Hubble
run: |
cilium --context ${{ env.contextName1 }} hubble enable ${{ steps.vars.outputs.hubble_enable_defaults }} --wait=false
cilium --context ${{ env.contextName2 }} hubble enable ${{ steps.vars.outputs.hubble_enable_defaults }} --wait=false --relay=false
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
- name: Enable Cluster Mesh
run: |
cilium --context ${{ env.contextName1 }} clustermesh enable ${{ steps.vars.outputs.clustermesh_enable_defaults }}
cilium --context ${{ env.contextName2 }} clustermesh enable ${{ steps.vars.outputs.clustermesh_enable_defaults }}
- name: Wait for cluster mesh status to be ready
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Connect clusters
run: |
cilium --context ${{ env.contextName1 }} clustermesh connect --destination-context ${{ env.contextName2 }}
- name: Wait for cluster mesh status to be ready
run: |
cilium --context ${{ env.contextName1 }} status --wait
cilium --context ${{ env.contextName2 }} status --wait
cilium --context ${{ env.contextName1 }} clustermesh status --wait
cilium --context ${{ env.contextName2 }} clustermesh status --wait
- name: Port forward Relay
run: |
cilium --context ${{ env.contextName1 }} hubble port-forward &
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium --context ${{ env.contextName1 }} connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() }}
run: |
cilium --context ${{ env.contextName1 }} status
cilium --context ${{ env.contextName1 }} clustermesh status
cilium --context ${{ env.contextName2 }} status
cilium --context ${{ env.contextName2 }} clustermesh status
kubectl config use-context ${{ env.contextName1 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context1-final-${{ join(matrix.*, '-') }}
kubectl config use-context ${{ env.contextName2 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context2-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-sysdumps-${{ matrix.name }}
path: cilium-sysdump-*.zip
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-junits-${{ matrix.name }}
path: cilium-junits/*.xml
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
merge-upload:
if: ${{ always() }}
name: Merge and Upload Artifacts
runs-on: ubuntu-latest
needs: installation-and-connectivity
steps:
- name: Merge Sysdumps
if: ${{ needs.installation-and-connectivity.result == 'failure' }}
uses: actions/upload-artifact/merge@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-sysdumps
pattern: cilium-sysdumps-*
retention-days: 5
delete-merged: true
continue-on-error: true
- name: Merge JUnits
uses: actions/upload-artifact/merge@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-junits
pattern: cilium-junits-*
retention-days: 5
delete-merged: true
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}