Skip to content

Service cache: populate OldEndpoints when merging information from remote clusters #23487

Service cache: populate OldEndpoints when merging information from remote clusters

Service cache: populate OldEndpoints when merging information from remote clusters #23487

name: Multicluster / Cluster mesh (ci-multicluster-1.12)
# Any change in triggers needs to be reflected in the concurrency group.
on:
issue_comment:
types:
- created
# Run once a day
schedule:
- cron: '0 5 * * *'
### FOR TESTING PURPOSES
# This workflow runs in the context of `main`, and ignores changes to
# workflow files in PRs. For testing changes to this workflow from a PR:
# - Make sure the PR uses a branch from the base repository (requires write
# privileges). It will not work with a branch from a fork (missing secrets).
# - Uncomment the `pull_request` event below, commit separately with a `DO
# NOT MERGE` message, and push to the PR. As long as the commit is present,
# any push to the PR will trigger this workflow.
# - Don't forget to remove the `DO NOT MERGE` commit once satisfied. The run
# will disappear from the PR checks: please provide a direct link to the
# successful workflow run (can be found from Actions tab) in a comment.
#
# pull_request: {}
###
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# So that Sibz/github-status-action can write into the status API
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - issue_comment: PR number
# - pull_request: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing:
# - schedule: {name} schedule {SHA}
# - issue_comment: {name} issue_comment {PR number}
# - pull_request: {name} pull_request {PR number}
#
# Note: for `issue_comment` triggers, we additionally need to filter out based
# on comment content, otherwise any comment will interrupt workflow runs.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'issue_comment' && (
github.event.comment.body == '/ci-multicluster-1.12' ||
github.event.comment.body == '/test-backport-1.12'
) && github.event.issue.number) ||
(github.event_name == 'pull_request' && github.event.pull_request.number)
}}
cancel-in-progress: true
env:
clusterName1: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-mesh-1
clusterName2: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-mesh-2
clusterNameBase: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-mesh
zone: us-west2-a
firewallRuleName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-rule
k8s_version: 1.24
# renovate: datasource=github-releases depName=cilium/cilium-cli
cilium_cli_version: v0.14.5
cilium_cli_ci_version:
check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
check_changes:
name: Deduce required tests from code changes
if: |
(github.event_name == 'issue_comment' && (
github.event.comment.body == '/ci-multicluster-1.12' ||
github.event.comment.body == '/test-backport-1.12'
)) ||
github.event_name == 'schedule' ||
github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
tested: ${{ steps.tested-tree.outputs.src }}
steps:
# Because we run on issue comments, we need to checkout the code for
# paths-filter to work.
- name: Checkout code
if: ${{ github.event.issue.pull_request }}
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
with:
persist-credentials: false
- name: Retrieve pull request's base and head
if: ${{ github.event.issue.pull_request }}
id: pr
run: |
curl ${{ github.event.issue.pull_request.url }} > pr.json
echo "base=$(jq -r '.base.sha' pr.json)" >> $GITHUB_OUTPUT
echo "head=$(jq -r '.head.sha' pr.json)" >> $GITHUB_OUTPUT
- name: Check code changes
if: ${{ github.event.issue.pull_request }}
uses: dorny/paths-filter@4512585405083f25c027a35db413c2b3b9006d50 # v2.11.1
id: tested-tree
with:
base: ${{ steps.pr.outputs.base }}
ref: ${{ steps.pr.outputs.head }}
filters: |
src:
- '!(test|Documentation)/**'
# This job is skipped when the workflow was triggered with the generic `/test`
# trigger if the only modified files were under `test/` or `Documentation/`.
installation-and-connectivity:
name: "Installation and Connectivity Test"
needs: check_changes
if: |
(github.event_name == 'issue_comment' && (
github.event.comment.body == '/ci-multicluster-1.12' ||
(github.event.comment.body == '/test-backport-1.12' && needs.check_changes.outputs.tested == 'true')
)) ||
github.event_name == 'schedule' ||
github.event_name == 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 45
env:
job_name: "Installation and Connectivity Test"
steps:
- name: Checkout main branch to access local actions
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
with:
ref: ${{ github.event.repository.default_branch }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Set up job variables
id: vars
run: |
if [ ${{ github.event.issue.pull_request || github.event.pull_request }} ]; then
PR_API_JSON=$(curl \
-H "Accept: application/vnd.github.v3+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
${{ github.event.issue.pull_request.url || github.event.pull_request.url }})
SHA=$(echo "$PR_API_JSON" | jq -r ".head.sha")
OWNER=$(echo "$PR_API_JSON" | jq -r ".number")
elif [ "${{ github.event_name }}" = "schedule" ]; then
curl https://api.github.com/repos/cilium/cilium/branches/v1.12 > branch.json
SHA=$(jq -r '.commit.sha' branch.json)
OWNER=v1-12
else
SHA=${{ github.sha }}
OWNER=${{ github.sha }}
fi
CILIUM_INSTALL_DEFAULTS="--chart-directory=install/kubernetes/cilium \
--helm-set=image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci \
--helm-set=image.useDigest=false \
--helm-set=image.tag=${SHA} \
--helm-set=operator.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/operator \
--helm-set=operator.image.suffix=-ci \
--helm-set=operator.image.tag=${SHA} \
--helm-set=operator.image.useDigest=false \
--helm-set=clustermesh.apiserver.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci \
--helm-set=clustermesh.apiserver.image.tag=${SHA} \
--helm-set=clustermesh.apiserver.image.useDigest=false \
--helm-set=hubble.relay.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci \
--helm-set=hubble.relay.image.tag=${SHA} \
--wait=false \
--rollback=false \
--config monitor-aggregation=none \
--version="
HUBBLE_ENABLE_DEFAULTS="--chart-directory=install/kubernetes/cilium \
--relay-image=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci:${SHA} \
--relay-version=${SHA}"
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure"
CLUSTERMESH_ENABLE_DEFAULTS="--apiserver-image=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci \
--apiserver-version=${SHA}"
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo hubble_enable_defaults=${HUBBLE_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo clustermesh_enable_defaults=${CLUSTERMESH_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${SHA} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Set commit status to pending
uses: Sibz/github-status-action@650dd1a882a76dbbbc4576fb5974b8d22f29847f # v1.1.6
with:
authToken: ${{ secrets.GITHUB_TOKEN }}
sha: ${{ steps.vars.outputs.sha }}
context: ${{ github.workflow }}
description: Connectivity test in progress...
state: pending
target_url: ${{ env.check_url }}
# Checkout source code to install Cilium using local Helm chart.
- name: Checkout code
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
- name: Install Cilium CLI
uses: cilium/cilium-cli@d610d6e2774e29aef9565bad935e3b4943d541af # v0.14.5
with:
release-version: ${{ env.cilium_cli_version }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Set up gcloud credentials
id: 'auth'
uses: google-github-actions/auth@35b0e87d162680511bf346c299f71c9c5c379033 # v1.1.1
with:
credentials_json: '${{ secrets.GCP_PR_SA_KEY }}'
- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@e30db14379863a8c79331b04a9969f4c1e225e0b # v1.1.1
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
version: "405.0.0"
- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Display gcloud CLI info
run: |
gcloud info
- name: Create GKE cluster 1
run: |
gcloud container clusters create ${{ env.clusterName1 }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--cluster-version ${{ env.k8s_version }} \
--enable-ip-alias \
--create-subnetwork="range=/26" \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--preemptible \
--async
- name: Create GKE cluster 2
run: |
gcloud container clusters create ${{ env.clusterName2 }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ env.zone }} \
--cluster-version ${{ env.k8s_version }} \
--enable-ip-alias \
--create-subnetwork="range=/26" \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--preemptible \
--async
- name: Wait for clusters to be provisioned
run: |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 10
done
- name: Allow cross-cluster traffic
run: |
TAG1=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName1 }}-[0-9a-z]*-all$" --format="value(name)")
TAG2=$(gcloud compute firewall-rules list --filter="name~^gke-${{ env.clusterName2 }}-[0-9a-z]*-all$" --format="value(name)")
gcloud compute firewall-rules describe $TAG1
gcloud compute firewall-rules describe $TAG2
gcloud compute firewall-rules create ${{ env.firewallRuleName }} --allow tcp,udp,icmp,sctp,esp,ah --priority=999 --source-ranges=10.0.0.0/9 --target-tags=${TAG1/-all/-node},${TAG2/-all/-node}
gcloud compute firewall-rules describe ${{ env.firewallRuleName }}
- name: Get cluster credentials and setup contexts
id: contexts
run: |
gcloud container clusters get-credentials ${{ env.clusterName1 }} --zone ${{ env.zone }}
CONTEXT_1="$(kubectl config view | grep ${{ env.clusterName1 }} | head -1 | awk '{print $2}')"
echo context1=${CONTEXT_1} >> $GITHUB_OUTPUT
gcloud container clusters get-credentials ${{ env.clusterName2 }} --zone ${{ env.zone }}
CONTEXT_2="$(kubectl config view | grep ${{ env.clusterName2 }} | head -1 | awk '{print $2}')"
echo context2=${CONTEXT_2} >> $GITHUB_OUTPUT
- name: Wait for images to be available
timeout-minutes: 10
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Install Cilium in cluster1
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }} \
--context ${{ steps.contexts.outputs.context1 }} \
--cluster-name=${{ env.clusterName1 }} \
--cluster-id 1 \
--ipv4-native-routing-cidr=10.0.0.0/9
- name: Install Cilium in cluster2
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }} \
--context ${{ steps.contexts.outputs.context2 }} \
--cluster-name=${{ env.clusterName2 }} \
--cluster-id 2 \
--ipv4-native-routing-cidr=10.0.0.0/9 \
--inherit-ca ${{ steps.contexts.outputs.context1 }}
- name: Enable Relay
run: |
cilium hubble enable --context ${{ steps.contexts.outputs.context1 }} ${{ steps.vars.outputs.hubble_enable_defaults }} --wait=false
cilium hubble enable --context ${{ steps.contexts.outputs.context2 }} ${{ steps.vars.outputs.hubble_enable_defaults }} --relay=false
cilium status --wait --context ${{ steps.contexts.outputs.context1 }}
- name: Enable cluster mesh
run: |
cilium clustermesh enable --context ${{ steps.contexts.outputs.context1 }} ${{ steps.vars.outputs.clustermesh_enable_defaults }}
cilium clustermesh enable --context ${{ steps.contexts.outputs.context2 }} ${{ steps.vars.outputs.clustermesh_enable_defaults }}
- name: Wait for cluster mesh status to be ready
run: |
cilium clustermesh status --wait --context ${{ steps.contexts.outputs.context1 }}
cilium clustermesh status --wait --context ${{ steps.contexts.outputs.context2 }}
- name: Connect clusters
run: |
cilium clustermesh connect \
--context ${{ steps.contexts.outputs.context1 }} \
--destination-context ${{ steps.contexts.outputs.context2 }}
- name: Wait for cluster mesh status to be ready
run: |
cilium clustermesh status --wait --context ${{ steps.contexts.outputs.context1 }}
cilium clustermesh status --wait --context ${{ steps.contexts.outputs.context2 }}
- name: Port forward Relay
run: |
cilium hubble port-forward --context ${{ steps.contexts.outputs.context1 }}&
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test
run: |
cilium connectivity test \
${{ steps.vars.outputs.connectivity_test_defaults }} \
--context ${{ steps.contexts.outputs.context1 }} \
--multi-cluster ${{ steps.contexts.outputs.context2 }} \
--collect-sysdump-on-failure \
--test '!/pod-to-.*-nodeport' \
--test '!no-policies/pod-to-service' \
--external-target google.com \
--external-cidr 8.0.0.0/8 --external-ip 8.8.8.8 --external-other-ip 8.8.4.4 \
--junit-file "cilium-junits/${{ env.job_name }}.xml" --junit-property github_job_step="Run connectivity test"
# TODO: Remove `no-policies/pod-to-service` test exception (unreliable
# on clustermesh) once https://github.com/cilium/cilium-cli/issues/600
# is fixed.
- name: Post-test information gathering
if: ${{ !success() }}
run: |
kubectl get pods --all-namespaces -o wide
cilium status --context ${{ steps.contexts.outputs.context1 }}
cilium clustermesh status --context ${{ steps.contexts.outputs.context1 }}
cilium status --context ${{ steps.contexts.outputs.context2 }}
cilium clustermesh status --context ${{ steps.contexts.outputs.context2 }}
kubectl config use-context ${{ steps.contexts.outputs.context1 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context1-final
kubectl config use-context ${{ steps.contexts.outputs.context2 }}
kubectl get pods --all-namespaces -o wide
cilium sysdump --output-filename cilium-sysdump-context2-final
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Clean up GKE
if: ${{ always() }}
run: |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterNameBase }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
done
gcloud container clusters delete ${{ env.clusterName1 }} --zone ${{ env.zone }} --quiet --async
gcloud container clusters delete ${{ env.clusterName2 }} --zone ${{ env.zone }} --quiet --async
gcloud compute firewall-rules delete ${{ env.firewallRuleName }} --quiet
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
with:
name: cilium-sysdumps
path: cilium-sysdump-*.zip
retention-days: 5
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
with:
name: cilium-junits
path: cilium-junits/*.xml
retention-days: 2
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
- name: Set commit status to success
if: ${{ success() }}
uses: Sibz/github-status-action@650dd1a882a76dbbbc4576fb5974b8d22f29847f # v1.1.6
with:
authToken: ${{ secrets.GITHUB_TOKEN }}
sha: ${{ steps.vars.outputs.sha }}
context: ${{ github.workflow }}
description: Connectivity test successful
state: success
target_url: ${{ env.check_url }}
- name: Set commit status to failure
if: ${{ failure() }}
uses: Sibz/github-status-action@650dd1a882a76dbbbc4576fb5974b8d22f29847f # v1.1.6
with:
authToken: ${{ secrets.GITHUB_TOKEN }}
sha: ${{ steps.vars.outputs.sha }}
context: ${{ github.workflow }}
description: Connectivity test failed
state: failure
target_url: ${{ env.check_url }}
- name: Set commit status to cancelled
if: ${{ cancelled() }}
uses: Sibz/github-status-action@650dd1a882a76dbbbc4576fb5974b8d22f29847f # v1.1.6
with:
authToken: ${{ secrets.GITHUB_TOKEN }}
sha: ${{ steps.vars.outputs.sha }}
context: ${{ github.workflow }}
description: Connectivity test cancelled
state: error
target_url: ${{ env.check_url }}