Skip to content

Commit

Permalink
Fix Pod finalizers for succeeded groups
Browse files Browse the repository at this point in the history
Change-Id: I7f17d51df66de6766af44e48f45b4165e8bd3e30
  • Loading branch information
alculquicondor committed Mar 25, 2024
1 parent 19ee32e commit 76993cb
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 8 deletions.
7 changes: 2 additions & 5 deletions pkg/controller/jobframework/reconciler.go
Expand Up @@ -273,11 +273,8 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
}

if wl != nil && apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) {
// Finalize the job if it's finished
if _, finished := job.Finished(); finished {
if err := r.finalizeJob(ctx, job); err != nil {
return ctrl.Result{}, err
}
if err := r.finalizeJob(ctx, job); err != nil {
return ctrl.Result{}, err
}

r.record.Eventf(object, corev1.EventTypeNormal, ReasonFinishedWorkload,
Expand Down
3 changes: 2 additions & 1 deletion pkg/controller/jobs/pod/pod_controller_test.go
Expand Up @@ -978,10 +978,11 @@ func TestReconciler(t *testing.T) {
},
workloadCmpOpts: defaultWorkloadCmpOpts,
},
"workload is not deleted if one of the pods in the finished group is absent": {
"Pods are finalized even if one of the pods in the finished group is absent": {
pods: []corev1.Pod{
*basePodWrapper.
Clone().
KueueFinalizer().
Label("kueue.x-k8s.io/managed", "true").
Group("test-group").
GroupTotalCount("2").
Expand Down
4 changes: 2 additions & 2 deletions site/static/examples/pods-kueue/kueue-pod-group.yaml
Expand Up @@ -16,7 +16,7 @@ spec:
command: ["sh", "-c", 'echo "hello world from the leader pod" && sleep 3']
resources:
requests:
cpu: 3
cpu: 0.5
---
apiVersion: v1
kind: Pod
Expand All @@ -35,4 +35,4 @@ spec:
command: ["sh", "-c", 'echo "hello world from the worker pod" && sleep 2']
resources:
requests:
cpu: 3
cpu: 0.5
61 changes: 61 additions & 0 deletions test/integration/controller/jobs/pod/pod_controller_test.go
Expand Up @@ -18,6 +18,7 @@ package pod

import (
"fmt"
"strconv"
"time"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -1025,6 +1026,66 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
})
})

ginkgo.It("Should finalize all Succeeded Pods when deleted", func() {
ginkgo.By("Creating pods with queue name")
// Use a number of Pods big enough to cause conflicts when removing finalizers >50% of the time.
// 7
const podCount = 10
pods := make([]*corev1.Pod, podCount)
for i := range pods {
pods[i] = testingpod.MakePod(fmt.Sprintf("test-pod-%d", i), ns.Name).
Group("test-group").
GroupTotalCount(strconv.Itoa(podCount)).
Request(corev1.ResourceCPU, "1").
Queue("test-queue").
Obj()
gomega.Expect(k8sClient.Create(ctx, pods[i])).Should(gomega.Succeed())
}

ginkgo.By("checking that workload is created for the pod group")
wlLookupKey := types.NamespacedName{
Namespace: pods[0].Namespace,
Name: "test-group",
}
createdWorkload := &kueue.Workload{}
gomega.Eventually(func() error {
return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
}, util.Timeout, util.Interval).Should(gomega.Succeed())

ginkgo.By("Admitting workload", func() {
admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
kueue.PodSetAssignment{
Name: "4b0469f7",
Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
corev1.ResourceCPU: "default",
},
Count: ptr.To[int32](podCount),
},
).Obj()
gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)

for i := range pods {
util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, client.ObjectKeyFromObject(pods[i]), map[string]string{"kubernetes.io/arch": "arm64"})
}
})

ginkgo.By("Finishing and deleting Pods", func() {
util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pods...)
for i := range pods {
gomega.Expect(k8sClient.Delete(ctx, pods[i])).To(gomega.Succeed())
}

gomega.Eventually(func(g gomega.Gomega) {
for i := range pods {
key := types.NamespacedName{Namespace: ns.Name, Name: fmt.Sprintf("test-pod-%d", i)}
g.Expect(k8sClient.Get(ctx, key, &corev1.Pod{})).To(testing.BeNotFoundError())
}
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

})

ginkgo.It("Should finalize workload if pods are absent", func() {
ginkgo.By("Creating pods with queue name")
pod1 := testingpod.MakePod("test-pod1", ns.Name).
Expand Down

0 comments on commit 76993cb

Please sign in to comment.