Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-0.6] Fix Pod finalizers for succeeded groups #1916

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 2 additions & 5 deletions pkg/controller/jobframework/reconciler.go
Expand Up @@ -270,11 +270,8 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
}

if wl != nil && apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished) {
// Finalize the job if it's finished
if _, finished := job.Finished(); finished {
if err := r.finalizeJob(ctx, job); err != nil {
return ctrl.Result{}, err
}
if err := r.finalizeJob(ctx, job); err != nil {
return ctrl.Result{}, err
}

r.record.Eventf(object, corev1.EventTypeNormal, ReasonFinishedWorkload,
Expand Down
3 changes: 2 additions & 1 deletion pkg/controller/jobs/pod/pod_controller_test.go
Expand Up @@ -977,10 +977,11 @@ func TestReconciler(t *testing.T) {
},
workloadCmpOpts: defaultWorkloadCmpOpts,
},
"workload is not deleted if one of the pods in the finished group is absent": {
"Pods are finalized even if one of the pods in the finished group is absent": {
pods: []corev1.Pod{
*basePodWrapper.
Clone().
KueueFinalizer().
Label("kueue.x-k8s.io/managed", "true").
Group("test-group").
GroupTotalCount("2").
Expand Down
60 changes: 60 additions & 0 deletions test/integration/controller/jobs/pod/pod_controller_test.go
Expand Up @@ -18,6 +18,7 @@ package pod

import (
"fmt"
"strconv"
"time"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -1024,6 +1025,65 @@ var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailu
})
})

ginkgo.It("Should finalize all Succeeded Pods when deleted", func() {
ginkgo.By("Creating pods with queue name")
// Use a number of Pods big enough to cause conflicts when removing finalizers >50% of the time.
const podCount = 7
pods := make([]*corev1.Pod, podCount)
for i := range pods {
pods[i] = testingpod.MakePod(fmt.Sprintf("test-pod-%d", i), ns.Name).
Group("test-group").
GroupTotalCount(strconv.Itoa(podCount)).
Request(corev1.ResourceCPU, "1").
Queue("test-queue").
Obj()
gomega.Expect(k8sClient.Create(ctx, pods[i])).Should(gomega.Succeed())
}

ginkgo.By("checking that workload is created for the pod group")
wlLookupKey := types.NamespacedName{
Namespace: pods[0].Namespace,
Name: "test-group",
}
createdWorkload := &kueue.Workload{}
gomega.Eventually(func() error {
return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
}, util.Timeout, util.Interval).Should(gomega.Succeed())

ginkgo.By("Admitting workload", func() {
admission := testing.MakeAdmission(clusterQueue.Name).PodSets(
kueue.PodSetAssignment{
Name: "4b0469f7",
Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
corev1.ResourceCPU: "default",
},
Count: ptr.To[int32](podCount),
},
).Obj()
gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)

for i := range pods {
util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, client.ObjectKeyFromObject(pods[i]), map[string]string{"kubernetes.io/arch": "arm64"})
}
})

ginkgo.By("Finishing and deleting Pods", func() {
util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pods...)
for i := range pods {
gomega.Expect(k8sClient.Delete(ctx, pods[i])).To(gomega.Succeed())
}

gomega.Eventually(func(g gomega.Gomega) {
for i := range pods {
key := types.NamespacedName{Namespace: ns.Name, Name: fmt.Sprintf("test-pod-%d", i)}
g.Expect(k8sClient.Get(ctx, key, &corev1.Pod{})).To(testing.BeNotFoundError())
}
}, util.Timeout, util.Interval).Should(gomega.Succeed())
})

})

ginkgo.It("Should finalize workload if pods are absent", func() {
ginkgo.By("Creating pods with queue name")
pod1 := testingpod.MakePod("test-pod1", ns.Name).
Expand Down