kubernetes · zhifei92 · Apr 28, 2024 · HirazawaUi · May 15, 2024 · zhifei92
diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go
@@ -76,9 +76,9 @@ type manager struct {
 	podStatusesLock  sync.RWMutex
 	podStatusChannel chan struct{}
 	// Map from (mirror) pod UID to latest status version successfully sent to the API server.
-	// apiStatusVersions must only be accessed from the sync thread.
-	apiStatusVersions map[kubetypes.MirrorPodUID]uint64
-	podDeletionSafety PodDeletionSafetyProvider
+	apiStatusVersions     map[kubetypes.MirrorPodUID]uint64
+	apiStatusVersionsLock sync.RWMutex
+	podDeletionSafety     PodDeletionSafetyProvider
 
 	podStartupLatencyHelper PodStartupLatencyStateHelper
 	// state allows to save/restore pod resource allocation and tolerate kubelet restarts.
@@ -761,6 +761,40 @@ func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
 	}
 }
 
+// deleteApiStatusVersion removes API status version associated with the specified Mirror Pod UID in a thread-safe manner.
+func (m *manager) deleteApiStatusVersion(uid kubetypes.MirrorPodUID) {
+	m.apiStatusVersionsLock.Lock()
+	defer m.apiStatusVersionsLock.Unlock()
+	delete(m.apiStatusVersions, uid)
+}
+
+// getApiStatusVersion retrieves the API status version for a given Mirror Pod UID in a thread-safe read operation.
+func (m *manager) getApiStatusVersion(uid kubetypes.MirrorPodUID) (version uint64, ok bool) {
+	m.apiStatusVersionsLock.RLock()
+	defer m.apiStatusVersionsLock.RUnlock()
+	version, ok = m.apiStatusVersions[uid]
+	if ok {
+		return version, ok
+	}
+	return 0, false
+}
+
+// setApiStatusVersion sets the version information for the specified uid
+func (m *manager) setApiStatusVersion(uid kubetypes.MirrorPodUID, version uint64) {
+	m.apiStatusVersionsLock.Lock()
+	defer m.apiStatusVersionsLock.Unlock()
+	m.apiStatusVersions[uid] = version
+}
+
+// setApiStatusVersions batch updates multiple API status versions concurrently in a thread-safe manner.
+func (m *manager) setApiStatusVersions(statusVersions map[kubetypes.MirrorPodUID]uint64) {
+	m.apiStatusVersionsLock.Lock()
+	defer m.apiStatusVersionsLock.Unlock()
+	for uid, version := range statusVersions {
+		m.apiStatusVersions[uid] = version
+	}
+}
+
 // syncBatch syncs pods statuses with the apiserver. Returns the number of syncs
 // attempted for testing.
 func (m *manager) syncBatch(all bool) int {
@@ -782,7 +816,7 @@ func (m *manager) syncBatch(all bool) int {
 				_, hasPod := m.podStatuses[types.UID(uid)]
 				_, hasMirror := mirrorToPod[uid]
 				if !hasPod && !hasMirror {
-					delete(m.apiStatusVersions, uid)
+					m.deleteApiStatusVersion(uid)
 				}
 			}
 		}
@@ -806,7 +840,8 @@ func (m *manager) syncBatch(all bool) int {
 			// if a new status update has been delivered, trigger an update, otherwise the
 			// pod can wait for the next bulk check (which performs reconciliation as well)
 			if !all {
-				if m.apiStatusVersions[uidOfStatus] >= status.version {
+				statusVersion, _ := m.getApiStatusVersion(uidOfStatus)
+				if statusVersion >= status.version {
 					continue
 				}
 				updatedStatuses = append(updatedStatuses, podSync{uid, uidOfStatus, status})
@@ -823,16 +858,22 @@ func (m *manager) syncBatch(all bool) int {
 				// In most cases the deleted apiStatusVersions here should be filled
 				// soon after the following syncPod() [If the syncPod() sync an update
 				// successfully].
-				delete(m.apiStatusVersions, uidOfStatus)
+				m.deleteApiStatusVersion(uidOfStatus)
 				updatedStatuses = append(updatedStatuses, podSync{uid, uidOfStatus, status})
 			}
 		}
 	}()
 
+	wg := sync.WaitGroup{}
+	wg.Add(len(updatedStatuses))
 	for _, update := range updatedStatuses {
 		klog.V(5).InfoS("Sync pod status", "podUID", update.podUID, "statusUID", update.statusUID, "version", update.status.version)
-		m.syncPod(update.podUID, update.status)
+		go func() {
+			defer wg.Done()
+			m.syncPod(update.podUID, update.status)
+		}()
 	}
+	wg.Wait()
 
 	return len(updatedStatuses)
 }
@@ -894,7 +935,7 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
 		metrics.PodStatusSyncDuration.Observe(duration.Seconds())
 	}
 
-	m.apiStatusVersions[kubetypes.MirrorPodUID(pod.UID)] = status.version
+	m.setApiStatusVersion(kubetypes.MirrorPodUID(pod.UID), status.version)
 
 	// We don't handle graceful deletion of mirror pods.
 	if m.canBeDeleted(pod, status.status, status.podIsFinished) {
@@ -917,7 +958,7 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
 // needsUpdate returns whether the status is stale for the given pod UID.
 // This method is not thread safe, and must only be accessed by the sync thread.
 func (m *manager) needsUpdate(uid types.UID, status versionedPodStatus) bool {
-	latest, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(uid)]
+	latest, ok := m.getApiStatusVersion(kubetypes.MirrorPodUID(uid))
 	if !ok || latest < status.version {
 		return true
 	}

diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go
@@ -35,6 +35,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/kubernetes/fake"
@@ -71,6 +72,26 @@ func getTestPod() *v1.Pod {
 	}
 }
 
+// getTestPods generates a slice of test Pod objects based on the input integer n.
+func getTestPods(n int) (pods []v1.Pod) {
+	for i := 0; i < n; i++ {
+		s := strconv.Itoa(i)
+		pod := v1.Pod{
+			TypeMeta: metav1.TypeMeta{
+				Kind:       "Pod",
+				APIVersion: "v1",
+			},
+			ObjectMeta: metav1.ObjectMeta{
+				UID:       types.UID(s),
+				Name:      "foo" + s,
+				Namespace: "new",
+			},
+		}
+		pods = append(pods, pod)
+	}
+	return
+}
+
 // After adding reconciliation, if status in pod manager is different from the cached status, a reconciliation
 // will be triggered, which will mess up all the old unit test.
 // To simplify the implementation of unit test, we add testSyncBatch() here, it will make sure the statuses in
@@ -426,7 +447,9 @@ func TestStaleUpdates(t *testing.T) {
 
 	t.Log("... even if it's stale as long as nothing changes")
 	mirrorPodUID := kubetypes.MirrorPodUID(pod.UID)
-	m.apiStatusVersions[mirrorPodUID] = m.apiStatusVersions[mirrorPodUID] - 1
+
+	statusVersion, _ := m.getApiStatusVersion(mirrorPodUID)
+	m.setApiStatusVersion(mirrorPodUID, statusVersion-1)
 
 	m.SetPodStatus(pod, status)
 	m.syncBatch(true)
@@ -436,6 +459,19 @@ func TestStaleUpdates(t *testing.T) {
 	verifyUpdates(t, m, 0)
 }
 
+func TestManyPodsUpdates(t *testing.T) {
+	const podNums = 100
+	pods := getTestPods(podNums)
+	client := fake.NewSimpleClientset(&v1.PodList{Items: pods})
+	m := newTestManager(client)
+
+	for i := 0; i < podNums; i++ {
+		m.SetPodStatus(&pods[i], getRandomPodStatus())
+	}
+
+	verifyUpdates(t, m, podNums)
+}
+
 // shuffle returns a new shuffled list of container statuses.
 func shuffle(statuses []v1.ContainerStatus) []v1.ContainerStatus {
 	numStatuses := len(statuses)
@@ -1320,13 +1356,16 @@ func TestSyncBatchCleanupVersions(t *testing.T) {
 	}
 
 	t.Logf("Orphaned pods should be removed.")
-	m.apiStatusVersions[kubetypes.MirrorPodUID(testPod.UID)] = 100
-	m.apiStatusVersions[kubetypes.MirrorPodUID(mirrorPod.UID)] = 200
+	m.setApiStatusVersions(map[kubetypes.MirrorPodUID]uint64{
+		kubetypes.MirrorPodUID(testPod.UID):   100,
+		kubetypes.MirrorPodUID(mirrorPod.UID): 200,
+	})
 	m.syncBatch(true)
-	if _, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(testPod.UID)]; ok {
+
+	if _, ok := m.getApiStatusVersion(kubetypes.MirrorPodUID(testPod.UID)); ok {
 		t.Errorf("Should have cleared status for testPod")
 	}
-	if _, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(mirrorPod.UID)]; ok {
+	if _, ok := m.getApiStatusVersion(kubetypes.MirrorPodUID(mirrorPod.UID)); ok {
 		t.Errorf("Should have cleared status for mirrorPod")
 	}
 
@@ -1337,13 +1376,15 @@ func TestSyncBatchCleanupVersions(t *testing.T) {
 	staticPod.UID = "static-uid"
 	staticPod.Annotations = map[string]string{kubetypes.ConfigSourceAnnotationKey: "file"}
 	m.podManager.(mutablePodManager).AddPod(staticPod)
-	m.apiStatusVersions[kubetypes.MirrorPodUID(testPod.UID)] = 100
-	m.apiStatusVersions[kubetypes.MirrorPodUID(mirrorPod.UID)] = 200
+	m.setApiStatusVersions(map[kubetypes.MirrorPodUID]uint64{
+		kubetypes.MirrorPodUID(testPod.UID):   100,
+		kubetypes.MirrorPodUID(mirrorPod.UID): 200,
+	})
 	m.testSyncBatch()
-	if _, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(testPod.UID)]; !ok {
+	if _, ok := m.getApiStatusVersion(kubetypes.MirrorPodUID(testPod.UID)); !ok {
 		t.Errorf("Should not have cleared status for testPod")
 	}
-	if _, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(mirrorPod.UID)]; !ok {
+	if _, ok := m.getApiStatusVersion(kubetypes.MirrorPodUID(mirrorPod.UID)); !ok {
 		t.Errorf("Should not have cleared status for mirrorPod")
 	}
 }