-
Notifications
You must be signed in to change notification settings - Fork 38.6k
/
plugin.go
281 lines (252 loc) · 12.3 KB
/
plugin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package podtopologyspread
import (
"context"
"fmt"
"reflect"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
"k8s.io/kubernetes/pkg/scheduler/util"
)
const (
// ErrReasonConstraintsNotMatch is used for PodTopologySpread filter error.
ErrReasonConstraintsNotMatch = "node(s) didn't match pod topology spread constraints"
// ErrReasonNodeLabelNotMatch is used when the node doesn't hold the required label.
ErrReasonNodeLabelNotMatch = ErrReasonConstraintsNotMatch + " (missing required label)"
)
var systemDefaultConstraints = []v1.TopologySpreadConstraint{
{
TopologyKey: v1.LabelHostname,
WhenUnsatisfiable: v1.ScheduleAnyway,
MaxSkew: 3,
},
{
TopologyKey: v1.LabelTopologyZone,
WhenUnsatisfiable: v1.ScheduleAnyway,
MaxSkew: 5,
},
}
// PodTopologySpread is a plugin that ensures pod's topologySpreadConstraints is satisfied.
type PodTopologySpread struct {
systemDefaulted bool
parallelizer parallelize.Parallelizer
defaultConstraints []v1.TopologySpreadConstraint
sharedLister framework.SharedLister
services corelisters.ServiceLister
replicationCtrls corelisters.ReplicationControllerLister
replicaSets appslisters.ReplicaSetLister
statefulSets appslisters.StatefulSetLister
enableMinDomainsInPodTopologySpread bool
enableNodeInclusionPolicyInPodTopologySpread bool
enableMatchLabelKeysInPodTopologySpread bool
}
var _ framework.PreFilterPlugin = &PodTopologySpread{}
var _ framework.FilterPlugin = &PodTopologySpread{}
var _ framework.PreScorePlugin = &PodTopologySpread{}
var _ framework.ScorePlugin = &PodTopologySpread{}
var _ framework.EnqueueExtensions = &PodTopologySpread{}
// Name is the name of the plugin used in the plugin registry and configurations.
const Name = names.PodTopologySpread
// Name returns name of the plugin. It is used in logs, etc.
func (pl *PodTopologySpread) Name() string {
return Name
}
// New initializes a new plugin and returns it.
func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
if h.SnapshotSharedLister() == nil {
return nil, fmt.Errorf("SnapshotSharedlister is nil")
}
args, err := getArgs(plArgs)
if err != nil {
return nil, err
}
if err := validation.ValidatePodTopologySpreadArgs(nil, &args); err != nil {
return nil, err
}
pl := &PodTopologySpread{
parallelizer: h.Parallelizer(),
sharedLister: h.SnapshotSharedLister(),
defaultConstraints: args.DefaultConstraints,
enableMinDomainsInPodTopologySpread: fts.EnableMinDomainsInPodTopologySpread,
enableNodeInclusionPolicyInPodTopologySpread: fts.EnableNodeInclusionPolicyInPodTopologySpread,
enableMatchLabelKeysInPodTopologySpread: fts.EnableMatchLabelKeysInPodTopologySpread,
}
if args.DefaultingType == config.SystemDefaulting {
pl.defaultConstraints = systemDefaultConstraints
pl.systemDefaulted = true
}
if len(pl.defaultConstraints) != 0 {
if h.SharedInformerFactory() == nil {
return nil, fmt.Errorf("SharedInformerFactory is nil")
}
pl.setListers(h.SharedInformerFactory())
}
return pl, nil
}
func getArgs(obj runtime.Object) (config.PodTopologySpreadArgs, error) {
ptr, ok := obj.(*config.PodTopologySpreadArgs)
if !ok {
return config.PodTopologySpreadArgs{}, fmt.Errorf("want args to be of type PodTopologySpreadArgs, got %T", obj)
}
return *ptr, nil
}
func (pl *PodTopologySpread) setListers(factory informers.SharedInformerFactory) {
pl.services = factory.Core().V1().Services().Lister()
pl.replicationCtrls = factory.Core().V1().ReplicationControllers().Lister()
pl.replicaSets = factory.Apps().V1().ReplicaSets().Lister()
pl.statefulSets = factory.Apps().V1().StatefulSets().Lister()
}
// EventsToRegister returns the possible events that may make a Pod
// failed by this plugin schedulable.
func (pl *PodTopologySpread) EventsToRegister() []framework.ClusterEventWithHint {
return []framework.ClusterEventWithHint{
// All ActionType includes the following events:
// - Add. An unschedulable Pod may fail due to violating topology spread constraints,
// adding an assigned Pod may make it schedulable.
// - Update. Updating on an existing Pod's labels (e.g., removal) may make
// an unschedulable Pod schedulable.
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints,
// deleting an existing Pod may make it schedulable.
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.All}, QueueingHintFn: pl.isSchedulableAfterPodChange},
// Node add|delete|update maybe lead an topology key changed,
// and make these pod in scheduling schedulable or unschedulable.
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Delete | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
}
}
func involvedInTopologySpreading(incomingPod, podWithSpreading *v1.Pod) bool {
return incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace
}
func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalPod, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
if err != nil {
return framework.Queue, err
}
if (modifiedPod != nil && !involvedInTopologySpreading(modifiedPod, pod)) || (originalPod != nil && !involvedInTopologySpreading(originalPod, pod)) {
logger.V(5).Info("the added/updated/deleted pod is unscheduled or has different namespace with target pod, so it doesn't make the target pod schedulable",
"pod", klog.KObj(pod), "originalPod", klog.KObj(originalPod))
return framework.QueueSkip, nil
}
constraints, err := pl.getConstraints(pod)
if err != nil {
return framework.Queue, err
}
// Pod is modified. Return Queue when the label(s) matching topologySpread's selector is added, changed, or deleted.
if modifiedPod != nil && originalPod != nil {
if reflect.DeepEqual(modifiedPod.Labels, originalPod.Labels) {
logger.V(5).Info("the updated pod is unscheduled or has no updated labels or has different namespace with target pod, so it doesn't make the target pod schedulable",
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.QueueSkip, nil
}
for _, c := range constraints {
if c.Selector.Matches(labels.Set(originalPod.Labels)) != c.Selector.Matches(labels.Set(modifiedPod.Labels)) {
// This modification makes this Pod match(or not match) with this constraint.
// Maybe now the scheduling result of topology spread gets changed by this change.
logger.V(5).Info("a scheduled pod's label was updated and it makes the updated pod match or unmatch the pod's topology spread constraints",
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.Queue, nil
}
}
// This modification of labels doesn't change whether this Pod would match selector or not in any constraints.
logger.V(5).Info("a scheduled pod's label was updated, but it's a change unrelated to the pod's topology spread constraints",
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.QueueSkip, nil
}
// Pod is added. Return Queue when the added Pod has a label that matches with topologySpread's selector.
if modifiedPod != nil {
if podLabelsMatchSpreadConstraints(constraints, modifiedPod.Labels) {
logger.V(5).Info("a scheduled pod was created and it matches with the pod's topology spread constraints",
"pod", klog.KObj(pod), "createdPod", klog.KObj(modifiedPod))
return framework.Queue, nil
}
logger.V(5).Info("a scheduled pod was created, but it doesn't matches with the pod's topology spread constraints",
"pod", klog.KObj(pod), "createdPod", klog.KObj(modifiedPod))
return framework.QueueSkip, nil
}
// Pod is deleted. Return Queue when the deleted Pod has a label that matches with topologySpread's selector.
if podLabelsMatchSpreadConstraints(constraints, originalPod.Labels) {
logger.V(5).Info("a scheduled pod which matches with the pod's topology spread constraints was deleted, and the pod may be schedulable now",
"pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
return framework.Queue, nil
}
logger.V(5).Info("a scheduled pod was deleted, but it's unrelated to the pod's topology spread constraints",
"pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
return framework.QueueSkip, nil
}
// getConstraints extracts topologySpreadConstraint(s) from the Pod spec.
// If the Pod doesn't have any topologySpreadConstraint, it returns default constraints.
func (pl *PodTopologySpread) getConstraints(pod *v1.Pod) ([]topologySpreadConstraint, error) {
var constraints []topologySpreadConstraint
var err error
if len(pod.Spec.TopologySpreadConstraints) > 0 {
// We have feature gating in APIServer to strip the spec
// so don't need to re-check feature gate, just check length of Constraints.
constraints, err = pl.filterTopologySpreadConstraints(
pod.Spec.TopologySpreadConstraints,
pod.Labels,
v1.DoNotSchedule,
)
if err != nil {
return nil, fmt.Errorf("obtaining pod's hard topology spread constraints: %w", err)
}
} else {
constraints, err = pl.buildDefaultConstraints(pod, v1.DoNotSchedule)
if err != nil {
return nil, fmt.Errorf("setting default hard topology spread constraints: %w", err)
}
}
return constraints, nil
}
func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
if err != nil {
return framework.Queue, err
}
constraints, err := pl.getConstraints(pod)
if err != nil {
return framework.Queue, err
}
// framework.Add/framework.Update: return Queue when node has topologyKey in its labels, else return QueueSkip.
//
// TODO: we can filter out node update events in a more fine-grained way once preCheck is completely removed.
// See: https://github.com/kubernetes/kubernetes/issues/110175
if modifiedNode != nil {
if !nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints) {
logger.V(5).Info("the created/updated node doesn't match pod topology spread constraints",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.QueueSkip, nil
}
logger.V(5).Info("node that match topology spread constraints was created/updated, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
return framework.Queue, nil
}
// framework.Delete: return Queue when node has topologyKey in its labels, else return QueueSkip.
if !nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints) {
logger.V(5).Info("the deleted node doesn't match pod topology spread constraints", "pod", klog.KObj(pod), "node", klog.KObj(originalNode))
return framework.QueueSkip, nil
}
logger.V(5).Info("node that match topology spread constraints was deleted, and the pod may be schedulable now",
"pod", klog.KObj(pod), "node", klog.KObj(originalNode))
return framework.Queue, nil
}