Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler_perf: add DRA structured parameters test with shared claims #124548

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions test/integration/scheduler_perf/config/dra/pod-with-claim-ref.yaml
@@ -0,0 +1,16 @@
apiVersion: v1
kind: Pod
metadata:
name: test-dra-{{.Index}}
spec:
containers:
- image: registry.k8s.io/pause:3.9
name: pause
resources:
claims:
- name: resource
resourceClaims:
- name: resource
source:
# Five pods share access to the same claim.
resourceClaimName: test-claim-{{div .Index 5}}
@@ -0,0 +1,10 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaim
metadata:
name: test-claim-{{.Index}}
spec:
resourceClassName: test-class
parametersRef:
apiGroup: resource.k8s.io
kind: ResourceClaimParameters
name: test-claim-parameters
6 changes: 6 additions & 0 deletions test/integration/scheduler_perf/config/dra/resourceclaim.yaml
@@ -0,0 +1,6 @@
apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaim
metadata:
name: test-claim-{{.Index}}
spec:
resourceClassName: test-class
Expand Up @@ -2,6 +2,7 @@ apiVersion: resource.k8s.io/v1alpha2
kind: ResourceClaimParameters
metadata:
name: test-claim-parameters
shareable: true
driverRequests:
- driverName: test-driver.cdi.k8s.io
requests:
Expand Down
102 changes: 97 additions & 5 deletions test/integration/scheduler_perf/config/performance-config.yaml
Expand Up @@ -719,6 +719,7 @@
- name: SchedulingWithResourceClaimTemplate
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
Expand Down Expand Up @@ -785,6 +786,7 @@
- name: SchedulingWithMultipleResourceClaims
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
Expand Down Expand Up @@ -860,6 +862,7 @@
- name: SchedulingWithResourceClaimTemplateStructured
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
Expand Down Expand Up @@ -908,20 +911,109 @@
- name: 2000pods_100nodes
labels: [performance, fast]
params:
# In this testcase, the number of nodes is smaller
# than the limit for the PodScheduling slices.
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 20
- name: 2000pods_200nodes
params:
# In this testcase, the driver and scheduler must
# truncate the PotentialNodes and UnsuitableNodes
# slices.
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
measurePods: 1000
maxClaimsPerNode: 10
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
measurePods: 2500
maxClaimsPerNode: 10

# SchedulingWithResourceClaimTemplate uses ResourceClaims
# with deterministic names that are shared between pods.
# There is a fixed ratio of 1:5 between claims and pods.
#
# The driver uses structured parameters.
- name: SchedulingWithResourceClaimStructured
featureGates:
DynamicResourceAllocation: true
# SchedulerQueueingHints: true
workloadTemplate:
- opcode: createNodes
countParam: $nodesWithoutDRA
- opcode: createNodes
nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
countParam: $nodesWithDRA
- opcode: createResourceDriver
driverName: test-driver.cdi.k8s.io
nodes: scheduler-perf-dra-*
maxClaimsPerNodeParam: $maxClaimsPerNode
structuredParameters: true
- opcode: createAny
templatePath: config/dra/resourceclass-structured.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: init
- opcode: createAny
templatePath: config/dra/resourceclaim-structured.yaml
namespace: init
countParam: $initClaims
- opcode: createPods
namespace: init
countParam: $initPods
podTemplatePath: config/dra/pod-with-claim-ref.yaml
- opcode: createAny
templatePath: config/dra/resourceclaimparameters.yaml
namespace: test
- opcode: createAny
templatePath: config/dra/resourceclaim-structured.yaml
namespace: test
countParam: $measureClaims
- opcode: createPods
namespace: test
countParam: $measurePods
podTemplatePath: config/dra/pod-with-claim-ref.yaml
collectMetrics: true
workloads:
- name: fast
labels: [integration-test, fast]
params:
# This testcase runs through all code paths without
# taking too long overall.
nodesWithDRA: 1
nodesWithoutDRA: 1
initPods: 0
initClaims: 0
measurePods: 10
measureClaims: 2 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 2000pods_100nodes
labels: [performance, fast]
params:
nodesWithDRA: 100
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be initPods / 5
maxClaimsPerNode: 4
- name: 2000pods_200nodes
params:
nodesWithDRA: 200
nodesWithoutDRA: 0
initPods: 1000
initClaims: 200 # must be initPods / 5
measurePods: 1000
measureClaims: 200 # must be measurePods / 5
maxClaimsPerNode: 2
- name: 5000pods_500nodes
params:
nodesWithDRA: 500
nodesWithoutDRA: 0
initPods: 2500
initClaims: 500 # must be initPods / 5
measurePods: 2500
measureClaims: 500 # must be measurePods / 5
maxClaimsPerNode: 2
54 changes: 51 additions & 3 deletions test/integration/scheduler_perf/create.go
Expand Up @@ -17,8 +17,11 @@ limitations under the License.
package benchmark

import (
"bytes"
"context"
"fmt"
"html/template"
"os"
"time"

apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -30,6 +33,8 @@ import (
"k8s.io/client-go/restmapper"
"k8s.io/klog/v2"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
"sigs.k8s.io/yaml"
)

// createAny defines an op where some object gets created from a YAML file.
Expand All @@ -40,7 +45,13 @@ type createAny struct {
// Namespace the object should be created in. Must be empty for cluster-scoped objects.
Namespace string
// Path to spec file describing the object to create.
// This will be processed with text/template.
// .Index will be in the range [0, Count-1] when creating
// more than one object. .Count is the total number of objects.
TemplatePath string
// Count determines how often to create the object. Defaults to 1 if unset.
Count *int
CountParam string
}

var _ runnableOp = &createAny{}
Expand All @@ -61,8 +72,15 @@ func (c *createAny) collectsMetrics() bool {
return false
}

func (c *createAny) patchParams(w *workload) (realOp, error) {
return c, c.isValid(false)
func (c createAny) patchParams(w *workload) (realOp, error) {
if c.CountParam != "" {
count, err := w.Params.get(c.CountParam[1:])
if err != nil {
return nil, err
}
c.Count = ptr.To(count)
}
return &c, c.isValid(false)
}

func (c *createAny) requiredNamespaces() []string {
Expand All @@ -73,8 +91,18 @@ func (c *createAny) requiredNamespaces() []string {
}

func (c *createAny) run(tCtx ktesting.TContext) {
count := 1
if c.Count != nil {
count = *c.Count
}
for index := 0; index < count; index++ {
c.create(tCtx, map[string]any{"Index": index, "Count": count})
}
}

func (c *createAny) create(tCtx ktesting.TContext, env map[string]any) {
var obj *unstructured.Unstructured
if err := getSpecFromFile(&c.TemplatePath, &obj); err != nil {
if err := getSpecFromTextTemplateFile(c.TemplatePath, env, &obj); err != nil {
tCtx.Fatalf("%s: parsing failed: %v", c.TemplatePath, err)
}

Expand Down Expand Up @@ -143,3 +171,23 @@ func (c *createAny) run(tCtx ktesting.TContext) {
}
}
}

func getSpecFromTextTemplateFile(path string, env map[string]any, spec interface{}) error {
content, err := os.ReadFile(path)
if err != nil {
return err
}
fm := template.FuncMap{"div": func(a, b int) int {
return a / b
}}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a pity that text/template hasn't standardized some common functions like this 😢

tmpl, err := template.New("object").Funcs(fm).Parse(string(content))
if err != nil {
return err
}
var buffer bytes.Buffer
if err := tmpl.Execute(&buffer, env); err != nil {
return err
}

return yaml.UnmarshalStrict(buffer.Bytes(), spec)
}
1 change: 1 addition & 0 deletions test/integration/scheduler_perf/dra.go
Expand Up @@ -177,6 +177,7 @@ func (op *createResourceDriverOp) run(tCtx ktesting.TContext) {
DriverName: op.DriverName,
NodeLocal: true,
MaxAllocations: op.MaxClaimsPerNode,
Shareable: true,
}

nodes, err := tCtx.Client().CoreV1().Nodes().List(tCtx, metav1.ListOptions{})
Expand Down
18 changes: 8 additions & 10 deletions test/integration/scheduler_perf/scheduler_perf.go
Expand Up @@ -1387,16 +1387,12 @@ func validateTestCases(testCases []*testCase) error {
}

func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error) {
basePod := makeBasePod()
podTemplate := testutils.StaticPodTemplate(makeBasePod())
if cpo.PodTemplatePath != nil {
var err error
basePod, err = getPodSpecFromFile(cpo.PodTemplatePath)
if err != nil {
return nil, err
}
podTemplate = podTemplateFromFile(*cpo.PodTemplatePath)
}
if cpo.PersistentVolumeClaimTemplatePath == nil {
return testutils.NewCustomCreatePodStrategy(basePod), nil
return testutils.NewCustomCreatePodStrategy(podTemplate), nil
}

pvTemplate, err := getPersistentVolumeSpecFromFile(cpo.PersistentVolumeTemplatePath)
Expand All @@ -1407,7 +1403,7 @@ func getPodStrategy(cpo *createPodsOp) (testutils.TestPodCreateStrategy, error)
if err != nil {
return nil, err
}
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), basePod), nil
return testutils.NewCreatePodWithPersistentVolumeStrategy(pvcTemplate, getCustomVolumeFactory(pvTemplate), podTemplate), nil
}

func getNodeSpecFromFile(path *string) (*v1.Node, error) {
Expand All @@ -1418,9 +1414,11 @@ func getNodeSpecFromFile(path *string) (*v1.Node, error) {
return nodeSpec, nil
}

func getPodSpecFromFile(path *string) (*v1.Pod, error) {
type podTemplateFromFile string

func (f podTemplateFromFile) GetPodTemplate(index, count int) (*v1.Pod, error) {
podSpec := &v1.Pod{}
if err := getSpecFromFile(path, podSpec); err != nil {
if err := getSpecFromTextTemplateFile(string(f), map[string]any{"Index": index, "Count": count}, podSpec); err != nil {
return nil, fmt.Errorf("parsing Pod: %w", err)
}
return podSpec, nil
Expand Down