From fc2134c84cc1c84a1cc1278401f70fce93eb11c3 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Mon, 11 Mar 2024 13:51:29 +0000 Subject: [PATCH 1/2] dra kubelet: fix error log Previously we were returning the error string from 'err' (which is nil), when we should have been returning it from result.Error. Without this it is hard to debug issues with NodeUnprepareResources. Signed-off-by: Kevin Klues --- pkg/kubelet/cm/dra/manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/dra/manager.go b/pkg/kubelet/cm/dra/manager.go index 8a6ee2dd3202..100deaae6375 100644 --- a/pkg/kubelet/cm/dra/manager.go +++ b/pkg/kubelet/cm/dra/manager.go @@ -380,7 +380,7 @@ func (m *ManagerImpl) UnprepareResources(pod *v1.Pod) error { return fmt.Errorf("NodeUnprepareResources returned result for unknown claim UID %s", claimUID) } if result.Error != "" { - return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, err) + return fmt.Errorf("NodeUnprepareResources failed for claim %s/%s: %s", reqClaim.Namespace, reqClaim.Name, result.Error) } // Delete last pod UID only if unprepare succeeds. From 21a0dd1d705a246049f0224b89a2a0d89a99656c Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Sat, 9 Mar 2024 00:26:33 +0100 Subject: [PATCH 2/2] dra scheduler: create default claim/class parameters instead of nil Without this, the scheduler was crashing in newClaimController() in pkg/scheduler/framework/plugins/dynamicresources/structuredparameters.go The code in newClaimController() assumes that the parameters are not nil. Furthermore it assumes that there is at least one DriverRequest populated in order to allocate any resources to a claim. This PR adds logic to define default claim/class parameters that will allow allocation to proceed even if an end user doesn't provide any class or claim parameters themselves. Signed-off-by: Kevin Klues --- .../dynamicresources/dynamicresources.go | 32 ++++++++++++++++--- .../dynamicresources/structuredparameters.go | 10 +++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go index 9fce37fdabed..449b24b5f56e 100644 --- a/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go +++ b/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go @@ -963,13 +963,15 @@ func (pl *dynamicResources) lookupParameters(logger klog.Logger, class *resource if status != nil { return } - claimParameters, status = pl.lookupClaimParameters(logger, claim) + claimParameters, status = pl.lookupClaimParameters(logger, class, claim) return } func (pl *dynamicResources) lookupClassParameters(logger klog.Logger, class *resourcev1alpha2.ResourceClass) (*resourcev1alpha2.ResourceClassParameters, *framework.Status) { + defaultClassParameters := resourcev1alpha2.ResourceClassParameters{} + if class.ParametersRef == nil { - return nil, nil + return &defaultClassParameters, nil } if class.ParametersRef.APIGroup == resourcev1alpha2.SchemeGroupVersion.Group && @@ -1004,9 +1006,31 @@ func (pl *dynamicResources) lookupClassParameters(logger klog.Logger, class *res return nil, statusUnschedulable(logger, fmt.Sprintf("generated class parameters for %s.%s %s not found", class.ParametersRef.Kind, class.ParametersRef.APIGroup, klog.KRef(class.Namespace, class.ParametersRef.Name))) } -func (pl *dynamicResources) lookupClaimParameters(logger klog.Logger, claim *resourcev1alpha2.ResourceClaim) (*resourcev1alpha2.ResourceClaimParameters, *framework.Status) { +func (pl *dynamicResources) lookupClaimParameters(logger klog.Logger, class *resourcev1alpha2.ResourceClass, claim *resourcev1alpha2.ResourceClaim) (*resourcev1alpha2.ResourceClaimParameters, *framework.Status) { + defaultClaimParameters := resourcev1alpha2.ResourceClaimParameters{ + Shareable: true, + DriverRequests: []resourcev1alpha2.DriverRequests{ + { + DriverName: class.DriverName, + Requests: []resourcev1alpha2.ResourceRequest{ + { + ResourceRequestModel: resourcev1alpha2.ResourceRequestModel{ + // TODO: This only works because NamedResources is + // the only model currently implemented. We need to + // match the default to how the resources of this + // class are being advertized in a ResourceSlice. + NamedResources: &resourcev1alpha2.NamedResourcesRequest{ + Selector: "true", + }, + }, + }, + }, + }, + }, + } + if claim.Spec.ParametersRef == nil { - return nil, nil + return &defaultClaimParameters, nil } if claim.Spec.ParametersRef.APIGroup == resourcev1alpha2.SchemeGroupVersion.Group && claim.Spec.ParametersRef.Kind == "ResourceClaimParameters" { diff --git a/pkg/scheduler/framework/plugins/dynamicresources/structuredparameters.go b/pkg/scheduler/framework/plugins/dynamicresources/structuredparameters.go index 44b9aee85b9b..d9b5a222369d 100644 --- a/pkg/scheduler/framework/plugins/dynamicresources/structuredparameters.go +++ b/pkg/scheduler/framework/plugins/dynamicresources/structuredparameters.go @@ -128,12 +128,10 @@ func newClaimController(logger klog.Logger, class *resourcev1alpha2.ResourceClas } for driverName, perDriver := range namedresourcesRequests { var filter *resourcev1alpha2.NamedResourcesFilter - if classParameters != nil { - for _, f := range classParameters.Filters { - if f.DriverName == driverName && f.ResourceFilterModel.NamedResources != nil { - filter = f.ResourceFilterModel.NamedResources - break - } + for _, f := range classParameters.Filters { + if f.DriverName == driverName && f.ResourceFilterModel.NamedResources != nil { + filter = f.ResourceFilterModel.NamedResources + break } } controller, err := namedresourcesmodel.NewClaimController(filter, perDriver.requests)