Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e_node: DRA: test plugin failures #124617

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 28 additions & 8 deletions test/e2e/dra/test-driver/app/gomega.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,42 @@ var BeRegistered = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error
return false, nil
}).WithMessage("contain successful NotifyRegistrationStatus call")

// NodePrepareResouceCalled checks that NodePrepareResource API has been called
var NodePrepareResourceCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
// NodePrepareResoucesSucceeded checks that NodePrepareResources API has been called and succeeded
var NodePrepareResourcesSucceeded = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResource") && call.Err == nil {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResources") && call.Response != nil && call.Err == nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain NodePrepareResource call")
}).WithMessage("contain successful NodePrepareResources call")

// NodePrepareResoucesCalled checks that NodePrepareResources API has been called
var NodePrepareResourcesCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
// NodePrepareResoucesFailed checks that NodePrepareResources API has been called and returned an error
var NodePrepareResourcesFailed = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResources") && call.Err == nil {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResources") && call.Err != nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain NodePrepareResources call")
}).WithMessage("contain unsuccessful NodePrepareResources call")

// NodeUnprepareResoucesSucceeded checks that NodeUnprepareResources API has been called and succeeded
var NodeUnprepareResourcesSucceeded = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodeUnprepareResources") && call.Response != nil && call.Err == nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain successful NodeUnprepareResources call")

// NodeUnprepareResoucesFailed checks that NodeUnprepareResources API has been called and returned an error
var NodeUnprepareResourcesFailed = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodeUnprepareResources") && call.Err != nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain unsuccessful NodeUnprepareResources call")
108 changes: 90 additions & 18 deletions test/e2e/dra/test-driver/app/kubeletplugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"

"github.com/google/go-cmp/cmp"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha2"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
Expand All @@ -54,7 +56,14 @@ type ExamplePlugin struct {
prepared map[ClaimID]any
gRPCCalls []GRPCCall

block bool
blockPrepareResourcesMutex sync.Mutex
blockUnprepareResourcesMutex sync.Mutex

prepareResourcesFailure error
failPrepareResourcesMutex sync.Mutex

unprepareResourcesFailure error
failUnprepareResourcesMutex sync.Mutex
}

type GRPCCall struct {
Expand Down Expand Up @@ -162,10 +171,62 @@ func (ex *ExamplePlugin) IsRegistered() bool {
return status.PluginRegistered
}

// Block sets a flag to block Node[Un]PrepareResources
// WithBlockedNodePrepareResources locks blockPrepareResourcesMutex to block NodePrepareResources
// to emulate time consuming or stuck calls
func (ex *ExamplePlugin) WithBlockedNodePrepareResources(f func() (*v1.Pod, func())) (*v1.Pod, func()) {
ex.blockPrepareResourcesMutex.Lock()
defer ex.blockPrepareResourcesMutex.Unlock()
return f()
}

// WithBlockedNodeUnprepareResources locks blockUnprepareResourcesMutex to block NodeUnprepareResources
// to emulate time consuming or stuck calls
func (ex *ExamplePlugin) Block() {
ex.block = true
func (ex *ExamplePlugin) WithBlockedNodeUnprepareResources(f func() (*v1.Pod, func())) (*v1.Pod, func()) {
ex.blockUnprepareResourcesMutex.Lock()
defer ex.blockUnprepareResourcesMutex.Unlock()
return f()
}

func (ex *ExamplePlugin) withLock(mutex *sync.Mutex, f func()) {
mutex.Lock()
f()
mutex.Unlock()
}

func (ex *ExamplePlugin) WithNodePrepareResourcesFailure(f func() (*v1.Pod, func())) (*v1.Pod, func()) {
ex.withLock(&ex.failPrepareResourcesMutex, func() {
ex.prepareResourcesFailure = errors.New("simulated PrepareResources failure")
})

defer ex.withLock(&ex.failPrepareResourcesMutex, func() {
ex.prepareResourcesFailure = nil
})

return f()
}

func (ex *ExamplePlugin) GetPrepareResourcesFailure() error {
ex.failPrepareResourcesMutex.Lock()
defer ex.failPrepareResourcesMutex.Unlock()
return ex.prepareResourcesFailure
}

func (ex *ExamplePlugin) WithNodeUnprepareResourcesFailure(f func() (*v1.Pod, func())) (*v1.Pod, func()) {
ex.withLock(&ex.failUnprepareResourcesMutex, func() {
ex.unprepareResourcesFailure = errors.New("simulated UnprepareResources failure")
})

defer ex.withLock(&ex.failUnprepareResourcesMutex, func() {
ex.unprepareResourcesFailure = nil
})

return f()
}

func (ex *ExamplePlugin) GetUnprepareResourcesFailure() error {
ex.failUnprepareResourcesMutex.Lock()
defer ex.failUnprepareResourcesMutex.Unlock()
return ex.unprepareResourcesFailure
}

// NodePrepareResource ensures that the CDI file for the claim exists. It uses
Expand All @@ -175,15 +236,10 @@ func (ex *ExamplePlugin) Block() {
func (ex *ExamplePlugin) nodePrepareResource(ctx context.Context, claimName string, claimUID string, resourceHandle string, structuredResourceHandle []*resourceapi.StructuredResourceHandle) ([]string, error) {
logger := klog.FromContext(ctx)

// Block to emulate plugin stuckness or slowness.
// By default the call will not be blocked as ex.block = false.
if ex.block {
<-ctx.Done()
return nil, ctx.Err()
}

ex.mutex.Lock()
defer ex.mutex.Unlock()
ex.blockPrepareResourcesMutex.Lock()
defer ex.blockPrepareResourcesMutex.Unlock()

deviceName := "claim-" + claimUID
vendor := ex.driverName
Expand Down Expand Up @@ -309,6 +365,11 @@ func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1a
resp := &drapbv1alpha3.NodePrepareResourcesResponse{
Claims: make(map[string]*drapbv1alpha3.NodePrepareResourceResponse),
}

if failure := ex.GetPrepareResourcesFailure(); failure != nil {
return resp, failure
}

for _, claimReq := range req.Claims {
cdiDevices, err := ex.nodePrepareResource(ctx, claimReq.Name, claimReq.Uid, claimReq.ResourceHandle, claimReq.StructuredResourceHandle)
if err != nil {
Expand All @@ -330,13 +391,6 @@ func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1a
func (ex *ExamplePlugin) nodeUnprepareResource(ctx context.Context, claimName string, claimUID string, resourceHandle string, structuredResourceHandle []*resourceapi.StructuredResourceHandle) error {
logger := klog.FromContext(ctx)

// Block to emulate plugin stuckness or slowness.
// By default the call will not be blocked as ex.block = false.
if ex.block {
<-ctx.Done()
return ctx.Err()
}

filePath := ex.getJSONFilePath(claimUID)
if err := ex.fileOps.Remove(filePath); err != nil {
return fmt.Errorf("error removing CDI file: %w", err)
Expand Down Expand Up @@ -378,9 +432,17 @@ func (ex *ExamplePlugin) nodeUnprepareResource(ctx context.Context, claimName st
}

func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapbv1alpha3.NodeUnprepareResourcesRequest) (*drapbv1alpha3.NodeUnprepareResourcesResponse, error) {
ex.blockUnprepareResourcesMutex.Lock()
defer ex.blockUnprepareResourcesMutex.Unlock()

resp := &drapbv1alpha3.NodeUnprepareResourcesResponse{
Claims: make(map[string]*drapbv1alpha3.NodeUnprepareResourceResponse),
}

if failure := ex.GetUnprepareResourcesFailure(); failure != nil {
return resp, failure
}

for _, claimReq := range req.Claims {
err := ex.nodeUnprepareResource(ctx, claimReq.Name, claimReq.Uid, claimReq.ResourceHandle, claimReq.StructuredResourceHandle)
if err != nil {
Expand Down Expand Up @@ -487,3 +549,13 @@ func (ex *ExamplePlugin) GetGRPCCalls() []GRPCCall {
calls = append(calls, ex.gRPCCalls...)
return calls
}

func (ex *ExamplePlugin) CountCalls(methodSuffix string) int {
count := 0
for _, call := range ex.GetGRPCCalls() {
if strings.HasSuffix(call.FullMethod, methodSuffix) {
count += 1
}
}
return count
}