Skip to content
This repository has been archived by the owner on Mar 28, 2023. It is now read-only.

Commit

Permalink
A CSI volume is de facto unmounted if the node that the volume is hos…
Browse files Browse the repository at this point in the history
…ted on goes down. In `controller.ControllerUnpublishVolume()`, when a volume has been unmounted and we cannot find the node that the volume should be mounted on, the volume is in fact unmounted and should reported as such without error.

This works around a bug in kubernetes/kubernetes#51835 which [appears to remain unresolved and administratively closed](kubernetes/kubernetes#51835 (comment)).

Resolves kubernetes-sigs#15

Signed-off-by: Brian Topping <brian.topping@sap.com>
  • Loading branch information
briantopping committed Mar 7, 2023
1 parent 1151123 commit 7627b48
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 18 deletions.
23 changes: 23 additions & 0 deletions pkg/common/cns-lib/vsphere/datacenter.go
Expand Up @@ -116,6 +116,29 @@ func (dc *Datacenter) GetVirtualMachineByUUID(ctx context.Context,
return vm, nil
}

// GetVirtualMachineByDNSName returns the VirtualMachine instance given its DNS name in a datacenter.
func (dc *Datacenter) GetVirtualMachineByDNSName(ctx context.Context, dnsName string) (*VirtualMachine, error) {
log := logger.GetLogger(ctx)
searchIndex := object.NewSearchIndex(dc.Datacenter.Client())
svm, err := searchIndex.FindByDnsName(ctx, dc.Datacenter, dnsName, true)
if err != nil {
log.Errorf("failed to find VM given DNS name %s with err: %v", dnsName, err)
return nil, err
} else if svm == nil {
log.Errorf("Couldn't find VM given DNS name %s", dnsName)
return nil, ErrVMNotFound
}
vmObject := object.NewVirtualMachine(dc.Datacenter.Client(), svm.Reference())
uuid := vmObject.UUID(ctx)
vm := &VirtualMachine{
VirtualCenterHost: dc.VirtualCenterHost,
UUID: uuid,
VirtualMachine: vmObject,
Datacenter: dc,
}
return vm, nil
}

// asyncGetAllDatacenters returns *Datacenter instances over the given
// channel. If an error occurs, it will be returned via the given error channel.
// If the given context is canceled, the processing will be stopped as soon as
Expand Down
43 changes: 30 additions & 13 deletions pkg/common/cns-lib/vsphere/virtualmachine.go
Expand Up @@ -147,6 +147,8 @@ func GetUUIDFromVMReference(ctx context.Context, vc *VirtualCenter, vmRef types.
return vm.Config.Uuid, nil
}

type getVirtualMachineFunc func(ctx context.Context, dc *Datacenter) (*VirtualMachine, error)

// GetVirtualMachineByUUID returns virtual machine given its UUID in entire VC.
// If instanceUuid is set to true, then UUID is an instance UUID.
// In this case, this function searches for virtual machines whose instance UUID
Expand All @@ -155,8 +157,23 @@ func GetUUIDFromVMReference(ctx context.Context, vc *VirtualCenter, vmRef types.
// In this case, this function searches for virtual machines whose BIOS UUID
// matches the given uuid.
func GetVirtualMachineByUUID(ctx context.Context, uuid string, instanceUUID bool) (*VirtualMachine, error) {
term := fmt.Sprintf("UUID %s", uuid)
return getVirtualMachineByFunc(ctx, term, func(ctx context.Context, dc *Datacenter) (*VirtualMachine, error) {
return dc.GetVirtualMachineByUUID(ctx, uuid, instanceUUID)
})
}

// GetVirtualMachineByDNSName returns virtual machine given its dnsName in entire VC.
func GetVirtualMachineByDNSName(ctx context.Context, dnsName string) (*VirtualMachine, error) {
term := fmt.Sprintf("DNS name %s", dnsName)
return getVirtualMachineByFunc(ctx, term, func(ctx context.Context, dc *Datacenter) (*VirtualMachine, error) {
return dc.GetVirtualMachineByDNSName(ctx, dnsName)
})
}

func getVirtualMachineByFunc(ctx context.Context, term string, f getVirtualMachineFunc) (*VirtualMachine, error) {
log := logger.GetLogger(ctx)
log.Infof("Initiating asynchronous datacenter listing with uuid %s", uuid)
log.Infof("Initiating asynchronous datacenter listing with %s", term)
dcsChan, errChan := AsyncGetAllDatacenters(ctx, dcBufferSize)

var wg sync.WaitGroup
Expand All @@ -172,42 +189,42 @@ func GetVirtualMachineByUUID(ctx context.Context, uuid string, instanceUUID bool
case err, ok := <-errChan:
if !ok {
// Async function finished.
log.Debugf("AsyncGetAllDatacenters finished with uuid %s", uuid)
log.Debugf("AsyncGetAllDatacenters finished with %s", term)
return
} else if err == context.Canceled {
// Canceled by another instance of this goroutine.
log.Debugf("AsyncGetAllDatacenters ctx was canceled with uuid %s", uuid)
log.Debugf("AsyncGetAllDatacenters ctx was canceled with %s", term)
return
} else {
// Some error occurred.
log.Errorf("AsyncGetAllDatacenters with uuid %s sent an error: %v", uuid, err)
log.Errorf("AsyncGetAllDatacenters with %s sent an error: %v", term, err)
poolErr = err
return
}

case dc, ok := <-dcsChan:
if !ok {
// Async function finished.
log.Debugf("AsyncGetAllDatacenters finished with uuid %s", uuid)
log.Debugf("AsyncGetAllDatacenters finished with %s", term)
return
}

// Found some Datacenter object.
log.Infof("AsyncGetAllDatacenters with uuid %s sent a dc %v", uuid, dc)
if vm, err := dc.GetVirtualMachineByUUID(ctx, uuid, instanceUUID); err != nil {
log.Infof("AsyncGetAllDatacenters with %s sent a dc %v", term, dc)
if vm, err := f(ctx, dc); err != nil {
if err == ErrVMNotFound {
// Didn't find VM on this DC, so, continue searching on other DCs.
log.Warnf("Couldn't find VM given uuid %s on DC %v with err: %v, continuing search", uuid, dc, err)
log.Warnf("Couldn't find VM given %s on DC %v with err: %v, continuing search", term, dc, err)
continue
} else {
// Some serious error occurred, so stop the async function.
log.Errorf("Failed finding VM given uuid %s on DC %v with err: %v", uuid, dc, err)
log.Errorf("Failed finding VM given %s on DC %v with err: %v", term, dc, err)
poolErr = err
return
}
} else {
// Virtual machine was found, so stop the async function.
log.Infof("Found VM %v given uuid %s on DC %v", vm, uuid, dc)
log.Infof("Found VM %v given %s on DC %v", vm, term, dc)
nodeVM = vm
return
}
Expand All @@ -218,13 +235,13 @@ func GetVirtualMachineByUUID(ctx context.Context, uuid string, instanceUUID bool
wg.Wait()

if nodeVM != nil {
log.Infof("Returning VM %v for UUID %s", nodeVM, uuid)
log.Infof("Returning VM %v for %s", nodeVM, term)
return nodeVM, nil
} else if poolErr != nil {
log.Errorf("Returning err: %v for UUID %s", poolErr, uuid)
log.Errorf("Returning err: %v for %s", poolErr, term)
return nil, poolErr
} else {
log.Errorf("Returning VM not found err for UUID %s", uuid)
log.Errorf("Returning VM not found err for %s", term)
return nil, ErrVMNotFound
}
}
Expand Down
15 changes: 10 additions & 5 deletions pkg/csi/service/vanilla/controller.go
Expand Up @@ -40,6 +40,7 @@ import (

"sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/migration"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/node"
cnsnode "sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/node"
cnsvolume "sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/volume"
cnsvsphere "sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/vsphere"
cnsconfig "sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/config"
Expand Down Expand Up @@ -2265,11 +2266,15 @@ func (c *controller) ControllerUnpublishVolume(ctx context.Context, req *csi.Con
nodevm, err = c.nodeMgr.GetNodeByName(ctx, req.NodeId)
}
if err != nil {
if err == cnsvsphere.ErrVMNotFound {
log.Infof("Virtual Machine for Node ID: %v is not present in the VC Inventory. "+
"Marking ControllerUnpublishVolume for Volume: %q as successful.", req.NodeId, req.VolumeId)
return &csi.ControllerUnpublishVolumeResponse{}, "", nil
} else {
if err == cnsnode.ErrNodeNotFound {
// Node is not existing anymore, we need to check if its VM is still existing.
// As the VM UUID is not known anymore, need to search by using NodeID as DNS Name
_, err = cnsvsphere.GetVirtualMachineByDNSName(ctx, req.NodeId)
if err == cnsvsphere.ErrVMNotFound {
log.Infof("Virtual Machine for Node ID: %v is not present in the VC Inventory. "+
"Marking ControllerUnpublishVolume for Volume: %q as successful.", req.NodeId, req.VolumeId)
return &csi.ControllerUnpublishVolumeResponse{}, "", nil
}
return nil, csifault.CSIInternalFault, logger.LogNewErrorCodef(log, codes.Internal,
"failed to find VirtualMachine for node:%q. Error: %v", req.NodeId, err)
}
Expand Down

0 comments on commit 7627b48

Please sign in to comment.