Skip to content

Commit

Permalink
CA-388933: rework GC Active lock to ensure GC starts
Browse files Browse the repository at this point in the history
Commit b7b90c8 was addresing a race
condition between the Garbage Collector and the SR detach operation
where the GC could get started while the SR detach was proceeding due
to there not being any mutual exclusion between these operations. To
address this the code was changed to obtain the gc_active lock only
when within the SR lock, which is also held by the detach
operation. This prevents the starting GC process from acquiring the
active lock until the detach has completed, at which point the SR
would be detached and the GC would exit.

There was a problem with this commit in that it used a Lock
acquireNoblock to acquire the SR lock and if it failed to do so
assumed that this meant the GC was already running. As the GC is
typically kicked as the result of a VDI delete (or manually as an SR
scan) the SR lock would be held. This results in the GC lock
acquisition being racy and dependent on the time taken for the process
to fork and daemonise. The outcome of this is that under some
conditions the GC process will never start and cleanup will not occur,
leading to an inability to take new snapshots when the maximum chain
length is exceeded.  It should instead be using a blocking acquire
which will wait until the current holder exits. This commit applies
this change.

Signed-off-by: Mark Syms <mark.syms@citrix.com>
  • Loading branch information
MarkSymsCtx committed Mar 14, 2024
1 parent 9ab89a5 commit fc6e4d3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 74 deletions.
3 changes: 1 addition & 2 deletions drivers/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3103,8 +3103,7 @@ def __init__(self, srUuid):
self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid)

def acquireNoblock(self):
if not self._srLock.acquireNoblock():
return False
self._srLock.acquire()

try:
return self._lock.acquireNoblock()
Expand Down
112 changes: 40 additions & 72 deletions tests/test_cleanup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import errno
import os
import unittest
import unittest.mock as mock

from tempfile import TemporaryDirectory
from uuid import uuid4

import cleanup
Expand Down Expand Up @@ -1787,81 +1785,52 @@ def test_not_plugged_retry(self):


class TestLockActive(unittest.TestCase):
# We mock flock.MockWriteLock so that we can easily fake
# up an lock being held by another process.
class MockWriteLock: # pragma: no cover
test_case = None

def __init__(self, fd):
self.fd = fd
self._held = False

def is_externally_locked(self):
return self.test_case.is_externally_locked(self.fd)

def lock(self):
if self.is_externally_locked():
raise AssertionError("Failed attempt to take out lock")
self._held = True

def trylock(self):
if self._held:
return False
if self.is_externally_locked():
return False
self._held = True
return True

def held(self):
return self._held

def unlock(self):
self._held = False

def test(self):
"""Returns the PID of the process holding the lock or -1 if the lock
is not held."""
if self._held:
return os.getpid()
elif self.is_externally_locked():
return 1
else:
return -1

def setUp(self):
tmp_dir = TemporaryDirectory()
self.addCleanup(tmp_dir.cleanup)
self.tmp_dir = tmp_dir.name
self.addCleanup(mock.patch.stopall)

lock_dir_patcher = mock.patch("lock.Lock.BASE_DIR", self.tmp_dir)
lock_dir_patcher.start()
self.lock_patcher = mock.patch('cleanup.lock.Lock')
patched_lock = self.lock_patcher.start()
patched_lock.side_effect = self.create_lock
self.locks = {}

self.externally_locked_files = set()
self.files_by_fd = {}
self.sr_uuid = str(uuid4())

def mock_open(path, *args, **kwargs):
f = open(path, *args, **kwargs)
self.files_by_fd[f.fileno()] = path
return f
class DummyLock:
def __init__(self, name):
self.name = name
self.held = False
self.count = 0
self.can_acquire = True

open_patcher = mock.patch("lock.open", mock_open)
open_patcher.start()
def acquire(self):
if not self.held:
if self.can_acquire:
self.held = True
else:
# In a real lock this would block, instead, error
raise BlockingIOError()

self.MockWriteLock.test_case = self
write_lock_patcher = mock.patch("flock.WriteLock", self.MockWriteLock)
write_lock_patcher.start()
self.count += 1

self.addCleanup(mock.patch.stopall)
def acquireNoblock(self):
if self.held or self.can_acquire:
self.held = True
return True

self.sr_uuid = str(uuid4())
return False

def release(self):
self.count -= 1
if not self.count:
self.held = False

def is_externally_locked(self, fd):
path = self.files_by_fd[fd]
return path in self.externally_locked_files
def create_lock(self, lock_name, sr_uuid):
lock_key = f'{lock_name}/{sr_uuid}'
if lock_key not in self.locks:
self.locks[lock_key] = self.DummyLock(lock_key)

def lock_externally(self, lock_type):
lockpath = os.path.join(self.tmp_dir, self.sr_uuid, lock_type)
self.externally_locked_files.add(lockpath)
return self.locks[lock_key]

def test_can_acquire(self):
# Given
Expand All @@ -1876,6 +1845,7 @@ def test_can_acquire(self):
def test_can_acquire_when_already_holding_sr_lock(self):
# Given
srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.sr_uuid)
srLock.held = True
gcLock = cleanup.LockActive(self.sr_uuid)

# When
Expand All @@ -1901,7 +1871,7 @@ def test_can_acquire_when_already_holding_sr_lock(self):
def test_cannot_acquire_if_other_process_holds_gc_lock(self):
# Given
gcLock = cleanup.LockActive(self.sr_uuid)
self.lock_externally(cleanup.LOCK_TYPE_GC_ACTIVE)
gcLock._lock.can_acquire = False

# When
acquired = gcLock.acquireNoblock()
Expand All @@ -1912,10 +1882,8 @@ def test_cannot_acquire_if_other_process_holds_gc_lock(self):
def test_cannot_acquire_if_other_process_holds_sr_lock(self):
# Given
gcLock = cleanup.LockActive(self.sr_uuid)
self.lock_externally(vhdutil.LOCK_TYPE_SR)
gcLock._srLock.can_acquire = False

# When
acquired = gcLock.acquireNoblock()

# Then
self.assertFalse(acquired)
with self.assertRaises(BlockingIOError):
gcLock.acquireNoblock()

0 comments on commit fc6e4d3

Please sign in to comment.