Skip to content

Commit

Permalink
Make diff snapshots transactional
Browse files Browse the repository at this point in the history
Store kvm dirty bitmap inside firecrackers internal bitmap so no data is
lost incase of an error

Signed-off-by: Jack Thomson <jackabt@amazon.com>
Co-authored-by: Roman Kovtyukh <HelloDearGrandma@gmail.com>
Co-authored-by: Pablo Barbáchano <pablob@amazon.com>
  • Loading branch information
3 people committed Apr 25, 2024
1 parent fd40204 commit 5bf4f93
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 9 deletions.
33 changes: 26 additions & 7 deletions src/vmm/src/vstate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,25 @@ impl GuestMemoryExtension for GuestMemoryMmap {
let mut writer_offset = 0;
let page_size = get_page_size().map_err(MemoryError::PageSize)?;

self.iter()
.enumerate()
.for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;

if is_kvm_page_dirty {
let page_offset = ((i * 64) + j) * page_size;

firecracker_bitmap.mark_dirty(page_offset, 1)
}
}
}
});

self.iter()
.enumerate()
.try_for_each(|(slot, region)| {
Expand All @@ -309,12 +328,11 @@ impl GuestMemoryExtension for GuestMemoryMmap {
let mut write_size = 0;
let mut dirty_batch_start: u64 = 0;

for (i, v) in kvm_bitmap.iter().enumerate() {
for i in 0..kvm_bitmap.len() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;
let page_offset = ((i * 64) + j) * page_size;
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);
if is_kvm_page_dirty || is_firecracker_page_dirty {
if is_firecracker_page_dirty {
// We are at the start of a new batch of dirty pages.
if write_size == 0 {
// Seek forward over the unmodified pages.
Expand Down Expand Up @@ -344,13 +362,14 @@ impl GuestMemoryExtension for GuestMemoryMmap {
)?;
}
writer_offset += region.len();
if let Some(bitmap) = firecracker_bitmap {
bitmap.reset();
}

Ok(())
})
.map_err(MemoryError::WriteMemory)
.map_err(MemoryError::WriteMemory)?;

self.reset_dirty();

Ok(())
}

/// Resets all the memory region bitmaps
Expand Down
10 changes: 8 additions & 2 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def __init__(
self.log_file = None
self.metrics_file = None
self._spawned = False
self._killed = False

# device dictionaries
self.iface = {}
Expand All @@ -236,6 +237,9 @@ def __repr__(self):
def kill(self):
"""All clean up associated with this microVM should go here."""
# pylint: disable=subprocess-run-check
# if it was already killed, return
if self._killed:
return

# We start with vhost-user backends,
# because if we stop Firecracker first, the backend will want
Expand Down Expand Up @@ -281,6 +285,7 @@ def kill(self):

# Mark the microVM as not spawned, so we avoid trying to kill twice.
self._spawned = False
self._killed = True

if self.time_api_requests:
self._validate_api_response_times()
Expand Down Expand Up @@ -1002,8 +1007,9 @@ def kill(self):
for vm in self.vms:
vm.kill()
vm.jailer.cleanup()
if len(vm.jailer.jailer_id) > 0:
shutil.rmtree(vm.jailer.chroot_base_with_id())
chroot_base_with_id = vm.jailer.chroot_base_with_id()
if len(vm.jailer.jailer_id) > 0 and chroot_base_with_id.exists():
shutil.rmtree(chroot_base_with_id)
vm.netns.cleanup()

self.vms.clear()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Test that the no dirty pages lost in case of error during snapshot creation."""

import pytest
import subprocess
from pathlib import Path


@pytest.fixture
def mount_tmpfs_small(worker_id):
"""Mount a small tmpfs and return its path"""
mnt_path = Path(f"/mnt/{worker_id}")
mnt_path.mkdir(parents=True)
subprocess.check_call(["mount", "-o", "size=512M", "-t", "tmpfs", "none", str(mnt_path)])
try:
yield mnt_path
finally:
subprocess.check_call(["umount", mnt_path])
mnt_path.rmdir()


def test_diff_snapshot_works_after_error(
microvm_factory, guest_kernel_linux_5_10, rootfs_ubuntu_22, mount_tmpfs_small
):
"""
"""

uvm = microvm_factory.build(
guest_kernel_linux_5_10,
rootfs_ubuntu_22,
jailer_kwargs={"chroot_base": mount_tmpfs_small},
)

vm_mem_size = 128
uvm.spawn()
uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True)
uvm.add_net_iface()
uvm.start()
uvm.ssh.run("true")

chroot = Path(uvm.chroot())

# Create a large file, so we run out of space (ENOSPC) during the snapshot
# Assumes a Docker /srv tmpfs of 1G, derived by trial and error
fill = chroot / "fill"
subprocess.check_call(f"fallocate -l 330M {fill}", shell=True)

try:
uvm.snapshot_diff()
except RuntimeError:
msg = "No space left on device"
uvm.check_log_message(msg)
else:
assert False, "This should fail"

fill.unlink()

# Now there is enough space for it to work
snap2 = uvm.snapshot_diff()

vm2 = microvm_factory.build()
vm2.spawn()
vm2.restore_from_snapshot(snap2, resume=True)
vm2.ssh.run("true")

uvm.kill()

0 comments on commit 5bf4f93

Please sign in to comment.