Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make diff snapshots transactional #4580

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
150 changes: 105 additions & 45 deletions src/vmm/src/vstate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@

/// Resets all the memory region bitmaps
fn reset_dirty(&self);

/// Store the dirty bitmap in internal store
fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize);
}

/// State of a guest memory region saved to file/buffer.
Expand Down Expand Up @@ -301,56 +304,57 @@
let mut writer_offset = 0;
let page_size = get_page_size().map_err(MemoryError::PageSize)?;

self.iter()
.enumerate()
.try_for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();
let mut write_size = 0;
let mut dirty_batch_start: u64 = 0;

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;
let page_offset = ((i * 64) + j) * page_size;
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);
if is_kvm_page_dirty || is_firecracker_page_dirty {
// We are at the start of a new batch of dirty pages.
if write_size == 0 {
// Seek forward over the unmodified pages.
writer
.seek(SeekFrom::Start(writer_offset + page_offset as u64))
.unwrap();
dirty_batch_start = page_offset as u64;
}
write_size += page_size;
} else if write_size > 0 {
// We are at the end of a batch of dirty pages.
writer.write_all_volatile(
&region.get_slice(
MemoryRegionAddress(dirty_batch_start),
write_size,
)?,
)?;

write_size = 0;
let write_result = self.iter().enumerate().try_for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();
let mut write_size = 0;
let mut dirty_batch_start: u64 = 0;

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;
let page_offset = ((i * 64) + j) * page_size;
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);

if is_kvm_page_dirty || is_firecracker_page_dirty {
// We are at the start of a new batch of dirty pages.
if write_size == 0 {
// Seek forward over the unmodified pages.
writer
.seek(SeekFrom::Start(writer_offset + page_offset as u64))
.unwrap();
dirty_batch_start = page_offset as u64;
}
write_size += page_size;
} else if write_size > 0 {
// We are at the end of a batch of dirty pages.
writer.write_all_volatile(
&region
.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;

Check warning on line 334 in src/vmm/src/vstate/memory.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/memory.rs#L334

Added line #L334 was not covered by tests

write_size = 0;
}
}
}

if write_size > 0 {
writer.write_all_volatile(
&region.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;
}
writer_offset += region.len();
if let Some(bitmap) = firecracker_bitmap {
bitmap.reset();
}
if write_size > 0 {
writer.write_all_volatile(
&region.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;

Check warning on line 344 in src/vmm/src/vstate/memory.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/memory.rs#L344

Added line #L344 was not covered by tests
}
writer_offset += region.len();

Ok(())
})
.map_err(MemoryError::WriteMemory)
Ok(())
});

if write_result.is_err() {
self.store_dirty_bitmap(dirty_bitmap, page_size);

Check warning on line 352 in src/vmm/src/vstate/memory.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/vstate/memory.rs#L352

Added line #L352 was not covered by tests
} else {
self.reset_dirty();
}

write_result.map_err(MemoryError::WriteMemory)
}

/// Resets all the memory region bitmaps
Expand All @@ -361,6 +365,26 @@
}
})
}

/// Stores the dirty bitmap inside into the internal bitmap
fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize) {
self.iter().enumerate().for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;

if is_kvm_page_dirty {
let page_offset = ((i * 64) + j) * page_size;

firecracker_bitmap.mark_dirty(page_offset, 1)
}
}
}
});
}
}

fn create_memfd(
Expand Down Expand Up @@ -812,6 +836,42 @@
assert_eq!(expected_first_region, diff_file_content);
}

#[test]
fn test_store_dirty_bitmap() {
let page_size = get_page_size().unwrap();

// Two regions of three pages each, with a one page gap between them.
let region_1_address = GuestAddress(0);
let region_2_address = GuestAddress(page_size as u64 * 4);
let region_size = page_size * 3;
let mem_regions = [
(region_1_address, region_size),
(region_2_address, region_size),
];
let guest_memory =
GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap();

// Check that Firecracker bitmap is clean.
guest_memory.iter().for_each(|r| {
assert!(!r.bitmap().dirty_at(0));
assert!(!r.bitmap().dirty_at(page_size));
assert!(!r.bitmap().dirty_at(page_size * 2));
});

let mut dirty_bitmap: DirtyBitmap = HashMap::new();
dirty_bitmap.insert(0, vec![0b101]);
dirty_bitmap.insert(1, vec![0b101]);

guest_memory.store_dirty_bitmap(&dirty_bitmap, page_size);

// Assert that the bitmap now reports as being dirty maching the dirty bitmap
guest_memory.iter().for_each(|r| {
assert!(r.bitmap().dirty_at(0));
assert!(!r.bitmap().dirty_at(page_size));
assert!(r.bitmap().dirty_at(page_size * 2));
});
}

#[test]
fn test_create_memfd() {
let size = 1;
Expand Down
10 changes: 8 additions & 2 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def __init__(
self.log_file = None
self.metrics_file = None
self._spawned = False
self._killed = False

# device dictionaries
self.iface = {}
Expand All @@ -238,6 +239,9 @@ def __repr__(self):
def kill(self):
"""All clean up associated with this microVM should go here."""
# pylint: disable=subprocess-run-check
# if it was already killed, return
if self._killed:
return

# Stop any registered monitors
for monitor in self.monitors:
Expand Down Expand Up @@ -287,6 +291,7 @@ def kill(self):

# Mark the microVM as not spawned, so we avoid trying to kill twice.
self._spawned = False
self._killed = True

if self.time_api_requests:
self._validate_api_response_times()
Expand Down Expand Up @@ -1014,8 +1019,9 @@ def kill(self):
for vm in self.vms:
vm.kill()
vm.jailer.cleanup()
if len(vm.jailer.jailer_id) > 0:
shutil.rmtree(vm.jailer.chroot_base_with_id())
chroot_base_with_id = vm.jailer.chroot_base_with_id()
if len(vm.jailer.jailer_id) > 0 and chroot_base_with_id.exists():
shutil.rmtree(chroot_base_with_id)
vm.netns.cleanup()

self.vms.clear()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Test that the no dirty pages lost in case of error during snapshot creation."""

import subprocess
from pathlib import Path

import psutil
import pytest


@pytest.fixture
def mount_tmpfs_small(worker_id):
"""Mount a small tmpfs and return its path"""
mnt_path = Path(f"/mnt/{worker_id}")
mnt_path.mkdir(parents=True)
subprocess.check_call(
["mount", "-o", "size=512M", "-t", "tmpfs", "none", str(mnt_path)]
)
try:
yield mnt_path
finally:
subprocess.check_call(["umount", mnt_path])
mnt_path.rmdir()


def test_diff_snapshot_works_after_error(
microvm_factory, guest_kernel_linux_5_10, rootfs_ubuntu_22, mount_tmpfs_small
):
"""
Test that if a partial snapshot errors it will work after and not lose data
"""
uvm = microvm_factory.build(
guest_kernel_linux_5_10,
rootfs_ubuntu_22,
jailer_kwargs={"chroot_base": mount_tmpfs_small},
)

vm_mem_size = 128
uvm.spawn()
uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True)
uvm.add_net_iface()
uvm.start()
uvm.wait_for_up()

chroot = Path(uvm.chroot())

# Create a large file dynamically based on available space
fill = chroot / "fill"
disk_usage = psutil.disk_usage(chroot)
target_size = round(disk_usage.free * 0.9) # Attempt to fill 90% of free space

subprocess.check_call(f"fallocate -l {target_size} {fill}", shell=True)

try:
uvm.snapshot_diff()
except RuntimeError:
msg = "No space left on device"
uvm.check_log_message(msg)
else:
assert False, "This should fail"

fill.unlink()

# Now there is enough space for it to work
snap2 = uvm.snapshot_diff()

vm2 = microvm_factory.build()
vm2.spawn()
vm2.restore_from_snapshot(snap2, resume=True)
vm2.wait_for_up()

uvm.kill()