Skip to content

Commit

Permalink
feat: Only copy bitmap if the snapshot fails
Browse files Browse the repository at this point in the history
Only if the snapshot writing fails then store the kvm buffer inside the
firecracker internal bitmap. Also attempted to make the filling
algorithm in the test to be dynamic so it will work on ARM
chips

Signed-off-by: Jack Thomson <jackabt@amazon.com>
  • Loading branch information
JackThomson2 committed May 9, 2024
1 parent c366bb9 commit 963448b
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 57 deletions.
151 changes: 97 additions & 54 deletions src/vmm/src/vstate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ where

/// Resets all the memory region bitmaps
fn reset_dirty(&self);

/// Store the dirty bitmap in internal store
fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize);
}

/// State of a guest memory region saved to file/buffer.
Expand Down Expand Up @@ -301,73 +304,57 @@ impl GuestMemoryExtension for GuestMemoryMmap {
let mut writer_offset = 0;
let page_size = get_page_size().map_err(MemoryError::PageSize)?;

self.iter().enumerate().for_each(|(slot, region)| {
let write_result = self.iter().enumerate().try_for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();
let mut write_size = 0;
let mut dirty_batch_start: u64 = 0;

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;

if is_kvm_page_dirty {
let page_offset = ((i * 64) + j) * page_size;

firecracker_bitmap.mark_dirty(page_offset, 1)
}
}
}
});

self.iter()
.enumerate()
.try_for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();
let mut write_size = 0;
let mut dirty_batch_start: u64 = 0;

for i in 0..kvm_bitmap.len() {
for j in 0..64 {
let page_offset = ((i * 64) + j) * page_size;
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);
if is_firecracker_page_dirty {
// We are at the start of a new batch of dirty pages.
if write_size == 0 {
// Seek forward over the unmodified pages.
writer
.seek(SeekFrom::Start(writer_offset + page_offset as u64))
.unwrap();
dirty_batch_start = page_offset as u64;
}
write_size += page_size;
} else if write_size > 0 {
// We are at the end of a batch of dirty pages.
writer.write_all_volatile(
&region.get_slice(
MemoryRegionAddress(dirty_batch_start),
write_size,
)?,
)?;

write_size = 0;
let page_offset = ((i * 64) + j) * page_size;
let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset);

if is_kvm_page_dirty || is_firecracker_page_dirty {
// We are at the start of a new batch of dirty pages.
if write_size == 0 {
// Seek forward over the unmodified pages.
writer
.seek(SeekFrom::Start(writer_offset + page_offset as u64))
.unwrap();
dirty_batch_start = page_offset as u64;
}
write_size += page_size;
} else if write_size > 0 {
// We are at the end of a batch of dirty pages.
writer.write_all_volatile(
&region
.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;

write_size = 0;
}
}
}

if write_size > 0 {
writer.write_all_volatile(
&region.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;
}
writer_offset += region.len();
if write_size > 0 {
writer.write_all_volatile(
&region.get_slice(MemoryRegionAddress(dirty_batch_start), write_size)?,
)?;
}
writer_offset += region.len();

Ok(())
})
.map_err(MemoryError::WriteMemory)?;
Ok(())
});

self.reset_dirty();
if write_result.is_err() {
self.store_dirty_bitmap(dirty_bitmap, page_size);
} else {
self.reset_dirty();
}

Ok(())
write_result.map_err(MemoryError::WriteMemory)
}

/// Resets all the memory region bitmaps
Expand All @@ -378,6 +365,26 @@ impl GuestMemoryExtension for GuestMemoryMmap {
}
})
}

/// Stores the dirty bitmap inside into the internal bitmap
fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize) {
self.iter().enumerate().for_each(|(slot, region)| {
let kvm_bitmap = dirty_bitmap.get(&slot).unwrap();
let firecracker_bitmap = region.bitmap();

for (i, v) in kvm_bitmap.iter().enumerate() {
for j in 0..64 {
let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64;

if is_kvm_page_dirty {
let page_offset = ((i * 64) + j) * page_size;

firecracker_bitmap.mark_dirty(page_offset, 1)
}
}
}
});
}
}

fn create_memfd(
Expand Down Expand Up @@ -829,6 +836,42 @@ mod tests {
assert_eq!(expected_first_region, diff_file_content);
}

#[test]
fn test_store_dirty_bitmap() {
let page_size = get_page_size().unwrap();

// Two regions of three pages each, with a one page gap between them.
let region_1_address = GuestAddress(0);
let region_2_address = GuestAddress(page_size as u64 * 4);
let region_size = page_size * 3;
let mem_regions = [
(region_1_address, region_size),
(region_2_address, region_size),
];
let guest_memory =
GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap();

// Check that Firecracker bitmap is clean.
guest_memory.iter().for_each(|r| {
assert!(!r.bitmap().dirty_at(0));
assert!(!r.bitmap().dirty_at(page_size));
assert!(!r.bitmap().dirty_at(page_size * 2));
});

let mut dirty_bitmap: DirtyBitmap = HashMap::new();
dirty_bitmap.insert(0, vec![0b101]);
dirty_bitmap.insert(1, vec![0b101]);

guest_memory.store_dirty_bitmap(&dirty_bitmap, page_size);

// Assert that the bitmap now reports as being dirty maching the dirty bitmap
guest_memory.iter().for_each(|r| {
assert!(r.bitmap().dirty_at(0));
assert!(!r.bitmap().dirty_at(page_size));
assert!(r.bitmap().dirty_at(page_size * 2));
});
}

#[test]
fn test_create_memfd() {
let size = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
"""Test that the no dirty pages lost in case of error during snapshot creation."""

import psutil
import subprocess
from pathlib import Path

Expand Down Expand Up @@ -44,10 +45,12 @@ def test_diff_snapshot_works_after_error(

chroot = Path(uvm.chroot())

# Create a large file, so we run out of space (ENOSPC) during the snapshot
# Assumes a Docker /srv tmpfs of 1G, derived by trial and error
# Create a large file dynamically based on available space
fill = chroot / "fill"
subprocess.check_call(f"fallocate -l 330M {fill}", shell=True)
disk_usage = psutil.disk_usage(chroot)
target_size = round(disk_usage.free * 0.9) # Attempt to fill 90% of free space

subprocess.check_call(f"fallocate -l {target_size} {fill}", shell=True)

try:
uvm.snapshot_diff()
Expand Down

0 comments on commit 963448b

Please sign in to comment.