Skip to content

Commit

Permalink
Optimize data section parsing and storage (#1024)
Browse files Browse the repository at this point in the history
* refactor the DataSegment type

* rename method

* refactor how data segments are parsed and stored

Active data segments now store all their bytes into a common buffer to avoid having many small allocations.

* avoid heap allocations for small passive data segments

* Revert "avoid heap allocations for small passive data segments"

This reverts commit 1ff9249.

* use len instead of end per active data segment

This leads to smaller indices which mades it possible to only store u32 instead of usize decreasing the size_of<DataSegment> by 8 bytes in total.

Also applied rustfmt and added missing docs.

* fix size_of test

* fix doc link

* add missing docs

* add more missing docs

* apply rustfmt

* use Vec<u8> instead of Box<[u8]> for bytes

* apply rustfmt
  • Loading branch information
Robbepop committed May 9, 2024
1 parent 302a24f commit 6214e51
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 95 deletions.
59 changes: 41 additions & 18 deletions crates/wasmi/src/memory/data.rs
@@ -1,5 +1,10 @@
use crate::{collections::arena::ArenaIndex, module, store::Stored, AsContextMut};
use std::sync::Arc;
use crate::{
collections::arena::ArenaIndex,
module::{self, PassiveDataSegmentBytes},
store::Stored,
AsContextMut,
};
use core::convert::AsRef;

/// A raw index to a data segment entity.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
Expand Down Expand Up @@ -34,14 +39,28 @@ impl DataSegment {
&self.0
}

/// Allocates a new [`DataSegment`] on the store.
/// Allocates a new active [`DataSegment`] on the store.
///
/// # Errors
///
/// If more than [`u32::MAX`] much linear memory is allocated.
pub fn new(mut ctx: impl AsContextMut, segment: &module::DataSegment) -> Self {
let entity = DataSegmentEntity::from(segment);
ctx.as_context_mut().store.inner.alloc_data_segment(entity)
pub fn new_active(mut ctx: impl AsContextMut) -> Self {
ctx.as_context_mut()
.store
.inner
.alloc_data_segment(DataSegmentEntity::active())
}

/// Allocates a new passive [`DataSegment`] on the store.
///
/// # Errors
///
/// If more than [`u32::MAX`] much linear memory is allocated.
pub fn new_passive(mut ctx: impl AsContextMut, bytes: PassiveDataSegmentBytes) -> Self {
ctx.as_context_mut()
.store
.inner
.alloc_data_segment(DataSegmentEntity::passive(bytes))
}
}

Expand All @@ -61,31 +80,35 @@ pub struct DataSegmentEntity {
/// These bytes are just readable after instantiation.
/// Using Wasm `data.drop` simply replaces the instance
/// with an empty one.
bytes: Option<Arc<[u8]>>,
bytes: Option<PassiveDataSegmentBytes>,
}

impl DataSegmentEntity {
/// Creates a new active [`DataSegmentEntity`].
pub fn active() -> Self {
Self { bytes: None }
}

/// Creates a new passive [`DataSegmentEntity`] with its `bytes`.
pub fn passive(bytes: PassiveDataSegmentBytes) -> Self {
Self { bytes: Some(bytes) }
}
}

impl From<&'_ module::DataSegment> for DataSegmentEntity {
fn from(segment: &'_ module::DataSegment) -> Self {
match segment.kind() {
module::DataSegmentKind::Passive => Self {
bytes: Some(segment.clone_bytes()),
},
module::DataSegmentKind::Active(_) => Self::empty(),
Self {
bytes: segment.passive_data_segment_bytes(),
}
}
}

impl DataSegmentEntity {
/// Create an empty [`DataSegmentEntity`] representing dropped data segments.
fn empty() -> Self {
Self { bytes: None }
}

/// Returns the bytes of the [`DataSegmentEntity`].
pub fn bytes(&self) -> &[u8] {
self.bytes
.as_ref()
.map(|bytes| &bytes[..])
.map(AsRef::as_ref)
.unwrap_or_else(|| &[])
}

Expand Down
36 changes: 13 additions & 23 deletions crates/wasmi/src/module/builder.rs
@@ -1,8 +1,9 @@
use super::{
data::DataSegmentsBuilder,
export::ExternIdx,
import::FuncTypeIdx,
ConstExpr,
DataSegment,
DataSegments,
ElementSegment,
ExternTypeIdx,
FuncIdx,
Expand Down Expand Up @@ -31,7 +32,7 @@ use std::{boxed::Box, collections::BTreeMap, sync::Arc, vec::Vec};
#[derive(Debug)]
pub struct ModuleBuilder {
pub header: ModuleHeader,
pub data_segments: Vec<DataSegment>,
pub data_segments: DataSegmentsBuilder,
}

/// A builder for a WebAssembly [`Module`] header.
Expand Down Expand Up @@ -134,7 +135,7 @@ impl ModuleBuilder {
pub fn new(header: ModuleHeader) -> Self {
Self {
header,
data_segments: Vec::new(),
data_segments: DataSegments::build(),
}
}
}
Expand Down Expand Up @@ -370,33 +371,22 @@ impl ModuleHeaderBuilder {
}

impl ModuleBuilder {
/// Pushes the given linear memory data segments to the [`Module`] under construction.
///
/// # Errors
///
/// If any of the linear memory data segments fail to validate.
///
/// # Panics
///
/// If this function has already been called on the same [`ModuleBuilder`].
pub fn push_data_segments<T>(&mut self, data: T) -> Result<(), Error>
where
T: IntoIterator<Item = Result<DataSegment, Error>>,
{
assert!(
self.data_segments.is_empty(),
"tried to initialize module linear memory data segments twice"
);
self.data_segments = data.into_iter().collect::<Result<Vec<_>, _>>()?;
Ok(())
/// Reserve space for at least `additional` new data segments.
pub fn reserve_data_segments(&mut self, additional: usize) {
self.data_segments.reserve(additional);
}

/// Push another parsed data segment to the [`ModuleBuilder`].
pub fn push_data_segment(&mut self, data: wasmparser::Data) -> Result<(), Error> {
self.data_segments.push_data_segment(data)
}

/// Finishes construction of the WebAssembly [`Module`].
pub fn finish(self, engine: &Engine) -> Module {
Module {
engine: engine.clone(),
header: self.header,
data_segments: self.data_segments.into(),
data_segments: self.data_segments.finish(),
}
}
}

0 comments on commit 6214e51

Please sign in to comment.