Skip to content

Commit

Permalink
simd: split cursor advancing from value matching (#156)
Browse files Browse the repository at this point in the history
This refactors all SIMD modules in order to make the value-matching
logic self-contained. Thus, all bytes-cursor manipulations are now
grouped and performed once at the end, outside of SIMD logic.
  • Loading branch information
lucab committed Mar 6, 2024
1 parent 0beb74e commit b2625f3
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 234 deletions.
33 changes: 19 additions & 14 deletions src/lib.rs
Expand Up @@ -953,18 +953,20 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[allow(missing_docs)]
// WARNING: Exported for internal benchmarks, not fit for public consumption
pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
let start = bytes.pos();
simd::match_uri_vectored(bytes);
// URI must have at least one char
if bytes.pos() == start {
let uri_len = simd::match_uri_vectored(bytes.as_ref());
if uri_len == 0 {
return Err(Error::Token);
}
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(uri_len) };
let uri_slice = bytes.slice();

if next!(bytes) == b' ' {
return Ok(Status::Complete(
// SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8.
unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) },
));
let space_delim = next!(bytes);
if space_delim == b' ' {
// SAFETY: all bytes within `uri_slice` must have been `is_token` and therefore also utf-8.
let uri = unsafe { str::from_utf8_unchecked(uri_slice) };
Ok(Status::Complete(uri))
} else {
Err(Error::Token)
}
Expand Down Expand Up @@ -1179,15 +1181,15 @@ fn parse_headers_iter_uninit<'a>(
#[allow(clippy::never_loop)]
// parse header name until colon
let header_name: &str = 'name: loop {
simd::match_header_name_vectored(bytes);
let mut b = next!(bytes);

// SAFETY: previously bumped by 1 with next! -> always safe.
let bslice = unsafe { bytes.slice_skip(1) };
let len = simd::match_header_name_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
let bslice = bytes.slice();
// SAFETY: previous call to match_header_name_vectored ensured all bytes are valid
// header name chars, and as such also valid utf-8.
let name = unsafe { str::from_utf8_unchecked(bslice) };

let mut b = next!(bytes);
if b == b':' {
break 'name name;
}
Expand All @@ -1213,6 +1215,7 @@ fn parse_headers_iter_uninit<'a>(
// eat white space between colon and value
'whitespace_after_colon: loop {
b = next!(bytes);

if b == b' ' || b == b'\t' {
bytes.slice();
continue 'whitespace_after_colon;
Expand All @@ -1239,7 +1242,9 @@ fn parse_headers_iter_uninit<'a>(
'value_lines: loop {
// parse value till EOL

simd::match_header_value_vectored(bytes);
let len = simd::match_header_value_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
let b = next!(bytes);

//found_ctl
Expand Down
48 changes: 27 additions & 21 deletions src/simd/avx2.rs
@@ -1,24 +1,27 @@
use crate::iter::Bytes;

#[cfg(target_arch = "x86")]
pub unsafe fn match_uri_vectored(_: &mut Bytes) {
pub(crate) unsafe fn match_uri_vectored(_: &[u8]) -> usize {
unreachable!("AVX2 detection should be disabled for x86");
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "sse4.2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);
pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_url_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];

if advance != 32 {
return;
return len;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_uri_vectored(bytes)
let advance = super::sse42::match_uri_vectored(remaining);
len = len.saturating_add(advance);
len
}

#[inline(always)]
Expand Down Expand Up @@ -64,23 +67,28 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
}

#[cfg(target_arch = "x86")]
pub unsafe fn match_header_value_vectored(_: &mut Bytes) {
pub(crate) unsafe fn match_header_value_vectored(_: &[u8]) -> usize {
unreachable!("AVX2 detection should be disabled for x86");
}

#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "sse4.2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);
pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_header_value_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];

if advance != 32 {
return;
return len;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_header_value_vectored(bytes)
let advance = super::sse42::match_header_value_vectored(remaining);
len = len.saturating_add(advance);
len
}

#[inline(always)]
Expand Down Expand Up @@ -152,7 +160,7 @@ fn avx2_code_matches_header_value_chars_table() {
}

#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool {
let slice = [
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
Expand All @@ -163,11 +171,9 @@ unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool
b'_', b'_', byte, b'_',
b'_', b'_', b'_', b'_',
];
let mut bytes = Bytes::new(&slice);

f(&mut bytes);

match bytes.pos() {
let pos = f(&slice);
match pos {
32 => true,
26 => false,
_ => unreachable!(),
Expand Down
30 changes: 15 additions & 15 deletions src/simd/mod.rs
Expand Up @@ -11,7 +11,7 @@ mod swar;
)
),
)))]
pub use self::swar::*;
pub(crate) use self::swar::*;

#[cfg(all(
httparse_simd,
Expand Down Expand Up @@ -59,7 +59,7 @@ mod runtime;
target_arch = "x86_64",
),
))]
pub use self::runtime::*;
pub(crate) use self::runtime::*;

#[cfg(all(
httparse_simd,
Expand All @@ -72,18 +72,18 @@ pub use self::runtime::*;
))]
mod sse42_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
}

#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_uri_vectored(b) }
}

#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_header_value_vectored(b) }
}
Expand All @@ -98,7 +98,7 @@ mod sse42_compile_time {
target_arch = "x86_64",
),
))]
pub use self::sse42_compile_time::*;
pub(crate) use self::sse42_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -110,18 +110,18 @@ pub use self::sse42_compile_time::*;
))]
mod avx2_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
}

#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_uri_vectored(b) }
}

#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_header_value_vectored(b) }
}
Expand All @@ -135,7 +135,7 @@ mod avx2_compile_time {
target_arch = "x86_64",
),
))]
pub use self::avx2_compile_time::*;
pub(crate) use self::avx2_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -149,4 +149,4 @@ mod neon;
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
))]
pub use self::neon::*;
pub(crate) use self::neon::*;

0 comments on commit b2625f3

Please sign in to comment.