Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simd: split cursor advancing from value matching #156

Merged
merged 1 commit into from Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 19 additions & 14 deletions src/lib.rs
Expand Up @@ -953,18 +953,20 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[allow(missing_docs)]
// WARNING: Exported for internal benchmarks, not fit for public consumption
pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
let start = bytes.pos();
simd::match_uri_vectored(bytes);
// URI must have at least one char
if bytes.pos() == start {
let uri_len = simd::match_uri_vectored(bytes.as_ref());
if uri_len == 0 {
return Err(Error::Token);
}
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(uri_len) };
let uri_slice = bytes.slice();

if next!(bytes) == b' ' {
return Ok(Status::Complete(
// SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8.
unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) },
));
let space_delim = next!(bytes);
if space_delim == b' ' {
// SAFETY: all bytes within `uri_slice` must have been `is_token` and therefore also utf-8.
let uri = unsafe { str::from_utf8_unchecked(uri_slice) };
Ok(Status::Complete(uri))
} else {
Err(Error::Token)
}
Expand Down Expand Up @@ -1179,15 +1181,15 @@ fn parse_headers_iter_uninit<'a>(
#[allow(clippy::never_loop)]
// parse header name until colon
let header_name: &str = 'name: loop {
simd::match_header_name_vectored(bytes);
let mut b = next!(bytes);

// SAFETY: previously bumped by 1 with next! -> always safe.
let bslice = unsafe { bytes.slice_skip(1) };
let len = simd::match_header_name_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
let bslice = bytes.slice();
// SAFETY: previous call to match_header_name_vectored ensured all bytes are valid
// header name chars, and as such also valid utf-8.
let name = unsafe { str::from_utf8_unchecked(bslice) };

let mut b = next!(bytes);
if b == b':' {
break 'name name;
}
Expand All @@ -1213,6 +1215,7 @@ fn parse_headers_iter_uninit<'a>(
// eat white space between colon and value
'whitespace_after_colon: loop {
b = next!(bytes);

if b == b' ' || b == b'\t' {
bytes.slice();
continue 'whitespace_after_colon;
Expand All @@ -1239,7 +1242,9 @@ fn parse_headers_iter_uninit<'a>(
'value_lines: loop {
// parse value till EOL

simd::match_header_value_vectored(bytes);
let len = simd::match_header_value_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
let b = next!(bytes);

//found_ctl
Expand Down
48 changes: 27 additions & 21 deletions src/simd/avx2.rs
@@ -1,24 +1,27 @@
use crate::iter::Bytes;

#[cfg(target_arch = "x86")]
pub unsafe fn match_uri_vectored(_: &mut Bytes) {
pub(crate) unsafe fn match_uri_vectored(_: &[u8]) -> usize {
unreachable!("AVX2 detection should be disabled for x86");
}

#[inline]
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "sse4.2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);
pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_url_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];

if advance != 32 {
return;
return len;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_uri_vectored(bytes)
let advance = super::sse42::match_uri_vectored(remaining);
len = len.saturating_add(advance);
len
}

#[inline(always)]
Expand Down Expand Up @@ -64,23 +67,28 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
}

#[cfg(target_arch = "x86")]
pub unsafe fn match_header_value_vectored(_: &mut Bytes) {
pub(crate) unsafe fn match_header_value_vectored(_: &[u8]) -> usize {
unreachable!("AVX2 detection should be disabled for x86");
}

#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "sse4.2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);
pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_header_value_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];

if advance != 32 {
return;
return len;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_header_value_vectored(bytes)
let advance = super::sse42::match_header_value_vectored(remaining);
len = len.saturating_add(advance);
len
}

#[inline(always)]
Expand Down Expand Up @@ -152,7 +160,7 @@ fn avx2_code_matches_header_value_chars_table() {
}

#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool {
let slice = [
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
Expand All @@ -163,11 +171,9 @@ unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool
b'_', b'_', byte, b'_',
b'_', b'_', b'_', b'_',
];
let mut bytes = Bytes::new(&slice);

f(&mut bytes);

match bytes.pos() {
let pos = f(&slice);
match pos {
32 => true,
26 => false,
_ => unreachable!(),
Expand Down
30 changes: 15 additions & 15 deletions src/simd/mod.rs
Expand Up @@ -11,7 +11,7 @@ mod swar;
)
),
)))]
pub use self::swar::*;
pub(crate) use self::swar::*;

#[cfg(all(
httparse_simd,
Expand Down Expand Up @@ -59,7 +59,7 @@ mod runtime;
target_arch = "x86_64",
),
))]
pub use self::runtime::*;
pub(crate) use self::runtime::*;

#[cfg(all(
httparse_simd,
Expand All @@ -72,18 +72,18 @@ pub use self::runtime::*;
))]
mod sse42_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
}

#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_uri_vectored(b) }
}

#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_header_value_vectored(b) }
}
Expand All @@ -98,7 +98,7 @@ mod sse42_compile_time {
target_arch = "x86_64",
),
))]
pub use self::sse42_compile_time::*;
pub(crate) use self::sse42_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -110,18 +110,18 @@ pub use self::sse42_compile_time::*;
))]
mod avx2_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
}

#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_uri_vectored(b) }
}

#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_header_value_vectored(b) }
}
Expand All @@ -135,7 +135,7 @@ mod avx2_compile_time {
target_arch = "x86_64",
),
))]
pub use self::avx2_compile_time::*;
pub(crate) use self::avx2_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -149,4 +149,4 @@ mod neon;
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
))]
pub use self::neon::*;
pub(crate) use self::neon::*;