From b2625f38cc3a200e840861f2a3371eb8f5afdd73 Mon Sep 17 00:00:00 2001 From: Luca Bruno Date: Wed, 6 Mar 2024 20:56:10 +0100 Subject: [PATCH] simd: split cursor advancing from value matching (#156) This refactors all SIMD modules in order to make the value-matching logic self-contained. Thus, all bytes-cursor manipulations are now grouped and performed once at the end, outside of SIMD logic. --- src/lib.rs | 33 +++++---- src/simd/avx2.rs | 48 +++++++------ src/simd/mod.rs | 30 ++++---- src/simd/neon.rs | 165 ++++++++++++++++++++++---------------------- src/simd/runtime.rs | 10 +-- src/simd/sse42.rs | 54 +++++++++------ src/simd/swar.rs | 123 ++++++++++++++------------------- 7 files changed, 229 insertions(+), 234 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 846ecee..3232852 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -953,18 +953,20 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { #[allow(missing_docs)] // WARNING: Exported for internal benchmarks, not fit for public consumption pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> { - let start = bytes.pos(); - simd::match_uri_vectored(bytes); // URI must have at least one char - if bytes.pos() == start { + let uri_len = simd::match_uri_vectored(bytes.as_ref()); + if uri_len == 0 { return Err(Error::Token); } + // SAFETY: these bytes have just been matched here above. + unsafe { bytes.advance(uri_len) }; + let uri_slice = bytes.slice(); - if next!(bytes) == b' ' { - return Ok(Status::Complete( - // SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8. - unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) }, - )); + let space_delim = next!(bytes); + if space_delim == b' ' { + // SAFETY: all bytes within `uri_slice` must have been `is_token` and therefore also utf-8. + let uri = unsafe { str::from_utf8_unchecked(uri_slice) }; + Ok(Status::Complete(uri)) } else { Err(Error::Token) } @@ -1179,15 +1181,15 @@ fn parse_headers_iter_uninit<'a>( #[allow(clippy::never_loop)] // parse header name until colon let header_name: &str = 'name: loop { - simd::match_header_name_vectored(bytes); - let mut b = next!(bytes); - - // SAFETY: previously bumped by 1 with next! -> always safe. - let bslice = unsafe { bytes.slice_skip(1) }; + let len = simd::match_header_name_vectored(bytes.as_ref()); + // SAFETY: these bytes have just been matched here above. + unsafe { bytes.advance(len) }; + let bslice = bytes.slice(); // SAFETY: previous call to match_header_name_vectored ensured all bytes are valid // header name chars, and as such also valid utf-8. let name = unsafe { str::from_utf8_unchecked(bslice) }; + let mut b = next!(bytes); if b == b':' { break 'name name; } @@ -1213,6 +1215,7 @@ fn parse_headers_iter_uninit<'a>( // eat white space between colon and value 'whitespace_after_colon: loop { b = next!(bytes); + if b == b' ' || b == b'\t' { bytes.slice(); continue 'whitespace_after_colon; @@ -1239,7 +1242,9 @@ fn parse_headers_iter_uninit<'a>( 'value_lines: loop { // parse value till EOL - simd::match_header_value_vectored(bytes); + let len = simd::match_header_value_vectored(bytes.as_ref()); + // SAFETY: these bytes have just been matched here above. + unsafe { bytes.advance(len) }; let b = next!(bytes); //found_ctl diff --git a/src/simd/avx2.rs b/src/simd/avx2.rs index 556620c..4f7d6fa 100644 --- a/src/simd/avx2.rs +++ b/src/simd/avx2.rs @@ -1,24 +1,27 @@ -use crate::iter::Bytes; - #[cfg(target_arch = "x86")] -pub unsafe fn match_uri_vectored(_: &mut Bytes) { +pub(crate) unsafe fn match_uri_vectored(_: &[u8]) -> usize { unreachable!("AVX2 detection should be disabled for x86"); } #[inline] #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx2", enable = "sse4.2")] -pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 32 { - let advance = match_url_char_32_avx(bytes.as_ref()); - bytes.advance(advance); +pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 32 { + let advance = match_url_char_32_avx(remaining); + len = len.saturating_add(advance); + remaining = &bytes[len..]; if advance != 32 { - return; + return len; } } // do both, since avx2 only works when bytes.len() >= 32 - super::sse42::match_uri_vectored(bytes) + let advance = super::sse42::match_uri_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline(always)] @@ -64,23 +67,28 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { } #[cfg(target_arch = "x86")] -pub unsafe fn match_header_value_vectored(_: &mut Bytes) { +pub(crate) unsafe fn match_header_value_vectored(_: &[u8]) -> usize { unreachable!("AVX2 detection should be disabled for x86"); } #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx2", enable = "sse4.2")] -pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 32 { - let advance = match_header_value_char_32_avx(bytes.as_ref()); - bytes.advance(advance); +pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 32 { + let advance = match_header_value_char_32_avx(remaining); + len = len.saturating_add(advance); + remaining = &bytes[len..]; if advance != 32 { - return; + return len; } } // do both, since avx2 only works when bytes.len() >= 32 - super::sse42::match_header_value_vectored(bytes) + let advance = super::sse42::match_header_value_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline(always)] @@ -152,7 +160,7 @@ fn avx2_code_matches_header_value_chars_table() { } #[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { +unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool { let slice = [ b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', @@ -163,11 +171,9 @@ unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool b'_', b'_', byte, b'_', b'_', b'_', b'_', b'_', ]; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - match bytes.pos() { + let pos = f(&slice); + match pos { 32 => true, 26 => false, _ => unreachable!(), diff --git a/src/simd/mod.rs b/src/simd/mod.rs index 63464b4..a2da053 100644 --- a/src/simd/mod.rs +++ b/src/simd/mod.rs @@ -11,7 +11,7 @@ mod swar; ) ), )))] -pub use self::swar::*; +pub(crate) use self::swar::*; #[cfg(all( httparse_simd, @@ -59,7 +59,7 @@ mod runtime; target_arch = "x86_64", ), ))] -pub use self::runtime::*; +pub(crate) use self::runtime::*; #[cfg(all( httparse_simd, @@ -72,18 +72,18 @@ pub use self::runtime::*; ))] mod sse42_compile_time { #[inline(always)] - pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { - super::swar::match_header_name_vectored(b); + pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize { + super::swar::match_header_name_vectored(b) } #[inline(always)] - pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { + pub(crate) fn match_uri_vectored(b: &[u8]) -> usize { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::sse42::match_uri_vectored(b) } } - + #[inline(always)] - pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { + pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::sse42::match_header_value_vectored(b) } } @@ -98,7 +98,7 @@ mod sse42_compile_time { target_arch = "x86_64", ), ))] -pub use self::sse42_compile_time::*; +pub(crate) use self::sse42_compile_time::*; #[cfg(all( httparse_simd, @@ -110,18 +110,18 @@ pub use self::sse42_compile_time::*; ))] mod avx2_compile_time { #[inline(always)] - pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { - super::swar::match_header_name_vectored(b); + pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize { + super::swar::match_header_name_vectored(b) } #[inline(always)] - pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { + pub(crate) fn match_uri_vectored(b: &[u8]) -> usize { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::avx2::match_uri_vectored(b) } } - + #[inline(always)] - pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { + pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize { // SAFETY: calls are guarded by a compile time feature check unsafe { crate::simd::avx2::match_header_value_vectored(b) } } @@ -135,7 +135,7 @@ mod avx2_compile_time { target_arch = "x86_64", ), ))] -pub use self::avx2_compile_time::*; +pub(crate) use self::avx2_compile_time::*; #[cfg(all( httparse_simd, @@ -149,4 +149,4 @@ mod neon; target_arch = "aarch64", httparse_simd_neon_intrinsics, ))] -pub use self::neon::*; +pub(crate) use self::neon::*; diff --git a/src/simd/neon.rs b/src/simd/neon.rs index c6b86a8..1e85589 100644 --- a/src/simd/neon.rs +++ b/src/simd/neon.rs @@ -1,52 +1,60 @@ -use crate::iter::Bytes; use core::arch::aarch64::*; #[inline] -pub fn match_header_name_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_header_name_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } +pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining. + let advance = unsafe { match_header_name_char_16_neon(remaining) }; + len = len.saturating_add(advance); + remaining = &bytes[len..]; + + if advance != 16 { + return len; } } - super::swar::match_header_name_vectored(bytes); + let advance = super::swar::match_header_name_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline] -pub fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_header_value_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } +pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining. + let advance = unsafe { match_header_value_char_16_neon(remaining) }; + len = len.saturating_add(advance); + remaining = &bytes[len..]; + + if advance != 16 { + return len; } } - super::swar::match_header_value_vectored(bytes); + let advance = super::swar::match_header_value_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline] -pub fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_url_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } +pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining. + let advance = unsafe { match_url_char_16_neon(remaining) }; + len = len.saturating_add(advance); + remaining = &bytes[len..]; + + if advance != 16 { + return len; } } - super::swar::match_uri_vectored(bytes); + let advance = super::swar::match_uri_vectored(remaining); + len = len.saturating_add(advance); + len } const fn bit_set(x: u8) -> bool { @@ -81,7 +89,7 @@ const BITMAPS: ([u8; 16], [u8; 16]) = build_bitmap(); // NOTE: adapted from 256-bit version, with upper 128-bit ops commented out #[inline] -unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize { +unsafe fn match_header_name_char_16_neon(bytes: &[u8]) -> usize { let bitmaps = BITMAPS; // NOTE: ideally compile-time constants let (bitmap_0_7, _bitmap_8_15) = bitmaps; @@ -94,7 +102,7 @@ unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize { let bitmask_lookup = vld1q_u8(BITMASK_LOOKUP_DATA.as_ptr()); // Load 16 input bytes. - let input = vld1q_u8(ptr); + let input = vld1q_u8(bytes.as_ptr()); // Extract indices for row_0_7. let indices_0_7 = vandq_u8(input, vdupq_n_u8(0x8F)); // 0b1000_1111; @@ -122,8 +130,8 @@ unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize { } #[inline] -unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize { - let input = vld1q_u8(ptr); +unsafe fn match_url_char_16_neon(bytes: &[u8]) -> usize { + let input = vld1q_u8(bytes.as_ptr()); // Check that b'!' <= input <= b'~' let result = vandq_u8( @@ -141,8 +149,8 @@ unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize { } #[inline] -unsafe fn match_header_value_char_16_neon(ptr: *const u8) -> usize { - let input = vld1q_u8(ptr); +unsafe fn match_header_value_char_16_neon(bytes: &[u8]) -> usize { + let input = vld1q_u8(bytes.as_ptr()); // Check that b' ' <= and b != 127 or b == 9 let result = vcleq_u8(vdupq_n_u8(b' '), input); @@ -195,67 +203,56 @@ unsafe fn offsetnz(x: uint8x16_t) -> u32 { #[test] fn neon_code_matches_uri_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_uri_vectored)); - - for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_uri_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } + assert!(byte_is_allowed(b'_', match_uri_vectored)); + + for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_uri_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); } } #[test] fn neon_code_matches_header_value_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_value_vectored)); - - for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_value_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } + assert!(byte_is_allowed(b'_', match_header_value_vectored)); + + for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_header_value_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); } } #[test] fn neon_code_matches_header_name_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_name_vectored)); - - for (b, allowed) in crate::HEADER_NAME_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_name_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } + assert!(byte_is_allowed(b'_', match_header_name_vectored)); + + for (b, allowed) in crate::HEADER_NAME_MAP.iter().cloned().enumerate() { + assert_eq!( + byte_is_allowed(b as u8, match_header_name_vectored), + allowed, + "byte_is_allowed({:?}) should be {:?}", + b, + allowed, + ); } } #[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { +fn byte_is_allowed(byte: u8, f: fn(bytes: &[u8]) -> usize) -> bool { let mut slice = [b'_'; 16]; slice[10] = byte; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - match bytes.pos() { + let pos = f(&slice); + match pos { 16 => true, 10 => false, x => panic!("unexpected pos: {}", x), diff --git a/src/simd/runtime.rs b/src/simd/runtime.rs index c523a92..c51e317 100644 --- a/src/simd/runtime.rs +++ b/src/simd/runtime.rs @@ -1,5 +1,5 @@ use std::sync::atomic::{AtomicU8, Ordering}; -use crate::iter::Bytes; + use super::avx2; use super::sse42; @@ -30,11 +30,11 @@ fn get_runtime_feature() -> u8 { feature } -pub fn match_header_name_vectored(bytes: &mut Bytes) { - super::swar::match_header_name_vectored(bytes); +pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { + super::swar::match_header_name_vectored(bytes) } -pub fn match_uri_vectored(bytes: &mut Bytes) { +pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { // SAFETY: calls are guarded by a feature check unsafe { match get_runtime_feature() { @@ -45,7 +45,7 @@ pub fn match_uri_vectored(bytes: &mut Bytes) { } } -pub fn match_header_value_vectored(bytes: &mut Bytes) { +pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { // SAFETY: calls are guarded by a feature check unsafe { match get_runtime_feature() { diff --git a/src/simd/sse42.rs b/src/simd/sse42.rs index d6fbf02..d835f19 100644 --- a/src/simd/sse42.rs +++ b/src/simd/sse42.rs @@ -1,16 +1,20 @@ -use crate::iter::Bytes; - #[target_feature(enable = "sse4.2")] -pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - let advance = match_url_char_16_sse(bytes.as_ref()); - bytes.advance(advance); +pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining. + let advance = match_url_char_16_sse(remaining); + len = len.saturating_add(advance); + remaining = &bytes[len..]; if advance != 16 { - return; + return len; } } - super::swar::match_uri_vectored(bytes); + let advance = super::swar::match_uri_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline(always)] @@ -62,16 +66,22 @@ unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize { } #[target_feature(enable = "sse4.2")] -pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - let advance = match_header_value_char_16_sse(bytes.as_ref()); - bytes.advance(advance); - - if advance != 16 { - return; - } +pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= 16 { + // SAFETY: ensured that there are at least 16 bytes remaining. + let advance = match_header_value_char_16_sse(remaining); + len = len.saturating_add(advance); + remaining = &bytes[len..]; + + if advance != 16 { + return len; + } } - super::swar::match_header_value_vectored(bytes); + let advance = super::swar::match_header_value_vectored(remaining); + len = len.saturating_add(advance); + len } #[inline(always)] @@ -143,18 +153,16 @@ fn sse_code_matches_header_value_chars_table() { #[allow(clippy::missing_safety_doc)] #[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { - let slice = [ +unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool { + let slice = &[ b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', b'_', byte, b'_', b'_', b'_', b'_', b'_', ]; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - match bytes.pos() { + let pos = f(slice); + match pos { 16 => true, 10 => false, _ => unreachable!(), diff --git a/src/simd/swar.rs b/src/simd/swar.rs index 4b352ba..940f500 100644 --- a/src/simd/swar.rs +++ b/src/simd/swar.rs @@ -1,83 +1,73 @@ /// SWAR: SIMD Within A Register /// SIMD validator backend that validates register-sized chunks of data at a time. -use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes}; +use crate::{is_header_name_token, is_header_value_token, is_uri_token}; +use core::convert::TryInto; // Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8 const BLOCK_SIZE: usize = core::mem::size_of::(); type ByteBlock = [u8; BLOCK_SIZE]; #[inline] -pub fn match_uri_vectored(bytes: &mut Bytes) { - loop { - if let Some(bytes8) = bytes.peek_n::(BLOCK_SIZE) { - let n = match_uri_char_8_swar(bytes8); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n == BLOCK_SIZE { - continue; - } +pub(crate) fn match_uri_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= BLOCK_SIZE { + let block = &remaining[..BLOCK_SIZE]; + let advance = match_uri_char_8_swar(block.try_into().unwrap()); + len = len.saturating_add(advance); + remaining = &bytes[len..]; + if advance != BLOCK_SIZE { + // NOTE: must continue to tail-matching logic below, due to known + // false-negatives that need to be individually checked. + break; } - if let Some(b) = bytes.peek() { - if is_uri_token(b) { - // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte - // in bytes, so calling advance is safe. - unsafe { - bytes.advance(1); - } - continue; - } - } - break; } + let tail_len = match_tail(is_uri_token, remaining); + len = len.saturating_add(tail_len); + len } #[inline] -pub fn match_header_value_vectored(bytes: &mut Bytes) { - loop { - if let Some(bytes8) = bytes.peek_n::(BLOCK_SIZE) { - let n = match_header_value_char_8_swar(bytes8); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n == BLOCK_SIZE { - continue; - } - } - if let Some(b) = bytes.peek() { - if is_header_value_token(b) { - // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte - // in bytes, so calling advance is safe. - unsafe { - bytes.advance(1); - } - continue; - } +pub(crate) fn match_header_value_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= BLOCK_SIZE { + let block = &remaining[..BLOCK_SIZE]; + let advance = match_header_value_char_8_swar(block.try_into().unwrap()); + len = len.saturating_add(advance); + remaining = &bytes[len..]; + if advance != BLOCK_SIZE { + // NOTE: must continue to tail-matching logic below, due to known + // false-negatives that need to be individually checked. + break; } - break; } + let tail_len = match_tail(is_header_value_token, remaining); + len = len.saturating_add(tail_len); + len } #[inline] -pub fn match_header_name_vectored(bytes: &mut Bytes) { - while let Some(block) = bytes.peek_n::(BLOCK_SIZE) { - let n = match_block(is_header_name_token, block); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n != BLOCK_SIZE { - return; - } +pub(crate) fn match_header_name_vectored(bytes: &[u8]) -> usize { + let mut len = 0usize; + let mut remaining = bytes; + while remaining.len() >= BLOCK_SIZE { + let block = &remaining[..BLOCK_SIZE]; + let advance = block.iter().position(|b| !is_header_name_token(*b)); + match advance { + None => { + len = len.saturating_add(BLOCK_SIZE); + remaining = &bytes[len..]; + } + Some(v) => { + len = len.saturating_add(v); + return len; + } + }; } - // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the - // end, but no further. - unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) }; + let tail_len = match_tail(is_header_name_token, remaining); + len = len.saturating_add(tail_len); + len } // Matches "tail", i.e: when we have bool, bytes: &[u8]) -> usize { bytes.len() } -// Naive fallback block matcher -#[inline(always)] -fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize { - for (i, &b) in block.iter().enumerate() { - if !f(b) { - return i; - } - } - BLOCK_SIZE -} - // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44) // creates a u64 whose bytes are each equal to b const fn uniform_block(b: u8) -> usize {