diff options
Diffstat (limited to 'vendor/httparse/src/simd')
| -rw-r--r-- | vendor/httparse/src/simd/avx2.rs | 206 | ||||
| -rw-r--r-- | vendor/httparse/src/simd/mod.rs | 153 | ||||
| -rw-r--r-- | vendor/httparse/src/simd/neon.rs | 258 | ||||
| -rw-r--r-- | vendor/httparse/src/simd/runtime.rs | 57 | ||||
| -rw-r--r-- | vendor/httparse/src/simd/sse42.rs | 142 | ||||
| -rw-r--r-- | vendor/httparse/src/simd/swar.rs | 235 |
6 files changed, 0 insertions, 1051 deletions
diff --git a/vendor/httparse/src/simd/avx2.rs b/vendor/httparse/src/simd/avx2.rs deleted file mode 100644 index 2adf0a28..00000000 --- a/vendor/httparse/src/simd/avx2.rs +++ /dev/null @@ -1,206 +0,0 @@ -use crate::iter::Bytes; - -#[inline] -#[target_feature(enable = "avx2")] -pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 32 { - - let advance = match_url_char_32_avx(bytes.as_ref()); - - bytes.advance(advance); - - if advance != 32 { - return; - } - } - // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2 - super::swar::match_uri_vectored(bytes) -} - -#[inline(always)] -#[allow(non_snake_case, overflowing_literals)] -#[allow(unused)] -unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize { - // NOTE: This check might be not necessary since this function is only used in - // `match_uri_vectored` where buffer overflow is taken care of. - debug_assert!(buf.len() >= 32); - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - // pointer to buffer - let ptr = buf.as_ptr(); - - // %x21-%x7e %x80-%xff - // - // Character ranges allowed by this function, can also be interpreted as: - // 33 =< (x != 127) =< 255 - // - // Create a vector full of DEL (0x7f) characters. - let DEL: __m256i = _mm256_set1_epi8(0x7f); - // Create a vector full of exclamation mark (!) (0x21) characters. - // Used as lower threshold, characters in URLs cannot be smaller than this. - let LOW: __m256i = _mm256_set1_epi8(0x21); - - // Load a chunk of 32 bytes from `ptr` as a vector. - // We can check 32 bytes in parallel at most with AVX2 since - // YMM registers can only have 256 bits most. - let dat = _mm256_lddqu_si256(ptr as *const _); - - // unsigned comparison dat >= LOW - // - // `_mm256_max_epu8` creates a new vector by comparing vectors `dat` and `LOW` - // and picks the max. values from each for all indices. - // So if a byte in `dat` is <= 32, it'll be represented as 33 - // which is the smallest valid character. - // - // Then, we compare the new vector with `dat` for equality. - // - // `_mm256_cmpeq_epi8` returns a new vector where; - // * matching bytes are set to 0xFF (all bits set), - // * nonmatching bytes are set to 0 (no bits set). - let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat); - // Similar to what we did before, but now invalid characters are set to 0xFF. - let del = _mm256_cmpeq_epi8(dat, DEL); - - // We glue the both comparisons via `_mm256_andnot_si256`. - // - // Since the representation of truthiness differ in these comparisons, - // we are in need of bitwise NOT to convert valid characters of `del`. - let bit = _mm256_andnot_si256(del, low); - // This creates a bitmask from the most significant bit of each byte. - // Simply, we're converting a vector value to scalar value here. - let res = _mm256_movemask_epi8(bit) as u32; - - // Count trailing zeros to find the first encountered invalid character. - // Bitwise NOT is required once again to flip truthiness. - // TODO: use .trailing_ones() once MSRV >= 1.46 - (!res).trailing_zeros() as usize -} - -#[target_feature(enable = "avx2")] -pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 32 { - let advance = match_header_value_char_32_avx(bytes.as_ref()); - bytes.advance(advance); - - if advance != 32 { - return; - } - } - // NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2 - super::swar::match_header_value_vectored(bytes) -} - -#[inline(always)] -#[allow(non_snake_case)] -#[allow(unused)] -unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize { - debug_assert!(buf.len() >= 32); - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - let ptr = buf.as_ptr(); - - // %x09 %x20-%x7e %x80-%xff - // Create a vector full of horizontal tab (\t) (0x09) characters. - let TAB: __m256i = _mm256_set1_epi8(0x09); - // Create a vector full of DEL (0x7f) characters. - let DEL: __m256i = _mm256_set1_epi8(0x7f); - // Create a vector full of space (0x20) characters. - let LOW: __m256i = _mm256_set1_epi8(0x20); - - // Load a chunk of 32 bytes from `ptr` as a vector. - let dat = _mm256_lddqu_si256(ptr as *const _); - - // unsigned comparison dat >= LOW - // - // Same as what we do in `match_url_char_32_avx`. - // This time the lower threshold is set to space character though. - let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat); - // Check if `dat` includes `TAB` characters. - let tab = _mm256_cmpeq_epi8(dat, TAB); - // Check if `dat` includes `DEL` characters. - let del = _mm256_cmpeq_epi8(dat, DEL); - - // Combine all comparisons together, notice that we're also using OR - // to connect `low` and `tab` but flip bits of `del`. - // - // In the end, this is simply: - // ~del & (low | tab) - let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab)); - // This creates a bitmask from the most significant bit of each byte. - // Creates a scalar value from vector value. - let res = _mm256_movemask_epi8(bit) as u32; - - // Count trailing zeros to find the first encountered invalid character. - // Bitwise NOT is required once again to flip truthiness. - // TODO: use .trailing_ones() once MSRV >= 1.46 - (!res).trailing_zeros() as usize -} - -#[test] -fn avx2_code_matches_uri_chars_table() { - if !is_x86_feature_detected!("avx2") { - return; - } - - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_uri_vectored)); - - for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_uri_vectored), allowed, - "byte_is_allowed({:?}) should be {:?}", b, allowed, - ); - } - } -} - -#[test] -fn avx2_code_matches_header_value_chars_table() { - if !is_x86_feature_detected!("avx2") { - return; - } - - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_value_vectored)); - - for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_value_vectored), allowed, - "byte_is_allowed({:?}) should be {:?}", b, allowed, - ); - } - } -} - -#[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { - let slice = [ - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', byte, b'_', - b'_', b'_', b'_', b'_', - ]; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - - match bytes.pos() { - 32 => true, - 26 => false, - _ => unreachable!(), - } -} diff --git a/vendor/httparse/src/simd/mod.rs b/vendor/httparse/src/simd/mod.rs deleted file mode 100644 index 0e4493a5..00000000 --- a/vendor/httparse/src/simd/mod.rs +++ /dev/null @@ -1,153 +0,0 @@ -mod swar; - -#[cfg(not(all( - httparse_simd, - any( - target_arch = "x86", - target_arch = "x86_64", - all( - target_arch = "aarch64", - httparse_simd_neon_intrinsics, - ) - ), -)))] -pub use self::swar::*; - -#[cfg(all( - httparse_simd, - not(httparse_simd_target_feature_avx2), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -mod sse42; - -#[cfg(all( - httparse_simd, - any( - httparse_simd_target_feature_avx2, - not(httparse_simd_target_feature_sse42), - ), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -mod avx2; - -#[cfg(all( - httparse_simd, - not(any( - httparse_simd_target_feature_sse42, - httparse_simd_target_feature_avx2, - )), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -mod runtime; - -#[cfg(all( - httparse_simd, - not(any( - httparse_simd_target_feature_sse42, - httparse_simd_target_feature_avx2, - )), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -pub use self::runtime::*; - -#[cfg(all( - httparse_simd, - httparse_simd_target_feature_sse42, - not(httparse_simd_target_feature_avx2), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -mod sse42_compile_time { - #[inline(always)] - pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { - super::swar::match_header_name_vectored(b); - } - - #[inline(always)] - pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { - // SAFETY: calls are guarded by a compile time feature check - unsafe { crate::simd::sse42::match_uri_vectored(b) } - } - - #[inline(always)] - pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { - // SAFETY: calls are guarded by a compile time feature check - unsafe { crate::simd::sse42::match_header_value_vectored(b) } - } -} - -#[cfg(all( - httparse_simd, - httparse_simd_target_feature_sse42, - not(httparse_simd_target_feature_avx2), - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -pub use self::sse42_compile_time::*; - -#[cfg(all( - httparse_simd, - httparse_simd_target_feature_avx2, - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -mod avx2_compile_time { - #[inline(always)] - pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) { - super::swar::match_header_name_vectored(b); - } - - #[inline(always)] - pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) { - // SAFETY: calls are guarded by a compile time feature check - unsafe { crate::simd::avx2::match_uri_vectored(b) } - } - - #[inline(always)] - pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) { - // SAFETY: calls are guarded by a compile time feature check - unsafe { crate::simd::avx2::match_header_value_vectored(b) } - } -} - -#[cfg(all( - httparse_simd, - httparse_simd_target_feature_avx2, - any( - target_arch = "x86", - target_arch = "x86_64", - ), -))] -pub use self::avx2_compile_time::*; - -#[cfg(all( - httparse_simd, - target_arch = "aarch64", - httparse_simd_neon_intrinsics, -))] -mod neon; - -#[cfg(all( - httparse_simd, - target_arch = "aarch64", - httparse_simd_neon_intrinsics, -))] -pub use self::neon::*; diff --git a/vendor/httparse/src/simd/neon.rs b/vendor/httparse/src/simd/neon.rs deleted file mode 100644 index b059efb2..00000000 --- a/vendor/httparse/src/simd/neon.rs +++ /dev/null @@ -1,258 +0,0 @@ -use crate::iter::Bytes; -use core::arch::aarch64::*; - -#[inline] -pub fn match_header_name_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_header_name_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } - } - } - super::swar::match_header_name_vectored(bytes); -} - -#[inline] -pub fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_header_value_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } - } - } - super::swar::match_header_value_vectored(bytes); -} - -#[inline] -pub fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - // SAFETY: ensured that there are at least 16 bytes remaining - unsafe { - let advance = match_url_char_16_neon(bytes.as_ref().as_ptr()); - bytes.advance(advance); - - if advance != 16 { - return; - } - } - } - super::swar::match_uri_vectored(bytes); -} - -const fn bit_set(x: u8) -> bool { - // Validates if a byte is a valid header name character - // https://tools.ietf.org/html/rfc7230#section-3.2.6 - matches!(x, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'-' | b'.' | b'^' | b'_' | b'`' | b'|' | b'~') -} - -// A 256-bit bitmap, split into two halves -// lower half contains bits whose higher nibble is <= 7 -// higher half contains bits whose higher nibble is >= 8 -const fn build_bitmap() -> ([u8; 16], [u8; 16]) { - let mut bitmap_0_7 = [0u8; 16]; // 0x00..0x7F - let mut bitmap_8_15 = [0u8; 16]; // 0x80..0xFF - let mut i = 0; - while i < 256 { - if bit_set(i as u8) { - // Nibbles - let (lo, hi) = (i & 0x0F, i >> 4); - if i < 128 { - bitmap_0_7[lo] |= 1 << hi; - } else { - bitmap_8_15[lo] |= 1 << hi; - } - } - i += 1; - } - (bitmap_0_7, bitmap_8_15) -} - -const BITMAPS: ([u8; 16], [u8; 16]) = build_bitmap(); - -// NOTE: adapted from 256-bit version, with upper 128-bit ops commented out -#[inline] -unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize { - let bitmaps = BITMAPS; - // NOTE: ideally compile-time constants - let (bitmap_0_7, _bitmap_8_15) = bitmaps; - let bitmap_0_7 = vld1q_u8(bitmap_0_7.as_ptr()); - // let bitmap_8_15 = vld1q_u8(bitmap_8_15.as_ptr()); - - // Initialize the bitmask_lookup. - const BITMASK_LOOKUP_DATA: [u8; 16] = - [1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128]; - let bitmask_lookup = vld1q_u8(BITMASK_LOOKUP_DATA.as_ptr()); - - // Load 16 input bytes. - let input = vld1q_u8(ptr); - - // Extract indices for row_0_7. - let indices_0_7 = vandq_u8(input, vdupq_n_u8(0x8F)); // 0b1000_1111; - - // Extract indices for row_8_15. - // let msb = vandq_u8(input, vdupq_n_u8(0x80)); - // let indices_8_15 = veorq_u8(indices_0_7, msb); - - // Fetch row_0_7 and row_8_15. - let row_0_7 = vqtbl1q_u8(bitmap_0_7, indices_0_7); - // let row_8_15 = vqtbl1q_u8(bitmap_8_15, indices_8_15); - - // Calculate a bitmask, i.e. (1 << hi_nibble % 8). - let bitmask = vqtbl1q_u8(bitmask_lookup, vshrq_n_u8(input, 4)); - - // Choose rows halves depending on higher nibbles. - // let bitsets = vorrq_u8(row_0_7, row_8_15); - let bitsets = row_0_7; - - // Finally check which bytes belong to the set. - let tmp = vandq_u8(bitsets, bitmask); - let result = vceqq_u8(tmp, bitmask); - - offsetz(result) as usize -} - -#[inline] -unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize { - let input = vld1q_u8(ptr); - - // Check that b'!' <= and b != 127 - let result = vcleq_u8(vdupq_n_u8(b'!'), input); - - // Disallow del - let del = vceqq_u8(input, vdupq_n_u8(0x7F)); - let result = vbicq_u8(result, del); - - offsetz(result) as usize -} - -#[inline] -unsafe fn match_header_value_char_16_neon(ptr: *const u8) -> usize { - let input = vld1q_u8(ptr); - - // Check that b' ' <= and b != 127 or b == 9 - let result = vcleq_u8(vdupq_n_u8(b' '), input); - - // Allow tab - let tab = vceqq_u8(input, vdupq_n_u8(0x09)); - let result = vorrq_u8(result, tab); - - // Disallow del - let del = vceqq_u8(input, vdupq_n_u8(0x7F)); - let result = vbicq_u8(result, del); - - offsetz(result) as usize -} - -#[inline] -unsafe fn offsetz(x: uint8x16_t) -> u32 { - // NOT the vector since it's faster to operate with zeros instead - offsetnz(vmvnq_u8(x)) -} - -#[inline] -unsafe fn offsetnz(x: uint8x16_t) -> u32 { - // Extract two u64 - let x = vreinterpretq_u64_u8(x); - // Extract to general purpose registers to perform clz - let low: u64 = vgetq_lane_u64::<0>(x); - let high: u64 = vgetq_lane_u64::<1>(x); - - #[inline] - fn clz(x: u64) -> u32 { - // perf: rust will unroll this loop - // and it's much faster than rbit + clz so voila - for (i, b) in x.to_ne_bytes().iter().copied().enumerate() { - if b != 0 { - return i as u32; - } - } - 8 // Technically not reachable since zero-guarded - } - - if low != 0 { - clz(low) - } else if high != 0 { - return 8 + clz(high); - } else { - return 16; - } -} - -#[test] -fn neon_code_matches_uri_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_uri_vectored)); - - for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_uri_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } - } -} - -#[test] -fn neon_code_matches_header_value_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_value_vectored)); - - for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_value_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } - } -} - -#[test] -fn neon_code_matches_header_name_chars_table() { - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_name_vectored)); - - for (b, allowed) in crate::TOKEN_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_name_vectored), - allowed, - "byte_is_allowed({:?}) should be {:?}", - b, - allowed, - ); - } - } -} - -#[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { - let mut slice = [b'_'; 16]; - slice[10] = byte; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - - match bytes.pos() { - 16 => true, - 10 => false, - x => panic!("unexpected pos: {}", x), - } -} diff --git a/vendor/httparse/src/simd/runtime.rs b/vendor/httparse/src/simd/runtime.rs deleted file mode 100644 index c523a921..00000000 --- a/vendor/httparse/src/simd/runtime.rs +++ /dev/null @@ -1,57 +0,0 @@ -use std::sync::atomic::{AtomicU8, Ordering}; -use crate::iter::Bytes; -use super::avx2; -use super::sse42; - -const AVX2: u8 = 1; -const SSE42: u8 = 2; -const NOP: u8 = 3; - -fn detect_runtime_feature() -> u8 { - if is_x86_feature_detected!("avx2") { - AVX2 - } else if is_x86_feature_detected!("sse4.2") { - SSE42 - } else { - NOP - } -} - -static RUNTIME_FEATURE: AtomicU8 = AtomicU8::new(0); - -#[inline] -fn get_runtime_feature() -> u8 { - let mut feature = RUNTIME_FEATURE.load(Ordering::Relaxed); - if feature == 0 { - feature = detect_runtime_feature(); - RUNTIME_FEATURE.store(feature, Ordering::Relaxed); - } - - feature -} - -pub fn match_header_name_vectored(bytes: &mut Bytes) { - super::swar::match_header_name_vectored(bytes); -} - -pub fn match_uri_vectored(bytes: &mut Bytes) { - // SAFETY: calls are guarded by a feature check - unsafe { - match get_runtime_feature() { - AVX2 => avx2::match_uri_vectored(bytes), - SSE42 => sse42::match_uri_vectored(bytes), - _ /* NOP */ => super::swar::match_uri_vectored(bytes), - } - } -} - -pub fn match_header_value_vectored(bytes: &mut Bytes) { - // SAFETY: calls are guarded by a feature check - unsafe { - match get_runtime_feature() { - AVX2 => avx2::match_header_value_vectored(bytes), - SSE42 => sse42::match_header_value_vectored(bytes), - _ /* NOP */ => super::swar::match_header_value_vectored(bytes), - } - } -} diff --git a/vendor/httparse/src/simd/sse42.rs b/vendor/httparse/src/simd/sse42.rs deleted file mode 100644 index 0fabdfeb..00000000 --- a/vendor/httparse/src/simd/sse42.rs +++ /dev/null @@ -1,142 +0,0 @@ -use crate::iter::Bytes; - -#[target_feature(enable = "sse4.2")] -pub unsafe fn match_uri_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - let advance = match_url_char_16_sse(bytes.as_ref()); - - bytes.advance(advance); - - if advance != 16 { - return; - } - } - super::swar::match_uri_vectored(bytes); -} - -#[inline(always)] -#[allow(non_snake_case)] -unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize { - debug_assert!(buf.len() >= 16); - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - let ptr = buf.as_ptr(); - - // %x21-%x7e %x80-%xff - let DEL: __m128i = _mm_set1_epi8(0x7f); - let LOW: __m128i = _mm_set1_epi8(0x21); - - let dat = _mm_lddqu_si128(ptr as *const _); - // unsigned comparison dat >= LOW - let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat); - let del = _mm_cmpeq_epi8(dat, DEL); - let bit = _mm_andnot_si128(del, low); - let res = _mm_movemask_epi8(bit) as u16; - - // TODO: use .trailing_ones() once MSRV >= 1.46 - (!res).trailing_zeros() as usize -} - -#[target_feature(enable = "sse4.2")] -pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) { - while bytes.as_ref().len() >= 16 { - let advance = match_header_value_char_16_sse(bytes.as_ref()); - bytes.advance(advance); - - if advance != 16 { - return; - } - } - super::swar::match_header_value_vectored(bytes); -} - -#[inline(always)] -#[allow(non_snake_case)] -unsafe fn match_header_value_char_16_sse(buf: &[u8]) -> usize { - debug_assert!(buf.len() >= 16); - - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - - let ptr = buf.as_ptr(); - - // %x09 %x20-%x7e %x80-%xff - let TAB: __m128i = _mm_set1_epi8(0x09); - let DEL: __m128i = _mm_set1_epi8(0x7f); - let LOW: __m128i = _mm_set1_epi8(0x20); - - let dat = _mm_lddqu_si128(ptr as *const _); - // unsigned comparison dat >= LOW - let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat); - let tab = _mm_cmpeq_epi8(dat, TAB); - let del = _mm_cmpeq_epi8(dat, DEL); - let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab)); - let res = _mm_movemask_epi8(bit) as u16; - - // TODO: use .trailing_ones() once MSRV >= 1.46 - (!res).trailing_zeros() as usize -} - -#[test] -fn sse_code_matches_uri_chars_table() { - if !is_x86_feature_detected!("sse4.2") { - return; - } - - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_uri_vectored)); - - for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_uri_vectored), allowed, - "byte_is_allowed({:?}) should be {:?}", b, allowed, - ); - } - } -} - -#[test] -fn sse_code_matches_header_value_chars_table() { - if !is_x86_feature_detected!("sse4.2") { - return; - } - - #[allow(clippy::undocumented_unsafe_blocks)] - unsafe { - assert!(byte_is_allowed(b'_', match_header_value_vectored)); - - for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() { - assert_eq!( - byte_is_allowed(b as u8, match_header_value_vectored), allowed, - "byte_is_allowed({:?}) should be {:?}", b, allowed, - ); - } - } -} - -#[allow(clippy::missing_safety_doc)] -#[cfg(test)] -unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool { - let slice = [ - b'_', b'_', b'_', b'_', - b'_', b'_', b'_', b'_', - b'_', b'_', byte, b'_', - b'_', b'_', b'_', b'_', - ]; - let mut bytes = Bytes::new(&slice); - - f(&mut bytes); - - match bytes.pos() { - 16 => true, - 10 => false, - _ => unreachable!(), - } -} diff --git a/vendor/httparse/src/simd/swar.rs b/vendor/httparse/src/simd/swar.rs deleted file mode 100644 index e1028d05..00000000 --- a/vendor/httparse/src/simd/swar.rs +++ /dev/null @@ -1,235 +0,0 @@ -/// SWAR: SIMD Within A Register -/// SIMD validator backend that validates register-sized chunks of data at a time. -use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes}; - -// Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8 -const BLOCK_SIZE: usize = core::mem::size_of::<usize>(); -type ByteBlock = [u8; BLOCK_SIZE]; - -#[inline] -pub fn match_uri_vectored(bytes: &mut Bytes) { - loop { - if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { - let n = match_uri_char_8_swar(bytes8); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n == BLOCK_SIZE { - continue; - } - } - if let Some(b) = bytes.peek() { - if is_uri_token(b) { - // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte - // in bytes, so calling advance is safe. - unsafe { - bytes.advance(1); - } - continue; - } - } - break; - } -} - -#[inline] -pub fn match_header_value_vectored(bytes: &mut Bytes) { - loop { - if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { - let n = match_header_value_char_8_swar(bytes8); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n == BLOCK_SIZE { - continue; - } - } - if let Some(b) = bytes.peek() { - if is_header_value_token(b) { - // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte - // in bytes, so calling advance is safe. - unsafe { - bytes.advance(1); - } - continue; - } - } - break; - } -} - -#[inline] -pub fn match_header_name_vectored(bytes: &mut Bytes) { - while let Some(block) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { - let n = match_block(is_header_name_token, block); - // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes - // in `bytes`, so calling `advance(n)` is safe. - unsafe { - bytes.advance(n); - } - if n != BLOCK_SIZE { - return; - } - } - // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the - // end, but no further. - unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) }; -} - -// Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon -#[cold] -#[inline] -fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize { - for (i, &b) in bytes.iter().enumerate() { - if !f(b) { - return i; - } - } - bytes.len() -} - -// Naive fallback block matcher -#[inline(always)] -fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize { - for (i, &b) in block.iter().enumerate() { - if !f(b) { - return i; - } - } - BLOCK_SIZE -} - -// A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44) -// creates a u64 whose bytes are each equal to b -const fn uniform_block(b: u8) -> usize { - (b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize -} - -// A byte-wise range-check on an entire word/block, -// ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255` -#[inline] -fn match_uri_char_8_swar(block: ByteBlock) -> usize { - // 33 <= (x != 127) <= 255 - const M: u8 = 0x21; - // uniform block full of exclamation mark (!) (33). - const BM: usize = uniform_block(M); - // uniform block full of 1. - const ONE: usize = uniform_block(0x01); - // uniform block full of DEL (127). - const DEL: usize = uniform_block(0x7f); - // uniform block full of 128. - const M128: usize = uniform_block(128); - - let x = usize::from_ne_bytes(block); // Really just a transmute - let lt = x.wrapping_sub(BM) & !x; // <= m - - let xor_del = x ^ DEL; - let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL - - offsetnz((lt | eq_del) & M128) -} - -// A byte-wise range-check on an entire word/block, -// ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255` -#[inline] -fn match_header_value_char_8_swar(block: ByteBlock) -> usize { - // 32 <= (x != 127) <= 255 - const M: u8 = 0x20; - // uniform block full of exclamation mark (!) (33). - const BM: usize = uniform_block(M); - // uniform block full of 1. - const ONE: usize = uniform_block(0x01); - // uniform block full of DEL (127). - const DEL: usize = uniform_block(0x7f); - // uniform block full of 128. - const M128: usize = uniform_block(128); - - let x = usize::from_ne_bytes(block); // Really just a transmute - let lt = x.wrapping_sub(BM) & !x; // <= m - - let xor_del = x ^ DEL; - let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL - - offsetnz((lt | eq_del) & M128) -} - -/// Check block to find offset of first non-zero byte -// NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit -#[inline] -fn offsetnz(block: usize) -> usize { - // fast path optimistic case (common for long valid sequences) - if block == 0 { - return BLOCK_SIZE; - } - - // perf: rust will unroll this loop - for (i, b) in block.to_ne_bytes().iter().copied().enumerate() { - if b != 0 { - return i; - } - } - unreachable!() -} - -#[test] -fn test_is_header_value_block() { - let is_header_value_block = |b| match_header_value_char_8_swar(b) == BLOCK_SIZE; - - // 0..32 => false - for b in 0..32_u8 { - assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b); - } - // 32..=126 => true - for b in 32..=126_u8 { - assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); - } - // 127 => false - assert!(!is_header_value_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); - // 128..=255 => true - for b in 128..=255_u8 { - assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b); - } - - - #[cfg(target_pointer_width = "64")] - { - // A few sanity checks on non-uniform bytes for safe-measure - assert!(!is_header_value_block(*b"foo.com\n")); - assert!(!is_header_value_block(*b"o.com\r\nU")); - } -} - -#[test] -fn test_is_uri_block() { - let is_uri_block = |b| match_uri_char_8_swar(b) == BLOCK_SIZE; - - // 0..33 => false - for b in 0..33_u8 { - assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b); - } - // 33..=126 => true - for b in 33..=126_u8 { - assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); - } - // 127 => false - assert!(!is_uri_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F'); - // 128..=255 => true - for b in 128..=255_u8 { - assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b); - } -} - -#[test] -fn test_offsetnz() { - let seq = [0_u8; BLOCK_SIZE]; - for i in 0..BLOCK_SIZE { - let mut seq = seq; - seq[i] = 1; - let x = usize::from_ne_bytes(seq); - assert_eq!(offsetnz(x), i); - } -} |
