//! Functions for common string operations. pub(crate) use self::maybe_pct_encoded::{ process_percent_encoded_best_effort, PctEncodedFragments, }; mod maybe_pct_encoded; /// Returns the inner string if wrapped. #[must_use] pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { let (prefix, suffix) = match s.as_bytes() { [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), _ => return None, }; if (prefix == open) && (suffix == close) { Some(&s[1..(s.len() - 1)]) } else { None } } /// Returns the byte that appears first. #[cfg(not(feature = "memchr"))] #[inline] #[must_use] pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option { haystack .iter() .copied() .find(|&b| b == needle1 || b == needle2) } /// Returns the byte that appears first. #[cfg(feature = "memchr")] #[inline] #[must_use] pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option { memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) } /// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. #[cfg(not(feature = "memchr"))] #[inline] #[must_use] pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option { haystack.iter().rposition(|&b| b == needle) } /// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. #[cfg(feature = "memchr")] #[inline] #[must_use] pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option { memchr::memrchr(needle, haystack) } /// Finds the first needle, and returns the string before it and the rest. /// /// If `needle` is not found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { haystack .bytes() .position(|b| b == needle) .map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and the rest. /// /// If `needle` is not found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) } /// Finds the last needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { haystack .bytes() .rposition(|b| b == needle1 || b == needle2) .map(|pos| haystack.split_at(pos)) } /// Finds the last needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { haystack .bytes() .position(|b| b == needle1 || b == needle2) .map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split3( haystack: &str, needle1: u8, needle2: u8, needle3: u8, ) -> Option<(&str, &str)> { haystack .bytes() .position(|b| b == needle1 || b == needle2 || b == needle3) .map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and the rest. /// /// If no needles are found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split3( haystack: &str, needle1: u8, needle2: u8, needle3: u8, ) -> Option<(&str, &str)> { memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) .map(|pos| haystack.split_at(pos)) } /// Finds the first needle, and returns the string before it and after it. /// /// If `needle` is not found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { haystack .bytes() .position(|b| b == needle) .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) } /// Finds the first needle, and returns the string before it and after it. /// /// If `needle` is not found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { memchr::memchr(needle, haystack.as_bytes()) .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) } /// Finds the first needle, and returns the string before it, the needle, and the string after it. /// /// If no needles are found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split2_hole( haystack: &str, needle1: u8, needle2: u8, ) -> Option<(&str, u8, &str)> { haystack .bytes() .position(|b| b == needle1 || b == needle2) .map(|pos| { ( &haystack[..pos], haystack.as_bytes()[pos], &haystack[(pos + 1)..], ) }) } /// Finds the first needle, and returns the string before it, the needle, and the string after it. /// /// If no needles are found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split2_hole( haystack: &str, needle1: u8, needle2: u8, ) -> Option<(&str, u8, &str)> { memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { ( &haystack[..pos], haystack.as_bytes()[pos], &haystack[(pos + 1)..], ) }) } /// Finds the first needle, and returns the string before it, the needle, and the string after it. /// /// If no needles are found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn find_split4_hole( haystack: &str, needle1: u8, needle2: u8, needle3: u8, needle4: u8, ) -> Option<(&str, u8, &str)> { haystack .bytes() .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) .map(|pos| { ( &haystack[..pos], haystack.as_bytes()[pos], &haystack[(pos + 1)..], ) }) } /// Finds the first needle, and returns the string before it, the needle, and the string after it. /// /// If no needles are found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn find_split4_hole( haystack: &str, needle1: u8, needle2: u8, needle3: u8, needle4: u8, ) -> Option<(&str, u8, &str)> { let bytes = haystack.as_bytes(); let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), None => memchr::memchr(needle4, bytes), }; pos.map(|pos| { ( &haystack[..pos], haystack.as_bytes()[pos], &haystack[(pos + 1)..], ) }) } /// Finds the last needle, and returns the string before it and after it. /// /// If `needle` is not found, returns `None`. #[cfg(not(feature = "memchr"))] #[must_use] pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { haystack .bytes() .rposition(|b| b == needle) .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) } /// Finds the last needle, and returns the string before it and after it. /// /// If `needle` is not found, returns `None`. #[cfg(feature = "memchr")] #[must_use] pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { memchr::memrchr(needle, haystack.as_bytes()) .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) } /// Returns `true` if the string only contains the allowed characters. #[must_use] fn satisfy_chars(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool where F: Copy + Fn(u8) -> bool, G: Copy + Fn(char) -> bool, { while !s.is_empty() { match s.bytes().position(|b| !b.is_ascii()) { Some(nonascii_pos) => { // Valdiate ASCII prefix. if nonascii_pos != 0 { let (prefix, rest) = s.split_at(nonascii_pos); if !prefix.bytes().all(pred_ascii) { return false; } s = rest; } // Extract non-ASCII part and validate it. let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) { Some(ascii_pos) => s.split_at(ascii_pos), None => (s, ""), }; if !prefix.chars().all(pred_nonascii) { return false; } s = rest; } None => { // All chars are ASCII. return s.bytes().all(pred_ascii); } } } true } /// Returns `true` if the string only contains the allowed characters and percent-encoded char. #[must_use] pub(crate) fn satisfy_chars_with_pct_encoded( mut s: &str, pred_ascii: F, pred_nonascii: G, ) -> bool where F: Copy + Fn(u8) -> bool, G: Copy + Fn(char) -> bool, { while let Some((prefix, suffix)) = find_split_hole(s, b'%') { // Verify strings before the percent-encoded char. if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { return false; } // Verify the percent-encoded char. if !starts_with_double_hexdigits(suffix.as_bytes()) { return false; } // Advance the cursor. s = &suffix[2..]; } // Verify the rest. satisfy_chars(s, pred_ascii, pred_nonascii) } /// Returns `true` if the given string starts with two hexadecimal digits. #[must_use] pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { match s { [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), _ => false, } } /// Strips the first character if it is the given ASCII character, and returns the rest. /// /// # Precondition /// /// The given ASCII character (`prefix`) should be an ASCII character. #[must_use] pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { debug_assert!(prefix.is_ascii()); if s.as_bytes().first().copied() == Some(prefix) { Some(&s[1..]) } else { None } } /// Splits the given string into the first character and the rest. /// /// Returns `(first_char, rest_str)`. #[must_use] pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { let mut chars = s.chars(); let c = chars.next()?; let rest = chars.as_str(); Some((c, rest)) }