diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
| commit | 01959b16a21b22b5df5f16569c2a8e8f92beecef (patch) | |
| tree | 32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/iri-string/src/parser/str.rs | |
| parent | ff30574117a996df332e23d1fb6f65259b316b5b (diff) | |
chore: vendor dependencies
Diffstat (limited to 'vendor/iri-string/src/parser/str.rs')
| -rw-r--r-- | vendor/iri-string/src/parser/str.rs | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/vendor/iri-string/src/parser/str.rs b/vendor/iri-string/src/parser/str.rs new file mode 100644 index 00000000..0f564bfa --- /dev/null +++ b/vendor/iri-string/src/parser/str.rs @@ -0,0 +1,390 @@ +//! Functions for common string operations. + +pub(crate) use self::maybe_pct_encoded::{ + process_percent_encoded_best_effort, PctEncodedFragments, +}; + +mod maybe_pct_encoded; + +/// Returns the inner string if wrapped. +#[must_use] +pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { + let (prefix, suffix) = match s.as_bytes() { + [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), + _ => return None, + }; + if (prefix == open) && (suffix == close) { + Some(&s[1..(s.len() - 1)]) + } else { + None + } +} + +/// Returns the byte that appears first. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + haystack + .iter() + .copied() + .find(|&b| b == needle1 || b == needle2) +} + +/// Returns the byte that appears first. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + haystack.iter().rposition(|&b| b == needle) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + memchr::memrchr(needle, haystack) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + let bytes = haystack.as_bytes(); + let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { + Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), + None => memchr::memchr(needle4, bytes), + }; + pos.map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memrchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Returns `true` if the string only contains the allowed characters. +#[must_use] +fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while !s.is_empty() { + match s.bytes().position(|b| !b.is_ascii()) { + Some(nonascii_pos) => { + // Valdiate ASCII prefix. + if nonascii_pos != 0 { + let (prefix, rest) = s.split_at(nonascii_pos); + if !prefix.bytes().all(pred_ascii) { + return false; + } + s = rest; + } + + // Extract non-ASCII part and validate it. + let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) { + Some(ascii_pos) => s.split_at(ascii_pos), + None => (s, ""), + }; + if !prefix.chars().all(pred_nonascii) { + return false; + } + s = rest; + } + None => { + // All chars are ASCII. + return s.bytes().all(pred_ascii); + } + } + } + + true +} + +/// Returns `true` if the string only contains the allowed characters and percent-encoded char. +#[must_use] +pub(crate) fn satisfy_chars_with_pct_encoded<F, G>( + mut s: &str, + pred_ascii: F, + pred_nonascii: G, +) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while let Some((prefix, suffix)) = find_split_hole(s, b'%') { + // Verify strings before the percent-encoded char. + if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { + return false; + } + + // Verify the percent-encoded char. + if !starts_with_double_hexdigits(suffix.as_bytes()) { + return false; + } + + // Advance the cursor. + s = &suffix[2..]; + } + + // Verify the rest. + satisfy_chars(s, pred_ascii, pred_nonascii) +} + +/// Returns `true` if the given string starts with two hexadecimal digits. +#[must_use] +pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { + match s { + [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), + _ => false, + } +} + +/// Strips the first character if it is the given ASCII character, and returns the rest. +/// +/// # Precondition +/// +/// The given ASCII character (`prefix`) should be an ASCII character. +#[must_use] +pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { + debug_assert!(prefix.is_ascii()); + if s.as_bytes().first().copied() == Some(prefix) { + Some(&s[1..]) + } else { + None + } +} + +/// Splits the given string into the first character and the rest. +/// +/// Returns `(first_char, rest_str)`. +#[must_use] +pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { + let mut chars = s.chars(); + let c = chars.next()?; + let rest = chars.as_str(); + Some((c, rest)) +} |
