diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
| commit | 01959b16a21b22b5df5f16569c2a8e8f92beecef (patch) | |
| tree | 32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/iri-string/src/parser | |
| parent | ff30574117a996df332e23d1fb6f65259b316b5b (diff) | |
chore: vendor dependencies
Diffstat (limited to 'vendor/iri-string/src/parser')
| -rw-r--r-- | vendor/iri-string/src/parser/char.rs | 323 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/str.rs | 390 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/str/maybe_pct_encoded.rs | 369 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/trusted.rs | 476 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/trusted/authority.rs | 32 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/validate.rs | 225 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/validate/authority.rs | 296 | ||||
| -rw-r--r-- | vendor/iri-string/src/parser/validate/path.rs | 91 |
8 files changed, 2202 insertions, 0 deletions
diff --git a/vendor/iri-string/src/parser/char.rs b/vendor/iri-string/src/parser/char.rs new file mode 100644 index 00000000..2455498e --- /dev/null +++ b/vendor/iri-string/src/parser/char.rs @@ -0,0 +1,323 @@ +//! Characters. + +use crate::spec::Spec; + +/// A mask to test whether the character is continue character of `scheme`. +// `ALPHA / DIGIT / "+" / "-" / "."` +const MASK_SCHEME_CONTINUE: u8 = 1 << 0; + +/// A mask to test whether the character matches `unreserved`. +// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` +const MASK_UNRESERVED: u8 = 1 << 1; + +/// A mask to test whether the character matches `gen-delims`. +// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` +const MASK_GEN_DELIMS: u8 = 1 << 2; + +/// A mask to test whether the character matches `sub-delims`. +// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="` +const MASK_SUB_DELIMS: u8 = 1 << 3; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes). +// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` +const MASK_PCHAR: u8 = 1 << 4; + +/// A mask to test whether the character can appear in `query` and `fragment`. +// `query = *( pchar / "/" / "?" )` +// `fragment = *( pchar / "/" / "?" )` +const MASK_FRAG_QUERY: u8 = 1 << 5; + +/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`. +// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` +const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash. +const MASK_PCHAR_SLASH: u8 = 1 << 7; + +/// ASCII characters' properties. +const TABLE: [u8; 128] = [ + 0b_0000_0000, // NUL + 0b_0000_0000, // SOH + 0b_0000_0000, // STX + 0b_0000_0000, // ETX + 0b_0000_0000, // EOT + 0b_0000_0000, // ENQ + 0b_0000_0000, // ACK + 0b_0000_0000, // BEL + 0b_0000_0000, // BS + 0b_0000_0000, // HT + 0b_0000_0000, // LF + 0b_0000_0000, // VT + 0b_0000_0000, // FF + 0b_0000_0000, // CR + 0b_0000_0000, // SO + 0b_0000_0000, // SI + 0b_0000_0000, // DLE + 0b_0000_0000, // DC1 + 0b_0000_0000, // DC2 + 0b_0000_0000, // DC3 + 0b_0000_0000, // DC4 + 0b_0000_0000, // NAK + 0b_0000_0000, // SYN + 0b_0000_0000, // ETB + 0b_0000_0000, // CAN + 0b_0000_0000, // EM + 0b_0000_0000, // SUB + 0b_0000_0000, // ESC + 0b_0000_0000, // FS + 0b_0000_0000, // GS + 0b_0000_0000, // RS + 0b_0000_0000, // US + 0b_0000_0000, // SPACE + 0b_1111_1000, // ! + 0b_0000_0000, // " + 0b_0000_0100, // # + 0b_1111_1000, // $ + 0b_0000_0000, // % + 0b_1111_1000, // & + 0b_1111_1000, // ' + 0b_1111_1000, // ( + 0b_1111_1000, // ) + 0b_1111_1000, // * + 0b_1111_1001, // + + 0b_1111_1000, // , + 0b_1111_0011, // - + 0b_1111_0011, // . + 0b_1010_0100, // / + 0b_1111_0011, // 0 + 0b_1111_0011, // 1 + 0b_1111_0011, // 2 + 0b_1111_0011, // 3 + 0b_1111_0011, // 4 + 0b_1111_0011, // 5 + 0b_1111_0011, // 6 + 0b_1111_0011, // 7 + 0b_1111_0011, // 8 + 0b_1111_0011, // 9 + 0b_1111_0100, // : + 0b_1111_1000, // ; + 0b_0000_0000, // < + 0b_1111_1000, // = + 0b_0000_0000, // > + 0b_0010_0100, // ? + 0b_1011_0100, // @ + 0b_1111_0011, // A + 0b_1111_0011, // B + 0b_1111_0011, // C + 0b_1111_0011, // D + 0b_1111_0011, // E + 0b_1111_0011, // F + 0b_1111_0011, // G + 0b_1111_0011, // H + 0b_1111_0011, // I + 0b_1111_0011, // J + 0b_1111_0011, // K + 0b_1111_0011, // L + 0b_1111_0011, // M + 0b_1111_0011, // N + 0b_1111_0011, // O + 0b_1111_0011, // P + 0b_1111_0011, // Q + 0b_1111_0011, // R + 0b_1111_0011, // S + 0b_1111_0011, // T + 0b_1111_0011, // U + 0b_1111_0011, // V + 0b_1111_0011, // W + 0b_1111_0011, // X + 0b_1111_0011, // Y + 0b_1111_0011, // Z + 0b_0000_0100, // [ + 0b_0000_0000, // \ + 0b_0000_0100, // ] + 0b_0000_0000, // ^ + 0b_1111_0010, // _ + 0b_0000_0000, // ` + 0b_1111_0011, // a + 0b_1111_0011, // b + 0b_1111_0011, // c + 0b_1111_0011, // d + 0b_1111_0011, // e + 0b_1111_0011, // f + 0b_1111_0011, // g + 0b_1111_0011, // h + 0b_1111_0011, // i + 0b_1111_0011, // j + 0b_1111_0011, // k + 0b_1111_0011, // l + 0b_1111_0011, // m + 0b_1111_0011, // n + 0b_1111_0011, // o + 0b_1111_0011, // p + 0b_1111_0011, // q + 0b_1111_0011, // r + 0b_1111_0011, // s + 0b_1111_0011, // t + 0b_1111_0011, // u + 0b_1111_0011, // v + 0b_1111_0011, // w + 0b_1111_0011, // x + 0b_1111_0011, // y + 0b_1111_0011, // z + 0b_0000_0000, // { + 0b_0000_0000, // | + 0b_0000_0000, // } + 0b_1111_0010, // ~ + 0b_0000_0000, // DEL +]; + +/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool { + (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0 +} + +/// Returns `true` if the given ASCII character matches `unreserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved(c: u8) -> bool { + (TABLE[c as usize] & MASK_UNRESERVED) != 0 +} + +/// Returns true if the character is unreserved. +#[inline] +#[must_use] +pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool { + if c.is_ascii() { + is_ascii_unreserved(c as u8) + } else { + S::is_nonascii_char_unreserved(c) + } +} + +///// Returns `true` if the given ASCII character matches `gen-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `sub-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `reserved`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_reserved(c: u8) -> bool { +// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +//} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR) != 0 +} + +/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_frag_query(c: u8) -> bool { + (TABLE[c as usize] & MASK_FRAG_QUERY) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool { + (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `reg-name` +#[inline] +#[must_use] +pub(crate) const fn is_ascii_regname(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0 +} + +/// Checks if the given character matches `ucschar` rule. +#[must_use] +pub(crate) fn is_ucschar(c: char) -> bool { + matches!( + u32::from(c), + 0xA0..=0xD7FF | + 0xF900..=0xFDCF | + 0xFDF0..=0xFFEF | + 0x1_0000..=0x1_FFFD | + 0x2_0000..=0x2_FFFD | + 0x3_0000..=0x3_FFFD | + 0x4_0000..=0x4_FFFD | + 0x5_0000..=0x5_FFFD | + 0x6_0000..=0x6_FFFD | + 0x7_0000..=0x7_FFFD | + 0x8_0000..=0x8_FFFD | + 0x9_0000..=0x9_FFFD | + 0xA_0000..=0xA_FFFD | + 0xB_0000..=0xB_FFFD | + 0xC_0000..=0xC_FFFD | + 0xD_0000..=0xD_FFFD | + 0xE_1000..=0xE_FFFD + ) +} + +/// Returns true if the given value is a continue byte of UTF-8. +#[inline(always)] +#[must_use] +pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool { + // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte, + // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear + // anywhere in UTF-8 byte sequence. + // `0x80 as i8` is -128, and `0xc0 as i8` is -96. + // + // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and + // the continue bytes is `0b10xx_xxxx`. + // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128. + (byte as i8) < -64 +} + +/// Returns true if the given ASCII character is `unreserved` or `reserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +} diff --git a/vendor/iri-string/src/parser/str.rs b/vendor/iri-string/src/parser/str.rs new file mode 100644 index 00000000..0f564bfa --- /dev/null +++ b/vendor/iri-string/src/parser/str.rs @@ -0,0 +1,390 @@ +//! Functions for common string operations. + +pub(crate) use self::maybe_pct_encoded::{ + process_percent_encoded_best_effort, PctEncodedFragments, +}; + +mod maybe_pct_encoded; + +/// Returns the inner string if wrapped. +#[must_use] +pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { + let (prefix, suffix) = match s.as_bytes() { + [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), + _ => return None, + }; + if (prefix == open) && (suffix == close) { + Some(&s[1..(s.len() - 1)]) + } else { + None + } +} + +/// Returns the byte that appears first. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + haystack + .iter() + .copied() + .find(|&b| b == needle1 || b == needle2) +} + +/// Returns the byte that appears first. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + haystack.iter().rposition(|&b| b == needle) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + memchr::memrchr(needle, haystack) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + let bytes = haystack.as_bytes(); + let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { + Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), + None => memchr::memchr(needle4, bytes), + }; + pos.map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memrchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Returns `true` if the string only contains the allowed characters. +#[must_use] +fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while !s.is_empty() { + match s.bytes().position(|b| !b.is_ascii()) { + Some(nonascii_pos) => { + // Valdiate ASCII prefix. + if nonascii_pos != 0 { + let (prefix, rest) = s.split_at(nonascii_pos); + if !prefix.bytes().all(pred_ascii) { + return false; + } + s = rest; + } + + // Extract non-ASCII part and validate it. + let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) { + Some(ascii_pos) => s.split_at(ascii_pos), + None => (s, ""), + }; + if !prefix.chars().all(pred_nonascii) { + return false; + } + s = rest; + } + None => { + // All chars are ASCII. + return s.bytes().all(pred_ascii); + } + } + } + + true +} + +/// Returns `true` if the string only contains the allowed characters and percent-encoded char. +#[must_use] +pub(crate) fn satisfy_chars_with_pct_encoded<F, G>( + mut s: &str, + pred_ascii: F, + pred_nonascii: G, +) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while let Some((prefix, suffix)) = find_split_hole(s, b'%') { + // Verify strings before the percent-encoded char. + if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { + return false; + } + + // Verify the percent-encoded char. + if !starts_with_double_hexdigits(suffix.as_bytes()) { + return false; + } + + // Advance the cursor. + s = &suffix[2..]; + } + + // Verify the rest. + satisfy_chars(s, pred_ascii, pred_nonascii) +} + +/// Returns `true` if the given string starts with two hexadecimal digits. +#[must_use] +pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { + match s { + [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), + _ => false, + } +} + +/// Strips the first character if it is the given ASCII character, and returns the rest. +/// +/// # Precondition +/// +/// The given ASCII character (`prefix`) should be an ASCII character. +#[must_use] +pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { + debug_assert!(prefix.is_ascii()); + if s.as_bytes().first().copied() == Some(prefix) { + Some(&s[1..]) + } else { + None + } +} + +/// Splits the given string into the first character and the rest. +/// +/// Returns `(first_char, rest_str)`. +#[must_use] +pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { + let mut chars = s.chars(); + let c = chars.next()?; + let rest = chars.as_str(); + Some((c, rest)) +} diff --git a/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs new file mode 100644 index 00000000..617f006a --- /dev/null +++ b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs @@ -0,0 +1,369 @@ +//! Processor for possibly- or invalidly-percent-encoded strings. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; +use core::num::NonZeroU8; +use core::ops::ControlFlow; + +use crate::parser::str::find_split; +use crate::parser::trusted::hexdigits_to_byte; + +/// Fragment in a possibly percent-encoded (and possibly broken) string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PctEncodedFragments<'a> { + /// String fragment without percent-encoded triplets. + NoPctStr(&'a str), + /// Stray `%` (percent) character. + StrayPercent, + /// Valid percent-encoded triplets for a character. + Char(&'a str, char), + /// Percent-encoded triplets that does not consists of a valid UTF-8 sequence. + InvalidUtf8PctTriplets(&'a str), +} + +/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets. +pub(crate) fn process_percent_encoded_best_effort<T, F, B>( + v: T, + mut f: F, +) -> Result<ControlFlow<B>, fmt::Error> +where + T: fmt::Display, + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + let mut buf = [0_u8; 12]; + let mut writer = DecomposeWriter { + f: &mut f, + decoder: Default::default(), + buf: &mut buf, + result: ControlFlow::Continue(()), + _r: PhantomData, + }; + + if write!(writer, "{v}").is_err() { + match writer.result { + ControlFlow::Continue(_) => return Err(fmt::Error), + ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)), + } + } + + // Flush the internal buffer of the decoder. + if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) { + let len_suffix = len % 3; + let triplets_end = len - len_suffix; + let triplets = core::str::from_utf8(&buf[..triplets_end]) + .expect("[validity] percent-encoded triplets consist of ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) { + return Ok(ControlFlow::Break(v)); + } + + if len_suffix > 0 { + if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) { + return Ok(ControlFlow::Break(v)); + } + } + if len_suffix > 1 { + let after_percent = core::str::from_utf8( + &buf[(triplets_end + 1)..(triplets_end + len_suffix)], + ) + .expect("[consistency] percent-encoded triplets contains only ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) { + return Ok(ControlFlow::Break(v)); + } + } + } + + Ok(ControlFlow::Continue(())) +} + +/// Writer to decompose the input into fragments. +struct DecomposeWriter<'a, F, B> { + /// Output function. + f: &'a mut F, + /// Decoder. + decoder: DecoderBuffer, + /// Buffer. + buf: &'a mut [u8], + /// Result of the last output function call. + result: ControlFlow<B>, + /// Dummy field for the type parameter of the return type of the function `f`. + _r: PhantomData<fn() -> B>, +} +impl<F, B> DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + /// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise. + #[inline(always)] + fn result_continue_or_err(&self) -> fmt::Result { + if self.result.is_break() { + return Err(fmt::Error); + } + Ok(()) + } + + /// Calls the output functions with the undecodable fragments. + fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result { + let len_written = usize::from(len_undecodable); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + let len_incomplete = len_written % 3; + let len_complete = len_written - len_incomplete; + self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets( + &frag[..len_complete], + )); + self.result_continue_or_err()?; + if len_incomplete > 0 { + // At least the first `%` exists. + self.result = (self.f)(PctEncodedFragments::StrayPercent); + if self.result.is_break() { + return Err(fmt::Error); + } + if len_incomplete > 1 { + // A following hexdigit is available. + debug_assert_eq!( + len_incomplete, 2, + "[consistency] the length of incomplete percent-encoded \ + triplet must be less than 2 bytes" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr( + &frag[(len_complete + 1)..len_written], + )); + self.result_continue_or_err()?; + } + } + Ok(()) + } +} + +impl<F, B> fmt::Write for DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + fn write_str(&mut self, s: &str) -> fmt::Result { + self.result_continue_or_err()?; + let mut rest = s; + while !rest.is_empty() { + let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest); + if len_consumed == 0 { + // `rest` does not start with the percent-encoded triplets. + // Flush the decoder before attempting to decode more data. + if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) { + self.output_as_undecodable(len_written)?; + rest = &rest[usize::from(len_written)..]; + } + + // Write plain string prefix (if found). + let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, "")); + debug_assert!( + !plain_prefix.is_empty(), + "[consistency] `len_consumed == 0` indicates non-empty \ + `rest` not starting with `%`" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix)); + self.result_continue_or_err()?; + rest = suffix; + continue; + } + + // Process decoding result. + match result { + PushResult::Decoded(len_written, c) => { + let len_written = usize::from(len_written.get()); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + self.result = (self.f)(PctEncodedFragments::Char(frag, c)); + self.result_continue_or_err()?; + } + PushResult::Undecodable(len_written) => { + self.output_as_undecodable(len_written)?; + } + PushResult::NeedMoreBytes => { + // Nothing to write at this time. + } + } + rest = &rest[len_consumed..]; + } + Ok(()) + } +} + +/// A type for result of feeding data to [`DecoderBuffer`]. +#[derive(Debug, Clone, Copy)] +enum PushResult { + /// Input is still incomplete, needs more bytes to get the decoding result. + NeedMoreBytes, + /// Bytes decodable to valid UTF-8 sequence. + // `.0`: Length of decodable fragment. + // `.1`: Decoded character. + Decoded(NonZeroU8, char), + /// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence. + // `.0`: Length of undecodable fragment. + Undecodable(u8), +} + +/// Buffer to contain (and to decode) incomplete percent-encoded triplets. +#[derive(Default, Debug, Clone, Copy)] +struct DecoderBuffer { + /// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded. + // + // `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and + // 4 triplets at most for single Unicode codepoint in UTF-8. + encoded: [u8; 12], + /// Decoded bytes. + decoded: [u8; 4], + /// Number of bytes available in `buf_encoded` buffer. + /// + /// `buf_encoded_len / 3` also indicates the length of data in `decoded`. + len_encoded: u8, +} + +impl DecoderBuffer { + /// Writes the data of the given length to the destination, and remove that part from buffer. + fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) { + let new_len = self.len_encoded - remove_len; + let remove_len = usize::from(remove_len); + let src_range = remove_len..usize::from(self.len_encoded); + dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]); + + if new_len == 0 { + *self = Self::default(); + return; + } + self.encoded.copy_within(src_range, 0); + self.decoded + .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0); + self.len_encoded = new_len; + } + + /// Pushes a byte of a (possible) percent-encoded tripet to the buffer. + fn push_single_encoded_byte(&mut self, byte: u8) { + debug_assert!( + self.len_encoded < 12, + "[consistency] four percent-encoded triplets are enough for a unicode code point" + ); + let pos_enc = usize::from(self.len_encoded); + self.len_encoded += 1; + self.encoded[pos_enc] = byte; + if self.len_encoded % 3 == 0 { + // A new percent-encoded triplet is read. Decode and remember. + let pos_dec = usize::from(self.len_encoded / 3 - 1); + let upper = self.encoded[pos_enc - 1]; + let lower = byte; + debug_assert!( + upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(), + "[consistency] the `encoded` buffer should contain valid percent-encoded triplets" + ); + self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]); + } + } + + /// Pushes the (possibly) encoded string to the buffer. + /// + /// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the + /// caller should call `Self::clear()` before pushing more bytes. + /// + /// # Preconditions + /// + /// * `buf` should be more than 12 bytes. If not, this method may panic. + #[must_use] + pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) { + debug_assert!( + buf.len() >= 12, + "[internal precondition] destination buffer should be at least 12 bytes" + ); + let mut chars = s.chars(); + let mut len_triplet_incomplete = self.len_encoded % 3; + for c in &mut chars { + if len_triplet_incomplete == 0 { + // Expect `%`. + if c != '%' { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(b'%'); + len_triplet_incomplete = 1; + continue; + } + + // Expect a nibble. + if !c.is_ascii_hexdigit() { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(c as u8); + if len_triplet_incomplete == 1 { + len_triplet_incomplete = 2; + continue; + } else { + // Now a new percent-encoded triplet is read! + debug_assert_eq!(len_triplet_incomplete, 2); + len_triplet_incomplete = 0; + } + + // Now a new percent-encoded triplet is read. + // Check if the buffer contains a valid decodable content. + let len_decoded = usize::from(self.len_encoded) / 3; + match core::str::from_utf8(&self.decoded[..len_decoded]) { + Ok(decoded_str) => { + // Successfully decoded. + let len_consumed = s.len() - chars.as_str().len(); + let c = decoded_str + .chars() + .next() + .expect("[validity] `decoded` buffer is nonempty"); + let len_result = NonZeroU8::new(self.len_encoded).expect( + "[consistency] `encoded` buffer is nonempty since \ + `push_single_encoded_byte()` was called", + ); + self.write_and_pop(buf, len_result.get()); + return (len_consumed, PushResult::Decoded(len_result, c)); + } + Err(e) => { + // Undecodable. + assert_eq!( + e.valid_up_to(), + 0, + "[consistency] `decoded` buffer contains at most one character" + ); + let skip_len_decoded = match e.error_len() { + // Unexpected EOF. Wait for remaining input. + None => continue, + // Skip invalid bytes. + Some(v) => v, + }; + let len_consumed = s.len() - chars.as_str().len(); + let len_result = skip_len_decoded as u8 * 3; + assert_ne!( + skip_len_decoded, 0, + "[consistency] empty bytes cannot be invalid" + ); + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + }; + } + let len_consumed = s.len() - chars.as_str().len(); + (len_consumed, PushResult::NeedMoreBytes) + } + + /// Writes the incomplete data completely to the destination, and clears the internal buffer. + #[must_use] + pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> { + let len_result = NonZeroU8::new(self.len_encoded)?; + // Emit the current (undecodable) buffer as is. + self.write_and_pop(buf, len_result.get()); + debug_assert_eq!( + self.len_encoded, 0, + "[consistency] the buffer should be cleared after flushed" + ); + Some(len_result) + } +} diff --git a/vendor/iri-string/src/parser/trusted.rs b/vendor/iri-string/src/parser/trusted.rs new file mode 100644 index 00000000..f15c075e --- /dev/null +++ b/vendor/iri-string/src/parser/trusted.rs @@ -0,0 +1,476 @@ +//! Fast parsers for trusted (already validated) input. +//! +//! Using this in wrong way will lead to unexpected wrong result. + +pub(crate) mod authority; + +use core::cmp::Ordering; +use core::num::NonZeroUsize; + +use crate::components::{RiReferenceComponents, Splitter}; +use crate::format::eq_str_display; +use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode}; +use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole}; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +/// Eats a `scheme` and a following colon, and returns the rest and the scheme. +/// +/// Returns `(rest, scheme)`. +/// +/// This should be called at the head of an absolute IRIs/URIs. +#[must_use] +fn scheme_colon(i: &str) -> (&str, &str) { + let (scheme, rest) = + find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part"); + (rest, scheme) +} + +/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme. +/// +/// This should be called at the head of an `IRI-reference` or similar. +#[must_use] +fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) { + match find_split4_hole(i, b':', b'/', b'?', b'#') { + Some((scheme, b':', rest)) => (rest, Some(scheme)), + _ => (i, None), + } +} + +/// Eats double slash and the following authority if available, and returns the authority. +/// +/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`. +#[must_use] +fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) { + let s = match i.strip_prefix("//") { + Some(rest) => rest, + None => return (i, None), + }; + // `i` might match `path-abempty` (which can start with `//`), but it is not + // allowed as `relative-part`, so no need to care `path-abempty` rule here. + // A slash, question mark, and hash character won't appear in `authority`. + match find_split3(s, b'/', b'?', b'#') { + Some((authority, rest)) => (rest, Some(authority)), + None => ("", Some(s)), + } +} + +/// Eats a string until the query, and returns that part (excluding `?` for the query). +#[must_use] +fn until_query(i: &str) -> (&str, &str) { + // `?` won't appear before the query part. + match find_split2(i, b'?', b'#') { + Some((before_query, rest)) => (rest, before_query), + None => ("", i), + } +} + +/// Decomposes query and fragment, if available. +/// +/// The string must starts with `?`, or `#`, or be empty. +#[must_use] +fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) { + match i.as_bytes().first().copied() { + None => (None, None), + Some(b'?') => { + let rest = &i[1..]; + match find_split_hole(rest, b'#') { + Some((query, fragment)) => (Some(query), Some(fragment)), + None => (Some(rest), None), + } + } + Some(c) => { + debug_assert_eq!(c, b'#'); + (None, Some(&i[1..])) + } + } +} + +/// Decomposes the given valid `IRI-reference`. +#[must_use] +pub(crate) fn decompose_iri_reference<S: Spec>( + i: &RiReferenceStr<S>, +) -> RiReferenceComponents<'_, S> { + /// Inner function to avoid unnecessary monomorphizations on `S`. + fn decompose(i: &str) -> Splitter { + let len = i.len(); + + let (i, scheme_end) = { + let (i, scheme) = scheme_colon_opt(i); + let end = scheme.and_then(|s| NonZeroUsize::new(s.len())); + (i, end) + }; + let (i, authority_end) = { + // 2: "//".len() + let start = len - i.len() + 2; + // `authority` does not contain the two slashes of `://'. + let (i, authority) = slash_slash_authority_opt(i); + let end = authority.and_then(|s| NonZeroUsize::new(start + s.len())); + (i, end) + }; + let (i, _path) = until_query(i); + + let (query_start, fragment_start) = { + // This could theoretically be zero if `len` is `usize::MAX` and + // `i` has neither a query nor a fragment. However, this is + // practically impossible. + let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1)); + + let (query, fragment) = decompose_query_and_fragment(i); + match (query.is_some(), fragment) { + (true, Some(fragment)) => { + (after_first_prefix, NonZeroUsize::new(len - fragment.len())) + } + (true, None) => (after_first_prefix, None), + (false, Some(_fragment)) => (None, after_first_prefix), + (false, None) => (None, None), + } + }; + + Splitter::new(scheme_end, authority_end, query_start, fragment_start) + } + + RiReferenceComponents { + iri: i, + splitter: decompose(i.as_str()), + } +} + +/// Extracts `scheme` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_scheme(i: &str) -> Option<&str> { + scheme_colon_opt(i).1 +} + +/// Extracts `scheme` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_scheme_absolute(i: &str) -> &str { + scheme_colon(i).1 +} + +/// Extracts `authority` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_authority(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon_opt(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> { + slash_slash_authority_opt(i).1 +} + +/// Extracts `path` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_path(i: &str) -> &str { + let (i, _scheme) = scheme_colon_opt(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_absolute(i: &str) -> &str { + let (i, _scheme) = scheme_colon(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_relative(i: &str) -> &str { + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `query` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_query(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + decompose_query_and_fragment(i).0 +} + +/// Extracts `query` part from an `absolute-IRI` string. +/// +/// # Precondition +/// +/// The given string must be a valid `absolute-IRI` string. +#[must_use] +pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + if i.is_empty() { + None + } else { + debug_assert_eq!( + i.as_bytes().first(), + Some(&b'?'), + "`absolute-IRI` string must not have `fragment part" + ); + Some(&i[1..]) + } +} + +/// Splits an IRI string into the prefix and the fragment part. +/// +/// A leading `#` character is truncated if the fragment part exists. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) { + // It is completely OK to find the first `#` character from valid IRI to get fragment part, + // because the spec says that there are no `#` characters before the fragment part. + // + // > ``` + // > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // > ``` + // > + // > --- [RFC 3986, section 3.1. Scheme](https://tools.ietf.org/html/rfc3986#section-3.1) + // + // > The authority component is preceded by a double slash ("//") and is terminated by the + // > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end + // > of the URI. + // > + // > --- [RFC 3986, section 3.2. Authority](https://tools.ietf.org/html/rfc3986#section-3.2) + // + // > The path is terminated by the first question mark ("?") or number sign ("#") + // > character, or by the end of the URI. + // > + // > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) + // + // > The query component is indicated by the first question mark ("?") character and + // > terminated by a number sign ("#") character or by the end of the URI. + // > + // > --- [RFC 3986, section 3.4. Query](https://tools.ietf.org/html/rfc3986#section-3.4) + match find_split_hole(iri, b'#') { + Some((prefix, fragment)) => (prefix, Some(fragment)), + None => (iri, None), + } +} + +/// Returns the fragment part of the given IRI. +/// +/// A leading `#` character of the fragment is truncated. +#[inline] +#[must_use] +pub(crate) fn extract_fragment(iri: &str) -> Option<&str> { + split_fragment(iri).1 +} + +/// Returns `Ok(_)` if the string is normalized. +/// +/// If this function returns `true`, normalization input and output will be identical. +/// +/// In this function, "normalized" means that any of the normalization below +/// won't change the input on normalization: +/// +/// * syntax-based normalization, +/// * case normalization, +/// * percent-encoding normalization, and +/// * path segment normalizaiton. +/// +/// Note that scheme-based normalization is not considered. +#[must_use] +pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool { + let (i, scheme) = scheme_colon(i); + let (after_authority, authority) = slash_slash_authority_opt(i); + let (_after_path, path) = until_query(after_authority); + + // Syntax-based normalization: uppercase chars in `scheme` should be + // converted to lowercase. + if scheme.bytes().any(|b| b.is_ascii_uppercase()) { + return false; + } + + // Case normalization: ASCII alphabets in US-ASCII only `host` should be + // normalized to lowercase. + // Case normalization: ASCII alphabets in percent-encoding triplet should be + // normalized to uppercase. + // Percent-encoding normalization: unresreved characters should be decoded + // in `userinfo`, `host`, `path`, `query`, and `fragments`. + // Path segment normalization: the path should not have dot segments (`.` + // and/or `..`). + // + // Note that `authority` can have percent-encoded `userinfo`. + if let Some(authority) = authority { + let authority_components = authority::decompose_authority(authority); + + // Check `host`. + let host = authority_components.host(); + let host_is_normalized = if is_ascii_only_host(host) { + eq_str_display(host, &NormalizedAsciiOnlyHost::new(host)) + } else { + // If the host is not ASCII-only, conversion to lowercase is not performed. + is_pct_case_normalized::<S>(host) + }; + if !host_is_normalized { + return false; + } + + // Check pencent encodings in `userinfo`. + if let Some(userinfo) = authority_components.userinfo() { + if !is_pct_case_normalized::<S>(userinfo) { + return false; + } + } + } + + // Check `path`. + // + // Syntax-based normalization: Dot segments might be removed. + // Note that we don't have to care `%2e` and `%2E` since `.` is unreserved + // and they will be decoded if not normalized. + // Also note that WHATWG serialization will use `/.//` as a path prefix if + // the path is absolute and won't modify the path if the path is relative. + // + // Percent-encoding normalization: unresreved characters should be decoded + // in `path`, `query`, and `fragments`. + let path_span_no_dot_segments = if authority.is_some() { + Some(path) + } else { + match mode { + NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)), + NormalizednessCheckMode::Rfc3986 => Some(path), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath => { + if path.starts_with('/') { + // Absolute. + Some(path.strip_prefix("/.//").unwrap_or(path)) + } else { + // Relative. Treat the path as "opaque". No span to check. + None + } + } + } + }; + if let Some(path_span_no_dot_segments) = path_span_no_dot_segments { + if path_span_no_dot_segments + .split('/') + .any(|segment| matches!(segment, "." | "..")) + { + return false; + } + } + is_pct_case_normalized::<S>(after_authority) +} + +/// Decodes two hexdigits into a byte. +/// +/// # Preconditions +/// +/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit. +#[must_use] +pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 { + let i_upper = match (upper & 0xf0).cmp(&0x40) { + Ordering::Less => upper - b'0', + Ordering::Equal => upper - (b'A' - 10), + Ordering::Greater => upper - (b'a' - 10), + }; + let i_lower = match (lower & 0xf0).cmp(&0x40) { + Ordering::Less => lower - b'0', + Ordering::Equal => lower - (b'A' - 10), + Ordering::Greater => lower - (b'a' - 10), + }; + (i_upper << 4) + i_lower +} + +/// Converts the first two hexdigit bytes in the buffer into a byte. +/// +/// # Panics +/// +/// Panics if the string does not start with two hexdigits. +#[must_use] +pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) { + let mut bytes = s.bytes(); + let upper_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let lower_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]); + (v, &s[2..]) +} + +/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters. +/// +/// # Precondition +/// +/// The given string should be valid `host` or `host ":" port` string. +#[must_use] +pub(crate) fn is_ascii_only_host(mut host: &str) -> bool { + while let Some((i, c)) = host + .char_indices() + .find(|(_i, c)| !c.is_ascii() || *c == '%') + { + if c != '%' { + // Non-ASCII character found. + debug_assert!(!c.is_ascii()); + return false; + } + // Percent-encoded character found. + let after_pct = &host[(i + 1)..]; + let (byte, rest) = take_xdigits2(after_pct); + if !byte.is_ascii() { + return false; + } + host = rest; + } + + // Neither non-ASCII characters nor percent-encoded characters found. + true +} diff --git a/vendor/iri-string/src/parser/trusted/authority.rs b/vendor/iri-string/src/parser/trusted/authority.rs new file mode 100644 index 00000000..83e41298 --- /dev/null +++ b/vendor/iri-string/src/parser/trusted/authority.rs @@ -0,0 +1,32 @@ +//! Parsers for trusted `authority` string. + +use crate::components::AuthorityComponents; +use crate::parser::str::{find_split_hole, rfind_split2}; + +/// Decomposes the authority into `(userinfo, host, port)`. +/// +/// The leading `:` is truncated. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> { + let i = authority; + let (i, host_start) = match find_split_hole(i, b'@') { + Some((userinfo, rest)) => (rest, userinfo.len() + 1), + None => (authority, 0), + }; + let colon_port_len = match rfind_split2(i, b':', b']') { + Some((_, suffix)) if suffix.starts_with(':') => suffix.len(), + _ => 0, + }; + let host_end = authority.len() - colon_port_len; + + AuthorityComponents { + authority, + host_start, + host_end, + } +} diff --git a/vendor/iri-string/src/parser/validate.rs b/vendor/iri-string/src/parser/validate.rs new file mode 100644 index 00000000..59625394 --- /dev/null +++ b/vendor/iri-string/src/parser/validate.rs @@ -0,0 +1,225 @@ +//! Validating parsers for non-trusted (possibly invalid) input. + +mod authority; +mod path; + +use crate::parser::char; +use crate::parser::str::{ + find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, +}; +use crate::spec::Spec; +use crate::validate::Error; + +use self::authority::validate_authority; +pub(crate) use self::authority::{validate_host, validate_userinfo}; +pub(crate) use self::path::validate_path; +use self::path::{ + validate_path_abempty, validate_path_absolute_authority_absent, + validate_path_relative_authority_absent, +}; + +/// Returns `Ok(_)` if the string matches `scheme`. +pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> { + debug_assert!(!i.is_empty()); + let bytes = i.as_bytes(); + if bytes[0].is_ascii_alphabetic() + && bytes[1..] + .iter() + .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b)) + { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `query` or `iquery`. +pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = + satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence. +fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + let (maybe_authority, maybe_path) = match find_split(i, b'/') { + Some(v) => v, + None => (i, ""), + }; + validate_authority::<S>(maybe_authority)?; + validate_path_abempty::<S>(maybe_path) +} + +/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules. +#[inline] +pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute) +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +#[inline] +pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Any) +} + +/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules. +#[inline] +pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment) +} + +/// Syntax rule for URI/IRI references. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +enum UriReferenceRule { + /// `URI` and `IRI`. + /// + /// This can have a fragment. + Absolute, + /// `absolute-URI` and `absolute-IRI`. + /// + /// This cannot have a fragment. + AbsoluteWithoutFragment, + /// `URI-reference` and `IRI-reference`. + /// + /// This can be relative. + Any, +} + +impl UriReferenceRule { + /// Returns `true` is the relative reference is allowed. + #[inline] + #[must_use] + fn is_relative_allowed(self) -> bool { + self == Self::Any + } + + /// Returns `true` is the fragment part is allowed. + #[inline] + #[must_use] + fn is_fragment_allowed(self) -> bool { + matches!(self, Self::Absolute | Self::Any) + } +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +fn validate_uri_reference_common<S: Spec>( + i: &str, + ref_rule: UriReferenceRule, +) -> Result<(), Error> { + // Validate `scheme ":"`. + let (i, _scheme) = match find_split_hole(i, b':') { + None => { + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + Some(("", _)) => return Err(Error::new()), + Some((maybe_scheme, rest)) => { + if validate_scheme(maybe_scheme).is_err() { + // The string before the first colon is not a scheme. + // Falling back to `relative-ref` parsing. + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + (rest, maybe_scheme) + } + }; + + // Validate `hier-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_absolute_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules. +pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> { + // Validate `relative-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_relative_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, true)?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version). +fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> { + let (maybe_query, maybe_fragment) = if first == b'?' { + match find_split_hole(rest, b'#') { + Some(v) => v, + None => (rest, ""), + } + } else { + debug_assert_eq!(first, b'#'); + ("", rest) + }; + validate_query::<S>(maybe_query)?; + if !accept_fragment && !maybe_fragment.is_empty() { + return Err(Error::new()); + } + validate_fragment::<S>(maybe_fragment) +} + +/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules. +pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_frag_query, + char::is_nonascii_fragment::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} diff --git a/vendor/iri-string/src/parser/validate/authority.rs b/vendor/iri-string/src/parser/validate/authority.rs new file mode 100644 index 00000000..fb41085e --- /dev/null +++ b/vendor/iri-string/src/parser/validate/authority.rs @@ -0,0 +1,296 @@ +//! Parsers for authority. + +use core::mem; + +use crate::parser::char; +use crate::parser::str::{ + find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded, + strip_ascii_char_prefix, +}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`. +pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_userinfo_ipvfutureaddr, + char::is_nonascii_userinfo::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `true` if the string matches `dec-octet`. +/// +/// In other words, this tests whether the string is decimal "0" to "255". +#[must_use] +fn is_dec_octet(i: &str) -> bool { + matches!( + i.as_bytes(), + [b'0'..=b'9'] + | [b'1'..=b'9', b'0'..=b'9'] + | [b'1', b'0'..=b'9', b'0'..=b'9'] + | [b'2', b'0'..=b'4', b'0'..=b'9'] + | [b'2', b'5', b'0'..=b'5'] + ) +} + +/// Returns `Ok(_)` if the string matches `IPv4address`. +fn validate_ipv4address(i: &str) -> Result<(), Error> { + let (first, rest) = find_split_hole(i, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(first) { + return Err(Error::new()); + } + let (second, rest) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(second) { + return Err(Error::new()); + } + let (third, fourth) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if is_dec_octet(third) && is_dec_octet(fourth) { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// A part of IPv6 addr. +#[derive(Clone, Copy)] +enum V6AddrPart { + /// `[0-9a-fA-F]{1,4}::`. + H16Omit, + /// `[0-9a-fA-F]{1,4}:`. + H16Cont, + /// `[0-9a-fA-F]{1,4}`. + H16End, + /// IPv4 address. + V4, + /// `::`. + Omit, +} + +/// Splits the IPv6 address string into the next component and the rest substring. +fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> { + debug_assert!(!i.is_empty()); + match find_split_hole(i, b':') { + Some((prefix, rest)) => { + if prefix.len() >= 5 { + return Err(Error::new()); + } + + if prefix.is_empty() { + return match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::Omit)), + None => Err(Error::new()), + }; + } + + // Should be `h16`. + debug_assert!((1..=4).contains(&prefix.len())); + if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::H16Omit)), + None => Ok((rest, V6AddrPart::H16Cont)), + } + } + None => { + if i.len() >= 5 { + // Possibly `IPv4address`. + validate_ipv4address(i)?; + return Ok(("", V6AddrPart::V4)); + } + if i.bytes().all(|b| b.is_ascii_hexdigit()) { + Ok(("", V6AddrPart::H16End)) + } else { + Err(Error::new()) + } + } + } +} + +/// Returns `Ok(_)` if the string matches `IPv6address`. +fn validate_ipv6address(mut i: &str) -> Result<(), Error> { + let mut h16_count = 0; + let mut is_omitted = false; + while !i.is_empty() { + let (rest, part) = split_v6_addr_part(i)?; + match part { + V6AddrPart::H16Omit => { + h16_count += 1; + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + V6AddrPart::H16Cont => { + h16_count += 1; + if rest.is_empty() { + // `H16Cont` cannot be the last part of an IPv6 address. + return Err(Error::new()); + } + } + V6AddrPart::H16End => { + h16_count += 1; + break; + } + V6AddrPart::V4 => { + debug_assert!(rest.is_empty()); + h16_count += 2; + break; + } + V6AddrPart::Omit => { + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + } + if h16_count > 8 { + return Err(Error::new()); + } + i = rest; + } + let is_valid = if is_omitted { + h16_count < 8 + } else { + h16_count == 8 + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority` or `iauthority`. +pub(super) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> { + // Strip and validate `userinfo`. + let (i, _userinfo) = match find_split_hole(i, b'@') { + Some((maybe_userinfo, i)) => { + validate_userinfo::<S>(maybe_userinfo)?; + (i, Some(maybe_userinfo)) + } + None => (i, None), + }; + // `host` can contain colons, but `port` cannot. + // Strip and validate `port`. + let (maybe_host, _port) = match rfind_split_hole(i, b':') { + Some((maybe_host, maybe_port)) => { + if maybe_port.bytes().all(|b| b.is_ascii_digit()) { + (maybe_host, Some(maybe_port)) + } else { + (i, None) + } + } + None => (i, None), + }; + // Validate `host`. + validate_host::<S>(maybe_host) +} + +/// Validates `host`. +pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> { + match get_wrapped_inner(i, b'[', b']') { + Some(maybe_addr) => { + // `IP-literal`. + // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2. + if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v') + .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V')) + { + // `IPvFuture`. + let (maybe_ver, maybe_addr) = + find_split_hole(maybe_addr_rest, b'.').ok_or_else(Error::new)?; + // Validate version. + if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + // Validate address. + if !maybe_addr.is_empty() + && maybe_addr.is_ascii() + && maybe_addr + .bytes() + .all(char::is_ascii_userinfo_ipvfutureaddr) + { + Ok(()) + } else { + Err(Error::new()) + } + } else { + // `IPv6address`. + validate_ipv6address(maybe_addr) + } + } + None => { + // `IPv4address` or `reg-name`. No need to distinguish them here. + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_regname, + char::is_nonascii_regname::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } + } + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + use alloc::format; + + macro_rules! assert_validate { + ($parser:expr, $($input:expr),* $(,)?) => {{ + $({ + let input = $input; + let input: &str = input.as_ref(); + assert!($parser(input).is_ok(), "input={:?}", input); + })* + }}; + } + + #[test] + fn test_ipv6address() { + use core::cmp::Ordering; + + assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1"); + assert_validate!(validate_ipv6address, "2001:db8::7"); + + // Generate IPv6 addresses with `::`. + let make_sub = |n: usize| { + let mut s = "1:".repeat(n); + s.pop(); + s + }; + for len_pref in 0..=7 { + let prefix = make_sub(len_pref); + for len_suf in 1..=(7 - len_pref) { + assert_validate!( + validate_ipv6address, + &format!("{}::{}", prefix, make_sub(len_suf)) + ); + match len_suf.cmp(&2) { + Ordering::Greater => assert_validate!( + validate_ipv6address, + &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2)) + ), + Ordering::Equal => { + assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix)) + } + Ordering::Less => {} + } + } + } + } +} diff --git a/vendor/iri-string/src/parser/validate/path.rs b/vendor/iri-string/src/parser/validate/path.rs new file mode 100644 index 00000000..1b09c84b --- /dev/null +++ b/vendor/iri-string/src/parser/validate/path.rs @@ -0,0 +1,91 @@ +//! Parsers for path. + +use crate::parser::char; +use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`. +pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + return Ok(()); + } + let i = match i.strip_prefix('/') { + Some(rest) => rest, + None => return Err(Error::new()), + }; + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + // `path-empty`. + return Ok(()); + } + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = match find_split2_hole(i, b'/', b':') { + Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ), + Some((_, c, _)) => { + debug_assert_eq!(c, b':'); + // `foo:bar`-style. This does not match `path-noscheme`. + return Err(Error::new()); + } + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `path`/`ipath` rules. +pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + return Err(Error::new()); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} |
