summaryrefslogtreecommitdiff
path: root/vendor/iri-string/src/parser
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-10 13:11:11 -0600
committermo khan <mo@mokhan.ca>2025-07-10 13:11:11 -0600
commit01959b16a21b22b5df5f16569c2a8e8f92beecef (patch)
tree32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/iri-string/src/parser
parentff30574117a996df332e23d1fb6f65259b316b5b (diff)
chore: vendor dependencies
Diffstat (limited to 'vendor/iri-string/src/parser')
-rw-r--r--vendor/iri-string/src/parser/char.rs323
-rw-r--r--vendor/iri-string/src/parser/str.rs390
-rw-r--r--vendor/iri-string/src/parser/str/maybe_pct_encoded.rs369
-rw-r--r--vendor/iri-string/src/parser/trusted.rs476
-rw-r--r--vendor/iri-string/src/parser/trusted/authority.rs32
-rw-r--r--vendor/iri-string/src/parser/validate.rs225
-rw-r--r--vendor/iri-string/src/parser/validate/authority.rs296
-rw-r--r--vendor/iri-string/src/parser/validate/path.rs91
8 files changed, 2202 insertions, 0 deletions
diff --git a/vendor/iri-string/src/parser/char.rs b/vendor/iri-string/src/parser/char.rs
new file mode 100644
index 00000000..2455498e
--- /dev/null
+++ b/vendor/iri-string/src/parser/char.rs
@@ -0,0 +1,323 @@
+//! Characters.
+
+use crate::spec::Spec;
+
+/// A mask to test whether the character is continue character of `scheme`.
+// `ALPHA / DIGIT / "+" / "-" / "."`
+const MASK_SCHEME_CONTINUE: u8 = 1 << 0;
+
+/// A mask to test whether the character matches `unreserved`.
+// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"`
+const MASK_UNRESERVED: u8 = 1 << 1;
+
+/// A mask to test whether the character matches `gen-delims`.
+// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"`
+const MASK_GEN_DELIMS: u8 = 1 << 2;
+
+/// A mask to test whether the character matches `sub-delims`.
+// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="`
+const MASK_SUB_DELIMS: u8 = 1 << 3;
+
+/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes).
+// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`
+const MASK_PCHAR: u8 = 1 << 4;
+
+/// A mask to test whether the character can appear in `query` and `fragment`.
+// `query = *( pchar / "/" / "?" )`
+// `fragment = *( pchar / "/" / "?" )`
+const MASK_FRAG_QUERY: u8 = 1 << 5;
+
+/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`.
+// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )`
+const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6;
+
+/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash.
+const MASK_PCHAR_SLASH: u8 = 1 << 7;
+
+/// ASCII characters' properties.
+const TABLE: [u8; 128] = [
+ 0b_0000_0000, // NUL
+ 0b_0000_0000, // SOH
+ 0b_0000_0000, // STX
+ 0b_0000_0000, // ETX
+ 0b_0000_0000, // EOT
+ 0b_0000_0000, // ENQ
+ 0b_0000_0000, // ACK
+ 0b_0000_0000, // BEL
+ 0b_0000_0000, // BS
+ 0b_0000_0000, // HT
+ 0b_0000_0000, // LF
+ 0b_0000_0000, // VT
+ 0b_0000_0000, // FF
+ 0b_0000_0000, // CR
+ 0b_0000_0000, // SO
+ 0b_0000_0000, // SI
+ 0b_0000_0000, // DLE
+ 0b_0000_0000, // DC1
+ 0b_0000_0000, // DC2
+ 0b_0000_0000, // DC3
+ 0b_0000_0000, // DC4
+ 0b_0000_0000, // NAK
+ 0b_0000_0000, // SYN
+ 0b_0000_0000, // ETB
+ 0b_0000_0000, // CAN
+ 0b_0000_0000, // EM
+ 0b_0000_0000, // SUB
+ 0b_0000_0000, // ESC
+ 0b_0000_0000, // FS
+ 0b_0000_0000, // GS
+ 0b_0000_0000, // RS
+ 0b_0000_0000, // US
+ 0b_0000_0000, // SPACE
+ 0b_1111_1000, // !
+ 0b_0000_0000, // "
+ 0b_0000_0100, // #
+ 0b_1111_1000, // $
+ 0b_0000_0000, // %
+ 0b_1111_1000, // &
+ 0b_1111_1000, // '
+ 0b_1111_1000, // (
+ 0b_1111_1000, // )
+ 0b_1111_1000, // *
+ 0b_1111_1001, // +
+ 0b_1111_1000, // ,
+ 0b_1111_0011, // -
+ 0b_1111_0011, // .
+ 0b_1010_0100, // /
+ 0b_1111_0011, // 0
+ 0b_1111_0011, // 1
+ 0b_1111_0011, // 2
+ 0b_1111_0011, // 3
+ 0b_1111_0011, // 4
+ 0b_1111_0011, // 5
+ 0b_1111_0011, // 6
+ 0b_1111_0011, // 7
+ 0b_1111_0011, // 8
+ 0b_1111_0011, // 9
+ 0b_1111_0100, // :
+ 0b_1111_1000, // ;
+ 0b_0000_0000, // <
+ 0b_1111_1000, // =
+ 0b_0000_0000, // >
+ 0b_0010_0100, // ?
+ 0b_1011_0100, // @
+ 0b_1111_0011, // A
+ 0b_1111_0011, // B
+ 0b_1111_0011, // C
+ 0b_1111_0011, // D
+ 0b_1111_0011, // E
+ 0b_1111_0011, // F
+ 0b_1111_0011, // G
+ 0b_1111_0011, // H
+ 0b_1111_0011, // I
+ 0b_1111_0011, // J
+ 0b_1111_0011, // K
+ 0b_1111_0011, // L
+ 0b_1111_0011, // M
+ 0b_1111_0011, // N
+ 0b_1111_0011, // O
+ 0b_1111_0011, // P
+ 0b_1111_0011, // Q
+ 0b_1111_0011, // R
+ 0b_1111_0011, // S
+ 0b_1111_0011, // T
+ 0b_1111_0011, // U
+ 0b_1111_0011, // V
+ 0b_1111_0011, // W
+ 0b_1111_0011, // X
+ 0b_1111_0011, // Y
+ 0b_1111_0011, // Z
+ 0b_0000_0100, // [
+ 0b_0000_0000, // \
+ 0b_0000_0100, // ]
+ 0b_0000_0000, // ^
+ 0b_1111_0010, // _
+ 0b_0000_0000, // `
+ 0b_1111_0011, // a
+ 0b_1111_0011, // b
+ 0b_1111_0011, // c
+ 0b_1111_0011, // d
+ 0b_1111_0011, // e
+ 0b_1111_0011, // f
+ 0b_1111_0011, // g
+ 0b_1111_0011, // h
+ 0b_1111_0011, // i
+ 0b_1111_0011, // j
+ 0b_1111_0011, // k
+ 0b_1111_0011, // l
+ 0b_1111_0011, // m
+ 0b_1111_0011, // n
+ 0b_1111_0011, // o
+ 0b_1111_0011, // p
+ 0b_1111_0011, // q
+ 0b_1111_0011, // r
+ 0b_1111_0011, // s
+ 0b_1111_0011, // t
+ 0b_1111_0011, // u
+ 0b_1111_0011, // v
+ 0b_1111_0011, // w
+ 0b_1111_0011, // x
+ 0b_1111_0011, // y
+ 0b_1111_0011, // z
+ 0b_0000_0000, // {
+ 0b_0000_0000, // |
+ 0b_0000_0000, // }
+ 0b_1111_0010, // ~
+ 0b_0000_0000, // DEL
+];
+
+/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0
+}
+
+/// Returns `true` if the given ASCII character matches `unreserved`.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_unreserved(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_UNRESERVED) != 0
+}
+
+/// Returns true if the character is unreserved.
+#[inline]
+#[must_use]
+pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool {
+ if c.is_ascii() {
+ is_ascii_unreserved(c as u8)
+ } else {
+ S::is_nonascii_char_unreserved(c)
+ }
+}
+
+///// Returns `true` if the given ASCII character matches `gen-delims`.
+//#[inline]
+//#[must_use]
+//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool {
+// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0
+//}
+
+///// Returns `true` if the given ASCII character matches `sub-delims`.
+//#[inline]
+//#[must_use]
+//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool {
+// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0
+//}
+
+///// Returns `true` if the given ASCII character matches `reserved`.
+//#[inline]
+//#[must_use]
+//pub(crate) const fn is_ascii_reserved(c: u8) -> bool {
+// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
+//}
+
+/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_pchar(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_PCHAR) != 0
+}
+
+/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_frag_query(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_FRAG_QUERY) != 0
+}
+
+/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`.
+#[inline]
+#[must_use]
+pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool {
+ S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
+}
+
+/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`.
+#[inline]
+#[must_use]
+pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool {
+ S::is_nonascii_char_unreserved(c)
+}
+
+/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0
+}
+
+/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`.
+#[inline]
+#[must_use]
+pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool {
+ S::is_nonascii_char_unreserved(c)
+}
+
+/// Returns `true` if the given ASCII character is allowed to appear in `reg-name`
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_regname(c: u8) -> bool {
+ (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0
+}
+
+/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`.
+#[inline]
+#[must_use]
+pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool {
+ S::is_nonascii_char_unreserved(c)
+}
+
+/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool {
+ (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0
+}
+
+/// Checks if the given character matches `ucschar` rule.
+#[must_use]
+pub(crate) fn is_ucschar(c: char) -> bool {
+ matches!(
+ u32::from(c),
+ 0xA0..=0xD7FF |
+ 0xF900..=0xFDCF |
+ 0xFDF0..=0xFFEF |
+ 0x1_0000..=0x1_FFFD |
+ 0x2_0000..=0x2_FFFD |
+ 0x3_0000..=0x3_FFFD |
+ 0x4_0000..=0x4_FFFD |
+ 0x5_0000..=0x5_FFFD |
+ 0x6_0000..=0x6_FFFD |
+ 0x7_0000..=0x7_FFFD |
+ 0x8_0000..=0x8_FFFD |
+ 0x9_0000..=0x9_FFFD |
+ 0xA_0000..=0xA_FFFD |
+ 0xB_0000..=0xB_FFFD |
+ 0xC_0000..=0xC_FFFD |
+ 0xD_0000..=0xD_FFFD |
+ 0xE_1000..=0xE_FFFD
+ )
+}
+
+/// Returns true if the given value is a continue byte of UTF-8.
+#[inline(always)]
+#[must_use]
+pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool {
+ // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte,
+ // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear
+ // anywhere in UTF-8 byte sequence.
+ // `0x80 as i8` is -128, and `0xc0 as i8` is -96.
+ //
+ // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and
+ // the continue bytes is `0b10xx_xxxx`.
+ // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128.
+ (byte as i8) < -64
+}
+
+/// Returns true if the given ASCII character is `unreserved` or `reserved`.
+#[inline]
+#[must_use]
+pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool {
+ (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
+}
diff --git a/vendor/iri-string/src/parser/str.rs b/vendor/iri-string/src/parser/str.rs
new file mode 100644
index 00000000..0f564bfa
--- /dev/null
+++ b/vendor/iri-string/src/parser/str.rs
@@ -0,0 +1,390 @@
+//! Functions for common string operations.
+
+pub(crate) use self::maybe_pct_encoded::{
+ process_percent_encoded_best_effort, PctEncodedFragments,
+};
+
+mod maybe_pct_encoded;
+
+/// Returns the inner string if wrapped.
+#[must_use]
+pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
+ let (prefix, suffix) = match s.as_bytes() {
+ [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
+ _ => return None,
+ };
+ if (prefix == open) && (suffix == close) {
+ Some(&s[1..(s.len() - 1)])
+ } else {
+ None
+ }
+}
+
+/// Returns the byte that appears first.
+#[cfg(not(feature = "memchr"))]
+#[inline]
+#[must_use]
+pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
+ haystack
+ .iter()
+ .copied()
+ .find(|&b| b == needle1 || b == needle2)
+}
+
+/// Returns the byte that appears first.
+#[cfg(feature = "memchr")]
+#[inline]
+#[must_use]
+pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
+ memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
+}
+
+/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
+#[cfg(not(feature = "memchr"))]
+#[inline]
+#[must_use]
+pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
+ haystack.iter().rposition(|&b| b == needle)
+}
+
+/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
+#[cfg(feature = "memchr")]
+#[inline]
+#[must_use]
+pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
+ memchr::memrchr(needle, haystack)
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle)
+ .map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the last needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .rposition(|b| b == needle1 || b == needle2)
+ .map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the last needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
+ memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle1 || b == needle2)
+ .map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
+ memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split3(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle1 || b == needle2 || b == needle3)
+ .map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and the rest.
+///
+/// If no needles are found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split3(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+) -> Option<(&str, &str)> {
+ memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
+ .map(|pos| haystack.split_at(pos))
+}
+
+/// Finds the first needle, and returns the string before it and after it.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle)
+ .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
+}
+
+/// Finds the first needle, and returns the string before it and after it.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ memchr::memchr(needle, haystack.as_bytes())
+ .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
+}
+
+/// Finds the first needle, and returns the string before it, the needle, and the string after it.
+///
+/// If no needles are found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split2_hole(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+) -> Option<(&str, u8, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle1 || b == needle2)
+ .map(|pos| {
+ (
+ &haystack[..pos],
+ haystack.as_bytes()[pos],
+ &haystack[(pos + 1)..],
+ )
+ })
+}
+
+/// Finds the first needle, and returns the string before it, the needle, and the string after it.
+///
+/// If no needles are found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split2_hole(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+) -> Option<(&str, u8, &str)> {
+ memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
+ (
+ &haystack[..pos],
+ haystack.as_bytes()[pos],
+ &haystack[(pos + 1)..],
+ )
+ })
+}
+
+/// Finds the first needle, and returns the string before it, the needle, and the string after it.
+///
+/// If no needles are found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn find_split4_hole(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ needle4: u8,
+) -> Option<(&str, u8, &str)> {
+ haystack
+ .bytes()
+ .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
+ .map(|pos| {
+ (
+ &haystack[..pos],
+ haystack.as_bytes()[pos],
+ &haystack[(pos + 1)..],
+ )
+ })
+}
+
+/// Finds the first needle, and returns the string before it, the needle, and the string after it.
+///
+/// If no needles are found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn find_split4_hole(
+ haystack: &str,
+ needle1: u8,
+ needle2: u8,
+ needle3: u8,
+ needle4: u8,
+) -> Option<(&str, u8, &str)> {
+ let bytes = haystack.as_bytes();
+ let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
+ Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
+ None => memchr::memchr(needle4, bytes),
+ };
+ pos.map(|pos| {
+ (
+ &haystack[..pos],
+ haystack.as_bytes()[pos],
+ &haystack[(pos + 1)..],
+ )
+ })
+}
+
+/// Finds the last needle, and returns the string before it and after it.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(not(feature = "memchr"))]
+#[must_use]
+pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ haystack
+ .bytes()
+ .rposition(|b| b == needle)
+ .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
+}
+
+/// Finds the last needle, and returns the string before it and after it.
+///
+/// If `needle` is not found, returns `None`.
+#[cfg(feature = "memchr")]
+#[must_use]
+pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
+ memchr::memrchr(needle, haystack.as_bytes())
+ .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
+}
+
+/// Returns `true` if the string only contains the allowed characters.
+#[must_use]
+fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool
+where
+ F: Copy + Fn(u8) -> bool,
+ G: Copy + Fn(char) -> bool,
+{
+ while !s.is_empty() {
+ match s.bytes().position(|b| !b.is_ascii()) {
+ Some(nonascii_pos) => {
+ // Valdiate ASCII prefix.
+ if nonascii_pos != 0 {
+ let (prefix, rest) = s.split_at(nonascii_pos);
+ if !prefix.bytes().all(pred_ascii) {
+ return false;
+ }
+ s = rest;
+ }
+
+ // Extract non-ASCII part and validate it.
+ let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {
+ Some(ascii_pos) => s.split_at(ascii_pos),
+ None => (s, ""),
+ };
+ if !prefix.chars().all(pred_nonascii) {
+ return false;
+ }
+ s = rest;
+ }
+ None => {
+ // All chars are ASCII.
+ return s.bytes().all(pred_ascii);
+ }
+ }
+ }
+
+ true
+}
+
+/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
+#[must_use]
+pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(
+ mut s: &str,
+ pred_ascii: F,
+ pred_nonascii: G,
+) -> bool
+where
+ F: Copy + Fn(u8) -> bool,
+ G: Copy + Fn(char) -> bool,
+{
+ while let Some((prefix, suffix)) = find_split_hole(s, b'%') {
+ // Verify strings before the percent-encoded char.
+ if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) {
+ return false;
+ }
+
+ // Verify the percent-encoded char.
+ if !starts_with_double_hexdigits(suffix.as_bytes()) {
+ return false;
+ }
+
+ // Advance the cursor.
+ s = &suffix[2..];
+ }
+
+ // Verify the rest.
+ satisfy_chars(s, pred_ascii, pred_nonascii)
+}
+
+/// Returns `true` if the given string starts with two hexadecimal digits.
+#[must_use]
+pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
+ match s {
+ [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
+ _ => false,
+ }
+}
+
+/// Strips the first character if it is the given ASCII character, and returns the rest.
+///
+/// # Precondition
+///
+/// The given ASCII character (`prefix`) should be an ASCII character.
+#[must_use]
+pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
+ debug_assert!(prefix.is_ascii());
+ if s.as_bytes().first().copied() == Some(prefix) {
+ Some(&s[1..])
+ } else {
+ None
+ }
+}
+
+/// Splits the given string into the first character and the rest.
+///
+/// Returns `(first_char, rest_str)`.
+#[must_use]
+pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
+ let mut chars = s.chars();
+ let c = chars.next()?;
+ let rest = chars.as_str();
+ Some((c, rest))
+}
diff --git a/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs
new file mode 100644
index 00000000..617f006a
--- /dev/null
+++ b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs
@@ -0,0 +1,369 @@
+//! Processor for possibly- or invalidly-percent-encoded strings.
+
+use core::fmt::{self, Write as _};
+use core::marker::PhantomData;
+use core::num::NonZeroU8;
+use core::ops::ControlFlow;
+
+use crate::parser::str::find_split;
+use crate::parser::trusted::hexdigits_to_byte;
+
+/// Fragment in a possibly percent-encoded (and possibly broken) string.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum PctEncodedFragments<'a> {
+ /// String fragment without percent-encoded triplets.
+ NoPctStr(&'a str),
+ /// Stray `%` (percent) character.
+ StrayPercent,
+ /// Valid percent-encoded triplets for a character.
+ Char(&'a str, char),
+ /// Percent-encoded triplets that does not consists of a valid UTF-8 sequence.
+ InvalidUtf8PctTriplets(&'a str),
+}
+
+/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets.
+pub(crate) fn process_percent_encoded_best_effort<T, F, B>(
+ v: T,
+ mut f: F,
+) -> Result<ControlFlow<B>, fmt::Error>
+where
+ T: fmt::Display,
+ F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
+{
+ let mut buf = [0_u8; 12];
+ let mut writer = DecomposeWriter {
+ f: &mut f,
+ decoder: Default::default(),
+ buf: &mut buf,
+ result: ControlFlow::Continue(()),
+ _r: PhantomData,
+ };
+
+ if write!(writer, "{v}").is_err() {
+ match writer.result {
+ ControlFlow::Continue(_) => return Err(fmt::Error),
+ ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)),
+ }
+ }
+
+ // Flush the internal buffer of the decoder.
+ if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) {
+ let len_suffix = len % 3;
+ let triplets_end = len - len_suffix;
+ let triplets = core::str::from_utf8(&buf[..triplets_end])
+ .expect("[validity] percent-encoded triplets consist of ASCII characters");
+ if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) {
+ return Ok(ControlFlow::Break(v));
+ }
+
+ if len_suffix > 0 {
+ if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) {
+ return Ok(ControlFlow::Break(v));
+ }
+ }
+ if len_suffix > 1 {
+ let after_percent = core::str::from_utf8(
+ &buf[(triplets_end + 1)..(triplets_end + len_suffix)],
+ )
+ .expect("[consistency] percent-encoded triplets contains only ASCII characters");
+ if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) {
+ return Ok(ControlFlow::Break(v));
+ }
+ }
+ }
+
+ Ok(ControlFlow::Continue(()))
+}
+
+/// Writer to decompose the input into fragments.
+struct DecomposeWriter<'a, F, B> {
+ /// Output function.
+ f: &'a mut F,
+ /// Decoder.
+ decoder: DecoderBuffer,
+ /// Buffer.
+ buf: &'a mut [u8],
+ /// Result of the last output function call.
+ result: ControlFlow<B>,
+ /// Dummy field for the type parameter of the return type of the function `f`.
+ _r: PhantomData<fn() -> B>,
+}
+impl<F, B> DecomposeWriter<'_, F, B>
+where
+ F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
+{
+ /// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise.
+ #[inline(always)]
+ fn result_continue_or_err(&self) -> fmt::Result {
+ if self.result.is_break() {
+ return Err(fmt::Error);
+ }
+ Ok(())
+ }
+
+ /// Calls the output functions with the undecodable fragments.
+ fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result {
+ let len_written = usize::from(len_undecodable);
+ let frag = core::str::from_utf8(&self.buf[..len_written])
+ .expect("[validity] `DecoderBuffer` writes a valid ASCII string");
+ let len_incomplete = len_written % 3;
+ let len_complete = len_written - len_incomplete;
+ self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets(
+ &frag[..len_complete],
+ ));
+ self.result_continue_or_err()?;
+ if len_incomplete > 0 {
+ // At least the first `%` exists.
+ self.result = (self.f)(PctEncodedFragments::StrayPercent);
+ if self.result.is_break() {
+ return Err(fmt::Error);
+ }
+ if len_incomplete > 1 {
+ // A following hexdigit is available.
+ debug_assert_eq!(
+ len_incomplete, 2,
+ "[consistency] the length of incomplete percent-encoded \
+ triplet must be less than 2 bytes"
+ );
+ self.result = (self.f)(PctEncodedFragments::NoPctStr(
+ &frag[(len_complete + 1)..len_written],
+ ));
+ self.result_continue_or_err()?;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<F, B> fmt::Write for DecomposeWriter<'_, F, B>
+where
+ F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
+{
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ self.result_continue_or_err()?;
+ let mut rest = s;
+ while !rest.is_empty() {
+ let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest);
+ if len_consumed == 0 {
+ // `rest` does not start with the percent-encoded triplets.
+ // Flush the decoder before attempting to decode more data.
+ if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) {
+ self.output_as_undecodable(len_written)?;
+ rest = &rest[usize::from(len_written)..];
+ }
+
+ // Write plain string prefix (if found).
+ let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, ""));
+ debug_assert!(
+ !plain_prefix.is_empty(),
+ "[consistency] `len_consumed == 0` indicates non-empty \
+ `rest` not starting with `%`"
+ );
+ self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix));
+ self.result_continue_or_err()?;
+ rest = suffix;
+ continue;
+ }
+
+ // Process decoding result.
+ match result {
+ PushResult::Decoded(len_written, c) => {
+ let len_written = usize::from(len_written.get());
+ let frag = core::str::from_utf8(&self.buf[..len_written])
+ .expect("[validity] `DecoderBuffer` writes a valid ASCII string");
+ self.result = (self.f)(PctEncodedFragments::Char(frag, c));
+ self.result_continue_or_err()?;
+ }
+ PushResult::Undecodable(len_written) => {
+ self.output_as_undecodable(len_written)?;
+ }
+ PushResult::NeedMoreBytes => {
+ // Nothing to write at this time.
+ }
+ }
+ rest = &rest[len_consumed..];
+ }
+ Ok(())
+ }
+}
+
+/// A type for result of feeding data to [`DecoderBuffer`].
+#[derive(Debug, Clone, Copy)]
+enum PushResult {
+ /// Input is still incomplete, needs more bytes to get the decoding result.
+ NeedMoreBytes,
+ /// Bytes decodable to valid UTF-8 sequence.
+ // `.0`: Length of decodable fragment.
+ // `.1`: Decoded character.
+ Decoded(NonZeroU8, char),
+ /// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence.
+ // `.0`: Length of undecodable fragment.
+ Undecodable(u8),
+}
+
+/// Buffer to contain (and to decode) incomplete percent-encoded triplets.
+#[derive(Default, Debug, Clone, Copy)]
+struct DecoderBuffer {
+ /// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded.
+ //
+ // `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and
+ // 4 triplets at most for single Unicode codepoint in UTF-8.
+ encoded: [u8; 12],
+ /// Decoded bytes.
+ decoded: [u8; 4],
+ /// Number of bytes available in `buf_encoded` buffer.
+ ///
+ /// `buf_encoded_len / 3` also indicates the length of data in `decoded`.
+ len_encoded: u8,
+}
+
+impl DecoderBuffer {
+ /// Writes the data of the given length to the destination, and remove that part from buffer.
+ fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) {
+ let new_len = self.len_encoded - remove_len;
+ let remove_len = usize::from(remove_len);
+ let src_range = remove_len..usize::from(self.len_encoded);
+ dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]);
+
+ if new_len == 0 {
+ *self = Self::default();
+ return;
+ }
+ self.encoded.copy_within(src_range, 0);
+ self.decoded
+ .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0);
+ self.len_encoded = new_len;
+ }
+
+ /// Pushes a byte of a (possible) percent-encoded tripet to the buffer.
+ fn push_single_encoded_byte(&mut self, byte: u8) {
+ debug_assert!(
+ self.len_encoded < 12,
+ "[consistency] four percent-encoded triplets are enough for a unicode code point"
+ );
+ let pos_enc = usize::from(self.len_encoded);
+ self.len_encoded += 1;
+ self.encoded[pos_enc] = byte;
+ if self.len_encoded % 3 == 0 {
+ // A new percent-encoded triplet is read. Decode and remember.
+ let pos_dec = usize::from(self.len_encoded / 3 - 1);
+ let upper = self.encoded[pos_enc - 1];
+ let lower = byte;
+ debug_assert!(
+ upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(),
+ "[consistency] the `encoded` buffer should contain valid percent-encoded triplets"
+ );
+ self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]);
+ }
+ }
+
+ /// Pushes the (possibly) encoded string to the buffer.
+ ///
+ /// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the
+ /// caller should call `Self::clear()` before pushing more bytes.
+ ///
+ /// # Preconditions
+ ///
+ /// * `buf` should be more than 12 bytes. If not, this method may panic.
+ #[must_use]
+ pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) {
+ debug_assert!(
+ buf.len() >= 12,
+ "[internal precondition] destination buffer should be at least 12 bytes"
+ );
+ let mut chars = s.chars();
+ let mut len_triplet_incomplete = self.len_encoded % 3;
+ for c in &mut chars {
+ if len_triplet_incomplete == 0 {
+ // Expect `%`.
+ if c != '%' {
+ // Undecodable.
+ // `-1`: the last byte is peeked but not consumed.
+ let len_consumed = s.len() - chars.as_str().len() - 1;
+ let len_result = self.len_encoded;
+ self.write_and_pop(buf, len_result);
+ return (len_consumed, PushResult::Undecodable(len_result));
+ }
+ self.push_single_encoded_byte(b'%');
+ len_triplet_incomplete = 1;
+ continue;
+ }
+
+ // Expect a nibble.
+ if !c.is_ascii_hexdigit() {
+ // Undecodable.
+ // `-1`: the last byte is peeked but not consumed.
+ let len_consumed = s.len() - chars.as_str().len() - 1;
+ let len_result = self.len_encoded;
+ self.write_and_pop(buf, len_result);
+ return (len_consumed, PushResult::Undecodable(len_result));
+ }
+ self.push_single_encoded_byte(c as u8);
+ if len_triplet_incomplete == 1 {
+ len_triplet_incomplete = 2;
+ continue;
+ } else {
+ // Now a new percent-encoded triplet is read!
+ debug_assert_eq!(len_triplet_incomplete, 2);
+ len_triplet_incomplete = 0;
+ }
+
+ // Now a new percent-encoded triplet is read.
+ // Check if the buffer contains a valid decodable content.
+ let len_decoded = usize::from(self.len_encoded) / 3;
+ match core::str::from_utf8(&self.decoded[..len_decoded]) {
+ Ok(decoded_str) => {
+ // Successfully decoded.
+ let len_consumed = s.len() - chars.as_str().len();
+ let c = decoded_str
+ .chars()
+ .next()
+ .expect("[validity] `decoded` buffer is nonempty");
+ let len_result = NonZeroU8::new(self.len_encoded).expect(
+ "[consistency] `encoded` buffer is nonempty since \
+ `push_single_encoded_byte()` was called",
+ );
+ self.write_and_pop(buf, len_result.get());
+ return (len_consumed, PushResult::Decoded(len_result, c));
+ }
+ Err(e) => {
+ // Undecodable.
+ assert_eq!(
+ e.valid_up_to(),
+ 0,
+ "[consistency] `decoded` buffer contains at most one character"
+ );
+ let skip_len_decoded = match e.error_len() {
+ // Unexpected EOF. Wait for remaining input.
+ None => continue,
+ // Skip invalid bytes.
+ Some(v) => v,
+ };
+ let len_consumed = s.len() - chars.as_str().len();
+ let len_result = skip_len_decoded as u8 * 3;
+ assert_ne!(
+ skip_len_decoded, 0,
+ "[consistency] empty bytes cannot be invalid"
+ );
+ self.write_and_pop(buf, len_result);
+ return (len_consumed, PushResult::Undecodable(len_result));
+ }
+ };
+ }
+ let len_consumed = s.len() - chars.as_str().len();
+ (len_consumed, PushResult::NeedMoreBytes)
+ }
+
+ /// Writes the incomplete data completely to the destination, and clears the internal buffer.
+ #[must_use]
+ pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> {
+ let len_result = NonZeroU8::new(self.len_encoded)?;
+ // Emit the current (undecodable) buffer as is.
+ self.write_and_pop(buf, len_result.get());
+ debug_assert_eq!(
+ self.len_encoded, 0,
+ "[consistency] the buffer should be cleared after flushed"
+ );
+ Some(len_result)
+ }
+}
diff --git a/vendor/iri-string/src/parser/trusted.rs b/vendor/iri-string/src/parser/trusted.rs
new file mode 100644
index 00000000..f15c075e
--- /dev/null
+++ b/vendor/iri-string/src/parser/trusted.rs
@@ -0,0 +1,476 @@
+//! Fast parsers for trusted (already validated) input.
+//!
+//! Using this in wrong way will lead to unexpected wrong result.
+
+pub(crate) mod authority;
+
+use core::cmp::Ordering;
+use core::num::NonZeroUsize;
+
+use crate::components::{RiReferenceComponents, Splitter};
+use crate::format::eq_str_display;
+use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode};
+use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole};
+use crate::spec::Spec;
+use crate::types::RiReferenceStr;
+
+/// Eats a `scheme` and a following colon, and returns the rest and the scheme.
+///
+/// Returns `(rest, scheme)`.
+///
+/// This should be called at the head of an absolute IRIs/URIs.
+#[must_use]
+fn scheme_colon(i: &str) -> (&str, &str) {
+ let (scheme, rest) =
+ find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part");
+ (rest, scheme)
+}
+
+/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme.
+///
+/// This should be called at the head of an `IRI-reference` or similar.
+#[must_use]
+fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) {
+ match find_split4_hole(i, b':', b'/', b'?', b'#') {
+ Some((scheme, b':', rest)) => (rest, Some(scheme)),
+ _ => (i, None),
+ }
+}
+
+/// Eats double slash and the following authority if available, and returns the authority.
+///
+/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`.
+#[must_use]
+fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) {
+ let s = match i.strip_prefix("//") {
+ Some(rest) => rest,
+ None => return (i, None),
+ };
+ // `i` might match `path-abempty` (which can start with `//`), but it is not
+ // allowed as `relative-part`, so no need to care `path-abempty` rule here.
+ // A slash, question mark, and hash character won't appear in `authority`.
+ match find_split3(s, b'/', b'?', b'#') {
+ Some((authority, rest)) => (rest, Some(authority)),
+ None => ("", Some(s)),
+ }
+}
+
+/// Eats a string until the query, and returns that part (excluding `?` for the query).
+#[must_use]
+fn until_query(i: &str) -> (&str, &str) {
+ // `?` won't appear before the query part.
+ match find_split2(i, b'?', b'#') {
+ Some((before_query, rest)) => (rest, before_query),
+ None => ("", i),
+ }
+}
+
+/// Decomposes query and fragment, if available.
+///
+/// The string must starts with `?`, or `#`, or be empty.
+#[must_use]
+fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) {
+ match i.as_bytes().first().copied() {
+ None => (None, None),
+ Some(b'?') => {
+ let rest = &i[1..];
+ match find_split_hole(rest, b'#') {
+ Some((query, fragment)) => (Some(query), Some(fragment)),
+ None => (Some(rest), None),
+ }
+ }
+ Some(c) => {
+ debug_assert_eq!(c, b'#');
+ (None, Some(&i[1..]))
+ }
+ }
+}
+
+/// Decomposes the given valid `IRI-reference`.
+#[must_use]
+pub(crate) fn decompose_iri_reference<S: Spec>(
+ i: &RiReferenceStr<S>,
+) -> RiReferenceComponents<'_, S> {
+ /// Inner function to avoid unnecessary monomorphizations on `S`.
+ fn decompose(i: &str) -> Splitter {
+ let len = i.len();
+
+ let (i, scheme_end) = {
+ let (i, scheme) = scheme_colon_opt(i);
+ let end = scheme.and_then(|s| NonZeroUsize::new(s.len()));
+ (i, end)
+ };
+ let (i, authority_end) = {
+ // 2: "//".len()
+ let start = len - i.len() + 2;
+ // `authority` does not contain the two slashes of `://'.
+ let (i, authority) = slash_slash_authority_opt(i);
+ let end = authority.and_then(|s| NonZeroUsize::new(start + s.len()));
+ (i, end)
+ };
+ let (i, _path) = until_query(i);
+
+ let (query_start, fragment_start) = {
+ // This could theoretically be zero if `len` is `usize::MAX` and
+ // `i` has neither a query nor a fragment. However, this is
+ // practically impossible.
+ let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1));
+
+ let (query, fragment) = decompose_query_and_fragment(i);
+ match (query.is_some(), fragment) {
+ (true, Some(fragment)) => {
+ (after_first_prefix, NonZeroUsize::new(len - fragment.len()))
+ }
+ (true, None) => (after_first_prefix, None),
+ (false, Some(_fragment)) => (None, after_first_prefix),
+ (false, None) => (None, None),
+ }
+ };
+
+ Splitter::new(scheme_end, authority_end, query_start, fragment_start)
+ }
+
+ RiReferenceComponents {
+ iri: i,
+ splitter: decompose(i.as_str()),
+ }
+}
+
+/// Extracts `scheme` part from an IRI reference.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn extract_scheme(i: &str) -> Option<&str> {
+ scheme_colon_opt(i).1
+}
+
+/// Extracts `scheme` part from an absolute IRI.
+///
+/// # Precondition
+///
+/// The given string must be a valid absolute IRI.
+#[inline]
+#[must_use]
+pub(crate) fn extract_scheme_absolute(i: &str) -> &str {
+ scheme_colon(i).1
+}
+
+/// Extracts `authority` part from an IRI reference.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn extract_authority(i: &str) -> Option<&str> {
+ let (i, _scheme) = scheme_colon_opt(i);
+ slash_slash_authority_opt(i).1
+}
+
+/// Extracts `authority` part from an absolute IRI.
+///
+/// # Precondition
+///
+/// The given string must be a valid absolute IRI.
+#[inline]
+#[must_use]
+pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> {
+ let (i, _scheme) = scheme_colon(i);
+ slash_slash_authority_opt(i).1
+}
+
+/// Extracts `authority` part from a relative IRI.
+///
+/// # Precondition
+///
+/// The given string must be a valid relative IRI.
+#[inline]
+#[must_use]
+pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> {
+ slash_slash_authority_opt(i).1
+}
+
+/// Extracts `path` part from an IRI reference.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn extract_path(i: &str) -> &str {
+ let (i, _scheme) = scheme_colon_opt(i);
+ let (i, _authority) = slash_slash_authority_opt(i);
+ until_query(i).1
+}
+
+/// Extracts `path` part from an absolute IRI.
+///
+/// # Precondition
+///
+/// The given string must be a valid absolute IRI.
+#[inline]
+#[must_use]
+pub(crate) fn extract_path_absolute(i: &str) -> &str {
+ let (i, _scheme) = scheme_colon(i);
+ let (i, _authority) = slash_slash_authority_opt(i);
+ until_query(i).1
+}
+
+/// Extracts `path` part from a relative IRI.
+///
+/// # Precondition
+///
+/// The given string must be a valid relative IRI.
+#[inline]
+#[must_use]
+pub(crate) fn extract_path_relative(i: &str) -> &str {
+ let (i, _authority) = slash_slash_authority_opt(i);
+ until_query(i).1
+}
+
+/// Extracts `query` part from an IRI reference.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn extract_query(i: &str) -> Option<&str> {
+ let (i, _before_query) = until_query(i);
+ decompose_query_and_fragment(i).0
+}
+
+/// Extracts `query` part from an `absolute-IRI` string.
+///
+/// # Precondition
+///
+/// The given string must be a valid `absolute-IRI` string.
+#[must_use]
+pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> {
+ let (i, _before_query) = until_query(i);
+ if i.is_empty() {
+ None
+ } else {
+ debug_assert_eq!(
+ i.as_bytes().first(),
+ Some(&b'?'),
+ "`absolute-IRI` string must not have `fragment part"
+ );
+ Some(&i[1..])
+ }
+}
+
+/// Splits an IRI string into the prefix and the fragment part.
+///
+/// A leading `#` character is truncated if the fragment part exists.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) {
+ // It is completely OK to find the first `#` character from valid IRI to get fragment part,
+ // because the spec says that there are no `#` characters before the fragment part.
+ //
+ // > ```
+ // > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ // > ```
+ // >
+ // > --- [RFC 3986, section 3.1. Scheme](https://tools.ietf.org/html/rfc3986#section-3.1)
+ //
+ // > The authority component is preceded by a double slash ("//") and is terminated by the
+ // > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end
+ // > of the URI.
+ // >
+ // > --- [RFC 3986, section 3.2. Authority](https://tools.ietf.org/html/rfc3986#section-3.2)
+ //
+ // > The path is terminated by the first question mark ("?") or number sign ("#")
+ // > character, or by the end of the URI.
+ // >
+ // > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3)
+ //
+ // > The query component is indicated by the first question mark ("?") character and
+ // > terminated by a number sign ("#") character or by the end of the URI.
+ // >
+ // > --- [RFC 3986, section 3.4. Query](https://tools.ietf.org/html/rfc3986#section-3.4)
+ match find_split_hole(iri, b'#') {
+ Some((prefix, fragment)) => (prefix, Some(fragment)),
+ None => (iri, None),
+ }
+}
+
+/// Returns the fragment part of the given IRI.
+///
+/// A leading `#` character of the fragment is truncated.
+#[inline]
+#[must_use]
+pub(crate) fn extract_fragment(iri: &str) -> Option<&str> {
+ split_fragment(iri).1
+}
+
+/// Returns `Ok(_)` if the string is normalized.
+///
+/// If this function returns `true`, normalization input and output will be identical.
+///
+/// In this function, "normalized" means that any of the normalization below
+/// won't change the input on normalization:
+///
+/// * syntax-based normalization,
+/// * case normalization,
+/// * percent-encoding normalization, and
+/// * path segment normalizaiton.
+///
+/// Note that scheme-based normalization is not considered.
+#[must_use]
+pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool {
+ let (i, scheme) = scheme_colon(i);
+ let (after_authority, authority) = slash_slash_authority_opt(i);
+ let (_after_path, path) = until_query(after_authority);
+
+ // Syntax-based normalization: uppercase chars in `scheme` should be
+ // converted to lowercase.
+ if scheme.bytes().any(|b| b.is_ascii_uppercase()) {
+ return false;
+ }
+
+ // Case normalization: ASCII alphabets in US-ASCII only `host` should be
+ // normalized to lowercase.
+ // Case normalization: ASCII alphabets in percent-encoding triplet should be
+ // normalized to uppercase.
+ // Percent-encoding normalization: unresreved characters should be decoded
+ // in `userinfo`, `host`, `path`, `query`, and `fragments`.
+ // Path segment normalization: the path should not have dot segments (`.`
+ // and/or `..`).
+ //
+ // Note that `authority` can have percent-encoded `userinfo`.
+ if let Some(authority) = authority {
+ let authority_components = authority::decompose_authority(authority);
+
+ // Check `host`.
+ let host = authority_components.host();
+ let host_is_normalized = if is_ascii_only_host(host) {
+ eq_str_display(host, &NormalizedAsciiOnlyHost::new(host))
+ } else {
+ // If the host is not ASCII-only, conversion to lowercase is not performed.
+ is_pct_case_normalized::<S>(host)
+ };
+ if !host_is_normalized {
+ return false;
+ }
+
+ // Check pencent encodings in `userinfo`.
+ if let Some(userinfo) = authority_components.userinfo() {
+ if !is_pct_case_normalized::<S>(userinfo) {
+ return false;
+ }
+ }
+ }
+
+ // Check `path`.
+ //
+ // Syntax-based normalization: Dot segments might be removed.
+ // Note that we don't have to care `%2e` and `%2E` since `.` is unreserved
+ // and they will be decoded if not normalized.
+ // Also note that WHATWG serialization will use `/.//` as a path prefix if
+ // the path is absolute and won't modify the path if the path is relative.
+ //
+ // Percent-encoding normalization: unresreved characters should be decoded
+ // in `path`, `query`, and `fragments`.
+ let path_span_no_dot_segments = if authority.is_some() {
+ Some(path)
+ } else {
+ match mode {
+ NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)),
+ NormalizednessCheckMode::Rfc3986 => Some(path),
+ NormalizednessCheckMode::PreserveAuthoritylessRelativePath => {
+ if path.starts_with('/') {
+ // Absolute.
+ Some(path.strip_prefix("/.//").unwrap_or(path))
+ } else {
+ // Relative. Treat the path as "opaque". No span to check.
+ None
+ }
+ }
+ }
+ };
+ if let Some(path_span_no_dot_segments) = path_span_no_dot_segments {
+ if path_span_no_dot_segments
+ .split('/')
+ .any(|segment| matches!(segment, "." | ".."))
+ {
+ return false;
+ }
+ }
+ is_pct_case_normalized::<S>(after_authority)
+}
+
+/// Decodes two hexdigits into a byte.
+///
+/// # Preconditions
+///
+/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit.
+#[must_use]
+pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 {
+ let i_upper = match (upper & 0xf0).cmp(&0x40) {
+ Ordering::Less => upper - b'0',
+ Ordering::Equal => upper - (b'A' - 10),
+ Ordering::Greater => upper - (b'a' - 10),
+ };
+ let i_lower = match (lower & 0xf0).cmp(&0x40) {
+ Ordering::Less => lower - b'0',
+ Ordering::Equal => lower - (b'A' - 10),
+ Ordering::Greater => lower - (b'a' - 10),
+ };
+ (i_upper << 4) + i_lower
+}
+
+/// Converts the first two hexdigit bytes in the buffer into a byte.
+///
+/// # Panics
+///
+/// Panics if the string does not start with two hexdigits.
+#[must_use]
+pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) {
+ let mut bytes = s.bytes();
+ let upper_xdigit = bytes
+ .next()
+ .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
+ let lower_xdigit = bytes
+ .next()
+ .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
+ let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]);
+ (v, &s[2..])
+}
+
+/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters.
+///
+/// # Precondition
+///
+/// The given string should be valid `host` or `host ":" port` string.
+#[must_use]
+pub(crate) fn is_ascii_only_host(mut host: &str) -> bool {
+ while let Some((i, c)) = host
+ .char_indices()
+ .find(|(_i, c)| !c.is_ascii() || *c == '%')
+ {
+ if c != '%' {
+ // Non-ASCII character found.
+ debug_assert!(!c.is_ascii());
+ return false;
+ }
+ // Percent-encoded character found.
+ let after_pct = &host[(i + 1)..];
+ let (byte, rest) = take_xdigits2(after_pct);
+ if !byte.is_ascii() {
+ return false;
+ }
+ host = rest;
+ }
+
+ // Neither non-ASCII characters nor percent-encoded characters found.
+ true
+}
diff --git a/vendor/iri-string/src/parser/trusted/authority.rs b/vendor/iri-string/src/parser/trusted/authority.rs
new file mode 100644
index 00000000..83e41298
--- /dev/null
+++ b/vendor/iri-string/src/parser/trusted/authority.rs
@@ -0,0 +1,32 @@
+//! Parsers for trusted `authority` string.
+
+use crate::components::AuthorityComponents;
+use crate::parser::str::{find_split_hole, rfind_split2};
+
+/// Decomposes the authority into `(userinfo, host, port)`.
+///
+/// The leading `:` is truncated.
+///
+/// # Precondition
+///
+/// The given string must be a valid IRI reference.
+#[inline]
+#[must_use]
+pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> {
+ let i = authority;
+ let (i, host_start) = match find_split_hole(i, b'@') {
+ Some((userinfo, rest)) => (rest, userinfo.len() + 1),
+ None => (authority, 0),
+ };
+ let colon_port_len = match rfind_split2(i, b':', b']') {
+ Some((_, suffix)) if suffix.starts_with(':') => suffix.len(),
+ _ => 0,
+ };
+ let host_end = authority.len() - colon_port_len;
+
+ AuthorityComponents {
+ authority,
+ host_start,
+ host_end,
+ }
+}
diff --git a/vendor/iri-string/src/parser/validate.rs b/vendor/iri-string/src/parser/validate.rs
new file mode 100644
index 00000000..59625394
--- /dev/null
+++ b/vendor/iri-string/src/parser/validate.rs
@@ -0,0 +1,225 @@
+//! Validating parsers for non-trusted (possibly invalid) input.
+
+mod authority;
+mod path;
+
+use crate::parser::char;
+use crate::parser::str::{
+ find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded,
+};
+use crate::spec::Spec;
+use crate::validate::Error;
+
+use self::authority::validate_authority;
+pub(crate) use self::authority::{validate_host, validate_userinfo};
+pub(crate) use self::path::validate_path;
+use self::path::{
+ validate_path_abempty, validate_path_absolute_authority_absent,
+ validate_path_relative_authority_absent,
+};
+
+/// Returns `Ok(_)` if the string matches `scheme`.
+pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> {
+ debug_assert!(!i.is_empty());
+ let bytes = i.as_bytes();
+ if bytes[0].is_ascii_alphabetic()
+ && bytes[1..]
+ .iter()
+ .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b))
+ {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `query` or `iquery`.
+pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> {
+ let is_valid =
+ satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>);
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence.
+fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
+ let (maybe_authority, maybe_path) = match find_split(i, b'/') {
+ Some(v) => v,
+ None => (i, ""),
+ };
+ validate_authority::<S>(maybe_authority)?;
+ validate_path_abempty::<S>(maybe_path)
+}
+
+/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules.
+#[inline]
+pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> {
+ validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute)
+}
+
+/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
+#[inline]
+pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> {
+ validate_uri_reference_common::<S>(i, UriReferenceRule::Any)
+}
+
+/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules.
+#[inline]
+pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> {
+ validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment)
+}
+
+/// Syntax rule for URI/IRI references.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+enum UriReferenceRule {
+ /// `URI` and `IRI`.
+ ///
+ /// This can have a fragment.
+ Absolute,
+ /// `absolute-URI` and `absolute-IRI`.
+ ///
+ /// This cannot have a fragment.
+ AbsoluteWithoutFragment,
+ /// `URI-reference` and `IRI-reference`.
+ ///
+ /// This can be relative.
+ Any,
+}
+
+impl UriReferenceRule {
+ /// Returns `true` is the relative reference is allowed.
+ #[inline]
+ #[must_use]
+ fn is_relative_allowed(self) -> bool {
+ self == Self::Any
+ }
+
+ /// Returns `true` is the fragment part is allowed.
+ #[inline]
+ #[must_use]
+ fn is_fragment_allowed(self) -> bool {
+ matches!(self, Self::Absolute | Self::Any)
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
+fn validate_uri_reference_common<S: Spec>(
+ i: &str,
+ ref_rule: UriReferenceRule,
+) -> Result<(), Error> {
+ // Validate `scheme ":"`.
+ let (i, _scheme) = match find_split_hole(i, b':') {
+ None => {
+ if ref_rule.is_relative_allowed() {
+ return validate_relative_ref::<S>(i);
+ } else {
+ return Err(Error::new());
+ }
+ }
+ Some(("", _)) => return Err(Error::new()),
+ Some((maybe_scheme, rest)) => {
+ if validate_scheme(maybe_scheme).is_err() {
+ // The string before the first colon is not a scheme.
+ // Falling back to `relative-ref` parsing.
+ if ref_rule.is_relative_allowed() {
+ return validate_relative_ref::<S>(i);
+ } else {
+ return Err(Error::new());
+ }
+ }
+ (rest, maybe_scheme)
+ }
+ };
+
+ // Validate `hier-part`.
+ let after_path = match i.strip_prefix("//") {
+ Some(i) => {
+ let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
+ Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
+ None => (i, None),
+ };
+ validate_authority_path_abempty::<S>(maybe_authority_path)?;
+ after_path
+ }
+ None => {
+ let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
+ Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
+ None => (i, None),
+ };
+ // Authority is absent.
+ validate_path_absolute_authority_absent::<S>(maybe_path)?;
+ after_path
+ }
+ };
+
+ // Validate `[ "?" query ] [ "#" fragment ]`.
+ if let Some((first, rest)) = after_path {
+ validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?;
+ }
+ Ok(())
+}
+
+/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules.
+pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> {
+ // Validate `relative-part`.
+ let after_path = match i.strip_prefix("//") {
+ Some(i) => {
+ let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
+ Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
+ None => (i, None),
+ };
+ validate_authority_path_abempty::<S>(maybe_authority_path)?;
+ after_path
+ }
+ None => {
+ let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
+ Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
+ None => (i, None),
+ };
+ // Authority is absent.
+ validate_path_relative_authority_absent::<S>(maybe_path)?;
+ after_path
+ }
+ };
+
+ // Validate `[ "?" query ] [ "#" fragment ]`.
+ if let Some((first, rest)) = after_path {
+ validate_after_path::<S>(first, rest, true)?;
+ }
+ Ok(())
+}
+
+/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version).
+fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> {
+ let (maybe_query, maybe_fragment) = if first == b'?' {
+ match find_split_hole(rest, b'#') {
+ Some(v) => v,
+ None => (rest, ""),
+ }
+ } else {
+ debug_assert_eq!(first, b'#');
+ ("", rest)
+ };
+ validate_query::<S>(maybe_query)?;
+ if !accept_fragment && !maybe_fragment.is_empty() {
+ return Err(Error::new());
+ }
+ validate_fragment::<S>(maybe_fragment)
+}
+
+/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules.
+pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> {
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_frag_query,
+ char::is_nonascii_fragment::<S>,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
diff --git a/vendor/iri-string/src/parser/validate/authority.rs b/vendor/iri-string/src/parser/validate/authority.rs
new file mode 100644
index 00000000..fb41085e
--- /dev/null
+++ b/vendor/iri-string/src/parser/validate/authority.rs
@@ -0,0 +1,296 @@
+//! Parsers for authority.
+
+use core::mem;
+
+use crate::parser::char;
+use crate::parser::str::{
+ find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded,
+ strip_ascii_char_prefix,
+};
+use crate::spec::Spec;
+use crate::validate::Error;
+
+/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`.
+pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> {
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_userinfo_ipvfutureaddr,
+ char::is_nonascii_userinfo::<S>,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `true` if the string matches `dec-octet`.
+///
+/// In other words, this tests whether the string is decimal "0" to "255".
+#[must_use]
+fn is_dec_octet(i: &str) -> bool {
+ matches!(
+ i.as_bytes(),
+ [b'0'..=b'9']
+ | [b'1'..=b'9', b'0'..=b'9']
+ | [b'1', b'0'..=b'9', b'0'..=b'9']
+ | [b'2', b'0'..=b'4', b'0'..=b'9']
+ | [b'2', b'5', b'0'..=b'5']
+ )
+}
+
+/// Returns `Ok(_)` if the string matches `IPv4address`.
+fn validate_ipv4address(i: &str) -> Result<(), Error> {
+ let (first, rest) = find_split_hole(i, b'.').ok_or_else(Error::new)?;
+ if !is_dec_octet(first) {
+ return Err(Error::new());
+ }
+ let (second, rest) = find_split_hole(rest, b'.').ok_or_else(Error::new)?;
+ if !is_dec_octet(second) {
+ return Err(Error::new());
+ }
+ let (third, fourth) = find_split_hole(rest, b'.').ok_or_else(Error::new)?;
+ if is_dec_octet(third) && is_dec_octet(fourth) {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// A part of IPv6 addr.
+#[derive(Clone, Copy)]
+enum V6AddrPart {
+ /// `[0-9a-fA-F]{1,4}::`.
+ H16Omit,
+ /// `[0-9a-fA-F]{1,4}:`.
+ H16Cont,
+ /// `[0-9a-fA-F]{1,4}`.
+ H16End,
+ /// IPv4 address.
+ V4,
+ /// `::`.
+ Omit,
+}
+
+/// Splits the IPv6 address string into the next component and the rest substring.
+fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> {
+ debug_assert!(!i.is_empty());
+ match find_split_hole(i, b':') {
+ Some((prefix, rest)) => {
+ if prefix.len() >= 5 {
+ return Err(Error::new());
+ }
+
+ if prefix.is_empty() {
+ return match strip_ascii_char_prefix(rest, b':') {
+ Some(rest) => Ok((rest, V6AddrPart::Omit)),
+ None => Err(Error::new()),
+ };
+ }
+
+ // Should be `h16`.
+ debug_assert!((1..=4).contains(&prefix.len()));
+ if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) {
+ return Err(Error::new());
+ }
+ match strip_ascii_char_prefix(rest, b':') {
+ Some(rest) => Ok((rest, V6AddrPart::H16Omit)),
+ None => Ok((rest, V6AddrPart::H16Cont)),
+ }
+ }
+ None => {
+ if i.len() >= 5 {
+ // Possibly `IPv4address`.
+ validate_ipv4address(i)?;
+ return Ok(("", V6AddrPart::V4));
+ }
+ if i.bytes().all(|b| b.is_ascii_hexdigit()) {
+ Ok(("", V6AddrPart::H16End))
+ } else {
+ Err(Error::new())
+ }
+ }
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `IPv6address`.
+fn validate_ipv6address(mut i: &str) -> Result<(), Error> {
+ let mut h16_count = 0;
+ let mut is_omitted = false;
+ while !i.is_empty() {
+ let (rest, part) = split_v6_addr_part(i)?;
+ match part {
+ V6AddrPart::H16Omit => {
+ h16_count += 1;
+ if mem::replace(&mut is_omitted, true) {
+ // Omitted twice.
+ return Err(Error::new());
+ }
+ }
+ V6AddrPart::H16Cont => {
+ h16_count += 1;
+ if rest.is_empty() {
+ // `H16Cont` cannot be the last part of an IPv6 address.
+ return Err(Error::new());
+ }
+ }
+ V6AddrPart::H16End => {
+ h16_count += 1;
+ break;
+ }
+ V6AddrPart::V4 => {
+ debug_assert!(rest.is_empty());
+ h16_count += 2;
+ break;
+ }
+ V6AddrPart::Omit => {
+ if mem::replace(&mut is_omitted, true) {
+ // Omitted twice.
+ return Err(Error::new());
+ }
+ }
+ }
+ if h16_count > 8 {
+ return Err(Error::new());
+ }
+ i = rest;
+ }
+ let is_valid = if is_omitted {
+ h16_count < 8
+ } else {
+ h16_count == 8
+ };
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `authority` or `iauthority`.
+pub(super) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> {
+ // Strip and validate `userinfo`.
+ let (i, _userinfo) = match find_split_hole(i, b'@') {
+ Some((maybe_userinfo, i)) => {
+ validate_userinfo::<S>(maybe_userinfo)?;
+ (i, Some(maybe_userinfo))
+ }
+ None => (i, None),
+ };
+ // `host` can contain colons, but `port` cannot.
+ // Strip and validate `port`.
+ let (maybe_host, _port) = match rfind_split_hole(i, b':') {
+ Some((maybe_host, maybe_port)) => {
+ if maybe_port.bytes().all(|b| b.is_ascii_digit()) {
+ (maybe_host, Some(maybe_port))
+ } else {
+ (i, None)
+ }
+ }
+ None => (i, None),
+ };
+ // Validate `host`.
+ validate_host::<S>(maybe_host)
+}
+
+/// Validates `host`.
+pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> {
+ match get_wrapped_inner(i, b'[', b']') {
+ Some(maybe_addr) => {
+ // `IP-literal`.
+ // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2.
+ if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v')
+ .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V'))
+ {
+ // `IPvFuture`.
+ let (maybe_ver, maybe_addr) =
+ find_split_hole(maybe_addr_rest, b'.').ok_or_else(Error::new)?;
+ // Validate version.
+ if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) {
+ return Err(Error::new());
+ }
+ // Validate address.
+ if !maybe_addr.is_empty()
+ && maybe_addr.is_ascii()
+ && maybe_addr
+ .bytes()
+ .all(char::is_ascii_userinfo_ipvfutureaddr)
+ {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+ } else {
+ // `IPv6address`.
+ validate_ipv6address(maybe_addr)
+ }
+ }
+ None => {
+ // `IPv4address` or `reg-name`. No need to distinguish them here.
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_regname,
+ char::is_nonascii_regname::<S>,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+#[cfg(feature = "alloc")]
+mod tests {
+ use super::*;
+
+ use alloc::format;
+
+ macro_rules! assert_validate {
+ ($parser:expr, $($input:expr),* $(,)?) => {{
+ $({
+ let input = $input;
+ let input: &str = input.as_ref();
+ assert!($parser(input).is_ok(), "input={:?}", input);
+ })*
+ }};
+ }
+
+ #[test]
+ fn test_ipv6address() {
+ use core::cmp::Ordering;
+
+ assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B");
+ assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1");
+ assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1");
+ assert_validate!(validate_ipv6address, "2001:db8::7");
+
+ // Generate IPv6 addresses with `::`.
+ let make_sub = |n: usize| {
+ let mut s = "1:".repeat(n);
+ s.pop();
+ s
+ };
+ for len_pref in 0..=7 {
+ let prefix = make_sub(len_pref);
+ for len_suf in 1..=(7 - len_pref) {
+ assert_validate!(
+ validate_ipv6address,
+ &format!("{}::{}", prefix, make_sub(len_suf))
+ );
+ match len_suf.cmp(&2) {
+ Ordering::Greater => assert_validate!(
+ validate_ipv6address,
+ &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2))
+ ),
+ Ordering::Equal => {
+ assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix))
+ }
+ Ordering::Less => {}
+ }
+ }
+ }
+ }
+}
diff --git a/vendor/iri-string/src/parser/validate/path.rs b/vendor/iri-string/src/parser/validate/path.rs
new file mode 100644
index 00000000..1b09c84b
--- /dev/null
+++ b/vendor/iri-string/src/parser/validate/path.rs
@@ -0,0 +1,91 @@
+//! Parsers for path.
+
+use crate::parser::char;
+use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded};
+use crate::spec::Spec;
+use crate::validate::Error;
+
+/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`.
+pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
+ if i.is_empty() {
+ return Ok(());
+ }
+ let i = match i.strip_prefix('/') {
+ Some(rest) => rest,
+ None => return Err(Error::new()),
+ };
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_pchar_slash,
+ S::is_nonascii_char_unreserved,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo
+/// `"//" authority path-abempty`.
+pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
+ if i.is_empty() {
+ // `path-empty`.
+ return Ok(());
+ }
+ if i.starts_with("//") {
+ unreachable!("this case should be handled by the caller");
+ }
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_pchar_slash,
+ S::is_nonascii_char_unreserved,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo
+/// `"//" authority path-abempty`.
+pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
+ if i.starts_with("//") {
+ unreachable!("this case should be handled by the caller");
+ }
+ let is_valid = match find_split2_hole(i, b'/', b':') {
+ Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_pchar_slash,
+ S::is_nonascii_char_unreserved,
+ ),
+ Some((_, c, _)) => {
+ debug_assert_eq!(c, b':');
+ // `foo:bar`-style. This does not match `path-noscheme`.
+ return Err(Error::new());
+ }
+ };
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}
+
+/// Returns `Ok(_)` if the string matches `path`/`ipath` rules.
+pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> {
+ if i.starts_with("//") {
+ return Err(Error::new());
+ }
+ let is_valid = satisfy_chars_with_pct_encoded(
+ i,
+ char::is_ascii_pchar_slash,
+ S::is_nonascii_char_unreserved,
+ );
+ if is_valid {
+ Ok(())
+ } else {
+ Err(Error::new())
+ }
+}