diff options
Diffstat (limited to 'vendor/iri-string/src')
49 files changed, 16363 insertions, 0 deletions
diff --git a/vendor/iri-string/src/build.rs b/vendor/iri-string/src/build.rs new file mode 100644 index 00000000..39a57017 --- /dev/null +++ b/vendor/iri-string/src/build.rs @@ -0,0 +1,1234 @@ +//! URI/IRI builder. +//! +//! See the documentation of [`Builder`] type. + +use core::fmt::{self, Display as _, Write as _}; +use core::marker::PhantomData; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::ToString; + +use crate::format::Censored; +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::normalize::{self, NormalizationMode, PathCharacteristic, PctCaseNormalized}; +use crate::parser::str::{find_split, prior_byte2}; +use crate::parser::validate as parser; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString}; +use crate::validate::Error; + +/// Port builder. +/// +/// This type is intended to be created by `From` trait implementations, and +/// to be passed to [`Builder::port`] method. +#[derive(Debug, Clone)] +pub struct PortBuilder<'a>(PortBuilderRepr<'a>); + +impl Default for PortBuilder<'_> { + #[inline] + fn default() -> Self { + Self(PortBuilderRepr::Empty) + } +} + +impl From<u8> for PortBuilder<'_> { + #[inline] + fn from(v: u8) -> Self { + Self(PortBuilderRepr::Integer(v.into())) + } +} + +impl From<u16> for PortBuilder<'_> { + #[inline] + fn from(v: u16) -> Self { + Self(PortBuilderRepr::Integer(v)) + } +} + +impl<'a> From<&'a str> for PortBuilder<'a> { + #[inline] + fn from(v: &'a str) -> Self { + Self(PortBuilderRepr::String(v)) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a alloc::string::String> for PortBuilder<'a> { + #[inline] + fn from(v: &'a alloc::string::String) -> Self { + Self(PortBuilderRepr::String(v.as_str())) + } +} + +/// Internal representation of a port builder. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +enum PortBuilderRepr<'a> { + /// Empty port. + Empty, + /// Port as an integer. + /// + /// Note that RFC 3986 accepts any number of digits as a port, but + /// practically (at least in TCP/IP) `u16` is enough. + Integer(u16), + /// Port as a string. + String(&'a str), +} + +/// Userinfo builder. +/// +/// This type is intended to be created by `From` trait implementations, and +/// to be passed to [`Builder::userinfo`] method. +#[derive(Clone)] +pub struct UserinfoBuilder<'a>(UserinfoRepr<'a>); + +impl Default for UserinfoBuilder<'_> { + #[inline] + fn default() -> Self { + Self(UserinfoRepr::None) + } +} + +impl fmt::Debug for UserinfoBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut debug = f.debug_struct("UserinfoBuilder"); + if let Some((user, password)) = self.to_user_password() { + debug.field("user", &user); + // > Applications should not render as clear text any data after + // > the first colon (":") character found within a userinfo + // > subcomponent unless the data after the colon is the empty + // > string (indicating no password). + if matches!(password, None | Some("")) { + debug.field("password", &password); + } else { + debug.field("password", &Some(Censored)); + } + } + debug.finish() + } +} + +impl<'a> UserinfoBuilder<'a> { + /// Decomposes the userinfo into `user` and `password`. + #[must_use] + fn to_user_password(&self) -> Option<(&'a str, Option<&'a str>)> { + match &self.0 { + UserinfoRepr::None => None, + UserinfoRepr::Direct(s) => match find_split(s, b':') { + None => Some((s, None)), + Some((user, password)) => Some((user, Some(password))), + }, + UserinfoRepr::UserPass(user, password) => Some((*user, *password)), + } + } +} + +impl<'a> From<&'a str> for UserinfoBuilder<'a> { + #[inline] + fn from(direct: &'a str) -> Self { + Self(UserinfoRepr::Direct(direct)) + } +} + +impl<'a> From<(&'a str, &'a str)> for UserinfoBuilder<'a> { + #[inline] + fn from((user, password): (&'a str, &'a str)) -> Self { + Self(UserinfoRepr::UserPass(user, Some(password))) + } +} + +impl<'a> From<(&'a str, Option<&'a str>)> for UserinfoBuilder<'a> { + #[inline] + fn from((user, password): (&'a str, Option<&'a str>)) -> Self { + Self(UserinfoRepr::UserPass(user, password)) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a alloc::string::String> for UserinfoBuilder<'a> { + #[inline] + fn from(v: &'a alloc::string::String) -> Self { + Self::from(v.as_str()) + } +} + +/// Internal representation of a userinfo builder. +#[derive(Clone, Copy)] +enum UserinfoRepr<'a> { + /// Not specified (absent). + None, + /// Direct `userinfo` content. + Direct(&'a str), + /// User name and password. + UserPass(&'a str, Option<&'a str>), +} + +/// URI/IRI authority builder. +#[derive(Default, Debug, Clone)] +struct AuthorityBuilder<'a> { + /// Host. + host: HostRepr<'a>, + /// Port. + port: PortBuilder<'a>, + /// Userinfo. + userinfo: UserinfoBuilder<'a>, +} + +impl AuthorityBuilder<'_> { + /// Writes the authority to the given formatter. + fn fmt_write_to<S: Spec>(&self, f: &mut fmt::Formatter<'_>, normalize: bool) -> fmt::Result { + match &self.userinfo.0 { + UserinfoRepr::None => {} + UserinfoRepr::Direct(userinfo) => { + if normalize { + PctCaseNormalized::<S>::new(userinfo).fmt(f)?; + } else { + userinfo.fmt(f)?; + } + f.write_char('@')?; + } + UserinfoRepr::UserPass(user, password) => { + if normalize { + PctCaseNormalized::<S>::new(user).fmt(f)?; + } else { + f.write_str(user)?; + } + if let Some(password) = password { + f.write_char(':')?; + if normalize { + PctCaseNormalized::<S>::new(password).fmt(f)?; + } else { + password.fmt(f)?; + } + } + f.write_char('@')?; + } + } + + match self.host { + HostRepr::String(host) => { + if normalize { + normalize::normalize_host_port::<S>(f, host)?; + } else { + f.write_str(host)?; + } + } + #[cfg(feature = "std")] + HostRepr::IpAddr(ipaddr) => match ipaddr { + std::net::IpAddr::V4(v) => v.fmt(f)?, + std::net::IpAddr::V6(v) => write!(f, "[{v}]")?, + }, + } + + match self.port.0 { + PortBuilderRepr::Empty => {} + PortBuilderRepr::Integer(v) => write!(f, ":{v}")?, + PortBuilderRepr::String(v) => { + // Omit empty port if the normalization is enabled. + if !(v.is_empty() && normalize) { + write!(f, ":{v}")?; + } + } + } + + Ok(()) + } +} + +/// Host representation. +#[derive(Debug, Clone, Copy)] +enum HostRepr<'a> { + /// Direct string representation. + String(&'a str), + #[cfg(feature = "std")] + /// Dedicated IP address type. + IpAddr(std::net::IpAddr), +} + +impl Default for HostRepr<'_> { + #[inline] + fn default() -> Self { + Self::String("") + } +} + +/// URI/IRI reference builder. +/// +/// # Usage +/// +/// 1. Create builder by [`Builder::new()`][`Self::new`]. +/// 2. Set (or unset) components and set normalization mode as you wish. +/// 3. Validate by [`Builder::build()`][`Self::build`] and get [`Built`] value. +/// 4. Use [`core::fmt::Display`] trait to serialize the resulting [`Built`], +/// or use [`From`]/[`Into`] traits to convert into an allocated string types. +/// +/// ``` +/// # use iri_string::validate::Error; +/// use iri_string::build::Builder; +/// # #[cfg(not(feature = "alloc"))] +/// # use iri_string::types::IriStr; +/// # #[cfg(feature = "alloc")] +/// use iri_string::types::{IriStr, IriString}; +/// +/// // 1. Create builder. +/// let mut builder = Builder::new(); +/// +/// // 2. Set (or unset) component and normalization mode. +/// builder.scheme("http"); +/// builder.host("example.com"); +/// builder.path("/foo/../"); +/// builder.normalize(); +/// +/// // 3. Validate and create the result. +/// let built = builder.build::<IriStr>()?; +/// +/// # #[cfg(feature = "alloc")] { +/// // 4a. Serialize by `Display` trait (or `ToString`). +/// let s = built.to_string(); +/// assert_eq!(s, "http://example.com/"); +/// # } +/// +/// # #[cfg(feature = "alloc")] { +/// // 4b. Convert into an allocated string types. +/// // Thanks to pre-validation by `.build::<IriStr>()`, this conversion is infallible! +/// let s: IriString = built.into(); +/// assert_eq!(s, "http://example.com/"); +/// # } +/// +/// # Ok::<_, Error>(()) +/// ``` +#[derive(Default, Debug, Clone)] +pub struct Builder<'a> { + /// Scheme. + scheme: Option<&'a str>, + /// Authority. + authority: Option<AuthorityBuilder<'a>>, + /// Path. + path: &'a str, + /// Query (without the leading `?`). + query: Option<&'a str>, + /// Fragment (without the leading `#`). + fragment: Option<&'a str>, + /// Normalization mode. + normalize: bool, +} + +impl<'a> Builder<'a> { + /// Creates a builder with empty data. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let builder = Builder::new(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Writes the authority to the given formatter. + /// + /// Don't expose this as public, since this method does not validate. + /// + /// # Preconditions + /// + /// The IRI string to be built should be a valid IRI reference. + /// Callers are responsible to validate the component values before calling + /// this method. + fn fmt_write_to<S: Spec>( + &self, + f: &mut fmt::Formatter<'_>, + path_is_absolute: bool, + ) -> fmt::Result { + if let Some(scheme) = self.scheme { + // Write the scheme. + if self.normalize { + normalize::normalize_scheme(f, scheme)?; + } else { + f.write_str(scheme)?; + } + f.write_char(':')?; + } + + if let Some(authority) = &self.authority { + f.write_str("//")?; + authority.fmt_write_to::<S>(f, self.normalize)?; + } + + if !self.normalize { + // No normalization. + f.write_str(self.path)?; + } else if self.scheme.is_some() || self.authority.is_some() || path_is_absolute { + // Apply full syntax-based normalization. + let op = normalize::NormalizationOp { + mode: NormalizationMode::Default, + }; + normalize::PathToNormalize::from_single_path(self.path).fmt_write_normalize::<S, _>( + f, + op, + self.authority.is_some(), + )?; + } else { + // The IRI reference starts with `path` component, and the path is relative. + // Skip path segment normalization. + PctCaseNormalized::<S>::new(self.path).fmt(f)?; + } + + if let Some(query) = self.query { + f.write_char('?')?; + if self.normalize { + normalize::normalize_query::<S>(f, query)?; + } else { + f.write_str(query)?; + } + } + + if let Some(fragment) = self.fragment { + f.write_char('#')?; + if self.normalize { + normalize::normalize_fragment::<S>(f, fragment)?; + } else { + f.write_str(fragment)?; + } + } + + Ok(()) + } + + /// Builds the proxy object that can be converted to the desired IRI string type. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriStr; + /// # #[cfg(feature = "alloc")] + /// use iri_string::types::IriString; + /// + /// let mut builder = Builder::new(); + /// + /// builder.scheme("http"); + /// builder.host("example.com"); + /// builder.path("/foo/bar"); + /// + /// let built = builder.build::<IriStr>()?; + /// + /// # #[cfg(feature = "alloc")] { + /// // The returned value implements `core::fmt::Display` and + /// // `core::string::ToString`. + /// assert_eq!(built.to_string(), "http://example.com/foo/bar"); + /// + /// // The returned value implements `Into<{iri_owned_string_type}>`. + /// let iri = IriString::from(built); + /// // `let iri: IriString = built.into();` is also OK. + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn build<T>(self) -> Result<Built<'a, T>, Error> + where + T: ?Sized + Buildable<'a>, + { + <T as private::Sealed<'a>>::validate_builder(self) + } +} + +// Setters does not return `&mut Self` or `Self` since it introduces needless +// ambiguity for users. +// For example, if setters return something and allows method chaining, can you +// correctly explain what happens with the code below without reading document? +// +// ```text +// let mut builder = Builder::new().foo("foo").bar("bar"); +// let baz = builder.baz("baz").clone().build(); +// // Should the result be foo+bar+qux, or foo+bar+baz+qux? +// let qux = builder.qux("qux").build(); +// ``` +impl<'a> Builder<'a> { + /// Sets the scheme. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("foo"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "foo:"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn scheme(&mut self, v: &'a str) { + self.scheme = Some(v); + } + + /// Unsets the scheme. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("foo"); + /// builder.unset_scheme(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_scheme(&mut self) { + self.scheme = None; + } + + /// Sets the path. + /// + /// Note that no methods are provided to "unset" path since every IRI + /// references has a path component (although it can be empty). + /// If you want to "unset" the path, just set the empty string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.path("foo/bar"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "foo/bar"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn path(&mut self, v: &'a str) { + self.path = v; + } + + /// Initializes the authority builder. + #[inline] + fn authority_builder(&mut self) -> &mut AuthorityBuilder<'a> { + self.authority.get_or_insert_with(AuthorityBuilder::default) + } + + /// Unsets the authority. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.host("example.com"); + /// builder.unset_authority(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_authority(&mut self) { + self.authority = None; + } + + /// Sets the userinfo. + /// + /// `userinfo` component always have `user` part (but it can be empty). + /// + /// Note that `("", None)` is considered as an empty userinfo, rather than + /// unset userinfo. + /// Also note that the user part cannot have colon characters. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo("user:pass"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//user:pass@"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// You can specify `(user, password)` pair. + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// + /// builder.userinfo(("user", Some("pass"))); + /// # #[cfg(feature = "alloc")] { + /// assert_eq!( + /// builder.clone().build::<IriReferenceStr>()?.to_string(), + /// "//user:pass@" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// `("", None)` is considered as an empty userinfo. + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo(("", None)); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//@"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn userinfo<T: Into<UserinfoBuilder<'a>>>(&mut self, v: T) { + self.authority_builder().userinfo = v.into(); + } + + /// Unsets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo("user:pass"); + /// // Note that this does not unset the entire authority. + /// // Now empty authority is set. + /// builder.unset_userinfo(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_userinfo(&mut self) { + self.authority_builder().userinfo = UserinfoBuilder::default(); + } + + /// Sets the reg-name or IP address (i.e. host) without port. + /// + /// Note that no methods are provided to "unset" host. + /// Depending on your situation, set empty string as a reg-name, or unset + /// the authority entirely by [`unset_authority`][`Self::unset_authority`] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.host("example.com"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//example.com"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn host(&mut self, v: &'a str) { + self.authority_builder().host = HostRepr::String(v); + } + + /// Sets the IP address as a host. + /// + /// Note that no methods are provided to "unset" host. + /// Depending on your situation, set empty string as a reg-name, or unset + /// the authority entirely by [`unset_authority`][`Self::unset_authority`] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "std")] { + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.ip_address(std::net::Ipv4Addr::new(192, 0, 2, 0)); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//192.0.2.0"); + /// # } + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[cfg(feature = "std")] + #[inline] + pub fn ip_address<T: Into<std::net::IpAddr>>(&mut self, addr: T) { + self.authority_builder().host = HostRepr::IpAddr(addr.into()); + } + + /// Sets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.port(80_u16); + /// // Accepts other types that implements `Into<PortBuilder<'a>>`. + /// //builder.port(80_u8); + /// //builder.port("80"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//:80"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn port<T: Into<PortBuilder<'a>>>(&mut self, v: T) { + self.authority_builder().port = v.into(); + } + + /// Unsets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.port(80_u16); + /// // Note that this does not unset the entire authority. + /// // Now empty authority is set. + /// builder.unset_port(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_port(&mut self) { + self.authority_builder().port = PortBuilder::default(); + } + + /// Sets the query. + /// + /// The string after `?` should be specified. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.query("q=example"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "?q=example"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn query(&mut self, v: &'a str) { + self.query = Some(v); + } + + /// Unsets the query. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.query("q=example"); + /// builder.unset_query(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_query(&mut self) { + self.query = None; + } + + /// Sets the fragment. + /// + /// The string after `#` should be specified. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.fragment("anchor"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "#anchor"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn fragment(&mut self, v: &'a str) { + self.fragment = Some(v); + } + + /// Unsets the fragment. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.fragment("anchor"); + /// builder.unset_fragment(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_fragment(&mut self) { + self.fragment = None; + } + + /// Stop normalizing the result. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("http"); + /// // `%75%73%65%72` is "user". + /// builder.userinfo("%75%73%65%72"); + /// builder.host("EXAMPLE.COM"); + /// builder.port(""); + /// builder.path("/foo/../%2e%2e/bar/%2e/baz/."); + /// + /// builder.unset_normalize(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!( + /// iri.to_string(), + /// "http://%75%73%65%72@EXAMPLE.COM:/foo/../%2e%2e/bar/%2e/baz/." + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_normalize(&mut self) { + self.normalize = false; + } + + /// Normalizes the result using RFC 3986 syntax-based normalization and + /// WHATWG URL Standard algorithm. + /// + /// # Normalization + /// + /// If `scheme` or `authority` component is present or the path is absolute, + /// the build result will fully normalized using full syntax-based normalization: + /// + /// * case normalization ([RFC 3986 6.2.2.1]), + /// * percent-encoding normalization ([RFC 3986 6.2.2.2]), and + /// * path segment normalization ([RFC 3986 6.2.2.2]). + /// + /// However, if both `scheme` and `authority` is absent and the path is relative + /// (including empty), i.e. the IRI reference to be built starts with the + /// relative `path` component, path segment normalization will be omitted. + /// This is because the path segment normalization depends on presence or + /// absense of the `authority` components, and will remove extra `..` + /// segments which should not be ignored. + /// + /// Note that `path` must already be empty or start with a slash **before + /// the normalizaiton** if `authority` is present. + /// + /// # WHATWG URL Standard + /// + /// If you need to avoid WHATWG URL Standard serialization, use + /// [`Built::ensure_rfc3986_normalizable`] method to test if the result is + /// normalizable without WHATWG spec. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("http"); + /// // `%75%73%65%72` is "user". + /// builder.userinfo("%75%73%65%72"); + /// builder.host("EXAMPLE.COM"); + /// builder.port(""); + /// builder.path("/foo/../%2e%2e/bar/%2e/baz/."); + /// + /// builder.normalize(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "http://user@example.com/bar/baz/"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn normalize(&mut self) { + self.normalize = true; + } +} + +/// [`Display`]-able IRI build result. +/// +/// The value of this type can generate an IRI using [`From`]/[`Into`] traits or +/// [`Display`] trait. +/// +/// # Security consideration +/// +/// This can be stringified or directly printed by `std::fmt::Display`, but note +/// that this `Display` **does not hide the password part**. Be careful **not to +/// print the value using `Display for Built<_>` in public context**. +/// +/// [`From`]: `core::convert::From` +/// [`Into`]: `core::convert::Into` +/// [`Display`]: `core::fmt::Display` +#[derive(Debug)] +pub struct Built<'a, T: ?Sized> { + /// Builder with the validated content. + builder: Builder<'a>, + /// Whether the path is absolute. + path_is_absolute: bool, + /// String type. + _ty_str: PhantomData<fn() -> T>, +} + +impl<T: ?Sized> Clone for Built<'_, T> { + #[inline] + fn clone(&self) -> Self { + Self { + builder: self.builder.clone(), + path_is_absolute: self.path_is_absolute, + _ty_str: PhantomData, + } + } +} + +/// Implements conversions to a string. +macro_rules! impl_stringifiers { + ($borrowed:ident, $owned:ident) => { + impl<S: Spec> Built<'_, $borrowed<S>> { + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), normalize::Error> { + if self.builder.authority.is_none() { + let path = normalize::PathToNormalize::from_single_path(self.builder.path); + path.ensure_rfc3986_normalizable_with_authority_absent()?; + } + Ok(()) + } + } + + impl<S: Spec> fmt::Display for Built<'_, $borrowed<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.builder.fmt_write_to::<S>(f, self.path_is_absolute) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for Built<'_, $borrowed<S>> { + type Target = $owned<S>; + + #[inline] + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s) + .expect("[validity] the IRI to be built is already validated")) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> From<Built<'_, $borrowed<S>>> for $owned<S> { + #[inline] + fn from(builder: Built<'_, $borrowed<S>>) -> Self { + (&builder).into() + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> From<&Built<'_, $borrowed<S>>> for $owned<S> { + #[inline] + fn from(builder: &Built<'_, $borrowed<S>>) -> Self { + let s = builder.to_string(); + Self::try_from(s).expect("[validity] the IRI to be built is already validated") + } + } + }; +} + +impl_stringifiers!(RiReferenceStr, RiReferenceString); +impl_stringifiers!(RiStr, RiString); +impl_stringifiers!(RiAbsoluteStr, RiAbsoluteString); +impl_stringifiers!(RiRelativeStr, RiRelativeString); + +/// A trait for borrowed IRI string types buildable by the [`Builder`]. +pub trait Buildable<'a>: private::Sealed<'a> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiReferenceStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiReferenceStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_none() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiAbsoluteStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_none() { + return Err(Error::new()); + } + if builder.fragment.is_some() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiAbsoluteStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiRelativeStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_some() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiRelativeStr<S> {} + +/// Checks whether the builder output is valid IRI reference. +/// +/// Returns whether the path is absolute. +fn validate_builder_for_iri_reference<S: Spec>(builder: &Builder<'_>) -> Result<bool, Error> { + if let Some(scheme) = builder.scheme { + parser::validate_scheme(scheme)?; + } + + if let Some(authority) = &builder.authority { + match &authority.userinfo.0 { + UserinfoRepr::None => {} + UserinfoRepr::Direct(userinfo) => { + parser::validate_userinfo::<S>(userinfo)?; + } + UserinfoRepr::UserPass(user, password) => { + // `user` is not allowed to have a colon, since the characters + // after the colon is parsed as the password. + if user.contains(':') { + return Err(Error::new()); + } + + // Note that the syntax of components inside `authority` + // (`user` and `password`) is not specified by RFC 3986. + parser::validate_userinfo::<S>(user)?; + if let Some(password) = password { + parser::validate_userinfo::<S>(password)?; + } + } + } + + match authority.host { + HostRepr::String(s) => parser::validate_host::<S>(s)?, + #[cfg(feature = "std")] + HostRepr::IpAddr(_) => {} + } + + if let PortBuilderRepr::String(s) = authority.port.0 { + if !s.bytes().all(|b| b.is_ascii_digit()) { + return Err(Error::new()); + } + } + } + + let path_is_absolute: bool; + let mut is_path_acceptable; + if builder.normalize { + if builder.scheme.is_some() || builder.authority.is_some() || builder.path.starts_with('/') + { + if builder.authority.is_some() { + // Note that the path should already be in an absolute form before normalization. + is_path_acceptable = builder.path.is_empty() || builder.path.starts_with('/'); + } else { + is_path_acceptable = true; + } + let op = normalize::NormalizationOp { + mode: NormalizationMode::Default, + }; + let path_characteristic = PathCharacteristic::from_path_to_display::<S>( + &normalize::PathToNormalize::from_single_path(builder.path), + op, + builder.authority.is_some(), + ); + path_is_absolute = path_characteristic.is_absolute(); + is_path_acceptable = is_path_acceptable + && match path_characteristic { + PathCharacteristic::CommonAbsolute | PathCharacteristic::CommonRelative => true, + PathCharacteristic::StartsWithDoubleSlash + | PathCharacteristic::RelativeFirstSegmentHasColon => { + builder.scheme.is_some() || builder.authority.is_some() + } + }; + } else { + path_is_absolute = false; + // If the path is relative (where neither scheme nor authority is + // available), the first segment should not contain a colon. + is_path_acceptable = prior_byte2(builder.path.as_bytes(), b'/', b':') != Some(b':'); + } + } else { + path_is_absolute = builder.path.starts_with('/'); + is_path_acceptable = if builder.authority.is_some() { + // The path should be absolute or empty. + path_is_absolute || builder.path.is_empty() + } else if builder.scheme.is_some() || path_is_absolute { + // The path should not start with '//'. + !builder.path.starts_with("//") + } else { + // If the path is relative (where neither scheme nor authority is + // available), the first segment should not contain a colon. + prior_byte2(builder.path.as_bytes(), b'/', b':') != Some(b':') + }; + } + if !is_path_acceptable { + return Err(Error::new()); + } + + if let Some(query) = builder.query { + parser::validate_query::<S>(query)?; + } + + if let Some(fragment) = builder.fragment { + parser::validate_fragment::<S>(fragment)?; + } + + Ok(path_is_absolute) +} + +/// Private module to put the trait to seal. +mod private { + use super::{Builder, Built, Error}; + + /// A trait for types buildable by the [`Builder`]. + pub trait Sealed<'a> { + /// Validates the content of the builder and returns the validated type if possible. + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error>; + } +} diff --git a/vendor/iri-string/src/components.rs b/vendor/iri-string/src/components.rs new file mode 100644 index 00000000..33e7ac45 --- /dev/null +++ b/vendor/iri-string/src/components.rs @@ -0,0 +1,267 @@ +//! Components of IRIs. + +mod authority; + +use core::num::NonZeroUsize; +use core::ops::{Range, RangeFrom, RangeTo}; + +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +pub use self::authority::AuthorityComponents; + +/// Positions to split an IRI into components. +#[derive(Debug, Clone, Copy)] +pub(crate) struct Splitter { + /// Scheme end. + scheme_end: Option<NonZeroUsize>, + /// Authority end. + /// + /// Note that absence of the authority and the empty authority is + /// distinguished. + authority_end: Option<NonZeroUsize>, + /// Query start (after the leading `?`). + query_start: Option<NonZeroUsize>, + /// Fragment start (after the leading `#`). + fragment_start: Option<NonZeroUsize>, +} + +impl Splitter { + /// Creates a new splitter. + #[inline] + #[must_use] + pub(crate) fn new( + scheme_end: Option<NonZeroUsize>, + authority_end: Option<NonZeroUsize>, + query_start: Option<NonZeroUsize>, + fragment_start: Option<NonZeroUsize>, + ) -> Self { + Self { + scheme_end, + authority_end, + query_start, + fragment_start, + } + } + + /// Decomposes an IRI into five major components: scheme, authority, path, query, and fragment. + #[must_use] + fn split_into_major( + self, + s: &str, + ) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) { + let (scheme, next_of_scheme) = match self.scheme_end { + // +1: ":".len() + Some(end) => (Some(&s[..end.get()]), end.get() + 1), + None => (None, 0), + }; + let (authority, next_of_authority) = match self.authority_end { + // +2: "//".len() + Some(end) => (Some(&s[(next_of_scheme + 2)..end.get()]), end.get()), + None => (None, next_of_scheme), + }; + let (fragment, end_of_prev_of_fragment) = match self.fragment_start { + // -1: "#".len() + Some(start) => (Some(&s[start.get()..]), start.get() - 1), + None => (None, s.len()), + }; + let (query, end_of_path) = match self.query_start { + Some(start) => ( + Some(&s[start.get()..end_of_prev_of_fragment]), + // -1: "?".len() + start.get() - 1, + ), + None => (None, end_of_prev_of_fragment), + }; + let path = &s[next_of_authority..end_of_path]; + (scheme, authority, path, query, fragment) + } + + /// Returns the range for the scheme part. + #[inline] + #[must_use] + fn scheme_range(self) -> Option<RangeTo<usize>> { + self.scheme_end.map(|end| ..end.get()) + } + + /// Returns the scheme as a string. + #[inline] + #[must_use] + pub(crate) fn scheme_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.scheme_range().map(|range| &s[range]) + } + + /// Returns true if the IRI has a scheme part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_scheme(&self) -> bool { + self.scheme_end.is_some() + } + + /// Returns the range for the authority part. + #[inline] + #[must_use] + fn authority_range(self) -> Option<Range<usize>> { + let end = self.authority_end?.get(); + // 2: "//".len() + // +3: "://".len() + let start = self.scheme_end.map_or(2, |v| v.get() + 3); + Some(start..end) + } + + /// Returns the authority as a string. + #[inline] + #[must_use] + pub(crate) fn authority_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.authority_range().map(|range| &s[range]) + } + + /// Returns true if the IRI has an authority part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_authority(&self) -> bool { + self.authority_end.is_some() + } + + /// Returns the range for the path part. + #[inline] + #[must_use] + fn path_range(self, full_len: usize) -> Range<usize> { + // -1: "?".len() and "#".len() + let end = self + .query_start + .or(self.fragment_start) + .map_or(full_len, |v| v.get() - 1); + let start = self.authority_end.map_or_else( + // +1: ":".len() + || self.scheme_end.map_or(0, |v| v.get() + 1), + NonZeroUsize::get, + ); + + start..end + } + + /// Returns the path as a string. + #[inline] + #[must_use] + pub(crate) fn path_str<'a>(&self, s: &'a str) -> &'a str { + &s[self.path_range(s.len())] + } + + /// Returns true if the path part of the IRI is empty. + #[inline] + #[must_use] + pub(crate) fn is_path_empty(&self, full_len: usize) -> bool { + self.path_range(full_len).is_empty() + } + + /// Returns the range for the query part excluding a prefix `?`. + #[inline] + #[must_use] + fn query_range(self, full_len: usize) -> Option<Range<usize>> { + let start = self.query_start?.get(); + // -1: "#".len() + let end = self.fragment_start.map_or(full_len, |v| v.get() - 1); + + Some(start..end) + } + + /// Returns the query as a string. + #[inline] + #[must_use] + pub(crate) fn query_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.query_range(s.len()).map(|range| &s[range]) + } + + /// Returns true if the IRI has a query part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_query(&self) -> bool { + self.query_start.is_some() + } + + /// Returns the range for the fragment part excluding a prefix `#`. + #[inline] + #[must_use] + pub(crate) fn fragment_range(self) -> Option<RangeFrom<usize>> { + self.fragment_start.map(|v| v.get()..) + } + + /// Returns the fragment as a string. + #[inline] + #[must_use] + pub(crate) fn fragment_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.fragment_range().map(|range| &s[range]) + } +} + +/// Components of an IRI reference. +/// +/// See <https://tools.ietf.org/html/rfc3986#section-5.2.2>. +#[derive(Debug, Clone, Copy)] +pub(crate) struct RiReferenceComponents<'a, S: Spec> { + /// Original complete string. + pub(crate) iri: &'a RiReferenceStr<S>, + /// Positions to split the IRI into components. + pub(crate) splitter: Splitter, +} + +impl<'a, S: Spec> RiReferenceComponents<'a, S> { + /// Returns five major components: scheme, authority, path, query, and fragment. + #[inline] + #[must_use] + pub(crate) fn to_major( + self, + ) -> ( + Option<&'a str>, + Option<&'a str>, + &'a str, + Option<&'a str>, + Option<&'a str>, + ) { + self.splitter.split_into_major(self.iri.as_str()) + } + + /// Returns the IRI reference. + #[inline] + #[must_use] + pub(crate) fn iri(&self) -> &'a RiReferenceStr<S> { + self.iri + } + + /// Returns the scheme as a string. + #[inline] + #[must_use] + pub(crate) fn scheme_str(&self) -> Option<&str> { + self.splitter.scheme_str(self.iri.as_str()) + } + + /// Returns the authority as a string. + #[inline] + #[must_use] + pub(crate) fn authority_str(&self) -> Option<&str> { + self.splitter.authority_str(self.iri.as_str()) + } + + /// Returns the path as a string. + #[inline] + #[must_use] + pub(crate) fn path_str(&self) -> &str { + self.splitter.path_str(self.iri.as_str()) + } + + /// Returns the query as a string. + #[inline] + #[must_use] + pub(crate) fn query_str(&self) -> Option<&str> { + self.splitter.query_str(self.iri.as_str()) + } +} + +impl<'a, S: Spec> From<&'a RiReferenceStr<S>> for RiReferenceComponents<'a, S> { + #[inline] + fn from(s: &'a RiReferenceStr<S>) -> Self { + trusted_parser::decompose_iri_reference(s) + } +} diff --git a/vendor/iri-string/src/components/authority.rs b/vendor/iri-string/src/components/authority.rs new file mode 100644 index 00000000..a7f17744 --- /dev/null +++ b/vendor/iri-string/src/components/authority.rs @@ -0,0 +1,121 @@ +//! Subcomponents of authority. + +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +/// Subcomponents of authority. +/// +/// This is a return type of the `authority_components` method of the string +/// types (for example [`RiStr::authority_components`]. +/// +/// [`RiStr::authority_components`]: `crate::types::RiStr::authority_components` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AuthorityComponents<'a> { + /// Authority string, excluding the leading `//`. + pub(crate) authority: &'a str, + /// Start position of the `host`. + pub(crate) host_start: usize, + /// End position of the `host`. + pub(crate) host_end: usize, +} + +impl<'a> AuthorityComponents<'a> { + /// Creates a new `AuthorityComponents` from the IRI. + pub fn from_iri<S: Spec>(iri: &'a RiReferenceStr<S>) -> Option<Self> { + iri.authority_str() + .map(trusted_parser::authority::decompose_authority) + } + + /// Returns the `userinfo` part, excluding the following `@`. + #[must_use] + pub fn userinfo(&self) -> Option<&'a str> { + let userinfo_at = self.host_start.checked_sub(1)?; + debug_assert_eq!(self.authority.as_bytes()[userinfo_at], b'@'); + Some(&self.authority[..userinfo_at]) + } + + /// Returns the `host` part. + #[inline] + #[must_use] + pub fn host(&self) -> &'a str { + // NOTE: RFC 6874 support may need the internal logic to change. + &self.authority[self.host_start..self.host_end] + } + + /// Returns the `port` part, excluding the following `:`. + #[must_use] + pub fn port(&self) -> Option<&'a str> { + if self.host_end == self.authority.len() { + return None; + } + let port_colon = self.host_end; + debug_assert_eq!(self.authority.as_bytes()[port_colon], b':'); + Some(&self.authority[(port_colon + 1)..]) + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::string::String; + + use crate::types::IriReferenceStr; + + const USERINFO: &[&str] = &["", "user:password", "user"]; + + const PORT: &[&str] = &[ + "", + "0", + "0000", + "80", + "1234567890123456789012345678901234567890", + ]; + + const HOST: &[&str] = &[ + "", + "localhost", + "example.com", + "192.0.2.0", + "[2001:db8::1]", + "[2001:0db8:0:0:0:0:0:1]", + "[2001:0db8::192.0.2.255]", + "[v9999.this-is-futuristic-ip-address]", + ]; + + fn compose_to_relative_iri(userinfo: Option<&str>, host: &str, port: Option<&str>) -> String { + let mut buf = String::from("//"); + if let Some(userinfo) = userinfo { + buf.push_str(userinfo); + buf.push('@'); + } + buf.push_str(host); + if let Some(port) = port { + buf.push(':'); + buf.push_str(port); + } + buf + } + + #[test] + fn test_decompose_authority() { + for host in HOST.iter().copied() { + for userinfo in USERINFO.iter().map(|s| Some(*s)).chain(None) { + for port in PORT.iter().map(|s| Some(*s)).chain(None) { + let authority = compose_to_relative_iri(userinfo, host, port); + let authority = + IriReferenceStr::new(&authority).expect("test case should be valid"); + let components = AuthorityComponents::from_iri(authority) + .expect("relative path composed for this test should contain authority"); + + assert_eq!(components.host(), host); + assert_eq!(components.userinfo(), userinfo); + assert_eq!(components.port(), port); + } + } + } + } +} diff --git a/vendor/iri-string/src/convert.rs b/vendor/iri-string/src/convert.rs new file mode 100644 index 00000000..5b4ec9a4 --- /dev/null +++ b/vendor/iri-string/src/convert.rs @@ -0,0 +1,291 @@ +//! Conversion between URI/IRI types. + +use core::fmt; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::spec::Spec; +use crate::types::{ + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString, + RiString, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString, + UriString, +}; + +/// Hexadecimal digits for a nibble. +const HEXDIGITS: [u8; 16] = [ + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F', +]; + +/// A resource identifier mapped to a URI of some kind. +/// +/// Supported `Src` type are: +/// +/// * IRIs: +/// + [`IriAbsoluteStr`] (alias of `RiAbsoluteStr<IriSpec>`) +/// + [`IriReferenceStr`] (alias of `RiReferenceStr<IriSpec>`) +/// + [`IriRelativeStr`] (alias of `RiRelativeStr<IriSpec>`) +/// + [`IriStr`] (alias of `RiStr<IriSpec>`) +/// * URIs: +/// + [`UriAbsoluteStr`] (alias of `RiAbsoluteStr<UriSpec>`) +/// + [`UriReferenceStr`] (alias of `RiReferenceStr<UriSpec>`) +/// + [`UriRelativeStr`] (alias of `RiRelativeStr<UriSpec>`) +/// + [`UriStr`] (alias of `RiStr<UriSpec>`) +/// +/// # Examples +/// +/// ``` +/// use iri_string::convert::MappedToUri; +/// use iri_string::types::{IriStr, UriStr}; +/// +/// let src = IriStr::new("http://example.com/?alpha=\u{03B1}")?; +/// // The type is `MappedToUri<IriStr>`, but you usually don't need to specify. +/// let mapped = MappedToUri::from(src).to_string(); +/// assert_eq!(mapped, "http://example.com/?alpha=%CE%B1"); +/// # Ok::<_, iri_string::validate::Error>(()) +/// ``` +/// +/// [`IriAbsoluteStr`]: crate::types::IriAbsoluteStr +/// [`IriReferenceStr`]: crate::types::IriReferenceStr +/// [`IriRelativeStr`]: crate::types::IriRelativeStr +/// [`IriStr`]: crate::types::IriStr +/// [`UriAbsoluteStr`]: crate::types::UriAbsoluteStr +/// [`UriReferenceStr`]: crate::types::UriReferenceStr +/// [`UriRelativeStr`]: crate::types::UriRelativeStr +/// [`UriStr`]: crate::types::UriStr +#[derive(Debug, Clone, Copy)] +pub struct MappedToUri<'a, Src: ?Sized>(&'a Src); + +/// Implement conversions for an IRI string type. +macro_rules! impl_for_iri { + ($borrowed:ident, $owned:ident, $owned_uri:ident) => { + impl<S: Spec> fmt::Display for MappedToUri<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_percent_encoded(f, self.0.as_str()) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for MappedToUri<'_, $borrowed<S>> { + type Target = $owned_uri; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s) + .expect("[validity] the IRI must be encoded into a valid URI")) + } + } + + impl<'a, S: Spec> From<&'a $borrowed<S>> for MappedToUri<'a, $borrowed<S>> { + #[inline] + fn from(iri: &'a $borrowed<S>) -> Self { + Self(iri) + } + } + + #[cfg(feature = "alloc")] + impl<'a, S: Spec> From<&'a $owned<S>> for MappedToUri<'a, $borrowed<S>> { + #[inline] + fn from(iri: &'a $owned<S>) -> Self { + Self(iri.as_slice()) + } + } + }; +} + +impl_for_iri!(RiReferenceStr, RiReferenceString, UriReferenceString); +impl_for_iri!(RiStr, RiString, UriString); +impl_for_iri!(RiAbsoluteStr, RiAbsoluteString, UriAbsoluteString); +impl_for_iri!(RiRelativeStr, RiRelativeString, UriRelativeString); +impl_for_iri!(RiQueryStr, RiQueryString, UriQueryString); +impl_for_iri!(RiFragmentStr, RiFragmentString, UriFragmentString); + +/// Percent-encodes and writes the IRI string using the given buffer. +fn write_percent_encoded(f: &mut fmt::Formatter<'_>, mut s: &str) -> fmt::Result { + while !s.is_empty() { + // Skip ASCII characters. + let non_ascii_pos = s.bytes().position(|b| !b.is_ascii()).unwrap_or(s.len()); + let (ascii, rest) = s.split_at(non_ascii_pos); + if !ascii.is_empty() { + f.write_str(ascii)?; + s = rest; + } + + if s.is_empty() { + return Ok(()); + } + + // Search for the next ASCII character. + let nonascii_end = s.bytes().position(|b| b.is_ascii()).unwrap_or(s.len()); + let (nonasciis, rest) = s.split_at(nonascii_end); + debug_assert!( + !nonasciis.is_empty(), + "string without non-ASCII characters should have caused early return" + ); + s = rest; + + // Escape non-ASCII characters as percent-encoded bytes. + // + // RFC 3987 (section 3.1 step 2) says "for each character in + // 'ucschar' or 'iprivate'", but this simply means "for each + // non-ASCII characters" since any non-ASCII characters that can + // appear in an IRI match `ucschar` or `iprivate`. + /// Number of source bytes to encode at once. + const NUM_BYTES_AT_ONCE: usize = 21; + percent_encode_bytes(f, nonasciis, &mut [0_u8; NUM_BYTES_AT_ONCE * 3])?; + } + + Ok(()) +} + +/// Percent-encode the string and pass the encoded chunks to the given function. +/// +/// `buf` is used as a temporary working buffer. It is initialized by this +/// function, so users can pass any mutable byte slice with enough size. +/// +/// # Precondition +/// +/// The length of `buf` must be 3 bytes or more. +fn percent_encode_bytes(f: &mut fmt::Formatter<'_>, s: &str, buf: &mut [u8]) -> fmt::Result { + /// Fill the buffer by percent-encoded bytes. + /// + /// Note that this function applies percent-encoding to every characters, + /// even if it is ASCII alphabet. + /// + /// # Precondition + /// + /// * The length of `buf` must be 3 bytes or more. + /// * All of the `buf[i * 3]` elements should already be set to `b'%'`. + // This function have many preconditions and I don't want checks for them + // to be mandatory, so make this nested inner function. + fn fill_by_percent_encoded<'a>(buf: &'a mut [u8], bytes: &mut core::str::Bytes<'_>) -> &'a str { + let src_len = bytes.len(); + // `<[u8; N]>::array_chunks_mut` is unstable as of Rust 1.58.1. + for (dest, byte) in buf.chunks_exact_mut(3).zip(bytes.by_ref()) { + debug_assert_eq!( + dest.len(), + 3, + "[validity] `chunks_exact()` must return a slice with the exact length" + ); + debug_assert_eq!( + dest[0], b'%', + "[precondition] the buffer must be properly initialized" + ); + + let upper = byte >> 4; + let lower = byte & 0b1111; + dest[1] = HEXDIGITS[usize::from(upper)]; + dest[2] = HEXDIGITS[usize::from(lower)]; + } + let num_dest_written = (src_len - bytes.len()) * 3; + let buf_filled = &buf[..num_dest_written]; + // SAFETY: `b'%'` and `HEXDIGITS[_]` are all ASCII characters, so + // `buf_filled` is filled with ASCII characters and is valid UTF-8 bytes. + unsafe { + debug_assert!(core::str::from_utf8(buf_filled).is_ok()); + core::str::from_utf8_unchecked(buf_filled) + } + } + + assert!( + buf.len() >= 3, + "[precondition] length of `buf` must be 3 bytes or more" + ); + + // Drop the elements that will never be used. + // The length to be used is always a multiple of three. + let buf_len = buf.len() / 3 * 3; + let buf = &mut buf[..buf_len]; + + // Fill some bytes with `%`. + // This will be vectorized by optimization (especially for long buffers), + // so no need to selectively set `buf[i * 3]`. + buf.fill(b'%'); + + let mut bytes = s.bytes(); + // `<core::str::Bytes as ExactSizeIterator>::is_empty` is unstable as of Rust 1.58.1. + while bytes.len() != 0 { + let encoded = fill_by_percent_encoded(buf, &mut bytes); + f.write_str(encoded)?; + } + + Ok(()) +} + +/// Percent-encodes the given IRI using the given buffer. +#[cfg(feature = "alloc")] +pub(crate) fn try_percent_encode_iri_inline( + iri: &mut String, +) -> Result<(), alloc::collections::TryReserveError> { + // Calculate the result length and extend the buffer. + let num_nonascii = count_nonascii(iri); + if num_nonascii == 0 { + // No need to escape. + return Ok(()); + } + let additional = num_nonascii * 2; + iri.try_reserve(additional)?; + let src_len = iri.len(); + + // Temporarily take the ownership of the internal buffer. + let mut buf = core::mem::take(iri).into_bytes(); + // `b'\0'` cannot appear in a valid IRI, so this default value would be + // useful in case of debugging. + buf.extend(core::iter::repeat(b'\0').take(additional)); + + // Fill the buffer from the tail to the head. + let mut dest_end = buf.len(); + let mut src_end = src_len; + let mut rest_nonascii = num_nonascii; + while rest_nonascii > 0 { + debug_assert!( + src_end > 0, + "[validity] the source position should not overrun" + ); + debug_assert!( + dest_end > 0, + "[validity] the destination position should not overrun" + ); + src_end -= 1; + dest_end -= 1; + let byte = buf[src_end]; + if byte.is_ascii() { + buf[dest_end] = byte; + // Use the ASCII character directly. + } else { + // Percent-encode the byte. + dest_end -= 2; + buf[dest_end] = b'%'; + let upper = byte >> 4; + let lower = byte & 0b1111; + buf[dest_end + 1] = HEXDIGITS[usize::from(upper)]; + buf[dest_end + 2] = HEXDIGITS[usize::from(lower)]; + rest_nonascii -= 1; + } + } + + // Move the result from the temporary buffer to the destination. + let s = String::from_utf8(buf).expect("[consistency] the encoding result is an ASCII string"); + *iri = s; + Ok(()) +} + +/// Returns the number of non-ASCII characters. +#[cfg(feature = "alloc")] +#[inline] +#[must_use] +fn count_nonascii(s: &str) -> usize { + s.bytes().filter(|b| !b.is_ascii()).count() +} diff --git a/vendor/iri-string/src/format.rs b/vendor/iri-string/src/format.rs new file mode 100644 index 00000000..ecc038d0 --- /dev/null +++ b/vendor/iri-string/src/format.rs @@ -0,0 +1,209 @@ +//! Utilities for formatting (especially `Display` trait). +//! +//! This module contains utilities for [`Display`][`core::fmt::Display`]-able +//! types. + +use core::fmt::{self, Write as _}; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +/// Output buffer capacity overflow error. +#[derive(Debug, Clone, Copy)] +pub struct CapacityOverflowError; + +impl fmt::Display for CapacityOverflowError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("buffer capacity overflow") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CapacityOverflowError {} + +/// Writer to the bytes buffer. +struct ByteBufWriter<'b> { + /// Destination buffer. + buffer: &'b mut [u8], + /// Position to write the next string fragment. + cursor: usize, +} + +impl fmt::Write for ByteBufWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + let dest = &mut self.buffer[self.cursor..]; + if dest.len() < s.len() { + return Err(fmt::Error); + } + dest[..s.len()].copy_from_slice(s.as_bytes()); + self.cursor += s.len(); + Ok(()) + } +} + +/// Writes to the bytes buffer. +pub fn write_to_slice<'a, T: fmt::Display>( + buf: &'a mut [u8], + value: &T, +) -> Result<&'a str, CapacityOverflowError> { + let mut writer = ByteBufWriter { + buffer: buf, + cursor: 0, + }; + if write!(writer, "{}", value).is_err() { + return Err(CapacityOverflowError); + } + let len = writer.cursor; + let result = core::str::from_utf8(&buf[..len]) + .expect("[validity] fmt::Display writes valid UTF-8 byte sequence"); + Ok(result) +} + +/// Writer that fails (not panics) on OOM. +#[cfg(feature = "alloc")] +struct StringWriter<'a> { + /// Destination buffer. + buffer: &'a mut String, + /// Memory allocation error. + error: Option<TryReserveError>, +} + +#[cfg(feature = "alloc")] +impl fmt::Write for StringWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.error.is_some() { + return Err(fmt::Error); + } + if let Err(e) = self.buffer.try_reserve(s.len()) { + self.error = Some(e); + return Err(fmt::Error); + } + // This should never fail since `.try_reserve(s.len())` succeeded. + self.buffer.push_str(s); + Ok(()) + } +} + +/// Appends the data to the string. +/// +/// When allocation failure happens, incompletely appended strings won't be +/// stripped. Callers are responsible to clean up the destination if necessary. +#[cfg(feature = "alloc")] +pub fn try_append_to_string<T: fmt::Display>( + dest: &mut String, + value: &T, +) -> Result<(), TryReserveError> { + let mut writer = StringWriter { + buffer: dest, + error: None, + }; + if write!(writer, "{}", value).is_err() { + let e = writer + .error + .expect("[consistency] allocation error should be set on formatting failure"); + return Err(e); + } + Ok(()) +} + +/// Returns true if the two equals after they are converted to strings. +pub(crate) fn eq_str_display<T>(s: &str, d: &T) -> bool +where + T: ?Sized + fmt::Display, +{ + /// Dummy writer to compare the formatted object to the given string. + struct CmpWriter<'a>(&'a str); + impl fmt::Write for CmpWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.0.len() < s.len() { + return Err(fmt::Error); + } + let (prefix, rest) = self.0.split_at(s.len()); + self.0 = rest; + if prefix == s { + Ok(()) + } else { + Err(fmt::Error) + } + } + } + + let mut writer = CmpWriter(s); + let succeeded = write!(writer, "{}", d).is_ok(); + succeeded && writer.0.is_empty() +} + +/// A debug-printable type to hide the sensitive information. +#[derive(Clone, Copy)] +pub(crate) struct Censored; + +impl core::fmt::Debug for Censored { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("{censored}") + } +} + +/// [`ToString`][`alloc::string::ToString`], but without panic. +#[cfg(feature = "alloc")] +pub trait ToStringFallible: alloc::string::ToString { + /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. + fn try_to_string(&self) -> Result<String, TryReserveError>; +} + +#[cfg(feature = "alloc")] +impl<T: fmt::Display> ToStringFallible for T { + /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. + #[inline] + fn try_to_string(&self) -> Result<String, TryReserveError> { + let mut buf = String::new(); + try_append_to_string(&mut buf, self)?; + Ok(buf) + } +} + +/// A trait for types that can be converted to a dedicated allocated string types. +#[cfg(feature = "alloc")] +pub trait ToDedicatedString { + /// Conversion target type. + type Target; + + /// Converts the value to the allocated string. + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError>; + + /// Converts the value to the allocated string. + /// + /// # Panics + /// + /// Panics if memory allocation error occured. + #[inline] + #[must_use] + fn to_dedicated_string(&self) -> Self::Target { + self.try_to_dedicated_string() + .expect("failed to allocate enough memory") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn eq_str_display_1() { + assert!(eq_str_display("hello", "hello")); + assert!(eq_str_display("42", &42)); + + assert!(eq_str_display( + r#"\x00\t\r\n\xff\\"#, + &b"\x00\t\r\n\xff\\".escape_ascii() + )); + + assert!(!eq_str_display("hello", "world")); + assert!(!eq_str_display("hello world", "hello")); + assert!(!eq_str_display("hello", "hello world")); + assert!(!eq_str_display("42", &4)); + assert!(!eq_str_display("4", &42)); + } +} diff --git a/vendor/iri-string/src/lib.rs b/vendor/iri-string/src/lib.rs new file mode 100644 index 00000000..9be41a9b --- /dev/null +++ b/vendor/iri-string/src/lib.rs @@ -0,0 +1,159 @@ +//! String types for [RFC 3987 Internationalized Resource Identifiers (IRIs)][RFC 3987] and +//! [RFC 3986 Uniform Resource Identifiers (URIs)][RFC 3986]. +//! +//! Note that this crate does not have any extra knowledge about protocols. +//! Comparisons between IRI strings by `PartialEq` and `Eq` is implemented as [simple string +//! comparison](https://tools.ietf.org/html/rfc3986#section-6.2.1). +//! You should implement by yourself or use another crate to use such extra knowledge to compare +//! IRIs / URIs. +//! +//! # Capability +//! +//! This crate provides many features for IRIs / URIs. +//! +//! ## String types +//! +//! [`types` module][`types`] module provides various string types for IRIs and URIs. +//! The borrowed string types are unsized slice types (such as `[u8]` and `str`) +//! and not a sized struct, so they are highly interoperable with for example +//! `Cow` and `Rc`. Conversions between `&str` and borrwed IRI string types are easy. +//! +//! ## Resolvers +//! +//! [`resolve` module][`resolve`] provides IRI / URI references resolver. +//! However, you are recommended to use methods of string types such as +//! [`RiReferenceStr::resolve_against()`] or [`RiRelativeStr::resolve_against()`] +//! if you don't intend to resolve multiple IRIs against the same base. +//! +//! ## Validators +//! +//! Validator functions are provided from [`validate` module][`validate`]. +//! +//! ## Percent encoding +//! +//! [`percent_encode` module][`percent_encode`] provides a converter to encode +//! user-provided string into percent-encoded one (if syntax requires so). +//! +//! ## IRI builder +//! +//! [`build` module][`build`] provides IRI builder. +//! +//! ## URI template (RFC 6570) +//! +//! [`template` module][`template`] provides an RFC 6570 URI Template processor. +//! +//! # Feature flags +//! +//! ## `std` and `alloc` support +//! +//! This crate supports `no_std` usage. +//! +//! * `alloc` feature: +//! + Std library or `alloc` crate is required. +//! + This feature enables types and functions which require memory allocation, +//! e.g. `types::IriString` and `types::IriRelativeStr::resolve_against()`. +//! * `std` feature (**enabled by default**): +//! + Std library is required. +//! + This automatically enables `alloc` feature. +//! + The feature let the crate utilize std-specific stuff, such as `std::error::Error` trait. +//! * With neither of them: +//! + The crate can be used in `no_std` environment. +//! +//! ## Other features +//! +//! * `serde` +//! + Enables serde support. +//! + Implement `Serailize` and `Deserialize` traits for IRI / URI types. +//! * `memchr` +//! + Enables faster internal character search. +//! +//! # Rationale +//! +//! ## `foo:`, `foo:/`, `foo://`, `foo:///`, `foo:////`, ... are valid IRIs +//! +//! All of these are valid IRIs. +//! (On the other hand, all of them are invalid as relative IRI reference, because they don't +//! match `relative-part` rule, especially `path-noscheme`, as the first path component of the +//! relative path contains a colon.) +//! +//! * `foo:` +//! + Decomposed to `<scheme="foo">:<path-empty="">`. +//! * `foo:/` +//! + Decomposed to `<scheme="foo">:<path-absolute="/">`. +//! * `foo://` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="">`. +//! * `foo:///` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="/">`. +//! * `foo:////` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="//">`. +//! * `foo://///` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="///">`. +//! +//! RFC 3986 says that "if authority is absent, path cannot start with `//`". +//! +//! > When authority is present, the path must either be empty or begin with a slash ("/") +//! > character. When authority is not present, the path cannot begin with two slash characters +//! > ("//"). +//! > +//! > --- [RFC 3986, section 3. Syntax Components](https://tools.ietf.org/html/rfc3986#section-3). +//! +//! > If a URI contains an authority component, then the path component must either be empty or +//! > begin with a slash ("/") character. If a URI does not contain an authority component, then the +//! > path cannot begin with two slash characters ("//"). +//! > +//! > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) +//! +//! We should interpret them as "if `authority` rule is completely unused (i.e. does not match any +//! strings **including empty string**), path cannot start with `//`". +//! In other words, we should consider this as **explaining the ABNF of `hier-part` rule** +//! (especially why it does not use `path` rule), but **not adding extra restriction to the rule +//! written in ABNF**. +//! +//! This restriction is necessary to remove ambiguity in decomposition of some strings. +//! For example, it is natural to decompose `foo://` to `<scheme="foo">:<path="//">` or +//! `<scheme="foo">://<authority=""><path="">`. +//! The restriction, **which is already encoded to the ABNF rule**, tells us to always decompose to +//! the latter form, rather than the former one. +//! +//! Readers of the spec might be confused by "when authority is **present**" and "if a URI +//! **contains** an authority component, which is unclear. +//! However, based on the interpretation above, we should consider authority part with empty string +//! as satisfying the condition "authority is **present**". +//! +//! ## IRI resolution can fail +//! +//! For some inputs, resulting string of IRI normalization and resolution can be syntactically +//! correct but semantically wrong. In such cases, the normalizer and resolver provided by this +//! crate do not silently "fix" the IRI by non-standard processing, but just +//! fail by returning `Err(_)`. +//! +//! For details, see the documentation of [`normalize`] module. +//! +//! [RFC 3986]: https://tools.ietf.org/html/rfc3986 +//! [RFC 3987]: https://tools.ietf.org/html/rfc3987 +//! [`RiReferenceStr::resolve_against()`]: `types::RiReferenceStr::resolve_against` +//! [`RiRelativeStr::resolve_against()`]: `types::RiRelativeStr::resolve_against` +#![warn(missing_docs)] +#![warn(unsafe_op_in_unsafe_fn)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(clippy::undocumented_unsafe_blocks)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +#[cfg(feature = "alloc")] +extern crate alloc; + +pub mod build; +pub mod components; +pub mod convert; +pub mod format; +pub mod mask_password; +pub mod normalize; +pub(crate) mod parser; +pub mod percent_encode; +pub(crate) mod raw; +pub mod resolve; +pub mod spec; +pub mod template; +pub mod types; +pub mod validate; diff --git a/vendor/iri-string/src/mask_password.rs b/vendor/iri-string/src/mask_password.rs new file mode 100644 index 00000000..ea3fda3b --- /dev/null +++ b/vendor/iri-string/src/mask_password.rs @@ -0,0 +1,298 @@ +//! Password masker. + +use core::fmt::{self, Write as _}; +use core::ops::Range; + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::borrow::ToOwned; +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::format::ToDedicatedString; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString}; + +/// Returns the range of the password to hide. +pub(crate) fn password_range_to_hide<S: Spec>(iri: &RiReferenceStr<S>) -> Option<Range<usize>> { + /// Spec-agnostic internal implementation of `password_range_to_hide`. + fn inner(iri: &str, userinfo: &str) -> Option<Range<usize>> { + // Length (including `//`) before the `authority` compontent. + // 2: `"//".len()`. + let authority_start = 2 + iri + .find("//") + .expect("[validity] `authority` component must be prefixed with `//`"); + let end = authority_start + userinfo.len(); + let start = authority_start + userinfo.find(':').map_or_else(|| userinfo.len(), |v| v + 1); + Some(start..end) + } + + let authority_components = AuthorityComponents::from_iri(iri)?; + let userinfo = authority_components.userinfo()?; + inner(iri.as_str(), userinfo) +} + +/// Writes the URI with the password part replaced. +fn write_with_masked_password<D>( + f: &mut fmt::Formatter<'_>, + s: &str, + pw_range: Range<usize>, + alt: &D, +) -> fmt::Result +where + D: ?Sized + fmt::Display, +{ + debug_assert!( + s.len() >= pw_range.end, + "[consistency] password range must be inside the IRI" + ); + + f.write_str(&s[..pw_range.start])?; + alt.fmt(f)?; + f.write_str(&s[pw_range.end..])?; + Ok(()) +} + +/// Writes an IRI with the password part trimmed. +fn write_trim_password(f: &mut fmt::Formatter<'_>, s: &str, pw_range: Range<usize>) -> fmt::Result { + write_with_masked_password(f, s, pw_range, "") +} + +/// A wrapper of an IRI string that masks the non-empty password when `Display`ed. +/// +/// This is a retrun type of `mask_password` method of IRI string types (such as +/// [`RiStr::mask_password`]). +/// +/// # Examples +/// +/// ``` +/// # use iri_string::validate::Error; +/// # #[cfg(feature = "alloc")] { +/// use iri_string::types::UriReferenceStr; +/// +/// let iri = UriReferenceStr::new("http://user:password@example.com/path?query")?; +/// let masked = iri.mask_password(); +/// assert_eq!(masked.to_string(), "http://user:@example.com/path?query"); +/// +/// assert_eq!( +/// masked.replace_password("${password}").to_string(), +/// "http://user:${password}@example.com/path?query" +/// ); +/// # } +/// # Ok::<_, Error>(()) +/// ``` +/// +/// [`RiStr::mask_password`]: `crate::types::RiStr::mask_password` +#[derive(Clone, Copy)] +pub struct PasswordMasked<'a, T: ?Sized> { + /// IRI reference. + iri_ref: &'a T, +} + +impl<'a, T: ?Sized> PasswordMasked<'a, T> { + /// Creates a new `PasswordMasked` object. + #[inline] + #[must_use] + pub(crate) fn new(iri_ref: &'a T) -> Self { + Self { iri_ref } + } +} + +/// Implements traits for `PasswordMasked`. +macro_rules! impl_mask { + ($borrowed:ident, $owned:ident) => { + impl<'a, S: Spec> PasswordMasked<'a, $borrowed<S>> { + /// Replaces the password with the given arbitrary content. + /// + /// Note that the result might be invalid as an IRI since arbitrary string + /// can go to the place of the password. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn replace_password<D>(&self, alt: D) -> PasswordReplaced<'a, $borrowed<S>, D> + where + D: fmt::Display, + { + PasswordReplaced::with_replacer(self.iri_ref, move |_| alt) + } + + /// Replaces the password with the given arbitrary content. + /// + /// Note that the result might be invalid as an IRI since arbitrary string + /// can go to the place of the password. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// + /// let replaced = masked + /// .replace_password_with(|password| format!("{{{} chars}}", password.len())); + /// assert_eq!( + /// replaced.to_string(), + /// "http://user:{8 chars}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn replace_password_with<F, D>( + &self, + replace: F, + ) -> PasswordReplaced<'a, $borrowed<S>, D> + where + F: FnOnce(&str) -> D, + D: fmt::Display, + { + PasswordReplaced::with_replacer(self.iri_ref, replace) + } + } + + impl<S: Spec> fmt::Display for PasswordMasked<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match password_range_to_hide(self.iri_ref.as_ref()) { + Some(pw_range) => write_trim_password(f, self.iri_ref.as_str(), pw_range), + None => self.iri_ref.fmt(f), + } + } + } + + impl<S: Spec> fmt::Debug for PasswordMasked<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('<')?; + fmt::Display::fmt(self, f)?; + f.write_char('>') + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for PasswordMasked<'_, $borrowed<S>> { + type Target = $owned<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let pw_range = match password_range_to_hide(self.iri_ref.as_ref()) { + Some(pw_range) => pw_range, + None => return Ok(self.iri_ref.to_owned()), + }; + let mut s = String::new(); + let iri_ref = self.iri_ref.as_str(); + s.try_reserve(iri_ref.len() - (pw_range.end - pw_range.start))?; + s.push_str(&iri_ref[..pw_range.start]); + s.push_str(&iri_ref[pw_range.end..]); + // SAFETY: IRI remains valid and type does not change if + // the password is trimmed. + let iri = unsafe { <$owned<S>>::new_maybe_unchecked(s) }; + Ok(iri) + } + } + }; +} + +impl_mask!(RiReferenceStr, RiReferenceString); +impl_mask!(RiStr, RiString); +impl_mask!(RiAbsoluteStr, RiAbsoluteString); +impl_mask!(RiRelativeStr, RiRelativeString); + +/// A wrapper of an IRI string that replaces the non-empty password when `Display`ed. +/// +/// This is a retrun type of `mask_password` method of IRI string types (such as +/// [`RiStr::mask_password`]). +/// +/// Note that the result might be invalid as an IRI since arbitrary string can +/// go to the place of the password. +#[cfg_attr( + feature = "alloc", + doc = "Because of this, [`ToDedicatedString`] trait is not implemented for this type." +)] +/// +/// [`PasswordMasked::replace_password`]: `PasswordMasked::replace_password` +pub struct PasswordReplaced<'a, T: ?Sized, D> { + /// IRI reference. + iri_ref: &'a T, + /// Password range and alternative content. + password: Option<(Range<usize>, D)>, +} + +impl<'a, T, D> PasswordReplaced<'a, T, D> +where + T: ?Sized, + D: fmt::Display, +{ + /// Creates a new `PasswordMasked` object. + /// + /// # Precondition + /// + /// The given string must be a valid IRI reference. + #[inline] + #[must_use] + pub(crate) fn with_replacer<S, F>(iri_ref: &'a T, replace: F) -> Self + where + S: Spec, + T: AsRef<RiReferenceStr<S>>, + F: FnOnce(&str) -> D, + { + let iri_ref_asref = iri_ref.as_ref(); + let password = password_range_to_hide(iri_ref_asref) + .map(move |pw_range| (pw_range.clone(), replace(&iri_ref_asref.as_str()[pw_range]))); + Self { iri_ref, password } + } +} + +/// Implements traits for `PasswordReplaced`. +macro_rules! impl_replace { + ($borrowed:ident, $owned:ident) => { + impl<S: Spec, D: fmt::Display> fmt::Display for PasswordReplaced<'_, $borrowed<S>, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.password { + Some((pw_range, alt)) => { + write_with_masked_password(f, self.iri_ref.as_str(), pw_range.clone(), alt) + } + None => self.iri_ref.fmt(f), + } + } + } + + impl<S: Spec, D: fmt::Display> fmt::Debug for PasswordReplaced<'_, $borrowed<S>, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('<')?; + fmt::Display::fmt(self, f)?; + f.write_char('>') + } + } + }; +} + +impl_replace!(RiReferenceStr, RiReferenceString); +impl_replace!(RiStr, RiString); +impl_replace!(RiAbsoluteStr, RiAbsoluteString); +impl_replace!(RiRelativeStr, RiRelativeString); diff --git a/vendor/iri-string/src/normalize.rs b/vendor/iri-string/src/normalize.rs new file mode 100644 index 00000000..a00fa44a --- /dev/null +++ b/vendor/iri-string/src/normalize.rs @@ -0,0 +1,691 @@ +//! Normalization. +//! +//! # IRI normalization (and resolution) can fail +//! +//! Though this is not explicitly stated in RFC 3986, IRI normalization can fail. +//! For example, `foo:.///bar`, `foo:./..//bar`, and `foo:/..//bar` are all +//! normalized to `foo://bar` as a string. However, IRI without authority (note +//! that this is different from "with empty authority") cannot have a path +//! starting with `//`, since it is ambiguous and can be interpreted as an IRI +//! with authority. So, `foo://bar` is decomposed as scheme `foo`, authority +//! `bar`, and empty path. The expected result is the combination of scheme +//! `foo`, no authority, and path `//bar` (though this is not possible to +//! serialize), so the algorithm fails as it cannot return the intended result. +//! +//! IRI resolution can also fail since it (conditionally) invokes normalization +//! during the resolution process. For example, resolving a reference `.///bar` +//! or `/..//bar` against the base `foo:` fail. +//! +//! Thus, IRI resolution can fail for some abnormal cases. +//! +//! Note that this kind of failure can happen only when the base IRI has no +//! authority and empty path. This would be rare in the wild, since many people +//! would use an IRI with authority part, such as `http://`. +//! +//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the +//! failure. Currently no cases are known to fail when at least one of the base +//! IRI or the relative IRI contains authorities. +//! +//! To know what will happen on resolution failure, see the module documentation +//! for [`resolve`][`crate::resolve`]. +//! +//! ## Examples +//! +//! ### Normalization failure +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::normalize::Error; +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("foo:.///bar")?; +//! assert!( +//! base.normalize().ensure_rfc3986_normalizable().is_err(), +//! "this normalization should fails without WAHTWG URL Standard serialization" +//! ); +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` +//! +//! ### Resolution failure +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("scheme:")?; +//! { +//! let reference = IriReferenceStr::new(".///bar")?; +//! let result = reference.resolve_against(base) +//! .ensure_rfc3986_normalizable(); +//! assert!(result.is_err()); +//! } +//! +//! { +//! let reference2 = IriReferenceStr::new("/..//bar")?; +//! // Resulting string will be `scheme://bar`, but `bar` should be a path +//! // segment, not a host. So, the semantically correct target IRI cannot +//! // be represented. +//! let result2 = reference2.resolve_against(base) +//! .ensure_rfc3986_normalizable(); +//! assert!(result2.is_err()); +//! } +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` + +mod error; +mod path; +mod pct_case; + +use core::fmt::{self, Display as _, Write as _}; +use core::marker::PhantomData; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; + +use crate::components::{RiReferenceComponents, Splitter}; +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::parser::str::rfind_split_hole; +use crate::parser::trusted::is_ascii_only_host; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiString}; + +pub use self::error::Error; +pub(crate) use self::path::{Path, PathCharacteristic, PathToNormalize}; +pub(crate) use self::pct_case::{ + is_pct_case_normalized, NormalizedAsciiOnlyHost, PctCaseNormalized, +}; + +/// Normalization algorithm. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum NormalizationMode { + /// No normalization. + None, + /// Default normalization mode. + /// + /// Applies RFC 3986 normalization whenever possible. When not possible, + /// applies serialization algorithm defined in WHATWG URL standard. + Default, + /// WHATWG-like normalization mode. + /// + /// Preserves relative path as is (modulo case/pct normalization) when the + /// authority component is absent. + PreserveAuthoritylessRelativePath, +} + +impl NormalizationMode { + /// Returns true if case normalization and percent-encoding normalization should be applied. + /// + /// Note that even when this option is `true`, plain US-ASCII characters + /// won't be automatically lowered. Users should apply case normalization + /// for US-ASCII only `host` component by themselves. + #[inline] + #[must_use] + fn case_pct_normalization(self) -> bool { + match self { + Self::None => false, + Self::Default | Self::PreserveAuthoritylessRelativePath => true, + } + } +} + +/// Normalizedness check algorithm. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum NormalizednessCheckMode { + /// Default algorithm (corresponding to [`NormalizationMode::Default`]). + Default, + /// Strict RFC 3986 normalization. + Rfc3986, + /// WHATWG-like normalization algorithm (corresponding to + /// [`NormalizationMode::PreserveAuthoritylessRelativePath`]). + PreserveAuthoritylessRelativePath, +} + +/// Normalization operation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct NormalizationOp { + /// Normalization mode. + pub(crate) mode: NormalizationMode, +} + +/// Spec-agnostic IRI normalization/resolution input. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NormalizationInput<'a> { + /// Target scheme. + scheme: &'a str, + /// Target authority. + authority: Option<&'a str>, + /// Target path without dot-removal. + path: Path<'a>, + /// Target query. + query: Option<&'a str>, + /// Target fragment. + fragment: Option<&'a str>, + /// Normalization type. + op: NormalizationOp, +} + +impl<'a> NormalizationInput<'a> { + /// Creates a `NormalizedInput` from IRIs to resolve. + #[inline] + #[must_use] + pub(crate) fn with_resolution_params<S: Spec>( + base_components: &RiReferenceComponents<'a, S>, + reference: &'a RiReferenceStr<S>, + ) -> Self { + let r = RiReferenceComponents::from(reference); + + Self::create_normalization_input( + r.iri.as_str(), + &r.splitter, + base_components.iri.as_str(), + &base_components.splitter, + ) + } + + /// Creates a `NormalizationInput` from components to resolve an IRI. + #[must_use] + fn create_normalization_input( + r_iri: &'a str, + r: &Splitter, + b_iri: &'a str, + b: &Splitter, + ) -> Self { + /// The toplevel component the reference has. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + enum RefToplevel { + /// Scheme. + Scheme, + /// Authority. + Authority, + /// Path. + Path, + /// Query. + Query, + /// Reference is empty or has only fragment. + None, + } + + impl RefToplevel { + /// Choose a component from either of the reference or the base, + /// based on the toplevel component of the reference. + #[inline] + #[must_use] + fn choose_then<T, F, G>(self, component: RefToplevel, reference: F, base: G) -> T + where + F: FnOnce() -> T, + G: FnOnce() -> T, + { + if self <= component { + reference() + } else { + base() + } + } + } + + let ref_toplevel = if r.has_scheme() { + RefToplevel::Scheme + } else if r.has_authority() { + RefToplevel::Authority + } else if !r.is_path_empty(r_iri.len()) { + RefToplevel::Path + } else if r.has_query() { + RefToplevel::Query + } else { + RefToplevel::None + }; + + let path = match ref_toplevel { + RefToplevel::Scheme | RefToplevel::Authority => { + Path::NeedsProcessing(PathToNormalize::from_single_path(r.path_str(r_iri))) + } + RefToplevel::Path => { + let r_path = r.path_str(r_iri); + if r_path.starts_with('/') { + Path::NeedsProcessing(PathToNormalize::from_single_path(r_path)) + } else { + // About this branch, see + // <https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.3>. + // + // > o If the base URI has a defined authority component and an empty + // > path, then return a string consisting of "/" concatenated with the + // > reference's path; otherwise, + let b_path = b.path_str(b_iri); + let b_path = if b.has_authority() && b_path.is_empty() { + "/" + } else { + b_path + }; + Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved( + b_path, r_path, + )) + } + } + RefToplevel::Query | RefToplevel::None => Path::Done(b.path_str(b_iri)), + }; + + Self { + scheme: r.scheme_str(r_iri).unwrap_or_else(|| { + b.scheme_str(b_iri) + .expect("[validity] non-relative IRI must have a scheme") + }), + authority: ref_toplevel.choose_then( + RefToplevel::Authority, + || r.authority_str(r_iri), + || b.authority_str(b_iri), + ), + path, + query: ref_toplevel.choose_then( + RefToplevel::Query, + || r.query_str(r_iri), + || b.query_str(b_iri), + ), + fragment: r.fragment_str(r_iri), + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> { + fn from(iri: &'a RiStr<S>) -> Self { + let components = RiReferenceComponents::<S>::from(iri.as_ref()); + let (scheme, authority, path, query, fragment) = components.to_major(); + let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); + let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); + + NormalizationInput { + scheme, + authority, + path, + query, + fragment, + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> { + #[inline] + fn from(iri: &'a RiString<S>) -> Self { + Self::from(iri.as_slice()) + } +} + +impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> { + fn from(iri: &'a RiAbsoluteStr<S>) -> Self { + let components = RiReferenceComponents::<S>::from(iri.as_ref()); + let (scheme, authority, path, query, fragment) = components.to_major(); + let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); + let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); + + NormalizationInput { + scheme, + authority, + path, + query, + fragment, + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> { + #[inline] + fn from(iri: &'a RiAbsoluteString<S>) -> Self { + Self::from(iri.as_slice()) + } +} + +impl NormalizationInput<'_> { + /// Checks if the path is normalizable by RFC 3986 algorithm. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + if self.authority.is_some() { + return Ok(()); + } + match self.path { + Path::Done(_) => Ok(()), + Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(), + } + } +} + +/// Writable as a normalized IRI. +/// +/// Note that this implicitly apply serialization rule defined by WHATWG URL +/// Standard (to handle normalization impossible by RFC 3986) because `Display` +/// should not fail by reasons other than backend I/O failure. If you make the +/// normalization fail in such cases, check if the path starts with `/./`. +/// When the normalization succeeds by RFC 3986 algorithm, the path never starts +/// with `/./`. +struct NormalizedInner<'a, S> { + /// Spec-agnostic normalization input. + input: NormalizationInput<'a>, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<S: Spec> fmt::Debug for NormalizedInner<'_, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Normalized") + .field("input", &self.input) + .finish() + } +} + +impl<'a, S: Spec> NormalizedInner<'a, S> { + /// Creates a new `Normalized` object from the given input. + #[inline] + #[must_use] + fn from_input(input: NormalizationInput<'a>) -> Self { + Self { + input, + _spec: PhantomData, + } + } +} + +impl<S: Spec> fmt::Display for NormalizedInner<'_, S> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Write the scheme. + if self.input.op.mode.case_pct_normalization() { + normalize_scheme(f, self.input.scheme)?; + } else { + f.write_str(self.input.scheme)?; + } + f.write_str(":")?; + + // Write the authority if available. + if let Some(authority) = self.input.authority { + f.write_str("//")?; + if self.input.op.mode.case_pct_normalization() { + normalize_authority::<S>(f, authority)?; + } else { + // No case/pct normalization. + f.write_str(authority)?; + } + } + + // Process and write the path. + match self.input.path { + Path::Done(s) => { + if self.input.op.mode.case_pct_normalization() { + // Normalize the path. + PathToNormalize::from_single_path(s).fmt_write_normalize::<S, _>( + f, + self.input.op, + self.input.authority.is_some(), + )? + } else { + // No normalization. + f.write_str(s)? + } + } + Path::NeedsProcessing(path) => { + path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())? + } + } + + // Write the query if available. + if let Some(query) = self.input.query { + f.write_char('?')?; + if self.input.op.mode.case_pct_normalization() { + normalize_query::<S>(f, query)?; + } else { + f.write_str(query)?; + } + } + + // Write the fragment if available. + if let Some(fragment) = self.input.fragment { + f.write_char('#')?; + if self.input.op.mode.case_pct_normalization() { + normalize_fragment::<S>(f, fragment)?; + } else { + f.write_str(fragment)?; + } + } + + Ok(()) + } +} + +/// Writes the normalized scheme. +pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result { + // Apply case normalization. + // + // > namely, that the scheme and US-ASCII only host are case + // > insensitive and therefore should be normalized to lowercase. + // > + // > --- <https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2.1>. + // + // Note that `scheme` consists of only ASCII characters and contains + // no percent-encoded characters. + scheme + .chars() + .map(|c| c.to_ascii_lowercase()) + .try_for_each(|c| f.write_char(c)) +} + +/// Writes the normalized authority. +fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result { + let host_port = match rfind_split_hole(authority, b'@') { + Some((userinfo, host_port)) => { + // Don't lowercase `userinfo` even if it is ASCII only. `userinfo` + // is not a part of `host`. + PctCaseNormalized::<S>::new(userinfo).fmt(f)?; + f.write_char('@')?; + host_port + } + None => authority, + }; + normalize_host_port::<S>(f, host_port) +} + +/// Writes the normalized host and port. +pub(crate) fn normalize_host_port<S: Spec>( + f: &mut fmt::Formatter<'_>, + host_port: &str, +) -> fmt::Result { + // If the suffix is a colon, it is a delimiter between the host and empty + // port. An empty port should be removed during normalization (see RFC 3986 + // section 3.2.3), so strip it. + // + // > URI producers and normalizers should omit the port component and its + // > ":" delimiter if port is empty or if its value would be the same as + // > that of the scheme's default. + // > + // > --- [RFC 3986 section 3.2.3. Port](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3) + let host_port = host_port.strip_suffix(':').unwrap_or(host_port); + + // Apply case normalization and percent-encoding normalization to `host`. + // Optional `":" port` part only consists of an ASCII colon and ASCII + // digits, so this won't affect to the test result. + if is_ascii_only_host(host_port) { + // If the host is ASCII characters only, make plain alphabets lower case. + NormalizedAsciiOnlyHost::new(host_port).fmt(f) + } else { + PctCaseNormalized::<S>::new(host_port).fmt(f) + } +} + +/// Writes the normalized query without the '?' prefix. +pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result { + // Apply percent-encoding normalization. + PctCaseNormalized::<S>::new(query).fmt(f) +} + +/// Writes the normalized query without the '#' prefix. +pub(crate) fn normalize_fragment<S: Spec>( + f: &mut fmt::Formatter<'_>, + fragment: &str, +) -> fmt::Result { + // Apply percent-encoding normalization. + PctCaseNormalized::<S>::new(fragment).fmt(f) +} + +/// Normalized OR resolved IRI. +/// +/// Resolved IRI can be represented by this type. In that case, the result might +/// not be normalized. If you want the IRI resolution result to be normalized, +/// use [`enable_normalization`][`Self::enable_normalization`] method. +/// +/// [`Display`]: `core::fmt::Display` +pub struct Normalized<'a, T: ?Sized> { + /// Spec-agnostic normalization input. + input: NormalizationInput<'a>, + /// Expected result type. + _ty_str: PhantomData<fn() -> T>, +} + +impl<T: ?Sized> fmt::Debug for Normalized<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Normalized") + .field("input", &self.input) + .finish() + } +} + +impl<'a, T: ?Sized> Normalized<'a, T> { + /// Creates a new `Normalized` object from the given input. + #[inline] + #[must_use] + pub(crate) fn from_input(input: NormalizationInput<'a>) -> Self { + Self { + input, + _ty_str: PhantomData, + } + } + + /// Enables the normalization. + /// + /// This lets the normalizer apply the case normalization, percent-encoding + /// normalization, and dot segments removal. + #[inline] + pub fn enable_normalization(&mut self) { + self.input.op.mode = NormalizationMode::Default; + } + + /// Enables the normalization that preserve relative path under some condition. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] + /// for detail. + #[inline] + pub fn enable_normalization_preserving_authorityless_relative_path(&mut self) { + self.input.op.mode = NormalizationMode::PreserveAuthoritylessRelativePath; + } + + /// Returns `Self` with normalization enabled. + #[inline] + #[must_use] + pub fn and_normalize(mut self) -> Self { + self.enable_normalization(); + self + } + + /// Returns `Self` with special normalization enabled. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] + /// for detail. + #[inline] + #[must_use] + pub fn and_normalize_but_preserve_authorityless_relative_path(mut self) -> Self { + self.enable_normalization_preserving_authorityless_relative_path(); + self + } + + /// Checks if the path is normalizable by RFC 3986 algorithm. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + self.input.ensure_rfc3986_normalizable() + } +} + +impl<S: Spec> fmt::Display for Normalized<'_, RiStr<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + NormalizedInner::<S>::from_input(self.input).fmt(f) + } +} + +impl<S: Spec> fmt::Display for Normalized<'_, RiAbsoluteStr<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + NormalizedInner::<S>::from_input(self.input).fmt(f) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> ToDedicatedString for Normalized<'_, RiStr<S>> { + type Target = RiString<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<Normalized<'_, RiStr<S>>> for RiString<S> { + #[inline] + fn from(v: Normalized<'_, RiStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<&Normalized<'_, RiStr<S>>> for RiString<S> { + #[inline] + fn from(v: &Normalized<'_, RiStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> ToDedicatedString for Normalized<'_, RiAbsoluteStr<S>> { + type Target = RiAbsoluteString<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { + #[inline] + fn from(v: Normalized<'_, RiAbsoluteStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<&Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { + #[inline] + fn from(v: &Normalized<'_, RiAbsoluteStr<S>>) -> Self { + v.to_dedicated_string() + } +} diff --git a/vendor/iri-string/src/normalize/error.rs b/vendor/iri-string/src/normalize/error.rs new file mode 100644 index 00000000..a5c5c895 --- /dev/null +++ b/vendor/iri-string/src/normalize/error.rs @@ -0,0 +1,26 @@ +//! Normalization and resolution error. + +use core::fmt; + +/// IRI normalization and resolution error. +/// +/// For detail about resolution failure, see [the module documentation][`crate::resolve`]. +#[derive(Debug, Clone)] +pub struct Error(()); + +impl Error { + /// Creates a new error. + pub(crate) fn new() -> Self { + Self(()) + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("unresolvable IRI") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error {} diff --git a/vendor/iri-string/src/normalize/path.rs b/vendor/iri-string/src/normalize/path.rs new file mode 100644 index 00000000..4f3e3397 --- /dev/null +++ b/vendor/iri-string/src/normalize/path.rs @@ -0,0 +1,620 @@ +//! Path normalization. + +use core::fmt; +use core::ops::Range; + +use crate::parser::str::{find_split_hole, rfind}; +use crate::spec::{Spec, UriSpec}; + +use super::pct_case::PctCaseNormalized; +use super::{Error, NormalizationMode, NormalizationOp}; + +/// Path that is (possibly) not yet processed or being processed. +#[derive(Debug, Clone, Copy)] +pub(crate) enum Path<'a> { + /// The result. No more processing is needed. + Done(&'a str), + /// Not yet completely processed path. + NeedsProcessing(PathToNormalize<'a>), +} + +/// Path that needs merge and/or dot segment removal. +/// +/// # Invariants +/// +/// If the first field (prefix field) is not `None`, it must end with a slash. +#[derive(Debug, Clone, Copy)] +pub(crate) struct PathToNormalize<'a>(Option<&'a str>, &'a str); + +impl<'a> PathToNormalize<'a> { + /// Creates a `PathToNormalize` from the given single path. + #[inline] + #[must_use] + pub(crate) fn from_single_path(path: &'a str) -> Self { + Self(None, path) + } + + /// Creates a `PathToNormalize` from the given base and reference paths to be resolved. + #[must_use] + pub(crate) fn from_paths_to_be_resolved(base: &'a str, reference: &'a str) -> Self { + if reference.starts_with('/') { + return Self(None, reference); + } + + match rfind(base.as_bytes(), b'/') { + Some(last_slash_pos) => Self(Some(&base[..=last_slash_pos]), reference), + None => Self(None, reference), + } + } + + /// Returns true if the path is empty string. + #[inline] + #[must_use] + fn is_empty(&self) -> bool { + // If `self.0` is `Some(_)`, it ends with a slash, i.e. it is not empty. + self.0.is_none() && self.1.is_empty() + } + + /// Returns the length of the not yet normalized path. + #[inline] + #[must_use] + pub(super) fn len(&self) -> usize { + self.len_prefix() + self.1.len() + } + + /// Returns the length of the prefix part. + /// + /// Returns 0 if the prefix part is empty. + #[inline] + #[must_use] + fn len_prefix(&self) -> usize { + self.0.map_or(0, |s| s.len()) + } + + /// Returns a byte at the given position. + #[must_use] + fn byte_at(&self, mut i: usize) -> Option<u8> { + if let Some(prefix) = self.0 { + if i < prefix.len() { + return Some(prefix.as_bytes()[i]); + } + i -= prefix.len(); + } + self.1.as_bytes().get(i).copied() + } + + /// Returns the position of the next slash of the byte at the given position. + #[must_use] + fn find_next_slash(&self, scan_start: usize) -> Option<usize> { + if let Some(prefix) = self.0 { + let prefix_len = prefix.len(); + if scan_start < prefix_len { + prefix[scan_start..].find('/').map(|rel| rel + scan_start) + } else { + let local_i = scan_start - prefix_len; + self.1[local_i..].find('/').map(|rel| rel + scan_start) + } + } else { + self.1[scan_start..].find('/').map(|rel| rel + scan_start) + } + } + + /// Removes the `len` characters from the beginning of `self`. + fn remove_start(&mut self, len: usize) { + if let Some(prefix) = self.0 { + if let Some(suffix_trim_len) = len.checked_sub(prefix.len()) { + self.0 = None; + self.1 = &self.1[suffix_trim_len..]; + } else { + self.0 = Some(&prefix[len..]); + } + } else { + self.1 = &self.1[len..]; + } + } + + /// Removes the prefix that are ignorable on normalization. + // Skips the prefix dot segments without leading slashes (such as `./`, + // `../`, and `../.././`). + // This is necessary because such segments should be removed with the + // FOLLOWING slashes, not leading slashes. + fn remove_ignorable_prefix(&mut self) { + while let Some(seg) = PathSegmentsIter::new(self).next() { + if seg.has_leading_slash { + // The first segment starting with a slash is not target. + break; + } + match seg.kind(self) { + SegmentKind::Dot | SegmentKind::DotDot => { + // Attempt to skip the following slash by `+ 1`. + let skip = self.len().min(seg.range.end + 1); + self.remove_start(skip); + } + SegmentKind::Normal => break, + } + } + } +} + +impl PathToNormalize<'_> { + /// Writes the normalized path. + pub(crate) fn fmt_write_normalize<S: Spec, W: fmt::Write>( + &self, + f: &mut W, + op: NormalizationOp, + authority_is_present: bool, + ) -> fmt::Result { + debug_assert!( + self.0.map_or(true, |s| s.ends_with('/')), + "[validity] the prefix field of `PathToNormalize` should end with a slash" + ); + + if self.is_empty() { + return Ok(()); + } + + if (op.mode == NormalizationMode::PreserveAuthoritylessRelativePath) + && !authority_is_present + && self.byte_at(0) != Some(b'/') + { + // Treat the path as "opaque", i.e. do not apply dot segments removal. + // See <https://github.com/lo48576/iri-string/issues/29>. + debug_assert!( + op.mode.case_pct_normalization(), + "[consistency] case/pct normalization should still be applied" + ); + if let Some(prefix) = self.0 { + write!(f, "{}", PctCaseNormalized::<S>::new(prefix))?; + } + write!(f, "{}", PctCaseNormalized::<S>::new(self.1))?; + return Ok(()); + } + + let mut rest = *self; + + // Skip the prefix dot segments without leading slashes (such as `./`, + // `../`, and `../.././`). + // This is necessary because such segments should be removed with the + // FOLLOWING slashes, not leading slashes. + rest.remove_ignorable_prefix(); + if rest.is_empty() { + // Path consists of only `/.`s and `/..`s. + // In this case, if the authority component is present, the result + // should be `/`, not empty. + if authority_is_present { + f.write_char('/')?; + } + return Ok(()); + } + + // None: No segments are written yet. + // Some(false): Something other than `/` is already written as the path. + // Some(true): Only a `/` is written as the path. + let mut only_a_slash_is_written = None; + let mut too_deep_area_may_have_dot_segments = true; + while !rest.is_empty() && too_deep_area_may_have_dot_segments { + /// The size of the queue to track the path segments. + /// + /// This should be nonzero. + const QUEUE_SIZE: usize = 8; + + { + // Skip `/.` and `/..` segments at the head. + let mut skipped_len = 0; + for seg in PathSegmentsIter::new(&rest) { + match seg.kind(&rest) { + SegmentKind::Dot | SegmentKind::DotDot => { + debug_assert!( + seg.has_leading_slash, + "[consistency] `.` or `..` segments without a + leading slash have already been skipped" + ); + skipped_len = seg.range.end; + } + _ => break, + } + } + rest.remove_start(skipped_len); + if rest.is_empty() { + // Finished with a dot segment. + // The last `/.` or `/..` should be replaced to `/`. + if !authority_is_present && (only_a_slash_is_written == Some(true)) { + // Insert a dot segment to break the prefix `//`. + // Without this, the path starts with `//` and it may + // be confused with the prefix of an authority. + f.write_str(".//")?; + } else { + f.write_char('/')?; + } + break; + } + } + + let mut queue: [Option<&'_ str>; QUEUE_SIZE] = Default::default(); + let mut level: usize = 0; + let mut first_segment_has_leading_slash = false; + + // Find higher path segments. + let mut end = 0; + for seg in PathSegmentsIter::new(&rest) { + let kind = seg.kind(&rest); + match kind { + SegmentKind::Dot => { + too_deep_area_may_have_dot_segments = true; + } + SegmentKind::DotDot => { + level = level.saturating_sub(1); + too_deep_area_may_have_dot_segments = true; + if level < queue.len() { + queue[level] = None; + } + } + SegmentKind::Normal => { + if level < queue.len() { + queue[level] = Some(seg.segment(&rest)); + too_deep_area_may_have_dot_segments = false; + end = seg.range.end; + if level == 0 { + first_segment_has_leading_slash = seg.has_leading_slash; + } + } + level += 1; + } + } + } + + // Write the path segments as possible, and update the internal state. + for segname in queue.iter().flatten() { + Self::emit_segment::<S, _>( + f, + &mut only_a_slash_is_written, + first_segment_has_leading_slash, + segname, + authority_is_present, + op, + )?; + } + + rest.remove_start(end); + } + + if !rest.is_empty() { + // No need of searching dot segments anymore. + assert!( + !too_deep_area_may_have_dot_segments, + "[consistency] loop condition of the previous loop" + ); + // Apply only normalization (if needed). + for seg in PathSegmentsIter::new(&rest) { + assert_eq!( + seg.kind(&rest), + SegmentKind::Normal, + "[consistency] already confirmed that there are no more dot segments" + ); + let segname = seg.segment(&rest); + Self::emit_segment::<S, _>( + f, + &mut only_a_slash_is_written, + seg.has_leading_slash, + segname, + authority_is_present, + op, + )?; + } + } + + Ok(()) + } + + /// Emits a non-dot segment and update the current state. + // + // `first_segment_has_leading_slash` can be any value if the segment is not the first one. + fn emit_segment<S: Spec, W: fmt::Write>( + f: &mut W, + only_a_slash_is_written: &mut Option<bool>, + first_segment_has_leading_slash: bool, + segname: &str, + authority_is_present: bool, + op: NormalizationOp, + ) -> fmt::Result { + // Omit the leading slash of the segment only if the segment is + // the first one and marked as not having a leading slash. + match *only_a_slash_is_written { + None => { + // First segment. + // This pass can be possible if `./` is repeated `QUEUE_SIZE` + // times at the beginning. + if first_segment_has_leading_slash { + f.write_char('/')?; + } + *only_a_slash_is_written = + Some(first_segment_has_leading_slash && segname.is_empty()); + } + Some(only_a_slash) => { + if only_a_slash && !authority_is_present { + // Apply serialization like WHATWG URL Standard. + // This prevents `<scheme=foo>:<path=//bar>` from written as + // `foo://bar`, which is interpreted as + // `<scheme=foo>://<authority=bar>`. Prepending `./`, the + // serialization result would be `foo:/.//bar`, which is safe. + f.write_str("./")?; + *only_a_slash_is_written = Some(false); + } + f.write_char('/')?; + } + } + + // Write the segment name. + if op.mode.case_pct_normalization() { + write!(f, "{}", PctCaseNormalized::<S>::new(segname)) + } else { + f.write_str(segname) + } + } + + /// Checks if the path is normalizable by RFC 3986 algorithm when the authority is absent. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + pub(crate) fn ensure_rfc3986_normalizable_with_authority_absent(&self) -> Result<(), Error> { + /// A sink to get the prefix of the input. + #[derive(Default)] + struct PrefixRetriever { + /// The buffer to remember the prefix of the input. + buf: [u8; 3], + /// The next write position in the buffer. + cursor: usize, + } + impl PrefixRetriever { + /// Returns the read prefix data. + #[inline] + #[must_use] + fn as_bytes(&self) -> &[u8] { + &self.buf[..self.cursor] + } + } + impl fmt::Write for PrefixRetriever { + fn write_str(&mut self, s: &str) -> fmt::Result { + if !s.is_empty() && (self.cursor >= self.buf.len()) { + // Enough bytes are read. + return Err(fmt::Error); + } + self.buf[self.cursor..] + .iter_mut() + .zip(s.bytes()) + .for_each(|(dest, src)| *dest = src); + self.cursor = self.cursor.saturating_add(s.len()).min(self.buf.len()); + Ok(()) + } + } + + let mut prefix = PrefixRetriever::default(); + // The failure of this write indicates more than 3 characters are read. + // This is safe to ignore since the check needs only 3 characters. + let _ = self.fmt_write_normalize::<UriSpec, _>( + &mut prefix, + NormalizationOp { + mode: NormalizationMode::None, + }, + // Assume the authority is absent. + false, + ); + + if prefix.as_bytes() == b"/./" { + Err(Error::new()) + } else { + Ok(()) + } + } +} + +/// Characteristic of a path. +#[derive(Debug, Clone, Copy)] +pub(crate) enum PathCharacteristic { + /// Absolute path, not special. + CommonAbsolute, + /// Absolute path, not special. + CommonRelative, + /// The first path segment of the relative path has one or more colon characters. + RelativeFirstSegmentHasColon, + /// The path starts with the double slash. + StartsWithDoubleSlash, +} + +impl PathCharacteristic { + /// Returns true if the path is absolute. + #[inline] + #[must_use] + pub(crate) fn is_absolute(self) -> bool { + matches!(self, Self::CommonAbsolute | Self::StartsWithDoubleSlash) + } + + /// Returns the characteristic of the path. + pub(crate) fn from_path_to_display<S: Spec>( + path: &PathToNormalize<'_>, + op: NormalizationOp, + authority_is_present: bool, + ) -> Self { + /// Dummy writer to get necessary values. + #[derive(Default, Clone, Copy)] + struct Writer { + /// Result. + result: Option<PathCharacteristic>, + /// Whether the normalized path is absolute. + is_absolute: Option<bool>, + } + impl fmt::Write for Writer { + fn write_str(&mut self, mut s: &str) -> fmt::Result { + if self.result.is_some() { + // Nothing more to do. + return Err(fmt::Error); + } + while !s.is_empty() { + if self.is_absolute.is_none() { + // The first input. + match s.strip_prefix('/') { + Some(rest) => { + self.is_absolute = Some(true); + s = rest; + } + None => { + self.is_absolute = Some(false); + } + } + continue; + } + if self.is_absolute == Some(true) { + let result = if s.starts_with('/') { + PathCharacteristic::StartsWithDoubleSlash + } else { + PathCharacteristic::CommonAbsolute + }; + self.result = Some(result); + return Err(fmt::Error); + } + // Processing the first segment of the relative path. + match find_split_hole(s, b'/') { + Some((first_seg, _rest)) => { + let result = if first_seg.contains(':') { + PathCharacteristic::RelativeFirstSegmentHasColon + } else { + PathCharacteristic::CommonRelative + }; + self.result = Some(result); + return Err(fmt::Error); + } + None => { + // `s` might not be the complete first segment. + if s.contains(':') { + self.result = + Some(PathCharacteristic::RelativeFirstSegmentHasColon); + return Err(fmt::Error); + } + break; + } + } + } + Ok(()) + } + } + + let mut writer = Writer::default(); + match path.fmt_write_normalize::<S, _>(&mut writer, op, authority_is_present) { + // Empty path. + Ok(_) => PathCharacteristic::CommonRelative, + Err(_) => writer + .result + .expect("[consistency] the formatting quits early by `Err` when the check is done"), + } + } +} + +/// Path segment kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SegmentKind { + /// `.` or the equivalents. + Dot, + /// `..` or the equivalents. + DotDot, + /// Other normal (not special) segments. + Normal, +} + +impl SegmentKind { + /// Creates a new `SegmentKind` from the given segment name. + #[must_use] + fn from_segment(s: &str) -> Self { + match s { + "." | "%2E" | "%2e" => SegmentKind::Dot, + ".." | ".%2E" | ".%2e" | "%2E." | "%2E%2E" | "%2E%2e" | "%2e." | "%2e%2E" + | "%2e%2e" => SegmentKind::DotDot, + _ => SegmentKind::Normal, + } + } +} + +/// A segment with optional leading slash. +#[derive(Debug, Clone)] +struct PathSegment { + /// Presence of a leading slash. + has_leading_slash: bool, + /// Range of the segment name (without any slashes). + range: Range<usize>, +} + +impl PathSegment { + /// Returns the segment without any slashes. + #[inline] + #[must_use] + fn segment<'a>(&self, path: &PathToNormalize<'a>) -> &'a str { + if let Some(prefix) = path.0 { + let prefix_len = prefix.len(); + if self.range.end <= prefix_len { + &prefix[self.range.clone()] + } else { + let range = (self.range.start - prefix_len)..(self.range.end - prefix_len); + &path.1[range] + } + } else { + &path.1[self.range.clone()] + } + } + + /// Returns the segment kind. + #[inline] + #[must_use] + fn kind(&self, path: &PathToNormalize<'_>) -> SegmentKind { + SegmentKind::from_segment(self.segment(path)) + } +} + +/// Iterator of path segments. +struct PathSegmentsIter<'a> { + /// Path. + path: &'a PathToNormalize<'a>, + /// Current cursor position. + cursor: usize, +} + +impl<'a> PathSegmentsIter<'a> { + /// Creates a new iterator of path segments. + #[inline] + #[must_use] + fn new(path: &'a PathToNormalize<'a>) -> Self { + Self { path, cursor: 0 } + } +} + +impl Iterator for PathSegmentsIter<'_> { + type Item = PathSegment; + + fn next(&mut self) -> Option<Self::Item> { + let path_len = self.path.len(); + if self.cursor >= path_len { + return None; + } + let has_leading_slash = self.path.byte_at(self.cursor) == Some(b'/'); + + let prefix_len = self.path.len_prefix(); + if (prefix_len != 0) && (self.cursor == prefix_len - 1) { + debug_assert!(has_leading_slash); + let end = self.path.1.find('/').unwrap_or(self.path.1.len()) + prefix_len; + self.cursor = end; + return Some(PathSegment { + has_leading_slash, + range: prefix_len..end, + }); + } + + if has_leading_slash { + // Skip the leading slash. + self.cursor += 1; + }; + let start = self.cursor; + self.cursor = self.path.find_next_slash(self.cursor).unwrap_or(path_len); + + Some(PathSegment { + has_leading_slash, + range: start..self.cursor, + }) + } +} diff --git a/vendor/iri-string/src/normalize/pct_case.rs b/vendor/iri-string/src/normalize/pct_case.rs new file mode 100644 index 00000000..75e0a777 --- /dev/null +++ b/vendor/iri-string/src/normalize/pct_case.rs @@ -0,0 +1,358 @@ +//! Percent-encoding normalization and case normalization. + +use core::cmp::Ordering; +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; + +use crate::format::eq_str_display; +use crate::parser::char::{is_ascii_unreserved, is_unreserved, is_utf8_byte_continue}; +use crate::parser::str::{find_split_hole, take_first_char}; +use crate::parser::trusted::take_xdigits2; +use crate::spec::Spec; + +/// Returns true if the given string is percent-encoding normalized and case +/// normalized. +/// +/// Note that normalization of ASCII-only host requires additional case +/// normalization, so checking by this function is not sufficient for that case. +pub(crate) fn is_pct_case_normalized<S: Spec>(s: &str) -> bool { + eq_str_display(s, &PctCaseNormalized::<S>::new(s)) +} + +/// Returns a character for the slice. +/// +/// Essentially equivalent to `core::str::from_utf8(bytes).unwrap().and_then(|s| s.get(0))`, +/// but this function fully trusts that the input is a valid UTF-8 string with +/// only one character. +fn into_char_trusted(bytes: &[u8]) -> Result<char, ()> { + /// The bit mask to get the content part in a continue byte. + const CONTINUE_BYTE_MASK: u8 = 0b_0011_1111; + /// Minimum valid values for a code point in a UTF-8 sequence of 2, 3, and 4 bytes. + const MIN: [u32; 3] = [0x80, 0x800, 0x1_0000]; + + let len = bytes.len(); + let c: u32 = match len { + 2 => (u32::from(bytes[0] & 0b_0001_1111) << 6) | u32::from(bytes[1] & CONTINUE_BYTE_MASK), + 3 => { + (u32::from(bytes[0] & 0b_0000_1111) << 12) + | (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 6) + | u32::from(bytes[2] & CONTINUE_BYTE_MASK) + } + 4 => { + (u32::from(bytes[0] & 0b_0000_0111) << 18) + | (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 12) + | (u32::from(bytes[2] & CONTINUE_BYTE_MASK) << 6) + | u32::from(bytes[3] & CONTINUE_BYTE_MASK) + } + len => unreachable!( + "[consistency] expected 2, 3, or 4 bytes for a character, but got {len} as the length" + ), + }; + if c < MIN[len - 2] { + // Redundant UTF-8 encoding. + return Err(()); + } + // Can be an invalid Unicode code point. + char::from_u32(c).ok_or(()) +} + +/// Writable as a normalized path segment percent-encoding IRI. +/// +/// This wrapper does the things below when being formatted: +/// +/// * Decode unnecessarily percent-encoded characters. +/// * Convert alphabetic characters uppercase in percent-encoded triplets. +/// +/// Note that this does not newly encode raw characters. +/// +/// # Safety +/// +/// The given string should be the valid path segment. +#[derive(Debug, Clone, Copy)] +pub(crate) struct PctCaseNormalized<'a, S> { + /// Valid segment name to normalize. + segname: &'a str, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec> PctCaseNormalized<'a, S> { + /// Creates a new `PctCaseNormalized` value. + #[inline] + #[must_use] + pub(crate) fn new(source: &'a str) -> Self { + Self { + segname: source, + _spec: PhantomData, + } + } +} + +impl<S: Spec> fmt::Display for PctCaseNormalized<'_, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut rest = self.segname; + + 'outer_loop: while !rest.is_empty() { + // Scan the next percent-encoded triplet. + let (prefix, after_percent) = match find_split_hole(rest, b'%') { + Some(v) => v, + None => return f.write_str(rest), + }; + // Write the string before the percent-encoded triplet. + f.write_str(prefix)?; + // Decode the percent-encoded triplet. + let (first_decoded, after_first_triplet) = take_xdigits2(after_percent); + rest = after_first_triplet; + + if first_decoded.is_ascii() { + if is_ascii_unreserved(first_decoded) { + // Unreserved. Print the decoded. + f.write_char(char::from(first_decoded))?; + } else { + write!(f, "%{:02X}", first_decoded)?; + } + continue 'outer_loop; + } + + // Continue byte cannot be the first byte of a character. + if is_utf8_byte_continue(first_decoded) { + write!(f, "%{:02X}", first_decoded)?; + continue 'outer_loop; + } + + // Get the expected length of decoded char. + let expected_char_len = match (first_decoded & 0xf0).cmp(&0b1110_0000) { + Ordering::Less => 2, + Ordering::Equal => 3, + Ordering::Greater => 4, + }; + + // Get continue bytes. + let c_buf = &mut [first_decoded, 0, 0, 0][..expected_char_len]; + for (i, buf_dest) in c_buf[1..].iter_mut().enumerate() { + match take_first_char(rest) { + Some(('%', after_percent)) => { + let (byte, after_triplet) = take_xdigits2(after_percent); + if !is_utf8_byte_continue(byte) { + // Note that `byte` can start the new string. + // Leave the byte in the `rest` for next try (i.e. + // don't update `rest` in this case). + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + continue 'outer_loop; + } + *buf_dest = byte; + rest = after_triplet; + } + // If the next character is not `%`, decoded bytes so far + // won't be valid UTF-8 byte sequence. + // Write the read percent-encoded triplets without decoding. + // Note that all characters in `&c_buf[1..]` (if available) + // will be decoded to "continue byte" of UTF-8, so they + // cannot be the start of a valid UTF-8 byte sequence if + // decoded. + Some((c, after_percent)) => { + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + f.write_char(c)?; + rest = after_percent; + continue 'outer_loop; + } + None => { + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + // Reached the end of the string. + break 'outer_loop; + } + } + } + + // Decode the bytes into a character. + match into_char_trusted(&c_buf[..expected_char_len]) { + Ok(decoded_c) => { + if is_unreserved::<S>(decoded_c) { + // Unreserved. Print the decoded. + f.write_char(decoded_c)?; + } else { + c_buf[0..expected_char_len] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + } + } + Err(_) => { + // Skip decoding of the entire sequence of pct-encoded triplets loaded + // in `c_buf`. This is valid from the reasons below. + // + // * The first byte in `c_buf` is valid as the first byte, and it tells the + // expected number of bytes for a code unit. The cases the bytes being too + // short and the sequence being incomplete have already been handled, and + // the execution does not reach here then. + // * All of the non-first bytes are checked if they are valid as UTF8 continue + // bytes by `is_utf8_byte_continue()`. If they're not, the decoding of + // that codepoint is aborted and the bytes in the buffer are immediately + // emitted as pct-encoded, and the execution does not reach here. This + // means that the bytes in the current `c_buf` have passed these tests. + // * Since all of the the non-first bytes are UTF8 continue bytes, any of + // them cannot start the new valid UTF-8 byte sequence. This means that + // if the bytes in the buffer does not consitute a valid UTF-8 bytes + // sequence, the whole buffer can immediately be emmitted as pct-encoded. + + debug_assert!( + c_buf[1..expected_char_len] + .iter() + .copied() + .all(is_utf8_byte_continue), + "[consistency] all non-first bytes have been \ + confirmed that they are UTF-8 continue bytes" + ); + // Note that the first pct-encoded triplet is stripped from + // `after_first_triplet`. + rest = &after_first_triplet[((expected_char_len - 1) * 3)..]; + c_buf[0..expected_char_len] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + } + } + } + + Ok(()) + } +} + +/// Writable as a normalized ASCII-only `host` (and optionally `port` followed). +#[derive(Debug, Clone, Copy)] +pub(crate) struct NormalizedAsciiOnlyHost<'a> { + /// Valid host (and additionaly port) to normalize. + host_port: &'a str, +} + +impl<'a> NormalizedAsciiOnlyHost<'a> { + /// Creates a new `NormalizedAsciiOnlyHost` value. + /// + /// # Preconditions + /// + /// The given string should be the valid ASCII-only `host` or + /// `host ":" port` after percent-encoding normalization. + /// In other words, [`parser::trusted::is_ascii_only_host`] should return + /// true for the given value. + /// + /// [`parser::trusted::is_ascii_only_host`]: `crate::parser::trusted::is_ascii_only_host` + #[inline] + #[must_use] + pub(crate) fn new(host_port: &'a str) -> Self { + Self { host_port } + } +} + +impl fmt::Display for NormalizedAsciiOnlyHost<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut rest = self.host_port; + + while !rest.is_empty() { + // Scan the next percent-encoded triplet. + let (prefix, after_percent) = match find_split_hole(rest, b'%') { + Some(v) => v, + None => { + return rest + .chars() + .try_for_each(|c| f.write_char(c.to_ascii_lowercase())); + } + }; + // Write the string before the percent-encoded triplet. + prefix + .chars() + .try_for_each(|c| f.write_char(c.to_ascii_lowercase()))?; + // Decode the percent-encoded triplet. + let (first_decoded, after_triplet) = take_xdigits2(after_percent); + rest = after_triplet; + + assert!( + first_decoded.is_ascii(), + "[consistency] this function requires ASCII-only host as an argument" + ); + + if is_ascii_unreserved(first_decoded) { + // Unreserved. Convert to lowercase and print. + f.write_char(char::from(first_decoded.to_ascii_lowercase()))?; + } else { + write!(f, "%{:02X}", first_decoded)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::string::ToString; + + use crate::spec::{IriSpec, UriSpec}; + + #[test] + fn invalid_utf8() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%80%cc%cc%cc").to_string(), + "%80%CC%CC%CC" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%80%cc%cc%cc").to_string(), + "%80%CC%CC%CC" + ); + } + + #[test] + fn iri_unreserved() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%ce%b1").to_string(), + "%CE%B1" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%ce%b1").to_string(), + "\u{03B1}" + ); + } + + #[test] + fn iri_middle_decode() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%ce%ce%b1%b1").to_string(), + "%CE%CE%B1%B1" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%ce%ce%b1%b1").to_string(), + "%CE\u{03B1}%B1" + ); + } + + #[test] + fn ascii_reserved() { + assert_eq!(PctCaseNormalized::<UriSpec>::new("%3f").to_string(), "%3F"); + assert_eq!(PctCaseNormalized::<IriSpec>::new("%3f").to_string(), "%3F"); + } + + #[test] + fn ascii_forbidden() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%3c%3e").to_string(), + "%3C%3E" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%3c%3e").to_string(), + "%3C%3E" + ); + } + + #[test] + fn ascii_unreserved() { + assert_eq!(PctCaseNormalized::<UriSpec>::new("%7ea").to_string(), "~a"); + assert_eq!(PctCaseNormalized::<IriSpec>::new("%7ea").to_string(), "~a"); + } +} diff --git a/vendor/iri-string/src/parser.rs b/vendor/iri-string/src/parser.rs new file mode 100644 index 00000000..35a4d475 --- /dev/null +++ b/vendor/iri-string/src/parser.rs @@ -0,0 +1,6 @@ +//! Common stuff for parsing. + +pub(crate) mod char; +pub(crate) mod str; +pub(crate) mod trusted; +pub(crate) mod validate; diff --git a/vendor/iri-string/src/parser/char.rs b/vendor/iri-string/src/parser/char.rs new file mode 100644 index 00000000..2455498e --- /dev/null +++ b/vendor/iri-string/src/parser/char.rs @@ -0,0 +1,323 @@ +//! Characters. + +use crate::spec::Spec; + +/// A mask to test whether the character is continue character of `scheme`. +// `ALPHA / DIGIT / "+" / "-" / "."` +const MASK_SCHEME_CONTINUE: u8 = 1 << 0; + +/// A mask to test whether the character matches `unreserved`. +// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` +const MASK_UNRESERVED: u8 = 1 << 1; + +/// A mask to test whether the character matches `gen-delims`. +// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` +const MASK_GEN_DELIMS: u8 = 1 << 2; + +/// A mask to test whether the character matches `sub-delims`. +// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="` +const MASK_SUB_DELIMS: u8 = 1 << 3; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes). +// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` +const MASK_PCHAR: u8 = 1 << 4; + +/// A mask to test whether the character can appear in `query` and `fragment`. +// `query = *( pchar / "/" / "?" )` +// `fragment = *( pchar / "/" / "?" )` +const MASK_FRAG_QUERY: u8 = 1 << 5; + +/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`. +// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` +const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash. +const MASK_PCHAR_SLASH: u8 = 1 << 7; + +/// ASCII characters' properties. +const TABLE: [u8; 128] = [ + 0b_0000_0000, // NUL + 0b_0000_0000, // SOH + 0b_0000_0000, // STX + 0b_0000_0000, // ETX + 0b_0000_0000, // EOT + 0b_0000_0000, // ENQ + 0b_0000_0000, // ACK + 0b_0000_0000, // BEL + 0b_0000_0000, // BS + 0b_0000_0000, // HT + 0b_0000_0000, // LF + 0b_0000_0000, // VT + 0b_0000_0000, // FF + 0b_0000_0000, // CR + 0b_0000_0000, // SO + 0b_0000_0000, // SI + 0b_0000_0000, // DLE + 0b_0000_0000, // DC1 + 0b_0000_0000, // DC2 + 0b_0000_0000, // DC3 + 0b_0000_0000, // DC4 + 0b_0000_0000, // NAK + 0b_0000_0000, // SYN + 0b_0000_0000, // ETB + 0b_0000_0000, // CAN + 0b_0000_0000, // EM + 0b_0000_0000, // SUB + 0b_0000_0000, // ESC + 0b_0000_0000, // FS + 0b_0000_0000, // GS + 0b_0000_0000, // RS + 0b_0000_0000, // US + 0b_0000_0000, // SPACE + 0b_1111_1000, // ! + 0b_0000_0000, // " + 0b_0000_0100, // # + 0b_1111_1000, // $ + 0b_0000_0000, // % + 0b_1111_1000, // & + 0b_1111_1000, // ' + 0b_1111_1000, // ( + 0b_1111_1000, // ) + 0b_1111_1000, // * + 0b_1111_1001, // + + 0b_1111_1000, // , + 0b_1111_0011, // - + 0b_1111_0011, // . + 0b_1010_0100, // / + 0b_1111_0011, // 0 + 0b_1111_0011, // 1 + 0b_1111_0011, // 2 + 0b_1111_0011, // 3 + 0b_1111_0011, // 4 + 0b_1111_0011, // 5 + 0b_1111_0011, // 6 + 0b_1111_0011, // 7 + 0b_1111_0011, // 8 + 0b_1111_0011, // 9 + 0b_1111_0100, // : + 0b_1111_1000, // ; + 0b_0000_0000, // < + 0b_1111_1000, // = + 0b_0000_0000, // > + 0b_0010_0100, // ? + 0b_1011_0100, // @ + 0b_1111_0011, // A + 0b_1111_0011, // B + 0b_1111_0011, // C + 0b_1111_0011, // D + 0b_1111_0011, // E + 0b_1111_0011, // F + 0b_1111_0011, // G + 0b_1111_0011, // H + 0b_1111_0011, // I + 0b_1111_0011, // J + 0b_1111_0011, // K + 0b_1111_0011, // L + 0b_1111_0011, // M + 0b_1111_0011, // N + 0b_1111_0011, // O + 0b_1111_0011, // P + 0b_1111_0011, // Q + 0b_1111_0011, // R + 0b_1111_0011, // S + 0b_1111_0011, // T + 0b_1111_0011, // U + 0b_1111_0011, // V + 0b_1111_0011, // W + 0b_1111_0011, // X + 0b_1111_0011, // Y + 0b_1111_0011, // Z + 0b_0000_0100, // [ + 0b_0000_0000, // \ + 0b_0000_0100, // ] + 0b_0000_0000, // ^ + 0b_1111_0010, // _ + 0b_0000_0000, // ` + 0b_1111_0011, // a + 0b_1111_0011, // b + 0b_1111_0011, // c + 0b_1111_0011, // d + 0b_1111_0011, // e + 0b_1111_0011, // f + 0b_1111_0011, // g + 0b_1111_0011, // h + 0b_1111_0011, // i + 0b_1111_0011, // j + 0b_1111_0011, // k + 0b_1111_0011, // l + 0b_1111_0011, // m + 0b_1111_0011, // n + 0b_1111_0011, // o + 0b_1111_0011, // p + 0b_1111_0011, // q + 0b_1111_0011, // r + 0b_1111_0011, // s + 0b_1111_0011, // t + 0b_1111_0011, // u + 0b_1111_0011, // v + 0b_1111_0011, // w + 0b_1111_0011, // x + 0b_1111_0011, // y + 0b_1111_0011, // z + 0b_0000_0000, // { + 0b_0000_0000, // | + 0b_0000_0000, // } + 0b_1111_0010, // ~ + 0b_0000_0000, // DEL +]; + +/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool { + (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0 +} + +/// Returns `true` if the given ASCII character matches `unreserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved(c: u8) -> bool { + (TABLE[c as usize] & MASK_UNRESERVED) != 0 +} + +/// Returns true if the character is unreserved. +#[inline] +#[must_use] +pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool { + if c.is_ascii() { + is_ascii_unreserved(c as u8) + } else { + S::is_nonascii_char_unreserved(c) + } +} + +///// Returns `true` if the given ASCII character matches `gen-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `sub-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `reserved`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_reserved(c: u8) -> bool { +// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +//} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR) != 0 +} + +/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_frag_query(c: u8) -> bool { + (TABLE[c as usize] & MASK_FRAG_QUERY) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool { + (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `reg-name` +#[inline] +#[must_use] +pub(crate) const fn is_ascii_regname(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0 +} + +/// Checks if the given character matches `ucschar` rule. +#[must_use] +pub(crate) fn is_ucschar(c: char) -> bool { + matches!( + u32::from(c), + 0xA0..=0xD7FF | + 0xF900..=0xFDCF | + 0xFDF0..=0xFFEF | + 0x1_0000..=0x1_FFFD | + 0x2_0000..=0x2_FFFD | + 0x3_0000..=0x3_FFFD | + 0x4_0000..=0x4_FFFD | + 0x5_0000..=0x5_FFFD | + 0x6_0000..=0x6_FFFD | + 0x7_0000..=0x7_FFFD | + 0x8_0000..=0x8_FFFD | + 0x9_0000..=0x9_FFFD | + 0xA_0000..=0xA_FFFD | + 0xB_0000..=0xB_FFFD | + 0xC_0000..=0xC_FFFD | + 0xD_0000..=0xD_FFFD | + 0xE_1000..=0xE_FFFD + ) +} + +/// Returns true if the given value is a continue byte of UTF-8. +#[inline(always)] +#[must_use] +pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool { + // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte, + // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear + // anywhere in UTF-8 byte sequence. + // `0x80 as i8` is -128, and `0xc0 as i8` is -96. + // + // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and + // the continue bytes is `0b10xx_xxxx`. + // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128. + (byte as i8) < -64 +} + +/// Returns true if the given ASCII character is `unreserved` or `reserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +} diff --git a/vendor/iri-string/src/parser/str.rs b/vendor/iri-string/src/parser/str.rs new file mode 100644 index 00000000..0f564bfa --- /dev/null +++ b/vendor/iri-string/src/parser/str.rs @@ -0,0 +1,390 @@ +//! Functions for common string operations. + +pub(crate) use self::maybe_pct_encoded::{ + process_percent_encoded_best_effort, PctEncodedFragments, +}; + +mod maybe_pct_encoded; + +/// Returns the inner string if wrapped. +#[must_use] +pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { + let (prefix, suffix) = match s.as_bytes() { + [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), + _ => return None, + }; + if (prefix == open) && (suffix == close) { + Some(&s[1..(s.len() - 1)]) + } else { + None + } +} + +/// Returns the byte that appears first. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + haystack + .iter() + .copied() + .find(|&b| b == needle1 || b == needle2) +} + +/// Returns the byte that appears first. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + haystack.iter().rposition(|&b| b == needle) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + memchr::memrchr(needle, haystack) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + let bytes = haystack.as_bytes(); + let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { + Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), + None => memchr::memchr(needle4, bytes), + }; + pos.map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memrchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Returns `true` if the string only contains the allowed characters. +#[must_use] +fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while !s.is_empty() { + match s.bytes().position(|b| !b.is_ascii()) { + Some(nonascii_pos) => { + // Valdiate ASCII prefix. + if nonascii_pos != 0 { + let (prefix, rest) = s.split_at(nonascii_pos); + if !prefix.bytes().all(pred_ascii) { + return false; + } + s = rest; + } + + // Extract non-ASCII part and validate it. + let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) { + Some(ascii_pos) => s.split_at(ascii_pos), + None => (s, ""), + }; + if !prefix.chars().all(pred_nonascii) { + return false; + } + s = rest; + } + None => { + // All chars are ASCII. + return s.bytes().all(pred_ascii); + } + } + } + + true +} + +/// Returns `true` if the string only contains the allowed characters and percent-encoded char. +#[must_use] +pub(crate) fn satisfy_chars_with_pct_encoded<F, G>( + mut s: &str, + pred_ascii: F, + pred_nonascii: G, +) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while let Some((prefix, suffix)) = find_split_hole(s, b'%') { + // Verify strings before the percent-encoded char. + if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { + return false; + } + + // Verify the percent-encoded char. + if !starts_with_double_hexdigits(suffix.as_bytes()) { + return false; + } + + // Advance the cursor. + s = &suffix[2..]; + } + + // Verify the rest. + satisfy_chars(s, pred_ascii, pred_nonascii) +} + +/// Returns `true` if the given string starts with two hexadecimal digits. +#[must_use] +pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { + match s { + [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), + _ => false, + } +} + +/// Strips the first character if it is the given ASCII character, and returns the rest. +/// +/// # Precondition +/// +/// The given ASCII character (`prefix`) should be an ASCII character. +#[must_use] +pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { + debug_assert!(prefix.is_ascii()); + if s.as_bytes().first().copied() == Some(prefix) { + Some(&s[1..]) + } else { + None + } +} + +/// Splits the given string into the first character and the rest. +/// +/// Returns `(first_char, rest_str)`. +#[must_use] +pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { + let mut chars = s.chars(); + let c = chars.next()?; + let rest = chars.as_str(); + Some((c, rest)) +} diff --git a/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs new file mode 100644 index 00000000..617f006a --- /dev/null +++ b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs @@ -0,0 +1,369 @@ +//! Processor for possibly- or invalidly-percent-encoded strings. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; +use core::num::NonZeroU8; +use core::ops::ControlFlow; + +use crate::parser::str::find_split; +use crate::parser::trusted::hexdigits_to_byte; + +/// Fragment in a possibly percent-encoded (and possibly broken) string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PctEncodedFragments<'a> { + /// String fragment without percent-encoded triplets. + NoPctStr(&'a str), + /// Stray `%` (percent) character. + StrayPercent, + /// Valid percent-encoded triplets for a character. + Char(&'a str, char), + /// Percent-encoded triplets that does not consists of a valid UTF-8 sequence. + InvalidUtf8PctTriplets(&'a str), +} + +/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets. +pub(crate) fn process_percent_encoded_best_effort<T, F, B>( + v: T, + mut f: F, +) -> Result<ControlFlow<B>, fmt::Error> +where + T: fmt::Display, + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + let mut buf = [0_u8; 12]; + let mut writer = DecomposeWriter { + f: &mut f, + decoder: Default::default(), + buf: &mut buf, + result: ControlFlow::Continue(()), + _r: PhantomData, + }; + + if write!(writer, "{v}").is_err() { + match writer.result { + ControlFlow::Continue(_) => return Err(fmt::Error), + ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)), + } + } + + // Flush the internal buffer of the decoder. + if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) { + let len_suffix = len % 3; + let triplets_end = len - len_suffix; + let triplets = core::str::from_utf8(&buf[..triplets_end]) + .expect("[validity] percent-encoded triplets consist of ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) { + return Ok(ControlFlow::Break(v)); + } + + if len_suffix > 0 { + if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) { + return Ok(ControlFlow::Break(v)); + } + } + if len_suffix > 1 { + let after_percent = core::str::from_utf8( + &buf[(triplets_end + 1)..(triplets_end + len_suffix)], + ) + .expect("[consistency] percent-encoded triplets contains only ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) { + return Ok(ControlFlow::Break(v)); + } + } + } + + Ok(ControlFlow::Continue(())) +} + +/// Writer to decompose the input into fragments. +struct DecomposeWriter<'a, F, B> { + /// Output function. + f: &'a mut F, + /// Decoder. + decoder: DecoderBuffer, + /// Buffer. + buf: &'a mut [u8], + /// Result of the last output function call. + result: ControlFlow<B>, + /// Dummy field for the type parameter of the return type of the function `f`. + _r: PhantomData<fn() -> B>, +} +impl<F, B> DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + /// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise. + #[inline(always)] + fn result_continue_or_err(&self) -> fmt::Result { + if self.result.is_break() { + return Err(fmt::Error); + } + Ok(()) + } + + /// Calls the output functions with the undecodable fragments. + fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result { + let len_written = usize::from(len_undecodable); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + let len_incomplete = len_written % 3; + let len_complete = len_written - len_incomplete; + self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets( + &frag[..len_complete], + )); + self.result_continue_or_err()?; + if len_incomplete > 0 { + // At least the first `%` exists. + self.result = (self.f)(PctEncodedFragments::StrayPercent); + if self.result.is_break() { + return Err(fmt::Error); + } + if len_incomplete > 1 { + // A following hexdigit is available. + debug_assert_eq!( + len_incomplete, 2, + "[consistency] the length of incomplete percent-encoded \ + triplet must be less than 2 bytes" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr( + &frag[(len_complete + 1)..len_written], + )); + self.result_continue_or_err()?; + } + } + Ok(()) + } +} + +impl<F, B> fmt::Write for DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + fn write_str(&mut self, s: &str) -> fmt::Result { + self.result_continue_or_err()?; + let mut rest = s; + while !rest.is_empty() { + let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest); + if len_consumed == 0 { + // `rest` does not start with the percent-encoded triplets. + // Flush the decoder before attempting to decode more data. + if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) { + self.output_as_undecodable(len_written)?; + rest = &rest[usize::from(len_written)..]; + } + + // Write plain string prefix (if found). + let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, "")); + debug_assert!( + !plain_prefix.is_empty(), + "[consistency] `len_consumed == 0` indicates non-empty \ + `rest` not starting with `%`" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix)); + self.result_continue_or_err()?; + rest = suffix; + continue; + } + + // Process decoding result. + match result { + PushResult::Decoded(len_written, c) => { + let len_written = usize::from(len_written.get()); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + self.result = (self.f)(PctEncodedFragments::Char(frag, c)); + self.result_continue_or_err()?; + } + PushResult::Undecodable(len_written) => { + self.output_as_undecodable(len_written)?; + } + PushResult::NeedMoreBytes => { + // Nothing to write at this time. + } + } + rest = &rest[len_consumed..]; + } + Ok(()) + } +} + +/// A type for result of feeding data to [`DecoderBuffer`]. +#[derive(Debug, Clone, Copy)] +enum PushResult { + /// Input is still incomplete, needs more bytes to get the decoding result. + NeedMoreBytes, + /// Bytes decodable to valid UTF-8 sequence. + // `.0`: Length of decodable fragment. + // `.1`: Decoded character. + Decoded(NonZeroU8, char), + /// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence. + // `.0`: Length of undecodable fragment. + Undecodable(u8), +} + +/// Buffer to contain (and to decode) incomplete percent-encoded triplets. +#[derive(Default, Debug, Clone, Copy)] +struct DecoderBuffer { + /// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded. + // + // `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and + // 4 triplets at most for single Unicode codepoint in UTF-8. + encoded: [u8; 12], + /// Decoded bytes. + decoded: [u8; 4], + /// Number of bytes available in `buf_encoded` buffer. + /// + /// `buf_encoded_len / 3` also indicates the length of data in `decoded`. + len_encoded: u8, +} + +impl DecoderBuffer { + /// Writes the data of the given length to the destination, and remove that part from buffer. + fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) { + let new_len = self.len_encoded - remove_len; + let remove_len = usize::from(remove_len); + let src_range = remove_len..usize::from(self.len_encoded); + dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]); + + if new_len == 0 { + *self = Self::default(); + return; + } + self.encoded.copy_within(src_range, 0); + self.decoded + .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0); + self.len_encoded = new_len; + } + + /// Pushes a byte of a (possible) percent-encoded tripet to the buffer. + fn push_single_encoded_byte(&mut self, byte: u8) { + debug_assert!( + self.len_encoded < 12, + "[consistency] four percent-encoded triplets are enough for a unicode code point" + ); + let pos_enc = usize::from(self.len_encoded); + self.len_encoded += 1; + self.encoded[pos_enc] = byte; + if self.len_encoded % 3 == 0 { + // A new percent-encoded triplet is read. Decode and remember. + let pos_dec = usize::from(self.len_encoded / 3 - 1); + let upper = self.encoded[pos_enc - 1]; + let lower = byte; + debug_assert!( + upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(), + "[consistency] the `encoded` buffer should contain valid percent-encoded triplets" + ); + self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]); + } + } + + /// Pushes the (possibly) encoded string to the buffer. + /// + /// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the + /// caller should call `Self::clear()` before pushing more bytes. + /// + /// # Preconditions + /// + /// * `buf` should be more than 12 bytes. If not, this method may panic. + #[must_use] + pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) { + debug_assert!( + buf.len() >= 12, + "[internal precondition] destination buffer should be at least 12 bytes" + ); + let mut chars = s.chars(); + let mut len_triplet_incomplete = self.len_encoded % 3; + for c in &mut chars { + if len_triplet_incomplete == 0 { + // Expect `%`. + if c != '%' { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(b'%'); + len_triplet_incomplete = 1; + continue; + } + + // Expect a nibble. + if !c.is_ascii_hexdigit() { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(c as u8); + if len_triplet_incomplete == 1 { + len_triplet_incomplete = 2; + continue; + } else { + // Now a new percent-encoded triplet is read! + debug_assert_eq!(len_triplet_incomplete, 2); + len_triplet_incomplete = 0; + } + + // Now a new percent-encoded triplet is read. + // Check if the buffer contains a valid decodable content. + let len_decoded = usize::from(self.len_encoded) / 3; + match core::str::from_utf8(&self.decoded[..len_decoded]) { + Ok(decoded_str) => { + // Successfully decoded. + let len_consumed = s.len() - chars.as_str().len(); + let c = decoded_str + .chars() + .next() + .expect("[validity] `decoded` buffer is nonempty"); + let len_result = NonZeroU8::new(self.len_encoded).expect( + "[consistency] `encoded` buffer is nonempty since \ + `push_single_encoded_byte()` was called", + ); + self.write_and_pop(buf, len_result.get()); + return (len_consumed, PushResult::Decoded(len_result, c)); + } + Err(e) => { + // Undecodable. + assert_eq!( + e.valid_up_to(), + 0, + "[consistency] `decoded` buffer contains at most one character" + ); + let skip_len_decoded = match e.error_len() { + // Unexpected EOF. Wait for remaining input. + None => continue, + // Skip invalid bytes. + Some(v) => v, + }; + let len_consumed = s.len() - chars.as_str().len(); + let len_result = skip_len_decoded as u8 * 3; + assert_ne!( + skip_len_decoded, 0, + "[consistency] empty bytes cannot be invalid" + ); + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + }; + } + let len_consumed = s.len() - chars.as_str().len(); + (len_consumed, PushResult::NeedMoreBytes) + } + + /// Writes the incomplete data completely to the destination, and clears the internal buffer. + #[must_use] + pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> { + let len_result = NonZeroU8::new(self.len_encoded)?; + // Emit the current (undecodable) buffer as is. + self.write_and_pop(buf, len_result.get()); + debug_assert_eq!( + self.len_encoded, 0, + "[consistency] the buffer should be cleared after flushed" + ); + Some(len_result) + } +} diff --git a/vendor/iri-string/src/parser/trusted.rs b/vendor/iri-string/src/parser/trusted.rs new file mode 100644 index 00000000..f15c075e --- /dev/null +++ b/vendor/iri-string/src/parser/trusted.rs @@ -0,0 +1,476 @@ +//! Fast parsers for trusted (already validated) input. +//! +//! Using this in wrong way will lead to unexpected wrong result. + +pub(crate) mod authority; + +use core::cmp::Ordering; +use core::num::NonZeroUsize; + +use crate::components::{RiReferenceComponents, Splitter}; +use crate::format::eq_str_display; +use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode}; +use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole}; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +/// Eats a `scheme` and a following colon, and returns the rest and the scheme. +/// +/// Returns `(rest, scheme)`. +/// +/// This should be called at the head of an absolute IRIs/URIs. +#[must_use] +fn scheme_colon(i: &str) -> (&str, &str) { + let (scheme, rest) = + find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part"); + (rest, scheme) +} + +/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme. +/// +/// This should be called at the head of an `IRI-reference` or similar. +#[must_use] +fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) { + match find_split4_hole(i, b':', b'/', b'?', b'#') { + Some((scheme, b':', rest)) => (rest, Some(scheme)), + _ => (i, None), + } +} + +/// Eats double slash and the following authority if available, and returns the authority. +/// +/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`. +#[must_use] +fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) { + let s = match i.strip_prefix("//") { + Some(rest) => rest, + None => return (i, None), + }; + // `i` might match `path-abempty` (which can start with `//`), but it is not + // allowed as `relative-part`, so no need to care `path-abempty` rule here. + // A slash, question mark, and hash character won't appear in `authority`. + match find_split3(s, b'/', b'?', b'#') { + Some((authority, rest)) => (rest, Some(authority)), + None => ("", Some(s)), + } +} + +/// Eats a string until the query, and returns that part (excluding `?` for the query). +#[must_use] +fn until_query(i: &str) -> (&str, &str) { + // `?` won't appear before the query part. + match find_split2(i, b'?', b'#') { + Some((before_query, rest)) => (rest, before_query), + None => ("", i), + } +} + +/// Decomposes query and fragment, if available. +/// +/// The string must starts with `?`, or `#`, or be empty. +#[must_use] +fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) { + match i.as_bytes().first().copied() { + None => (None, None), + Some(b'?') => { + let rest = &i[1..]; + match find_split_hole(rest, b'#') { + Some((query, fragment)) => (Some(query), Some(fragment)), + None => (Some(rest), None), + } + } + Some(c) => { + debug_assert_eq!(c, b'#'); + (None, Some(&i[1..])) + } + } +} + +/// Decomposes the given valid `IRI-reference`. +#[must_use] +pub(crate) fn decompose_iri_reference<S: Spec>( + i: &RiReferenceStr<S>, +) -> RiReferenceComponents<'_, S> { + /// Inner function to avoid unnecessary monomorphizations on `S`. + fn decompose(i: &str) -> Splitter { + let len = i.len(); + + let (i, scheme_end) = { + let (i, scheme) = scheme_colon_opt(i); + let end = scheme.and_then(|s| NonZeroUsize::new(s.len())); + (i, end) + }; + let (i, authority_end) = { + // 2: "//".len() + let start = len - i.len() + 2; + // `authority` does not contain the two slashes of `://'. + let (i, authority) = slash_slash_authority_opt(i); + let end = authority.and_then(|s| NonZeroUsize::new(start + s.len())); + (i, end) + }; + let (i, _path) = until_query(i); + + let (query_start, fragment_start) = { + // This could theoretically be zero if `len` is `usize::MAX` and + // `i` has neither a query nor a fragment. However, this is + // practically impossible. + let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1)); + + let (query, fragment) = decompose_query_and_fragment(i); + match (query.is_some(), fragment) { + (true, Some(fragment)) => { + (after_first_prefix, NonZeroUsize::new(len - fragment.len())) + } + (true, None) => (after_first_prefix, None), + (false, Some(_fragment)) => (None, after_first_prefix), + (false, None) => (None, None), + } + }; + + Splitter::new(scheme_end, authority_end, query_start, fragment_start) + } + + RiReferenceComponents { + iri: i, + splitter: decompose(i.as_str()), + } +} + +/// Extracts `scheme` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_scheme(i: &str) -> Option<&str> { + scheme_colon_opt(i).1 +} + +/// Extracts `scheme` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_scheme_absolute(i: &str) -> &str { + scheme_colon(i).1 +} + +/// Extracts `authority` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_authority(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon_opt(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> { + slash_slash_authority_opt(i).1 +} + +/// Extracts `path` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_path(i: &str) -> &str { + let (i, _scheme) = scheme_colon_opt(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_absolute(i: &str) -> &str { + let (i, _scheme) = scheme_colon(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_relative(i: &str) -> &str { + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `query` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_query(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + decompose_query_and_fragment(i).0 +} + +/// Extracts `query` part from an `absolute-IRI` string. +/// +/// # Precondition +/// +/// The given string must be a valid `absolute-IRI` string. +#[must_use] +pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + if i.is_empty() { + None + } else { + debug_assert_eq!( + i.as_bytes().first(), + Some(&b'?'), + "`absolute-IRI` string must not have `fragment part" + ); + Some(&i[1..]) + } +} + +/// Splits an IRI string into the prefix and the fragment part. +/// +/// A leading `#` character is truncated if the fragment part exists. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) { + // It is completely OK to find the first `#` character from valid IRI to get fragment part, + // because the spec says that there are no `#` characters before the fragment part. + // + // > ``` + // > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // > ``` + // > + // > --- [RFC 3986, section 3.1. Scheme](https://tools.ietf.org/html/rfc3986#section-3.1) + // + // > The authority component is preceded by a double slash ("//") and is terminated by the + // > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end + // > of the URI. + // > + // > --- [RFC 3986, section 3.2. Authority](https://tools.ietf.org/html/rfc3986#section-3.2) + // + // > The path is terminated by the first question mark ("?") or number sign ("#") + // > character, or by the end of the URI. + // > + // > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) + // + // > The query component is indicated by the first question mark ("?") character and + // > terminated by a number sign ("#") character or by the end of the URI. + // > + // > --- [RFC 3986, section 3.4. Query](https://tools.ietf.org/html/rfc3986#section-3.4) + match find_split_hole(iri, b'#') { + Some((prefix, fragment)) => (prefix, Some(fragment)), + None => (iri, None), + } +} + +/// Returns the fragment part of the given IRI. +/// +/// A leading `#` character of the fragment is truncated. +#[inline] +#[must_use] +pub(crate) fn extract_fragment(iri: &str) -> Option<&str> { + split_fragment(iri).1 +} + +/// Returns `Ok(_)` if the string is normalized. +/// +/// If this function returns `true`, normalization input and output will be identical. +/// +/// In this function, "normalized" means that any of the normalization below +/// won't change the input on normalization: +/// +/// * syntax-based normalization, +/// * case normalization, +/// * percent-encoding normalization, and +/// * path segment normalizaiton. +/// +/// Note that scheme-based normalization is not considered. +#[must_use] +pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool { + let (i, scheme) = scheme_colon(i); + let (after_authority, authority) = slash_slash_authority_opt(i); + let (_after_path, path) = until_query(after_authority); + + // Syntax-based normalization: uppercase chars in `scheme` should be + // converted to lowercase. + if scheme.bytes().any(|b| b.is_ascii_uppercase()) { + return false; + } + + // Case normalization: ASCII alphabets in US-ASCII only `host` should be + // normalized to lowercase. + // Case normalization: ASCII alphabets in percent-encoding triplet should be + // normalized to uppercase. + // Percent-encoding normalization: unresreved characters should be decoded + // in `userinfo`, `host`, `path`, `query`, and `fragments`. + // Path segment normalization: the path should not have dot segments (`.` + // and/or `..`). + // + // Note that `authority` can have percent-encoded `userinfo`. + if let Some(authority) = authority { + let authority_components = authority::decompose_authority(authority); + + // Check `host`. + let host = authority_components.host(); + let host_is_normalized = if is_ascii_only_host(host) { + eq_str_display(host, &NormalizedAsciiOnlyHost::new(host)) + } else { + // If the host is not ASCII-only, conversion to lowercase is not performed. + is_pct_case_normalized::<S>(host) + }; + if !host_is_normalized { + return false; + } + + // Check pencent encodings in `userinfo`. + if let Some(userinfo) = authority_components.userinfo() { + if !is_pct_case_normalized::<S>(userinfo) { + return false; + } + } + } + + // Check `path`. + // + // Syntax-based normalization: Dot segments might be removed. + // Note that we don't have to care `%2e` and `%2E` since `.` is unreserved + // and they will be decoded if not normalized. + // Also note that WHATWG serialization will use `/.//` as a path prefix if + // the path is absolute and won't modify the path if the path is relative. + // + // Percent-encoding normalization: unresreved characters should be decoded + // in `path`, `query`, and `fragments`. + let path_span_no_dot_segments = if authority.is_some() { + Some(path) + } else { + match mode { + NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)), + NormalizednessCheckMode::Rfc3986 => Some(path), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath => { + if path.starts_with('/') { + // Absolute. + Some(path.strip_prefix("/.//").unwrap_or(path)) + } else { + // Relative. Treat the path as "opaque". No span to check. + None + } + } + } + }; + if let Some(path_span_no_dot_segments) = path_span_no_dot_segments { + if path_span_no_dot_segments + .split('/') + .any(|segment| matches!(segment, "." | "..")) + { + return false; + } + } + is_pct_case_normalized::<S>(after_authority) +} + +/// Decodes two hexdigits into a byte. +/// +/// # Preconditions +/// +/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit. +#[must_use] +pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 { + let i_upper = match (upper & 0xf0).cmp(&0x40) { + Ordering::Less => upper - b'0', + Ordering::Equal => upper - (b'A' - 10), + Ordering::Greater => upper - (b'a' - 10), + }; + let i_lower = match (lower & 0xf0).cmp(&0x40) { + Ordering::Less => lower - b'0', + Ordering::Equal => lower - (b'A' - 10), + Ordering::Greater => lower - (b'a' - 10), + }; + (i_upper << 4) + i_lower +} + +/// Converts the first two hexdigit bytes in the buffer into a byte. +/// +/// # Panics +/// +/// Panics if the string does not start with two hexdigits. +#[must_use] +pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) { + let mut bytes = s.bytes(); + let upper_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let lower_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]); + (v, &s[2..]) +} + +/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters. +/// +/// # Precondition +/// +/// The given string should be valid `host` or `host ":" port` string. +#[must_use] +pub(crate) fn is_ascii_only_host(mut host: &str) -> bool { + while let Some((i, c)) = host + .char_indices() + .find(|(_i, c)| !c.is_ascii() || *c == '%') + { + if c != '%' { + // Non-ASCII character found. + debug_assert!(!c.is_ascii()); + return false; + } + // Percent-encoded character found. + let after_pct = &host[(i + 1)..]; + let (byte, rest) = take_xdigits2(after_pct); + if !byte.is_ascii() { + return false; + } + host = rest; + } + + // Neither non-ASCII characters nor percent-encoded characters found. + true +} diff --git a/vendor/iri-string/src/parser/trusted/authority.rs b/vendor/iri-string/src/parser/trusted/authority.rs new file mode 100644 index 00000000..83e41298 --- /dev/null +++ b/vendor/iri-string/src/parser/trusted/authority.rs @@ -0,0 +1,32 @@ +//! Parsers for trusted `authority` string. + +use crate::components::AuthorityComponents; +use crate::parser::str::{find_split_hole, rfind_split2}; + +/// Decomposes the authority into `(userinfo, host, port)`. +/// +/// The leading `:` is truncated. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> { + let i = authority; + let (i, host_start) = match find_split_hole(i, b'@') { + Some((userinfo, rest)) => (rest, userinfo.len() + 1), + None => (authority, 0), + }; + let colon_port_len = match rfind_split2(i, b':', b']') { + Some((_, suffix)) if suffix.starts_with(':') => suffix.len(), + _ => 0, + }; + let host_end = authority.len() - colon_port_len; + + AuthorityComponents { + authority, + host_start, + host_end, + } +} diff --git a/vendor/iri-string/src/parser/validate.rs b/vendor/iri-string/src/parser/validate.rs new file mode 100644 index 00000000..59625394 --- /dev/null +++ b/vendor/iri-string/src/parser/validate.rs @@ -0,0 +1,225 @@ +//! Validating parsers for non-trusted (possibly invalid) input. + +mod authority; +mod path; + +use crate::parser::char; +use crate::parser::str::{ + find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, +}; +use crate::spec::Spec; +use crate::validate::Error; + +use self::authority::validate_authority; +pub(crate) use self::authority::{validate_host, validate_userinfo}; +pub(crate) use self::path::validate_path; +use self::path::{ + validate_path_abempty, validate_path_absolute_authority_absent, + validate_path_relative_authority_absent, +}; + +/// Returns `Ok(_)` if the string matches `scheme`. +pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> { + debug_assert!(!i.is_empty()); + let bytes = i.as_bytes(); + if bytes[0].is_ascii_alphabetic() + && bytes[1..] + .iter() + .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b)) + { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `query` or `iquery`. +pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = + satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence. +fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + let (maybe_authority, maybe_path) = match find_split(i, b'/') { + Some(v) => v, + None => (i, ""), + }; + validate_authority::<S>(maybe_authority)?; + validate_path_abempty::<S>(maybe_path) +} + +/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules. +#[inline] +pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute) +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +#[inline] +pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Any) +} + +/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules. +#[inline] +pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment) +} + +/// Syntax rule for URI/IRI references. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +enum UriReferenceRule { + /// `URI` and `IRI`. + /// + /// This can have a fragment. + Absolute, + /// `absolute-URI` and `absolute-IRI`. + /// + /// This cannot have a fragment. + AbsoluteWithoutFragment, + /// `URI-reference` and `IRI-reference`. + /// + /// This can be relative. + Any, +} + +impl UriReferenceRule { + /// Returns `true` is the relative reference is allowed. + #[inline] + #[must_use] + fn is_relative_allowed(self) -> bool { + self == Self::Any + } + + /// Returns `true` is the fragment part is allowed. + #[inline] + #[must_use] + fn is_fragment_allowed(self) -> bool { + matches!(self, Self::Absolute | Self::Any) + } +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +fn validate_uri_reference_common<S: Spec>( + i: &str, + ref_rule: UriReferenceRule, +) -> Result<(), Error> { + // Validate `scheme ":"`. + let (i, _scheme) = match find_split_hole(i, b':') { + None => { + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + Some(("", _)) => return Err(Error::new()), + Some((maybe_scheme, rest)) => { + if validate_scheme(maybe_scheme).is_err() { + // The string before the first colon is not a scheme. + // Falling back to `relative-ref` parsing. + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + (rest, maybe_scheme) + } + }; + + // Validate `hier-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_absolute_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules. +pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> { + // Validate `relative-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_relative_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, true)?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version). +fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> { + let (maybe_query, maybe_fragment) = if first == b'?' { + match find_split_hole(rest, b'#') { + Some(v) => v, + None => (rest, ""), + } + } else { + debug_assert_eq!(first, b'#'); + ("", rest) + }; + validate_query::<S>(maybe_query)?; + if !accept_fragment && !maybe_fragment.is_empty() { + return Err(Error::new()); + } + validate_fragment::<S>(maybe_fragment) +} + +/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules. +pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_frag_query, + char::is_nonascii_fragment::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} diff --git a/vendor/iri-string/src/parser/validate/authority.rs b/vendor/iri-string/src/parser/validate/authority.rs new file mode 100644 index 00000000..fb41085e --- /dev/null +++ b/vendor/iri-string/src/parser/validate/authority.rs @@ -0,0 +1,296 @@ +//! Parsers for authority. + +use core::mem; + +use crate::parser::char; +use crate::parser::str::{ + find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded, + strip_ascii_char_prefix, +}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`. +pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_userinfo_ipvfutureaddr, + char::is_nonascii_userinfo::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `true` if the string matches `dec-octet`. +/// +/// In other words, this tests whether the string is decimal "0" to "255". +#[must_use] +fn is_dec_octet(i: &str) -> bool { + matches!( + i.as_bytes(), + [b'0'..=b'9'] + | [b'1'..=b'9', b'0'..=b'9'] + | [b'1', b'0'..=b'9', b'0'..=b'9'] + | [b'2', b'0'..=b'4', b'0'..=b'9'] + | [b'2', b'5', b'0'..=b'5'] + ) +} + +/// Returns `Ok(_)` if the string matches `IPv4address`. +fn validate_ipv4address(i: &str) -> Result<(), Error> { + let (first, rest) = find_split_hole(i, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(first) { + return Err(Error::new()); + } + let (second, rest) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(second) { + return Err(Error::new()); + } + let (third, fourth) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if is_dec_octet(third) && is_dec_octet(fourth) { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// A part of IPv6 addr. +#[derive(Clone, Copy)] +enum V6AddrPart { + /// `[0-9a-fA-F]{1,4}::`. + H16Omit, + /// `[0-9a-fA-F]{1,4}:`. + H16Cont, + /// `[0-9a-fA-F]{1,4}`. + H16End, + /// IPv4 address. + V4, + /// `::`. + Omit, +} + +/// Splits the IPv6 address string into the next component and the rest substring. +fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> { + debug_assert!(!i.is_empty()); + match find_split_hole(i, b':') { + Some((prefix, rest)) => { + if prefix.len() >= 5 { + return Err(Error::new()); + } + + if prefix.is_empty() { + return match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::Omit)), + None => Err(Error::new()), + }; + } + + // Should be `h16`. + debug_assert!((1..=4).contains(&prefix.len())); + if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::H16Omit)), + None => Ok((rest, V6AddrPart::H16Cont)), + } + } + None => { + if i.len() >= 5 { + // Possibly `IPv4address`. + validate_ipv4address(i)?; + return Ok(("", V6AddrPart::V4)); + } + if i.bytes().all(|b| b.is_ascii_hexdigit()) { + Ok(("", V6AddrPart::H16End)) + } else { + Err(Error::new()) + } + } + } +} + +/// Returns `Ok(_)` if the string matches `IPv6address`. +fn validate_ipv6address(mut i: &str) -> Result<(), Error> { + let mut h16_count = 0; + let mut is_omitted = false; + while !i.is_empty() { + let (rest, part) = split_v6_addr_part(i)?; + match part { + V6AddrPart::H16Omit => { + h16_count += 1; + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + V6AddrPart::H16Cont => { + h16_count += 1; + if rest.is_empty() { + // `H16Cont` cannot be the last part of an IPv6 address. + return Err(Error::new()); + } + } + V6AddrPart::H16End => { + h16_count += 1; + break; + } + V6AddrPart::V4 => { + debug_assert!(rest.is_empty()); + h16_count += 2; + break; + } + V6AddrPart::Omit => { + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + } + if h16_count > 8 { + return Err(Error::new()); + } + i = rest; + } + let is_valid = if is_omitted { + h16_count < 8 + } else { + h16_count == 8 + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority` or `iauthority`. +pub(super) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> { + // Strip and validate `userinfo`. + let (i, _userinfo) = match find_split_hole(i, b'@') { + Some((maybe_userinfo, i)) => { + validate_userinfo::<S>(maybe_userinfo)?; + (i, Some(maybe_userinfo)) + } + None => (i, None), + }; + // `host` can contain colons, but `port` cannot. + // Strip and validate `port`. + let (maybe_host, _port) = match rfind_split_hole(i, b':') { + Some((maybe_host, maybe_port)) => { + if maybe_port.bytes().all(|b| b.is_ascii_digit()) { + (maybe_host, Some(maybe_port)) + } else { + (i, None) + } + } + None => (i, None), + }; + // Validate `host`. + validate_host::<S>(maybe_host) +} + +/// Validates `host`. +pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> { + match get_wrapped_inner(i, b'[', b']') { + Some(maybe_addr) => { + // `IP-literal`. + // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2. + if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v') + .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V')) + { + // `IPvFuture`. + let (maybe_ver, maybe_addr) = + find_split_hole(maybe_addr_rest, b'.').ok_or_else(Error::new)?; + // Validate version. + if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + // Validate address. + if !maybe_addr.is_empty() + && maybe_addr.is_ascii() + && maybe_addr + .bytes() + .all(char::is_ascii_userinfo_ipvfutureaddr) + { + Ok(()) + } else { + Err(Error::new()) + } + } else { + // `IPv6address`. + validate_ipv6address(maybe_addr) + } + } + None => { + // `IPv4address` or `reg-name`. No need to distinguish them here. + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_regname, + char::is_nonascii_regname::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } + } + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + use alloc::format; + + macro_rules! assert_validate { + ($parser:expr, $($input:expr),* $(,)?) => {{ + $({ + let input = $input; + let input: &str = input.as_ref(); + assert!($parser(input).is_ok(), "input={:?}", input); + })* + }}; + } + + #[test] + fn test_ipv6address() { + use core::cmp::Ordering; + + assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1"); + assert_validate!(validate_ipv6address, "2001:db8::7"); + + // Generate IPv6 addresses with `::`. + let make_sub = |n: usize| { + let mut s = "1:".repeat(n); + s.pop(); + s + }; + for len_pref in 0..=7 { + let prefix = make_sub(len_pref); + for len_suf in 1..=(7 - len_pref) { + assert_validate!( + validate_ipv6address, + &format!("{}::{}", prefix, make_sub(len_suf)) + ); + match len_suf.cmp(&2) { + Ordering::Greater => assert_validate!( + validate_ipv6address, + &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2)) + ), + Ordering::Equal => { + assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix)) + } + Ordering::Less => {} + } + } + } + } +} diff --git a/vendor/iri-string/src/parser/validate/path.rs b/vendor/iri-string/src/parser/validate/path.rs new file mode 100644 index 00000000..1b09c84b --- /dev/null +++ b/vendor/iri-string/src/parser/validate/path.rs @@ -0,0 +1,91 @@ +//! Parsers for path. + +use crate::parser::char; +use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`. +pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + return Ok(()); + } + let i = match i.strip_prefix('/') { + Some(rest) => rest, + None => return Err(Error::new()), + }; + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + // `path-empty`. + return Ok(()); + } + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = match find_split2_hole(i, b'/', b':') { + Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ), + Some((_, c, _)) => { + debug_assert_eq!(c, b':'); + // `foo:bar`-style. This does not match `path-noscheme`. + return Err(Error::new()); + } + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `path`/`ipath` rules. +pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + return Err(Error::new()); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} diff --git a/vendor/iri-string/src/percent_encode.rs b/vendor/iri-string/src/percent_encode.rs new file mode 100644 index 00000000..b5997a03 --- /dev/null +++ b/vendor/iri-string/src/percent_encode.rs @@ -0,0 +1,378 @@ +//! Percent encoding. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; + +use crate::parser::char; +use crate::spec::{IriSpec, Spec, UriSpec}; + +/// A proxy to percent-encode a string as a part of URI. +pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>; + +/// A proxy to percent-encode a string as a part of IRI. +pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>; + +/// Context for percent encoding. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +enum Context { + /// Encode the string as a reg-name (usually called as "hostname"). + RegName, + /// Encode the string as a user name or a password (inside the `userinfo` component). + UserOrPassword, + /// Encode the string as a path segment. + /// + /// A slash (`/`) will be encoded to `%2F`. + PathSegment, + /// Encode the string as path segments joined with `/`. + /// + /// A slash (`/`) will be used as is. + Path, + /// Encode the string as a query string (without the `?` prefix). + Query, + /// Encode the string as a fragment string (without the `#` prefix). + Fragment, + /// Encode all characters except for `unreserved` characters. + Unreserve, + /// Encode characters only if they cannot appear anywhere in an IRI reference. + /// + /// `%` character will be always encoded. + Character, +} + +/// A proxy to percent-encode a string. +/// +/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided. +/// You can use them to make the expression simpler, for example write +/// `PercentEncodedForUri::from_path(foo)` instead of +/// `PercentEncoded::<_, UriSpec>::from_path(foo)`. +#[derive(Debug, Clone, Copy)] +pub struct PercentEncoded<T, S> { + /// Source string context. + context: Context, + /// Raw string before being encoded. + raw: T, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> { + /// Creates an encoded string from a raw reg-name (i.e. hostname or domain). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha.\u{03B1}.example.com"; + /// let encoded = "alpha.%CE%B1.example.com"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_reg_name(raw: T) -> Self { + Self { + context: Context::RegName, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw user name (inside `userinfo` component). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "user:\u{03B1}"; + /// // The first `:` will be interpreted as a delimiter, so colons will be escaped. + /// let encoded = "user%3A%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_user(raw: T) -> Self { + Self { + context: Context::UserOrPassword, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw user name (inside `userinfo` component). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "password:\u{03B1}"; + /// // The first `:` will be interpreted as a delimiter, and the colon + /// // inside the password will be the first one if the user name is empty, + /// // so colons will be escaped. + /// let encoded = "password%3A%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_password(raw: T) -> Self { + Self { + context: Context::UserOrPassword, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw path segment. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// // Note that `/` is encoded to `%2F`. + /// let encoded = "alpha%2F%CE%B1%3F%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_path_segment(raw: T) -> Self { + Self { + context: Context::PathSegment, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw path. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// // Note that `/` is NOT percent encoded. + /// let encoded = "alpha/%CE%B1%3F%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_path(raw: T) -> Self { + Self { + context: Context::Path, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw query. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// let encoded = "alpha/%CE%B1?%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_query(raw: T) -> Self { + Self { + context: Context::Query, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw fragment. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// let encoded = "alpha/%CE%B1?%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_fragment(raw: T) -> Self { + Self { + context: Context::Fragment, + raw, + _spec: PhantomData, + } + } + + /// Creates a string consists of only `unreserved` string and percent-encoded triplets. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let unreserved = "%a0-._~\u{03B1}"; + /// let unreserved_encoded = "%25a0-._~%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(), + /// unreserved_encoded + /// ); + /// + /// let reserved = ":/?#[]@ !$&'()*+,;="; + /// let reserved_encoded = + /// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(), + /// reserved_encoded + /// ); + /// # } + /// ``` + #[inline] + #[must_use] + pub fn unreserve(raw: T) -> Self { + Self { + context: Context::Unreserve, + raw, + _spec: PhantomData, + } + } + + /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference. + /// + /// `%` character will be always encoded. In other words, this conversion + /// is not aware of percent-encoded triplets. + /// + /// Note that this encoding process does not guarantee that the resulting + /// string is a valid IRI reference. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let unreserved = "%a0-._~\u{03B1}"; + /// let unreserved_encoded = "%25a0-._~%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(), + /// unreserved_encoded + /// ); + /// + /// let reserved = ":/?#[]@ !$&'()*+,;="; + /// // Note that `%20` cannot appear directly in an IRI reference. + /// let expected = ":/?#[]@%20!$&'()*+,;="; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(), + /// expected + /// ); + /// # } + /// ``` + #[inline] + #[must_use] + pub fn characters(raw: T) -> Self { + Self { + context: Context::Character, + raw, + _spec: PhantomData, + } + } +} + +impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + /// Filter that encodes a character before written if necessary. + struct Filter<'a, 'b, S> { + /// Encoding context. + context: Context, + /// Writer. + writer: &'a mut fmt::Formatter<'b>, + /// Spec. + _spec: PhantomData<fn() -> S>, + } + impl<S: Spec> fmt::Write for Filter<'_, '_, S> { + fn write_str(&mut self, s: &str) -> fmt::Result { + s.chars().try_for_each(|c| self.write_char(c)) + } + fn write_char(&mut self, c: char) -> fmt::Result { + let is_valid_char = match (self.context, c.is_ascii()) { + (Context::RegName, true) => char::is_ascii_regname(c as u8), + (Context::RegName, false) => char::is_nonascii_regname::<S>(c), + (Context::UserOrPassword, true) => { + c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8) + } + (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c), + (Context::PathSegment, true) => char::is_ascii_pchar(c as u8), + (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c), + (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8), + (Context::Path, false) => S::is_nonascii_char_unreserved(c), + (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8), + (Context::Query, false) => char::is_nonascii_query::<S>(c), + (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8), + (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c), + (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8), + (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c), + (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8), + (Context::Character, false) => { + S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) + } + }; + if is_valid_char { + self.writer.write_char(c) + } else { + write_pct_encoded_char(&mut self.writer, c) + } + } + } + let mut filter = Filter { + context: self.context, + writer: f, + _spec: PhantomData::<fn() -> S>, + }; + write!(filter, "{}", self.raw) + } +} + +/// Percent-encodes the given character and writes it. +#[inline] +fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result { + let mut buf = [0_u8; 4]; + let buf = c.encode_utf8(&mut buf); + buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b)) +} diff --git a/vendor/iri-string/src/raw.rs b/vendor/iri-string/src/raw.rs new file mode 100644 index 00000000..652f60a9 --- /dev/null +++ b/vendor/iri-string/src/raw.rs @@ -0,0 +1,55 @@ +//! Raw IRI strings manipulation. +//! +//! Note that functions in this module may operates on raw `&str` types. +//! It is caller's responsilibility to guarantee that the given string satisfies the precondition. + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::parser::trusted as trusted_parser; + +/// Sets the fragment part to the given string. +/// +/// Removes fragment part (and following `#` character) if `None` is given. +#[cfg(feature = "alloc")] +pub(crate) fn set_fragment(s: &mut String, fragment: Option<&str>) { + remove_fragment(s); + if let Some(fragment) = fragment { + s.reserve(fragment.len() + 1); + s.push('#'); + s.push_str(fragment); + } +} + +/// Removes the fragment part from the string. +#[cfg(feature = "alloc")] +#[inline] +pub(crate) fn remove_fragment(s: &mut String) { + if let Some(colon_pos) = s.find('#') { + s.truncate(colon_pos); + } +} + +/// Splits the string into the prefix and the fragment part. +/// +/// A leading `#` character is truncated if the fragment part exists. +#[cfg(feature = "alloc")] +pub(crate) fn split_fragment_owned(mut s: String) -> (String, Option<String>) { + let prefix_len = match trusted_parser::split_fragment(&s) { + (_, None) => return (s, None), + (prefix, Some(_fragment)) => prefix.len(), + }; + + // `+ 1` is for leading `#` character. + let fragment = s.split_off(prefix_len + 1); + // Current `s` contains a trailing `#` character, which should be removed. + { + // Remove a trailing `#`. + let hash = s.pop(); + assert_eq!(hash, Some('#')); + } + assert_eq!(s.len(), prefix_len); + + (s, Some(fragment)) +} diff --git a/vendor/iri-string/src/resolve.rs b/vendor/iri-string/src/resolve.rs new file mode 100644 index 00000000..d29c6874 --- /dev/null +++ b/vendor/iri-string/src/resolve.rs @@ -0,0 +1,344 @@ +//! URI and IRI resolvers. +//! +//! # IRI resolution can fail without WHATWG URL Standard serialization +//! +//! ## Pure RFC 3986 algorithm +//! +//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail. +//! Below are examples: +//! +//! * base=`scheme:`, ref=`.///bar`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:foo`, ref=`.///bar`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:`, ref=`/..//baz`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:foo/bar`, ref=`..//baz`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! +//! IRI without authority (note that this is different from "with empty authority") +//! cannot have a path starting with `//`, since it is ambiguous and can be +//! interpreted as an IRI with authority. For the above examples, `scheme://bar` +//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an +//! authority, not a path. +//! +//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal +//! cases. +//! +//! Note that this kind of failure can happen only when the base IRI has no +//! authority and empty path. This would be rare in the wild, since many people +//! would use an IRI with authority part, such as `http://`. +//! +//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the +//! failure. Currently no cases are known to fail when at least one of the base +//! IRI or the relative IRI contains authorities. +//! +//! If you want this kind of abnormal IRI resolution to succeed and to be +//! idempotent, check the resolution result using +//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below). +//! +//! ## WHATWG serialization +//! +//! To handle IRI resolution failure, WHATWG URL Standard defines serialization +//! algorithm for this kind of result, and it makes IRI resolution (and even +//! normalization) infallible and idempotent. +//! +//! IRI resolution and normalization provided by this crate automatically +//! applies this special rule if necessary, so they are infallible. If you want +//! to detect resolution/normalization failure, use +//! [`Normalized::ensure_rfc3986_normalizable`] method. +//! +//! ## Examples +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::format::ToDedicatedString; +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("scheme:")?; +//! { +//! let reference = IriReferenceStr::new(".///not-a-host")?; +//! let result = reference.resolve_against(base); +//! assert!(result.ensure_rfc3986_normalizable().is_err()); +//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host"); +//! } +//! +//! { +//! let reference2 = IriReferenceStr::new("/..//not-a-host")?; +//! // Resulting string will be `scheme://not-a-host`, but `not-a-host` +//! // should be a path segment, not a host. So, the semantically correct +//! // target IRI cannot be represented by RFC 3986 IRI resolution. +//! let result2 = reference2.resolve_against(base); +//! assert!(result2.ensure_rfc3986_normalizable().is_err()); +//! +//! // Algorithm defined in WHATWG URL Standard addresses this case. +//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host"); +//! } +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` + +use crate::components::RiReferenceComponents; +use crate::normalize::{NormalizationInput, Normalized}; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr}; + +/// A resolver against the fixed base. +#[derive(Debug, Clone, Copy)] +pub struct FixedBaseResolver<'a, S: Spec> { + /// Components of the base IRI. + base_components: RiReferenceComponents<'a, S>, +} + +impl<'a, S: Spec> FixedBaseResolver<'a, S> { + /// Creates a new resolver with the given base. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// let reference = IriReferenceStr::new("../there")?; + /// let resolved = resolver.resolve(reference); + /// + /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn new(base: &'a RiAbsoluteStr<S>) -> Self { + Self { + base_components: RiReferenceComponents::from(base.as_ref()), + } + } + + /// Returns the base. + /// + /// # Examples + /// + /// ``` + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.base(), base); + /// # Ok::<_, iri_string::validate::Error>(()) + /// ``` + #[must_use] + pub fn base(&self) -> &'a RiAbsoluteStr<S> { + // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`, + // and the type of `base_components` does not allow modification of the + // content after it is created. + unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) } + } +} + +/// Components getters. +/// +/// These getters are more efficient than calling through the result of `.base()`. +impl<S: Spec> FixedBaseResolver<'_, S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.scheme_str(), "http"); + /// assert_eq!(base.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + self.base_components + .scheme_str() + .expect("[validity] absolute IRI should have the scheme part") + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com")); + /// assert_eq!(base.authority_str(), Some("user:pass@example.com")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + self.base_components.authority_str() + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.path_str(), "/base/"); + /// assert_eq!(base.path_str(), "/base/"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + self.base_components.path_str() + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriQueryStr}; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// let query = IriQueryStr::new("query")?; + /// + /// assert_eq!(resolver.query(), Some(query)); + /// assert_eq!(base.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + let query_raw = self.query_str()?; + let query = RiQueryStr::new(query_raw) + .expect("[validity] must be valid query if present in an absolute-IRI"); + Some(query) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.query_str(), Some("query")); + /// assert_eq!(base.query_str(), Some("query")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + self.base_components.query_str() + } +} + +impl<'a, S: Spec> FixedBaseResolver<'a, S> { + /// Resolves the given reference against the fixed base. + /// + /// The task returned by this method does **not** normalize the resolution + /// result. However, `..` and `.` are recognized even when they are + /// percent-encoded. + /// + /// # Failures + /// + /// This function itself does not fail, but resolution algorithm defined by + /// RFC 3986 can fail. In that case, serialization algorithm defined by + /// WHATWG URL Standard would be automatically applied. + /// + /// See the documentation of [`Normalized`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// let reference = IriReferenceStr::new("../there")?; + /// let resolved = resolver.resolve(reference); + /// + /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Note that `..` and `.` path segments are recognized even when they are + /// percent-encoded. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// // `%2e%2e` is recognized as `..`. + /// // However, `dot%2edot` is NOT normalized into `dot.dot`. + /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?; + /// let resolved = resolver.resolve(reference); + /// + /// // Resolved but not normalized. + /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> { + let input = NormalizationInput::with_resolution_params(&self.base_components, reference); + Normalized::from_input(input) + } +} diff --git a/vendor/iri-string/src/spec.rs b/vendor/iri-string/src/spec.rs new file mode 100644 index 00000000..21e8315e --- /dev/null +++ b/vendor/iri-string/src/spec.rs @@ -0,0 +1,34 @@ +//! IRI specs. + +use core::fmt; + +// Note that this MUST be private module. +// See <https://rust-lang.github.io/api-guidelines/future-proofing.html> about +// sealed trait. +mod internal; + +/// A trait for spec types. +/// +/// This trait is not intended to be implemented by crate users. +// Note that all types which implement `Spec` also implement `SpecInternal`. +pub trait Spec: internal::Sealed + Copy + fmt::Debug {} + +/// A type that represents specification of IRI. +/// +/// About IRI, see [RFC 3987]. +/// +/// [RFC 3987]: https://tools.ietf.org/html/rfc3987 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum IriSpec {} + +impl Spec for IriSpec {} + +/// A type that represents specification of URI. +/// +/// About URI, see [RFC 3986]. +/// +/// [RFC 3986]: https://tools.ietf.org/html/rfc3986 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum UriSpec {} + +impl Spec for UriSpec {} diff --git a/vendor/iri-string/src/spec/internal.rs b/vendor/iri-string/src/spec/internal.rs new file mode 100644 index 00000000..3ad2ee34 --- /dev/null +++ b/vendor/iri-string/src/spec/internal.rs @@ -0,0 +1,58 @@ +//! A private module for sealed trait and internal implementations. +//! +//! Note that this MUST be a private module. +//! See [Rust API Guidelines][sealed-trait] about the necessity of being private. +//! +//! [sealed-trait]: +//! https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed + +use crate::parser::char::is_ucschar; +use crate::spec::{IriSpec, UriSpec}; + +/// A trait to prohibit user-defined types from implementing `Spec`. +/// +/// About sealed trait, see [Rust API Guidelines][future-proofing]. +/// +/// [future-proofing]: https://rust-lang.github.io/api-guidelines/future-proofing.html +pub trait Sealed: SpecInternal {} + +impl Sealed for IriSpec {} +impl Sealed for UriSpec {} + +/// Internal implementations for spec types. +pub trait SpecInternal: Sized { + /// Checks if the given non-ASCII character matches `unreserved` or `iunreserved` rule. + #[must_use] + fn is_nonascii_char_unreserved(c: char) -> bool; + /// Checks if the given character matches `iprivate` rule. + #[must_use] + fn is_nonascii_char_private(c: char) -> bool; +} + +impl SpecInternal for IriSpec { + #[inline] + fn is_nonascii_char_unreserved(c: char) -> bool { + is_ucschar(c) + } + + fn is_nonascii_char_private(c: char) -> bool { + matches!( + u32::from(c), + 0xE000..=0xF8FF | + 0xF_0000..=0xF_FFFD | + 0x10_0000..=0x10_FFFD + ) + } +} + +impl SpecInternal for UriSpec { + #[inline] + fn is_nonascii_char_unreserved(_: char) -> bool { + false + } + + #[inline] + fn is_nonascii_char_private(_: char) -> bool { + false + } +} diff --git a/vendor/iri-string/src/template.rs b/vendor/iri-string/src/template.rs new file mode 100644 index 00000000..3c647ff2 --- /dev/null +++ b/vendor/iri-string/src/template.rs @@ -0,0 +1,200 @@ +//! Processor for [RFC 6570] URI Template. +//! +//! [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +//! +//! # Usage +//! +//! 1. Prepare a template. +//! * You can create a template as [`UriTemplateStr`] +#![cfg_attr( + feature = "alloc", + doc = " type (borrowed) or [`UriTemplateString`] type (owned)." +)] +#![cfg_attr(not(feature = "alloc"), doc = " type.")] +//! 2. Prepare a context. +//! * Create a value of type that implements [`Context`] trait. +#![cfg_attr( + feature = "alloc", + doc = " * Or, if you use [`SimpleContext`], insert key-value pairs into it." +)] +//! 3. Expand. +//! * Pass the context to [`UriTemplateStr::expand`] method of the template. +//! 4. Use the result. +//! * Returned [`Expanded`] object can be directly printed since it +//! implements [`Display`][`core::fmt::Display`] trait. Or, you can call +//! `.to_string()` method of the `alloc::string::ToString` trait to +//! convert it to a `String`. +//! +//! # Examples +//! +//! ## Custom context type +//! +//! For details, see [the documentation of `context` module][`context`]. +//! +//! ``` +//! # use iri_string::template::Error; +//! use core::fmt; +//! use iri_string::spec::{IriSpec, Spec, UriSpec}; +//! use iri_string::template::UriTemplateStr; +//! use iri_string::template::context::{Context, VarName, Visitor}; +//! +//! struct UserInfo { +//! username: &'static str, +//! utf8_available: bool, +//! } +//! +//! impl Context for UserInfo { +//! fn visit<V: Visitor>( +//! &self, +//! visitor: V, +//! ) -> V::Result { +//! match visitor.var_name().as_str() { +//! "username" => visitor.visit_string(self.username), +//! "utf8" => { +//! if self.utf8_available { +//! // U+2713 CHECK MARK +//! visitor.visit_string("\u{2713}") +//! } else { +//! visitor.visit_undefined() +//! } +//! } +//! _ => visitor.visit_undefined() +//! } +//! } +//! } +//! +//! let context = UserInfo { +//! username: "foo", +//! utf8_available: true, +//! }; +//! +//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; +//! +//! # #[cfg(feature = "alloc")] { +//! assert_eq!( +//! template.expand::<UriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=%E2%9C%93" +//! ); +//! assert_eq!( +//! template.expand::<IriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=\u{2713}" +//! ); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +//! ## `SimpleContext` type (enabled by `alloc` feature flag) +//! +//! ``` +//! # use iri_string::template::Error; +//! # #[cfg(feature = "alloc")] { +//! use iri_string::spec::{IriSpec, UriSpec}; +//! use iri_string::template::UriTemplateStr; +//! use iri_string::template::simple_context::SimpleContext; +//! +//! let mut context = SimpleContext::new(); +//! context.insert("username", "foo"); +//! // U+2713 CHECK MARK +//! context.insert("utf8", "\u{2713}"); +//! +//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; +//! +//! assert_eq!( +//! template.expand::<UriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=%E2%9C%93" +//! ); +//! assert_eq!( +//! template.expand::<IriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=\u{2713}" +//! ); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +#![cfg_attr( + feature = "alloc", + doc = "[`SimpleContext`]: `simple_context::SimpleContext`" +)] +mod components; +pub mod context; +mod error; +mod expand; +mod parser; +#[cfg(feature = "alloc")] +pub mod simple_context; +mod string; + +pub use self::context::{Context, DynamicContext}; +#[cfg(feature = "alloc")] +pub use self::error::CreationError; +pub use self::error::Error; +pub use self::expand::Expanded; +#[cfg(feature = "alloc")] +pub use self::string::UriTemplateString; +pub use self::string::{UriTemplateStr, UriTemplateVariables}; + +/// Deprecated old name of [`template::context::VarName`]. +/// +/// [`template::context::VarName`]: `components::VarName` +#[deprecated( + since = "0.7.1", + note = "renamed (moved) to `template::context::VarName`" +)] +pub type VarName<'a> = self::components::VarName<'a>; + +/// Variable value type. +#[derive(Debug, Clone, Copy)] +enum ValueType { + /// Undefined (i.e. null). + Undefined, + /// String value. + String, + /// List. + List, + /// Associative array. + Assoc, +} + +impl ValueType { + /// Returns the value type for an undefined variable. + #[inline] + #[must_use] + pub const fn undefined() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a string variable. + #[inline] + #[must_use] + pub const fn string() -> Self { + ValueType::String + } + + /// Returns the value type for an empty list variable. + #[inline] + #[must_use] + pub const fn empty_list() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a nonempty list variable. + #[inline] + #[must_use] + pub const fn nonempty_list() -> Self { + ValueType::List + } + + /// Returns the value type for an empty associative array variable. + #[inline] + #[must_use] + pub const fn empty_assoc() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a nonempty associative array variable. + #[inline] + #[must_use] + pub const fn nonempty_assoc() -> Self { + ValueType::Assoc + } +} diff --git a/vendor/iri-string/src/template/components.rs b/vendor/iri-string/src/template/components.rs new file mode 100644 index 00000000..7eb83a58 --- /dev/null +++ b/vendor/iri-string/src/template/components.rs @@ -0,0 +1,332 @@ +//! Syntax components of URI templates. + +use core::mem; + +use crate::parser::str::find_split_hole; +use crate::template::error::Error; +use crate::template::parser::validate as validate_parser; + +/// Expression body. +/// +/// This does not contain the wrapping braces (`{` and `}`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) struct ExprBody<'a>(&'a str); + +impl<'a> ExprBody<'a> { + /// Creates a new expression body. + /// + /// # Precondition + /// + /// The given string should be a valid expression body. + #[inline] + #[must_use] + pub(super) fn new(s: &'a str) -> Self { + debug_assert!( + !s.is_empty(), + "[precondition] valid expression body is not empty" + ); + + Self(s) + } + + /// Decomposes the expression into an `operator` and `variable-list`. + /// + /// # Panics + /// + /// May panic if the input is invalid. + #[must_use] + pub(super) fn decompose(&self) -> (Operator, VarListStr<'a>) { + debug_assert!( + !self.0.is_empty(), + "[precondition] valid expression body is not empty" + ); + let first = self.0.as_bytes()[0]; + if first.is_ascii_alphanumeric() || (first == b'_') || (first == b'%') { + // The first byte is a part of the variable list. + (Operator::String, VarListStr::new(self.0)) + } else { + let op = Operator::from_byte(first).unwrap_or_else(|| { + unreachable!( + "[precondition] valid expression has (optional) \ + valid operator, but got a byte {first:#02x?}" + ) + }); + (op, VarListStr::new(&self.0[1..])) + } + } + + /// Returns the raw expression in a string slice. + #[inline] + #[must_use] + pub(super) fn as_str(&self) -> &'a str { + self.0 + } +} + +/// Variable name. +// QUESTION: Should hexdigits in percent-encoded triplets be compared case sensitively? +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VarName<'a>(&'a str); + +impl<'a> VarName<'a> { + /// Creates a `VarName` from the trusted string. + /// + /// # Precondition + /// + /// The given string should be a valid variable name. + #[inline] + #[must_use] + pub(super) fn from_trusted(s: &'a str) -> Self { + Self(s) + } + + /// Creates a `VarName` from the string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::context::VarName; + /// + /// let name = VarName::new("hello")?; + /// assert_eq!(name.as_str(), "hello"); + /// + /// assert!(VarName::new("0+non-variable-name").is_err()); + /// + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn new(s: &'a str) -> Result<Self, Error> { + match validate_parser::validate_varname(s, 0) { + Ok(_) => Ok(Self::from_trusted(s)), + Err(e) => Err(e), + } + } + + /// Returns the varibale name. + #[inline] + #[must_use] + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +/// Variable specifier. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VarSpec<'a> { + /// Variable name. + name: VarName<'a>, + /// Variable modifier. + modifier: Modifier, +} + +impl<'a> VarSpec<'a> { + /// Returns the varibale name. + #[inline] + #[must_use] + pub(super) fn name(&self) -> VarName<'a> { + self.name + } + + /// Returns the modifier. + #[inline] + #[must_use] + pub(super) fn modifier(&self) -> Modifier { + self.modifier + } + + /// Parses the trusted varspec string. + /// + /// # Panics + /// + /// May panic if the input is invalid. + #[must_use] + pub(super) fn parse_trusted(s: &'a str) -> Self { + if let Some(varname) = s.strip_suffix('*') { + // `varname "*"`. + return Self { + name: VarName::from_trusted(varname), + modifier: Modifier::Explode, + }; + } + // `varname ":" max-length` or `varname`. + match find_split_hole(s, b':') { + Some((varname, max_len)) => { + let max_len: u16 = max_len + .parse() + .expect("[precondition] the input should be valid `varspec`"); + Self { + name: VarName::from_trusted(varname), + modifier: Modifier::MaxLen(max_len), + } + } + None => Self { + name: VarName(s), + modifier: Modifier::None, + }, + } + } +} + +/// Variable list. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) struct VarListStr<'a>(&'a str); + +impl<'a> VarListStr<'a> { + /// Creates a new variable list. + /// + /// # Precondition + /// + /// The given string should be a valid variable list. + #[inline] + #[must_use] + pub(super) fn new(s: &'a str) -> Self { + Self(s) + } +} + +impl<'a> IntoIterator for VarListStr<'a> { + type IntoIter = VarListIter<'a>; + type Item = (usize, VarSpec<'a>); + + #[inline] + fn into_iter(self) -> Self::IntoIter { + VarListIter { rest: self.0 } + } +} + +/// Iterator of variable specs. +#[derive(Debug, Clone)] +pub(super) struct VarListIter<'a> { + /// Remaining input. + rest: &'a str, +} + +impl<'a> Iterator for VarListIter<'a> { + /// A pair of the length of the varspec and the varspec itself. + type Item = (usize, VarSpec<'a>); + + fn next(&mut self) -> Option<Self::Item> { + match find_split_hole(self.rest, b',') { + Some((prefix, new_rest)) => { + self.rest = new_rest; + Some((prefix.len(), VarSpec::parse_trusted(prefix))) + } + None => { + if self.rest.is_empty() { + None + } else { + Some(( + self.rest.len(), + VarSpec::parse_trusted(mem::take(&mut self.rest)), + )) + } + } + } + } +} + +/// Variable modifier. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum Modifier { + /// No modifiers. + None, + /// Max length, greater than 0 and less than 10000. + MaxLen(u16), + /// Explode the variable, e.g. the var spec has `*`. + Explode, +} + +/// Operator that is possibly reserved for future extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum MaybeOperator { + /// Working operator. + Operator(Operator), + /// Reserved for future extensions. + Reserved(OperatorReservedForFuture), +} + +impl MaybeOperator { + /// Returns the operator for the given character. + pub(super) fn from_byte(b: u8) -> Option<Self> { + match b { + b'+' => Some(Self::Operator(Operator::Reserved)), + b'#' => Some(Self::Operator(Operator::Fragment)), + b'.' => Some(Self::Operator(Operator::Label)), + b'/' => Some(Self::Operator(Operator::PathSegments)), + b';' => Some(Self::Operator(Operator::PathParams)), + b'?' => Some(Self::Operator(Operator::FormQuery)), + b'&' => Some(Self::Operator(Operator::FormQueryCont)), + b'=' => Some(Self::Reserved(OperatorReservedForFuture::Equals)), + b',' => Some(Self::Reserved(OperatorReservedForFuture::Comma)), + b'!' => Some(Self::Reserved(OperatorReservedForFuture::Exclamation)), + b'@' => Some(Self::Reserved(OperatorReservedForFuture::AtSign)), + b'|' => Some(Self::Reserved(OperatorReservedForFuture::Pipe)), + _ => None, + } + } +} + +/// Working operator. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum Operator { + /// No operator. String expansion. + String, + /// Reserved expansion by `+`. + Reserved, + /// Fragment expansion by `#`. + Fragment, + /// Label expansion by `.`. + Label, + /// Path segments by `/`. + PathSegments, + /// Path-style parameters by `;`. + PathParams, + /// Form-style query by `?`. + FormQuery, + /// Form-style query continuation by `&`. + FormQueryCont, +} + +impl Operator { + /// Returns the operator for the given character. + #[must_use] + pub(super) fn from_byte(b: u8) -> Option<Self> { + match b { + b'+' => Some(Self::Reserved), + b'#' => Some(Self::Fragment), + b'.' => Some(Self::Label), + b'/' => Some(Self::PathSegments), + b';' => Some(Self::PathParams), + b'?' => Some(Self::FormQuery), + b'&' => Some(Self::FormQueryCont), + _ => None, + } + } + + /// Returns the string length of the operator. + #[inline] + #[must_use] + pub(super) const fn len(self) -> usize { + if matches!(self, Self::String) { + 0 + } else { + 1 + } + } +} + +/// Operator reserved for future extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum OperatorReservedForFuture { + /// Reserved `=` operator. + Equals, + /// Reserved `,` operator. + Comma, + /// Reserved `!` operator. + Exclamation, + /// Reserved `@` operator. + AtSign, + /// Reserved `|` operator. + Pipe, +} diff --git a/vendor/iri-string/src/template/context.rs b/vendor/iri-string/src/template/context.rs new file mode 100644 index 00000000..ea3f14bb --- /dev/null +++ b/vendor/iri-string/src/template/context.rs @@ -0,0 +1,339 @@ +//! Template expansion context. +//! +//! # Examples +//! +//! 1. Define your context type. +//! 2. Implement [`Context`] trait (and [`Context::visit`] method) for the type. +//! 1. Get variable name by [`Visitor::var_name`] method. +//! 2. Feed the corresponding value(s) by one of `Visitor::visit_*` methods. +//! +//! Note that contexts should return consistent result across multiple visits for +//! the same variable. In other words, `Context::visit` should return the same +//! result for the same `Visitor::var_name()` during the context is borrowed. +//! If this condition is violated, the URI template processor can return +//! invalid result or panic at worst. +//! +//! ``` +//! use iri_string::template::context::{Context, Visitor, ListVisitor, AssocVisitor}; +//! +//! struct MyContext { +//! name: &'static str, +//! id: u64, +//! tags: &'static [&'static str], +//! children: &'static [(&'static str, usize)], +//! } +//! +//! impl Context for MyContext { +//! fn visit<V: Visitor>(&self, visitor: V) -> V::Result { +//! let name = visitor.var_name().as_str(); +//! match name { +//! "name" => visitor.visit_string(self.name), +//! "id" => visitor.visit_string(self.id), +//! "tags" => visitor.visit_list().visit_items_and_finish(self.tags), +//! "children" => visitor +//! .visit_assoc() +//! .visit_entries_and_finish(self.children.iter().copied()), +//! _ => visitor.visit_undefined(), +//! } +//! } +//! } +//! ``` +// +// # Developers note +// +// Visitor types **should not** be cloneable in order to enforce just one +// visitor is used to visit a variable. If visitors are cloneable, it can make +// the wrong usage to be available, i.e. storing cloned visitors somewhere and +// using the wrong one. +// +// However, if visitors are made cloneable by any chance, it does not indicate +// the whole implementation will be broken. Users can only use the visitors +// through visitor traits (and their API do not allow cloning), so the logic +// would work as expected if the internal usage of the visitors are correct. +// Making visitors noncloneable is an optional safety guard (with no overhead). + +use core::fmt; +use core::ops::ControlFlow; + +pub use crate::template::components::VarName; + +/// A trait for types that can behave as a static URI template expansion context. +/// +/// This type is for use with [`UriTemplateStr::expand`] method. +/// +/// See [the module documentation][`crate::template`] for usage. +/// +/// [`UriTemplateStr::expand`]: `crate::template::UriTemplateStr::expand` +pub trait Context: Sized { + /// Visits a variable. + /// + /// To get variable name, use [`Visitor::var_name()`]. + #[must_use] + fn visit<V: Visitor>(&self, visitor: V) -> V::Result; +} + +/// A trait for types that can behave as a dynamic (mutable) URI template expansion context. +/// +/// This type is for use with [`UriTemplateStr::expand_dynamic`] method and its +/// family. +/// +/// Note that "dynamic" here does not mean that the value of variables can +/// change during a template expansion. The value should be fixed and consistent +/// during each expansion, but the context is allowed to mutate itself if it +/// does not break this rule. +/// +/// # Exmaples +/// +/// ``` +/// # #[cfg(feature = "alloc")] +/// # extern crate alloc; +/// # use iri_string::template::Error; +/// # #[cfg(feature = "alloc")] { +/// # use alloc::string::String; +/// use iri_string::template::UriTemplateStr; +/// use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose}; +/// use iri_string::spec::UriSpec; +/// +/// struct MyContext<'a> { +/// /// Target path. +/// target: &'a str, +/// /// Username. +/// username: Option<&'a str>, +/// /// A flag to remember whether the URI template +/// /// attempted to use `username` variable. +/// username_visited: bool, +/// } +/// +/// impl DynamicContext for MyContext<'_> { +/// fn on_expansion_start(&mut self) { +/// // Reset the state. +/// self.username_visited = false; +/// } +/// fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { +/// match visitor.var_name().as_str() { +/// "target" => visitor.visit_string(self.target), +/// "username" => { +/// if visitor.purpose() == VisitPurpose::Expand { +/// // The variable `username` is being used +/// // on the template expansion. +/// // Don't care whether `username` is defined or not. +/// self.username_visited = true; +/// } +/// if let Some(username) = &self.username { +/// visitor.visit_string(username) +/// } else { +/// visitor.visit_undefined() +/// } +/// } +/// _ => visitor.visit_undefined(), +/// } +/// } +/// } +/// +/// let mut context = MyContext { +/// target: "/posts/1", +/// username: Some("the_admin"), +/// username_visited: false, +/// }; +/// let mut buf = String::new(); +/// +/// // No access to the variable `username`. +/// let template1 = UriTemplateStr::new("{+target}")?; +/// template1.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1"); +/// assert!(!context.username_visited); +/// +/// buf.clear(); +/// // Will access to the variable `username`. +/// let template2 = UriTemplateStr::new("{+target}{?username}")?; +/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1?username=the_admin"); +/// assert!(context.username_visited); +/// +/// buf.clear(); +/// context.username = None; +/// // Will access to the variable `username` but it is undefined. +/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1"); +/// assert!( +/// context.username_visited, +/// "`MyContext` can know and remember whether `visit_dynamic()` is called +/// for `username`, even if its value is undefined" +/// ); +/// # } +/// # Ok::<_, Error>(()) +/// ``` +/// +/// [`UriTemplateStr::expand_dynamic`]: `crate::template::UriTemplateStr::expand_dynamic` +pub trait DynamicContext: Sized { + /// Visits a variable. + /// + /// To get variable name, use [`Visitor::var_name()`]. + /// + /// # Restriction + /// + /// The visit results should be consistent and unchanged between the last + /// time [`on_expansion_start`][`Self::on_expansion_start`] was called and + /// the next time [`on_expansion_end`][`Self::on_expansion_end`] will be + /// called. If this condition is violated, template expansion will produce + /// wrong result or may panic at worst. + #[must_use] + fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result; + + /// A callback that is called before the expansion of a URI template. + #[inline] + fn on_expansion_start(&mut self) {} + + /// A callback that is called after the expansion of a URI template. + #[inline] + fn on_expansion_end(&mut self) {} +} + +impl<C: Context> DynamicContext for C { + #[inline] + fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { + self.visit(visitor) + } +} + +/// A purpose of a visit. +/// +/// This enum is nonexhaustive since this partially exposes the internal +/// implementation of the template expansion, and thus this is subject to +/// change. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum VisitPurpose { + /// A visit for type checking. + Typecheck, + /// A visit for template expansion to retrieve the value. + Expand, +} + +/// Variable visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait Visitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + /// List visitor. + type ListVisitor: ListVisitor<Result = Self::Result>; + /// Associative array visitor. + type AssocVisitor: AssocVisitor<Result = Self::Result>; + + /// Returns the name of the variable to visit. + #[must_use] + fn var_name(&self) -> VarName<'_>; + /// Returns the purpose of the visit. + /// + /// The template expansion algorithm checks the types for some variables + /// depending on its usage. To get the usage count correctly, you should + /// only count visits with [`VisitPurpose::Expand`]. + /// + /// If you need to know whether the variable is accessed and does not + /// need dynamic context generation or access counts, consider using + /// [`UriTemplateStr::variables`] method to iterate the variables in the + /// URI template. + /// + /// [`UriTemplateStr::variables`]: `crate::template::UriTemplateStr::variables` + #[must_use] + fn purpose(&self) -> VisitPurpose; + /// Visits an undefined variable, i.e. indicates that the requested variable is unavailable. + #[must_use] + fn visit_undefined(self) -> Self::Result; + /// Visits a string variable. + #[must_use] + fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result; + /// Visits a list variable. + #[must_use] + fn visit_list(self) -> Self::ListVisitor; + /// Visits an associative array variable. + #[must_use] + fn visit_assoc(self) -> Self::AssocVisitor; +} + +/// List visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait ListVisitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + + /// Visits an item. + /// + /// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also + /// return this `v`. + /// + /// To feed multiple items at once, do + /// `items.into_iter().try_for_each(|item| self.visit_item(item))` for example. + #[must_use] + fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result>; + /// Finishes visiting the list. + #[must_use] + fn finish(self) -> Self::Result; + + /// Visits items and finish. + #[must_use] + fn visit_items_and_finish<T, I>(mut self, items: I) -> Self::Result + where + T: fmt::Display, + I: IntoIterator<Item = T>, + { + match items.into_iter().try_for_each(|item| self.visit_item(item)) { + ControlFlow::Break(v) => v, + ControlFlow::Continue(()) => self.finish(), + } + } +} + +/// Associative array visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait AssocVisitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + + /// Visits an entry. + /// + /// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also + /// return this `v`. + /// + /// To feed multiple items at once, do + /// `entries.into_iter().try_for_each(|(key, value)| self.visit_entry(key, value))` + /// for example. + #[must_use] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> ControlFlow<Self::Result>; + /// Finishes visiting the associative array. + #[must_use] + fn finish(self) -> Self::Result; + + /// Visits entries and finish. + #[must_use] + fn visit_entries_and_finish<K, V, I>(mut self, entries: I) -> Self::Result + where + K: fmt::Display, + V: fmt::Display, + I: IntoIterator<Item = (K, V)>, + { + match entries + .into_iter() + .try_for_each(|(key, value)| self.visit_entry(key, value)) + { + ControlFlow::Break(v) => v, + ControlFlow::Continue(()) => self.finish(), + } + } +} + +/// Private module to put the trait to seal. +pub(super) mod private { + /// A trait for visitor types of variables in a context. + pub trait Sealed {} +} diff --git a/vendor/iri-string/src/template/error.rs b/vendor/iri-string/src/template/error.rs new file mode 100644 index 00000000..f5206a4b --- /dev/null +++ b/vendor/iri-string/src/template/error.rs @@ -0,0 +1,154 @@ +//! Errors related to URI templates. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +/// Template construction and expansion error kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum ErrorKind { + /// Cannot write to the backend. + WriteFailed, + /// Expression is not closed. + ExpressionNotClosed, + /// Invalid character. + InvalidCharacter, + /// Invalid expression. + InvalidExpression, + /// Invalid percent-encoded triplets. + InvalidPercentEncoding, + /// Invalid UTF-8 bytes. + InvalidUtf8, + /// Unexpected value type for the variable. + UnexpectedValueType, + /// Unsupported operator, including operators reserved for future. + UnsupportedOperator, +} + +impl ErrorKind { + /// Returns the error message. + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::WriteFailed => "failed to write to the backend writer", + Self::ExpressionNotClosed => "expression not closed", + Self::InvalidCharacter => "invalid character", + Self::InvalidExpression => "invalid expression", + Self::InvalidPercentEncoding => "invalid percent-encoded triplets", + Self::InvalidUtf8 => "invalid utf-8 byte sequence", + Self::UnexpectedValueType => "unexpected value type for the variable", + Self::UnsupportedOperator => "unsupported operator", + } + } +} + +/// Template construction and expansion error. +/// +// Note that this type should implement `Copy` trait. +// To return additional non-`Copy` data as an error, use wrapper type +// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error { + /// Error kind. + kind: ErrorKind, + /// Location (byte position of the error). + location: usize, +} + +impl Error { + /// Creates a new `Error`. + /// + /// For internal use. + #[inline] + #[must_use] + pub(super) fn new(kind: ErrorKind, location: usize) -> Self { + Self { kind, location } + } + + /// Returns the byte position the error is detected. + /// + /// NOTE: This is not a part of the public API since the value to be + /// returned (i.e., the definition of the "position" of an error) is not + /// guaranteed to be stable. + #[cfg(test)] + pub(super) fn location(&self) -> usize { + self.location + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid URI template: {} (at {}-th byte)", + self.kind.as_str(), + self.location + ) + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +/// Error on conversion into a URI template type. +// TODO: Unifiable to `types::CreationError`? +#[cfg(feature = "alloc")] +pub struct CreationError<T> { + /// Soruce data. + source: T, + /// Validation error. + error: Error, +} + +#[cfg(feature = "alloc")] +impl<T> CreationError<T> { + /// Returns the source data. + #[must_use] + pub fn into_source(self) -> T { + self.source + } + + /// Returns the validation error. + #[must_use] + pub fn validation_error(&self) -> Error { + self.error + } + + /// Creates a new `CreationError`. + #[must_use] + pub(crate) fn new(error: Error, source: T) -> Self { + Self { source, error } + } +} + +#[cfg(feature = "alloc")] +impl<T: fmt::Debug> fmt::Debug for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CreationError") + .field("source", &self.source) + .field("error", &self.error) + .finish() + } +} + +#[cfg(feature = "alloc")] +impl<T: Clone> Clone for CreationError<T> { + fn clone(&self) -> Self { + Self { + source: self.source.clone(), + error: self.error, + } + } +} + +#[cfg(feature = "alloc")] +impl<T> fmt::Display for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.error.fmt(f) + } +} + +#[cfg(feature = "std")] +impl<T: fmt::Debug> error::Error for CreationError<T> {} diff --git a/vendor/iri-string/src/template/expand.rs b/vendor/iri-string/src/template/expand.rs new file mode 100644 index 00000000..605043ab --- /dev/null +++ b/vendor/iri-string/src/template/expand.rs @@ -0,0 +1,1039 @@ +//! Expansion. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; +use core::mem; +use core::ops::ControlFlow; + +#[cfg(feature = "alloc")] +use alloc::string::{String, ToString}; + +use crate::parser::str::{find_split, find_split_hole}; +use crate::parser::str::{process_percent_encoded_best_effort, PctEncodedFragments}; +use crate::percent_encode::PercentEncoded; +use crate::spec::Spec; +use crate::template::components::{ExprBody, Modifier, Operator, VarName, VarSpec}; +use crate::template::context::{ + private::Sealed as VisitorSealed, AssocVisitor, Context, DynamicContext, ListVisitor, + VisitPurpose, Visitor, +}; +use crate::template::error::{Error, ErrorKind}; +use crate::template::{UriTemplateStr, ValueType}; +#[cfg(feature = "alloc")] +use crate::types; + +/// A chunk in a template string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum Chunk<'a> { + /// Literal. + Literal(&'a str), + /// Expression excluding the wrapping braces. + Expr(ExprBody<'a>), +} + +/// Iterator of template chunks. +#[derive(Debug, Clone)] +pub(super) struct Chunks<'a> { + /// Template. + template: &'a str, +} + +impl<'a> Chunks<'a> { + /// Creates a new iterator. + #[inline] + #[must_use] + pub(super) fn new(template: &'a UriTemplateStr) -> Self { + Self { + template: template.as_str(), + } + } +} + +impl<'a> Iterator for Chunks<'a> { + type Item = Chunk<'a>; + + fn next(&mut self) -> Option<Self::Item> { + if self.template.is_empty() { + return None; + } + match find_split(self.template, b'{') { + Some(("", _)) => { + let (expr_body, rest) = find_split_hole(&self.template[1..], b'}') + .expect("[validity] expression inside a template must be closed"); + self.template = rest; + Some(Chunk::Expr(ExprBody::new(expr_body))) + } + Some((lit, rest)) => { + self.template = rest; + Some(Chunk::Literal(lit)) + } + None => Some(Chunk::Literal(mem::take(&mut self.template))), + } + } +} + +/// Template expansion result. +#[derive(Debug, Clone, Copy)] +pub struct Expanded<'a, S, C> { + /// Compiled template. + template: &'a UriTemplateStr, + /// Context. + context: &'a C, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec, C: Context> Expanded<'a, S, C> { + /// Creates a new `Expanded` object. + #[inline] + pub(super) fn new(template: &'a UriTemplateStr, context: &'a C) -> Result<Self, Error> { + Self::typecheck_context(template, context)?; + Ok(Self { + template, + context, + _spec: PhantomData, + }) + } + + /// Checks if the types of variables are allowed for the corresponding expressions in the template. + fn typecheck_context(template: &UriTemplateStr, context: &C) -> Result<(), Error> { + let mut pos = 0; + for chunk in Chunks::new(template) { + let (expr_len, (op, varlist)) = match chunk { + Chunk::Expr(expr_body) => (expr_body.as_str().len(), expr_body.decompose()), + Chunk::Literal(lit) => { + pos += lit.len(); + continue; + } + }; + // +2: wrapping braces (`{` and `}`). + let chunk_end_pos = pos + expr_len + 2; + // +1: opening brace `{`. + pos += op.len() + 1; + for (varspec_len, varspec) in varlist { + let ty = context.visit(TypeVisitor::new(varspec.name())); + let modifier = varspec.modifier(); + + if matches!(modifier, Modifier::MaxLen(_)) + && matches!(ty, ValueType::List | ValueType::Assoc) + { + // > Prefix modifiers are not applicable to variables that + // > have composite values. + // + // --- [RFC 6570 Section 2.4.1. Prefix](https://www.rfc-editor.org/rfc/rfc6570.html#section-2.4.1) + return Err(Error::new(ErrorKind::UnexpectedValueType, pos)); + } + + // +1: A trailing comman (`,`) or a closing brace (`}`). + pos += varspec_len + 1; + } + assert_eq!(pos, chunk_end_pos); + } + Ok(()) + } +} + +impl<S: Spec, C: Context> fmt::Display for Expanded<'_, S, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for chunk in Chunks::new(self.template) { + let expr = match chunk { + Chunk::Literal(lit) => { + f.write_str(lit)?; + continue; + } + Chunk::Expr(body) => body, + }; + expand::<S, _>(f, expr, self.context)?; + } + + Ok(()) + } +} + +/// Implement `TryFrom<Expanded<...>> for SomeUriStringType`. +macro_rules! impl_try_from_expanded { + ($ty_outer:ident) => { + #[cfg(feature = "alloc")] + impl<S: Spec, C: Context> TryFrom<Expanded<'_, S, C>> for types::$ty_outer<S> { + type Error = types::CreationError<String>; + + #[inline] + fn try_from(v: Expanded<'_, S, C>) -> Result<Self, Self::Error> { + Self::try_from(v.to_string()) + } + } + }; +} + +// Not implementing `TryFrom<Expand<...>>` for query and fragment strings +// since they cannot behave as a query or a fragment only by themselves. +// Query strings in practical starts with `?` prefix but `RiQueryStr{,ing}` +// strips that, and so do fragment strings (but `#` instead of `?`). +// Because of this, query and fragment string types won't be used to represent +// a relative IRIs without combining the prefix. +// +// In contrast, RFC 6570 URI Template expects that the users are constructing a +// "working" IRIs, including the necessary prefixes for syntax components. +// For example, fragment expansion `{#var}`, where `var` is "hello", expands to +// `#hello`, including the prefix `#`. This means that a URI template will be +// used to generate neither `RiQueryStr{,ing}` nor `RiFragmentStr{,ing}` strings. +impl_try_from_expanded!(RiAbsoluteString); +impl_try_from_expanded!(RiReferenceString); +impl_try_from_expanded!(RiRelativeString); +impl_try_from_expanded!(RiString); + +/// Expands the whole template with the dynamic context. +pub(super) fn expand_whole_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>( + template: &UriTemplateStr, + writer: &mut W, + context: &mut C, +) -> Result<(), Error> { + context.on_expansion_start(); + let result = expand_whole_dynamic_impl::<S, W, C>(template, writer, context); + context.on_expansion_end(); + result +} + +/// Expands the whole template with the dynamic context. +/// +/// Note that the caller is responsible to set up or finalize the `context`. +fn expand_whole_dynamic_impl<S: Spec, W: fmt::Write, C: DynamicContext>( + template: &UriTemplateStr, + writer: &mut W, + context: &mut C, +) -> Result<(), Error> { + let mut pos = 0; + for chunk in Chunks::new(template) { + let expr = match chunk { + Chunk::Literal(lit) => { + writer + .write_str(lit) + .map_err(|_| Error::new(ErrorKind::WriteFailed, pos))?; + pos += lit.len(); + continue; + } + Chunk::Expr(body) => body, + }; + expand_expr_mut::<S, _, _>(writer, &mut pos, expr, context)?; + } + + Ok(()) +} + +/// Expands the expression using the given operator and the dynamic context. +fn expand_expr_mut<S: Spec, W: fmt::Write, C: DynamicContext>( + writer: &mut W, + pos: &mut usize, + expr: ExprBody<'_>, + context: &mut C, +) -> Result<(), Error> { + let (op, varlist) = expr.decompose(); + + let mut is_first_varspec = true; + // +2: wrapping braces (`{` and `}`). + let chunk_end_pos = *pos + expr.as_str().len() + 2; + // +1: opening brace `{`. + *pos += op.len() + 1; + for (varspec_len, varspec) in varlist { + // Check the type before the actual expansion. + let ty = context.visit_dynamic(TypeVisitor::new(varspec.name())); + let modifier = varspec.modifier(); + + if matches!(modifier, Modifier::MaxLen(_)) + && matches!(ty, ValueType::List | ValueType::Assoc) + { + // > Prefix modifiers are not applicable to variables that + // > have composite values. + // + // --- [RFC 6570 Section 2.4.1. Prefix](https://www.rfc-editor.org/rfc/rfc6570.html#section-2.4.1) + return Err(Error::new(ErrorKind::UnexpectedValueType, *pos)); + } + + // Typecheck passed. Expand. + let visitor = ValueVisitor::<S, _>::new(writer, varspec, op, &mut is_first_varspec); + let token = context + .visit_dynamic(visitor) + .map_err(|_| Error::new(ErrorKind::WriteFailed, *pos))?; + let writer_ptr = token.writer_ptr(); + if writer_ptr != writer as *mut _ { + // Invalid `VisitDoneToken` was returned. This cannot usually happen + // without intentional unnatural usage. + panic!("invalid `VisitDoneToken` was returned"); + } + + // +1: A trailing comman (`,`) or a closing brace (`}`). + *pos += varspec_len + 1; + } + assert_eq!(*pos, chunk_end_pos); + + Ok(()) +} + +/// Properties of an operator. +/// +/// See [RFC 6570 Appendix A](https://www.rfc-editor.org/rfc/rfc6570#appendix-A). +#[derive(Debug, Clone, Copy)] +struct OpProps { + /// Prefix for the first element. + first: &'static str, + /// Separator. + sep: &'static str, + /// Whether or not the expansion includes the variable or key name. + named: bool, + /// Result string if the variable is empty. + ifemp: &'static str, + /// Whether or not the reserved values can be written without being encoded. + allow_reserved: bool, +} + +impl OpProps { + /// Properties for all known operators. + const PROPS: [Self; 8] = [ + // String + Self { + first: "", + sep: ",", + named: false, + ifemp: "", + allow_reserved: false, + }, + // Reserved + Self { + first: "", + sep: ",", + named: false, + ifemp: "", + allow_reserved: true, + }, + // Fragment + Self { + first: "#", + sep: ",", + named: false, + ifemp: "", + allow_reserved: true, + }, + // Label + Self { + first: ".", + sep: ".", + named: false, + ifemp: "", + allow_reserved: false, + }, + // PathSegments + Self { + first: "/", + sep: "/", + named: false, + ifemp: "", + allow_reserved: false, + }, + // PathParams + Self { + first: ";", + sep: ";", + named: true, + ifemp: "", + allow_reserved: false, + }, + // FormQuery + Self { + first: "?", + sep: "&", + named: true, + ifemp: "=", + allow_reserved: false, + }, + // FormQueryCont + Self { + first: "&", + sep: "&", + named: true, + ifemp: "=", + allow_reserved: false, + }, + ]; + + /// Returns the properties for the operator. + #[must_use] + #[inline] + pub(super) fn from_op(op: Operator) -> &'static Self { + let index = match op { + Operator::String => 0, + Operator::Reserved => 1, + Operator::Fragment => 2, + Operator::Label => 3, + Operator::PathSegments => 4, + Operator::PathParams => 5, + Operator::FormQuery => 6, + Operator::FormQueryCont => 7, + }; + &Self::PROPS[index] + } +} + +/// Expands the expression using the given operator. +fn expand<S: Spec, C: Context>( + f: &mut fmt::Formatter<'_>, + expr: ExprBody<'_>, + context: &C, +) -> fmt::Result { + let (op, varlist) = expr.decompose(); + + let mut is_first_varspec = true; + for (_varspec_len, varspec) in varlist { + let visitor = ValueVisitor::<S, _>::new(f, varspec, op, &mut is_first_varspec); + let token = context.visit(visitor)?; + let writer_ptr = token.writer_ptr(); + if writer_ptr != f as *mut _ { + // Invalid `VisitDoneToken` was returned. This cannot usually happen + // without intentional unnatural usage. + panic!("invalid `VisitDoneToken` was returned"); + } + } + + Ok(()) +} + +/// Escapes the given value and writes it. +#[inline] +fn escape_write<S: Spec, T: fmt::Display, W: fmt::Write>( + f: &mut W, + v: T, + allow_reserved: bool, +) -> fmt::Result { + if allow_reserved { + let result = process_percent_encoded_best_effort(v, |frag| { + let result = match frag { + PctEncodedFragments::Char(s, _) => f.write_str(s), + PctEncodedFragments::NoPctStr(s) => { + write!(f, "{}", PercentEncoded::<_, S>::characters(s)) + } + PctEncodedFragments::StrayPercent => f.write_str("%25"), + PctEncodedFragments::InvalidUtf8PctTriplets(s) => f.write_str(s), + }; + if result.is_err() { + return ControlFlow::Break(result); + } + ControlFlow::Continue(()) + }); + match result { + Ok(ControlFlow::Break(Ok(_)) | ControlFlow::Continue(_)) => Ok(()), + Ok(ControlFlow::Break(Err(e))) | Err(e) => Err(e), + } + } else { + /// Writer that escapes the unreserved characters and writes them. + struct UnreservePercentEncodeWriter<'a, S, W> { + /// Inner writer. + writer: &'a mut W, + /// Spec. + _spec: PhantomData<fn() -> S>, + } + impl<S: Spec, W: fmt::Write> fmt::Write for UnreservePercentEncodeWriter<'_, S, W> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + write!(self.writer, "{}", PercentEncoded::<_, S>::unreserve(s)) + } + } + let mut writer = UnreservePercentEncodeWriter::<S, W> { + writer: f, + _spec: PhantomData, + }; + write!(writer, "{v}") + } +} + +/// Truncates the given value as a string, escapes the value, and writes it. +fn escape_write_with_maxlen<S: Spec, T: fmt::Display, W: fmt::Write>( + writer: &mut PrefixOnceWriter<'_, W>, + v: T, + allow_reserved: bool, + max_len: Option<u16>, +) -> fmt::Result { + if allow_reserved { + let mut max_len = max_len.map_or(usize::MAX, usize::from); + let result = process_percent_encoded_best_effort(v, |frag| { + if max_len == 0 { + return ControlFlow::Break(Ok(())); + } + let result = + match frag { + PctEncodedFragments::Char(s, _) => { + max_len -= 1; + writer.write_str(s) + } + PctEncodedFragments::NoPctStr(s) => { + let mut chars = s.char_indices(); + let count = + chars.by_ref().take(max_len).last().map(|(i, _)| i).expect( + "[consistency] decomposed string fragment must not be empty", + ); + let sub_len = s.len() - chars.as_str().len(); + max_len -= count; + write!( + writer, + "{}", + PercentEncoded::<_, S>::characters(&s[..sub_len]) + ) + } + PctEncodedFragments::StrayPercent => { + max_len -= 1; + writer.write_str("%25") + } + PctEncodedFragments::InvalidUtf8PctTriplets(s) => { + let count = max_len.min(s.len() / 3); + let sub_len = count * 3; + max_len -= count; + writer.write_str(&s[..sub_len]) + } + }; + if result.is_err() { + return ControlFlow::Break(result); + } + ControlFlow::Continue(()) + }); + match result { + Ok(ControlFlow::Break(Ok(_)) | ControlFlow::Continue(_)) => Ok(()), + Ok(ControlFlow::Break(Err(e))) | Err(e) => Err(e), + } + } else { + match max_len { + Some(max_len) => { + let mut writer = TruncatePercentEncodeWriter::<S, _> { + inner: writer, + rest_num_chars: usize::from(max_len), + _spec: PhantomData, + }; + write!(writer, "{v}") + } + None => write!(writer, "{}", PercentEncoded::<_, S>::unreserve(v)), + } + } +} + +/// A writer that truncates the input to the given length and writes to the backend. +struct TruncatePercentEncodeWriter<'a, S, W> { + /// Inner writer. + inner: &'a mut W, + /// Maximum number of characters to be written. + rest_num_chars: usize, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<S: Spec, W: fmt::Write> fmt::Write for TruncatePercentEncodeWriter<'_, S, W> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.rest_num_chars == 0 { + return Ok(()); + } + let mut chars = s.char_indices(); + let skip_count = chars + .by_ref() + .take(self.rest_num_chars) + .last() + .map_or(0, |(i, _)| i + 1); + let len = s.len() - chars.as_str().len(); + let truncated = &s[..len]; + write!( + self.inner, + "{}", + PercentEncoded::<_, S>::unreserve(truncated) + )?; + self.rest_num_chars -= skip_count; + Ok(()) + } +} + +/// A writer that writes a prefix only once if and only if some value is written. +struct PrefixOnceWriter<'a, W> { + /// Inner writer. + inner: &'a mut W, + /// Prefix to write. + prefix: Option<&'a str>, +} + +impl<'a, W: fmt::Write> PrefixOnceWriter<'a, W> { + /// Creates a new writer with no prefix. + #[inline] + #[must_use] + fn new(inner: &'a mut W) -> Self { + Self { + inner, + prefix: None, + } + } + + /// Creates a new writer with a prefix. + #[inline] + #[must_use] + fn with_prefix(inner: &'a mut W, prefix: &'a str) -> Self { + Self { + inner, + prefix: Some(prefix), + } + } + + /// Returns true if the writer have not yet written the prefix. + #[inline] + #[must_use] + fn has_unwritten_prefix(&self) -> bool { + self.prefix.is_some() + } +} + +impl<W: fmt::Write> fmt::Write for PrefixOnceWriter<'_, W> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + if let Some(prefix) = self.prefix.take() { + self.inner.write_str(prefix)?; + } + self.inner.write_str(s) + } +} + +/// An opaque token value that proves some variable is visited. +// This should not be able to be created by any means other than `VarVisitor::visit_foo()`. +// Do not derive any traits that allows the value to be generated or cloned. +struct VisitDoneToken<'a, S, W>(ValueVisitor<'a, S, W>); + +impl<'a, S: Spec, W: fmt::Write> VisitDoneToken<'a, S, W> { + /// Creates a new token. + #[inline] + #[must_use] + fn new(visitor: ValueVisitor<'a, S, W>) -> Self { + Self(visitor) + } + + /// Returns the raw pointer to the backend formatter. + #[inline] + #[must_use] + fn writer_ptr(&self) -> *const W { + self.0.writer_ptr() + } +} + +impl<S: Spec, W: fmt::Write> fmt::Debug for VisitDoneToken<'_, S, W> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("VisitDoneToken") + } +} + +/// Visitor to retrieve a variable value. +// Single `ValueVisitor` should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct ValueVisitor<'a, S, W> { + /// Formatter. + writer: &'a mut W, + /// Varspec. + varspec: VarSpec<'a>, + /// Operator. + op: Operator, + /// Whether the variable to visit is the first one in an expression. + is_first_varspec: &'a mut bool, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec, W: fmt::Write> ValueVisitor<'a, S, W> { + /// Creates a visitor. + #[inline] + #[must_use] + fn new( + f: &'a mut W, + varspec: VarSpec<'a>, + op: Operator, + is_first_varspec: &'a mut bool, + ) -> Self { + Self { + writer: f, + varspec, + op, + is_first_varspec, + _spec: PhantomData, + } + } + + /// Returns the raw pointer to the backend formatter. + #[inline] + #[must_use] + fn writer_ptr(&self) -> *const W { + self.writer as &_ as *const _ + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for ValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> Visitor for ValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + type ListVisitor = ListValueVisitor<'a, S, W>; + type AssocVisitor = AssocValueVisitor<'a, S, W>; + + /// Returns the name of the variable to visit. + #[inline] + #[must_use] + fn var_name(&self) -> VarName<'a> { + self.varspec.name() + } + + #[inline] + fn purpose(&self) -> VisitPurpose { + VisitPurpose::Expand + } + + /// Visits an undefined variable, i.e. indicates that the requested variable is unavailable. + #[inline] + fn visit_undefined(self) -> Self::Result { + Ok(VisitDoneToken::new(self)) + } + + /// Visits a string variable. + #[inline] + fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result { + let oppr = OpProps::from_op(self.op); + + if mem::replace(self.is_first_varspec, false) { + self.writer.write_str(oppr.first)?; + } else { + self.writer.write_str(oppr.sep)?; + } + let mut writer = if oppr.named { + self.writer.write_str(self.varspec.name().as_str())?; + PrefixOnceWriter::with_prefix(self.writer, "=") + } else { + PrefixOnceWriter::new(self.writer) + }; + + let max_len = match self.varspec.modifier() { + Modifier::None | Modifier::Explode => None, + Modifier::MaxLen(max_len) => Some(max_len), + }; + escape_write_with_maxlen::<S, T, W>(&mut writer, v, oppr.allow_reserved, max_len)?; + if writer.has_unwritten_prefix() { + self.writer.write_str(oppr.ifemp)?; + } + Ok(VisitDoneToken::new(self)) + } + + /// Visits a list variable. + #[inline] + #[must_use] + fn visit_list(self) -> Self::ListVisitor { + let oppr = OpProps::from_op(self.op); + ListValueVisitor { + visitor: self, + num_elems: 0, + oppr, + } + } + + /// Visits an associative array variable. + #[inline] + #[must_use] + fn visit_assoc(self) -> Self::AssocVisitor { + let oppr = OpProps::from_op(self.op); + AssocValueVisitor { + visitor: self, + num_elems: 0, + oppr, + } + } +} + +/// Visitor to retrieve value of a list variable. +// RFC 6570 section 2.3: +// +// > A variable defined as a list value is considered undefined if the +// > list contains zero members. A variable defined as an associative +// > array of (name, value) pairs is considered undefined if the array +// > contains zero members or if all member names in the array are +// > associated with undefined values. +// +// Single variable visitor should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct ListValueVisitor<'a, S, W> { + /// Visitor. + visitor: ValueVisitor<'a, S, W>, + /// Number of already emitted elements. + num_elems: usize, + /// Operator props. + oppr: &'static OpProps, +} + +impl<S: Spec, W: fmt::Write> ListValueVisitor<'_, S, W> { + /// Visits an item. + fn visit_item_impl<T: fmt::Display>(&mut self, item: T) -> fmt::Result { + let modifier = self.visitor.varspec.modifier(); + let is_explode = match modifier { + Modifier::MaxLen(_) => panic!( + "value type changed since `UriTemplateStr::expand()`: \ + prefix modifier is not applicable to a list" + ), + Modifier::None => false, + Modifier::Explode => true, + }; + + // Write prefix for each variable. + if self.num_elems == 0 { + if mem::replace(self.visitor.is_first_varspec, false) { + self.visitor.writer.write_str(self.oppr.first)?; + } else { + self.visitor.writer.write_str(self.oppr.sep)?; + } + if self.oppr.named { + self.visitor + .writer + .write_str(self.visitor.varspec.name().as_str())?; + self.visitor.writer.write_char('=')?; + } + } else { + // Write prefix for the non-first item. + match (self.oppr.named, is_explode) { + (_, false) => self.visitor.writer.write_char(',')?, + (false, true) => self.visitor.writer.write_str(self.oppr.sep)?, + (true, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>( + self.visitor.writer, + self.visitor.varspec.name().as_str(), + self.oppr.allow_reserved, + )?; + self.visitor.writer.write_char('=')?; + } + } + } + + escape_write::<S, _, _>(self.visitor.writer, item, self.oppr.allow_reserved)?; + + self.num_elems += 1; + Ok(()) + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for ListValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> ListVisitor for ListValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + + /// Visits an item. + #[inline] + fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result> { + match self.visit_item_impl(item) { + Ok(_) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(Err(e)), + } + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + Ok(VisitDoneToken::new(self.visitor)) + } +} + +/// Visitor to retrieve entries of an associative array variable. +// RFC 6570 section 2.3: +// +// > A variable defined as a list value is considered undefined if the +// > list contains zero members. A variable defined as an associative +// > array of (name, value) pairs is considered undefined if the array +// > contains zero members or if all member names in the array are +// > associated with undefined values. +// +// Single variable visitor should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct AssocValueVisitor<'a, S, W> { + /// Visitor. + visitor: ValueVisitor<'a, S, W>, + /// Number of already emitted elements. + num_elems: usize, + /// Operator props. + oppr: &'static OpProps, +} + +impl<S: Spec, W: fmt::Write> AssocValueVisitor<'_, S, W> { + /// Visits an entry. + fn visit_entry_impl<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> fmt::Result { + let modifier = self.visitor.varspec.modifier(); + let is_explode = match modifier { + Modifier::MaxLen(_) => panic!( + "value type changed since `UriTemplateStr::expand()`: \ + prefix modifier is not applicable to an associative array" + ), + Modifier::None => false, + Modifier::Explode => true, + }; + + // Write prefix for each variable. + if self.num_elems == 0 { + if mem::replace(self.visitor.is_first_varspec, false) { + self.visitor.writer.write_str(self.oppr.first)?; + } else { + self.visitor.writer.write_str(self.oppr.sep)?; + } + if is_explode { + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } else { + if self.oppr.named { + escape_write::<S, _, _>( + self.visitor.writer, + self.visitor.varspec.name().as_str(), + self.oppr.allow_reserved, + )?; + self.visitor.writer.write_char('=')?; + } + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char(',')?; + } + } else { + // Write prefix for the non-first item. + match (self.oppr.named, is_explode) { + (_, false) => { + self.visitor.writer.write_char(',')?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char(',')?; + } + (false, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } + (true, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } + } + } + + escape_write::<S, _, _>(self.visitor.writer, value, self.oppr.allow_reserved)?; + + self.num_elems += 1; + Ok(()) + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for AssocValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> AssocVisitor for AssocValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + + /// Visits an entry. + #[inline] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> ControlFlow<Self::Result> { + match self.visit_entry_impl(key, value) { + Ok(_) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(Err(e)), + } + } + + /// Finishes visiting the associative array. + #[inline] + fn finish(self) -> Self::Result { + Ok(VisitDoneToken::new(self.visitor)) + } +} + +/// Visitor to retrieve effective type of a variable. +struct TypeVisitor<'a> { + /// Variable name. + var_name: VarName<'a>, +} + +impl<'a> TypeVisitor<'a> { + /// Creates a new type visitor. + #[inline] + #[must_use] + fn new(var_name: VarName<'a>) -> Self { + Self { var_name } + } +} + +impl VisitorSealed for TypeVisitor<'_> {} + +impl<'a> Visitor for TypeVisitor<'a> { + type Result = ValueType; + type ListVisitor = ListTypeVisitor; + type AssocVisitor = AssocTypeVisitor; + + #[inline] + fn var_name(&self) -> VarName<'a> { + self.var_name + } + #[inline] + fn purpose(&self) -> VisitPurpose { + VisitPurpose::Typecheck + } + #[inline] + fn visit_undefined(self) -> Self::Result { + ValueType::undefined() + } + #[inline] + fn visit_string<T: fmt::Display>(self, _: T) -> Self::Result { + ValueType::string() + } + #[inline] + fn visit_list(self) -> Self::ListVisitor { + ListTypeVisitor + } + #[inline] + fn visit_assoc(self) -> Self::AssocVisitor { + AssocTypeVisitor + } +} + +/// Visitor to retrieve effective type of a list variable. +struct ListTypeVisitor; + +impl VisitorSealed for ListTypeVisitor {} + +impl ListVisitor for ListTypeVisitor { + type Result = ValueType; + + /// Visits an item. + #[inline] + fn visit_item<T: fmt::Display>(&mut self, _item: T) -> ControlFlow<Self::Result> { + ControlFlow::Break(ValueType::nonempty_list()) + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + ValueType::empty_list() + } +} + +/// Visitor to retrieve effective type of an associative array variable. +struct AssocTypeVisitor; + +impl VisitorSealed for AssocTypeVisitor {} + +impl AssocVisitor for AssocTypeVisitor { + type Result = ValueType; + + /// Visits an item. + #[inline] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + _key: K, + _value: V, + ) -> ControlFlow<Self::Result> { + ControlFlow::Break(ValueType::nonempty_assoc()) + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + ValueType::empty_assoc() + } +} diff --git a/vendor/iri-string/src/template/parser.rs b/vendor/iri-string/src/template/parser.rs new file mode 100644 index 00000000..6d5443a8 --- /dev/null +++ b/vendor/iri-string/src/template/parser.rs @@ -0,0 +1,6 @@ +//! URI Template parser. + +pub(super) mod char; +pub(super) mod validate; + +pub(super) use self::validate::validate_template_str; diff --git a/vendor/iri-string/src/template/parser/char.rs b/vendor/iri-string/src/template/parser/char.rs new file mode 100644 index 00000000..9ad4a6d8 --- /dev/null +++ b/vendor/iri-string/src/template/parser/char.rs @@ -0,0 +1,190 @@ +//! Characters. + +/// Properties of ASCII characters. +/// +/// About `'` (single quote) being considered as a literal: see +/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937). +const CHARS_TABLE: [u8; 128] = [ + 0b_0000_0000, // NUL + 0b_0000_0000, // SOH + 0b_0000_0000, // STX + 0b_0000_0000, // ETX + 0b_0000_0000, // EOT + 0b_0000_0000, // ENQ + 0b_0000_0000, // ACK + 0b_0000_0000, // BEL + 0b_0000_0000, // BS + 0b_0000_0000, // HT + 0b_0000_0000, // LF + 0b_0000_0000, // VT + 0b_0000_0000, // FF + 0b_0000_0000, // CR + 0b_0000_0000, // SO + 0b_0000_0000, // SI + 0b_0000_0000, // DLE + 0b_0000_0000, // DC1 + 0b_0000_0000, // DC2 + 0b_0000_0000, // DC3 + 0b_0000_0000, // DC4 + 0b_0000_0000, // NAK + 0b_0000_0000, // SYN + 0b_0000_0000, // ETB + 0b_0000_0000, // CAN + 0b_0000_0000, // EM + 0b_0000_0000, // SUB + 0b_0000_0000, // ESC + 0b_0000_0000, // FS + 0b_0000_0000, // GS + 0b_0000_0000, // RS + 0b_0000_0000, // US + 0b_0000_0000, // SPACE + 0b_0000_0001, // ! + 0b_0000_0000, // " + 0b_0000_0001, // # + 0b_0000_0001, // $ + 0b_0000_0000, // % + 0b_0000_0001, // & + 0b_0000_0001, // ' + 0b_0000_0001, // ( + 0b_0000_0001, // ) + 0b_0000_0001, // * + 0b_0000_0001, // + + 0b_0000_0001, // , + 0b_0000_0001, // - + 0b_0000_0101, // . + 0b_0000_0001, // / + 0b_0000_0111, // 0 + 0b_0000_0111, // 1 + 0b_0000_0111, // 2 + 0b_0000_0111, // 3 + 0b_0000_0111, // 4 + 0b_0000_0111, // 5 + 0b_0000_0111, // 6 + 0b_0000_0111, // 7 + 0b_0000_0111, // 8 + 0b_0000_0111, // 9 + 0b_0000_0001, // : + 0b_0000_0001, // ; + 0b_0000_0000, // < + 0b_0000_0001, // = + 0b_0000_0000, // > + 0b_0000_0001, // ? + 0b_0000_0001, // @ + 0b_0000_0111, // A + 0b_0000_0111, // B + 0b_0000_0111, // C + 0b_0000_0111, // D + 0b_0000_0111, // E + 0b_0000_0111, // F + 0b_0000_0111, // G + 0b_0000_0111, // H + 0b_0000_0111, // I + 0b_0000_0111, // J + 0b_0000_0111, // K + 0b_0000_0111, // L + 0b_0000_0111, // M + 0b_0000_0111, // N + 0b_0000_0111, // O + 0b_0000_0111, // P + 0b_0000_0111, // Q + 0b_0000_0111, // R + 0b_0000_0111, // S + 0b_0000_0111, // T + 0b_0000_0111, // U + 0b_0000_0111, // V + 0b_0000_0111, // W + 0b_0000_0111, // X + 0b_0000_0111, // Y + 0b_0000_0111, // Z + 0b_0000_0001, // [ + 0b_0000_0000, // \ + 0b_0000_0001, // ] + 0b_0000_0000, // ^ + 0b_0000_0111, // _ + 0b_0000_0000, // ` + 0b_0000_0111, // a + 0b_0000_0111, // b + 0b_0000_0111, // c + 0b_0000_0111, // d + 0b_0000_0111, // e + 0b_0000_0111, // f + 0b_0000_0111, // g + 0b_0000_0111, // h + 0b_0000_0111, // i + 0b_0000_0111, // j + 0b_0000_0111, // k + 0b_0000_0111, // l + 0b_0000_0111, // m + 0b_0000_0111, // n + 0b_0000_0111, // o + 0b_0000_0111, // p + 0b_0000_0111, // q + 0b_0000_0111, // r + 0b_0000_0111, // s + 0b_0000_0111, // t + 0b_0000_0111, // u + 0b_0000_0111, // v + 0b_0000_0111, // w + 0b_0000_0111, // x + 0b_0000_0111, // y + 0b_0000_0111, // z + 0b_0000_0000, // { + 0b_0000_0000, // | + 0b_0000_0000, // } + 0b_0000_0001, // ~ + 0b_0000_0000, // DEL +]; + +/// A mask to test whether the character matches `literals` rule defined in [RFC 6570]. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.1 +const CHARS_TABLE_MASK_LITERAL: u8 = 1 << 0; + +/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570]. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 +const CHARS_TABLE_MASK_VARCHAR_START: u8 = 1 << 1; + +/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570] or a period. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 +const CHARS_TABLE_MASK_VARCHAR_CONTINUE: u8 = 1 << 2; + +/// Returns true if the given ASCII character is allowed in a literal string. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_literal_char(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_LITERAL) != 0 +} + +/// Returns true if the given ASCII character is allowed as the beginning of the `varname`. +/// +/// Note that this does not return true for `%` character. It is caller's +/// responsibility to test validity of percent-encoded triplets. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_varchar_start(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_START) != 0 +} + +/// Returns true if the given ASCII character is allowed as the non-beginning of the `varname`. +/// +/// Note that this does not return true for `%` character. It is caller's +/// responsibility to test validity of percent-encoded triplets. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_varchar_continue(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_CONTINUE) != 0 +} diff --git a/vendor/iri-string/src/template/parser/validate.rs b/vendor/iri-string/src/template/parser/validate.rs new file mode 100644 index 00000000..67ab6c01 --- /dev/null +++ b/vendor/iri-string/src/template/parser/validate.rs @@ -0,0 +1,161 @@ +//! Validating parsers. + +use crate::parser::str::{ + find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits, +}; +use crate::template::components::MaybeOperator; +use crate::template::error::{Error, ErrorKind}; + +use crate::template::parser::char::{ + is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start, +}; + +/// Returns `Ok(())` if the given string is a valid literal. +fn validate_literal(s: &str, offset: usize) -> Result<(), Error> { + match s + .chars() + .position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8)) + { + Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)), + None => Ok(()), + } +} + +/// Returns `Ok(())` if the given string is a valid varspec. +fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> { + match find_split2_hole(s, b':', b'*') { + Some((maybe_varname, b':', maybe_len)) => { + validate_varname(maybe_varname, offset)?; + if !(1..=5).contains(&maybe_len.len()) { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 2, + )); + } + if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 2 + pos, + )); + } + } + Some((maybe_varname, b'*', extra)) => { + validate_varname(maybe_varname, offset)?; + if !extra.is_empty() { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 1, + )); + } + } + Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"), + None => validate_varname(s, offset)?, + } + Ok(()) +} + +/// Returns `Ok(())` if the given string is a valid varname. +pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> { + let rest = match s.as_bytes().first() { + Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..], + Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..], + _ => return Err(Error::new(ErrorKind::InvalidExpression, offset)), + }; + let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false); + if !is_valid { + return Err(Error::new(ErrorKind::InvalidExpression, offset)); + } + Ok(()) +} + +/// Returns `Ok(())` if the given string is a valid expression. +/// +/// "Expression" here is the expression body inside `{` and `}`, but not including braces. +fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> { + if s.is_empty() { + return Err(Error::new(ErrorKind::InvalidExpression, offset)); + } + + // Skip the operator. + let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) { + Some(MaybeOperator::Operator(_)) => { + offset += 1; + &s[1..] + } + Some(MaybeOperator::Reserved(_)) => { + return Err(Error::new(ErrorKind::UnsupportedOperator, offset)); + } + None => s, + }; + + // Validate varspecs. + for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() { + if spec_i != 0 { + // Add the length of the leading separator `,`. + offset += 1; + } + validate_varspec(maybe_varspec, offset)?; + offset += maybe_varspec.len(); + } + + Ok(()) +} + +/// Validates whether the given string is valid as a URI template. +/// +/// Returns `Ok(())` if the given string is a valid URI template. +pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> { + let mut rest = s; + let mut offset = 0; + while !rest.is_empty() { + rest = match find_split2_hole(rest, b'%', b'{') { + Some((literal, b'%', xdigits2_and_rest)) => { + validate_literal(literal, offset)?; + + if xdigits2_and_rest.len() < 2 { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len(), + )); + } + let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2); + if !xdigits2.as_bytes()[0].is_ascii_hexdigit() { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len() + 1, + )); + } + if !xdigits2.as_bytes()[1].is_ascii_hexdigit() { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len() + 2, + )); + } + new_rest + } + Some((literal, b'{', expr_and_rest)) => { + validate_literal(literal, offset)?; + + let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') { + Some(v) => v, + None => { + return Err(Error::new( + ErrorKind::ExpressionNotClosed, + offset + literal.len(), + )) + } + }; + + // +1 is `+ "{".len()`. + validate_expr_body(expr, offset + literal.len() + 1)?; + + new_rest + } + Some(_) => unreachable!("[consistency] searching only `%` and `{{`"), + None => return validate_literal(rest, offset), + }; + offset = s.len() - rest.len(); + } + + Ok(()) +} diff --git a/vendor/iri-string/src/template/simple_context.rs b/vendor/iri-string/src/template/simple_context.rs new file mode 100644 index 00000000..5c19dc79 --- /dev/null +++ b/vendor/iri-string/src/template/simple_context.rs @@ -0,0 +1,218 @@ +//! Simple general-purpose context type. + +use core::ops::ControlFlow; + +use alloc::collections::BTreeMap; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::vec::Vec; + +use crate::template::context::{Context, VarName, Visitor}; + +/// Value. +#[derive(Debug, Clone)] +pub enum Value { + /// Undefined (i.e. null). + Undefined, + /// String value. + String(String), + /// List. + List(Vec<String>), + /// Associative array. + Assoc(Vec<(String, String)>), +} + +impl From<&str> for Value { + #[inline] + fn from(v: &str) -> Self { + Self::String(v.into()) + } +} + +impl From<String> for Value { + #[inline] + fn from(v: String) -> Self { + Self::String(v) + } +} + +/// Simple template expansion context. +#[derive(Default, Debug, Clone)] +pub struct SimpleContext { + /// Variable values. + // Any map types (including `HashMap`) is ok, but the hash map is not provided by `alloc`. + // + // QUESTION: Should hexdigits in percent-encoded triplets in varnames be + // compared case sensitively? + variables: BTreeMap<String, Value>, +} + +impl SimpleContext { + /// Creates a new empty context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let empty_ctx = SimpleContext::new(); + /// let template = UriTemplateStr::new("{no_such_variable}")?; + /// let expanded = template.expand::<UriSpec, _>(&empty_ctx)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Inserts a variable. + /// + /// Passing [`Value::Undefined`] removes the value from the context. + /// + /// The entry will be inserted or removed even if the key is invalid as a + /// variable name. Such entries will be simply ignored on expansion. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/foo" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Passing [`Value::Undefined`] removes the value from the context. + /// + /// ``` + /// # use iri_string::template::Error; + /// ## [cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::{SimpleContext, Value}; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// context.insert("username", Value::Undefined); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn insert<K, V>(&mut self, key: K, value: V) -> Option<Value> + where + K: Into<String>, + V: Into<Value>, + { + let key = key.into(); + match value.into() { + Value::Undefined => self.variables.remove(&key), + value => self.variables.insert(key, value), + } + } + + /// Removes all entries in the context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let template = UriTemplateStr::new("{foo,bar}")?; + /// let mut context = SimpleContext::new(); + /// + /// context.insert("foo", "FOO"); + /// context.insert("bar", "BAR"); + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "FOO,BAR" + /// ); + /// + /// context.clear(); + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.variables.clear(); + } + + /// Returns a reference to the value for the key. + // + // QUESTION: Should hexdigits in percent-encoded triplets in varnames be + // compared case sensitively? + #[inline] + #[must_use] + pub fn get(&self, key: VarName<'_>) -> Option<&Value> { + self.variables.get(key.as_str()) + } +} + +impl Context for SimpleContext { + fn visit<V: Visitor>(&self, visitor: V) -> V::Result { + use crate::template::context::{AssocVisitor, ListVisitor}; + + let name = visitor.var_name().as_str(); + match self.variables.get(name) { + None | Some(Value::Undefined) => visitor.visit_undefined(), + Some(Value::String(s)) => visitor.visit_string(s), + Some(Value::List(list)) => { + let mut visitor = visitor.visit_list(); + if let ControlFlow::Break(res) = + list.iter().try_for_each(|item| visitor.visit_item(item)) + { + return res; + } + visitor.finish() + } + Some(Value::Assoc(list)) => { + let mut visitor = visitor.visit_assoc(); + if let ControlFlow::Break(res) = + list.iter().try_for_each(|(k, v)| visitor.visit_entry(k, v)) + { + return res; + } + visitor.finish() + } + } + } +} diff --git a/vendor/iri-string/src/template/string.rs b/vendor/iri-string/src/template/string.rs new file mode 100644 index 00000000..9ba53a75 --- /dev/null +++ b/vendor/iri-string/src/template/string.rs @@ -0,0 +1,647 @@ +//! Template string types. + +use core::fmt; + +#[cfg(feature = "alloc")] +use alloc::borrow::Cow; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; + +use crate::spec::Spec; +use crate::template::components::{VarListIter, VarName}; +use crate::template::context::{Context, DynamicContext}; +use crate::template::error::{Error, ErrorKind}; +use crate::template::expand::{expand_whole_dynamic, Chunk, Chunks, Expanded}; +use crate::template::parser::validate_template_str; + +#[cfg(feature = "alloc")] +pub use self::owned::UriTemplateString; + +/// Implements `PartialEq` and `PartialOrd`. +macro_rules! impl_cmp { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +#[cfg(feature = "alloc")] +mod owned; + +/// A borrowed slice of a URI template. +/// +/// URI Template is defined by [RFC 6570]. +/// +/// Note that "URI Template" can also be used for IRI. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +/// +/// # Valid values +/// +/// This type can have a URI template string. +/// +/// # Applied errata +/// +/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937) is applied, so +/// single quotes are allowed to appear in an URI template. +/// +/// ``` +/// # use iri_string::template::Error; +/// use iri_string::template::UriTemplateStr; +/// +/// let template = UriTemplateStr::new("'quoted'")?; +/// # Ok::<_, Error>(()) +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(transparent))] +#[repr(transparent)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UriTemplateStr { + /// The raw string. + inner: str, +} + +impl UriTemplateStr { + /// Creates a new string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn new(s: &str) -> Result<&Self, Error> { + TryFrom::try_from(s) + } + + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: &str) -> &Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string without any validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be valid. + #[inline] + #[must_use] + unsafe fn new_always_unchecked(s: &str) -> &Self { + // SAFETY: the cast is safe since `Self` type has `repr(transparent)` + // attribute and the content is guaranteed as valid by the + // precondition of the function. + unsafe { &*(s as *const str as *const Self) } + } + + /// Returns the template as a plain `&str`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert_eq!(template.as_str(), "/users/{username}"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + self.as_ref() + } + + /// Returns the template string length. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert_eq!(template.len(), "/users/{username}".len()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.as_str().len() + } + + /// Returns whether the string is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert!(!template.is_empty()); + /// + /// let empty = UriTemplateStr::new("")?; + /// assert!(empty.is_empty()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } +} + +impl UriTemplateStr { + /// Expands the template with the given context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/foo" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// You can control allowed characters in the output by changing spec type. + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::{IriSpec, UriSpec}; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("alpha", "\u{03B1}"); + /// + /// let template = UriTemplateStr::new("{?alpha}")?; + /// + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "?alpha=%CE%B1", + /// "a URI cannot contain Unicode alpha (U+03B1), so it should be escaped" + /// ); + /// assert_eq!( + /// template.expand::<IriSpec, _>(&context)?.to_string(), + /// "?alpha=\u{03B1}", + /// "an IRI can contain Unicode alpha (U+03B1), so it written as is" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn expand<'a, S: Spec, C: Context>( + &'a self, + context: &'a C, + ) -> Result<Expanded<'a, S, C>, Error> { + Expanded::new(self, context) + } + + /// Expands the template with the given dynamic context. + /// + #[cfg_attr( + feature = "alloc", + doc = concat!( + "If you need the allocated [`String`], use", + "[`expand_dynamic_to_string`][`Self::expand_dynamic_to_string`]." + ) + )] + /// + /// See the documentation for [`DynamicContext`] for usage. + pub fn expand_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>( + &self, + writer: &mut W, + context: &mut C, + ) -> Result<(), Error> { + expand_whole_dynamic::<S, _, _>(self, writer, context) + } + + /// Expands the template into a string, with the given dynamic context. + /// + /// This is basically [`expand_dynamic`][`Self::expand_dynamic`] method + /// that returns an owned string instead of writing to the given writer. + /// + /// See the documentation for [`DynamicContext`] for usage. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] + /// # extern crate alloc; + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// # use alloc::string::String; + /// use iri_string::template::UriTemplateStr; + /// # use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose}; + /// use iri_string::spec::UriSpec; + /// + /// struct MyContext<'a> { + /// // See the documentation for `DynamicContext`. + /// # /// Target path. + /// # target: &'a str, + /// # /// Username. + /// # username: Option<&'a str>, + /// # /// A flag to remember whether the URI template + /// # /// attempted to use `username` variable. + /// # username_visited: bool, + /// } + /// # + /// # impl DynamicContext for MyContext<'_> { + /// # fn on_expansion_start(&mut self) { + /// # // Reset the state. + /// # self.username_visited = false; + /// # } + /// # fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { + /// # match visitor.var_name().as_str() { + /// # "target" => visitor.visit_string(self.target), + /// # "username" => { + /// # if visitor.purpose() == VisitPurpose::Expand { + /// # // The variable `username` is being used + /// # // on the template expansion. + /// # // Don't care whether `username` is defined or not. + /// # self.username_visited = true; + /// # } + /// # if let Some(username) = &self.username { + /// # visitor.visit_string(username) + /// # } else { + /// # visitor.visit_undefined() + /// # } + /// # } + /// # _ => visitor.visit_undefined(), + /// # } + /// # } + /// # } + /// + /// let mut context = MyContext { + /// target: "/posts/1", + /// username: Some("the_admin"), + /// username_visited: false, + /// }; + /// + /// // No access to the variable `username`. + /// let template = UriTemplateStr::new("{+target}{?username}")?; + /// let s = template.expand_dynamic_to_string::<UriSpec, _>(&mut context)?; + /// assert_eq!(s, "/posts/1?username=the_admin"); + /// assert!(context.username_visited); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[cfg(feature = "alloc")] + pub fn expand_dynamic_to_string<S: Spec, C: DynamicContext>( + &self, + context: &mut C, + ) -> Result<String, Error> { + let mut buf = String::new(); + expand_whole_dynamic::<S, _, _>(self, &mut buf, context)?; + Ok(buf) + } + + /// Returns an iterator of variables in the template. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("foo{/bar*,baz:4}{?qux}{&bar*}")?; + /// let mut vars = template.variables(); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("baz")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("qux")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn variables(&self) -> UriTemplateVariables<'_> { + UriTemplateVariables::new(self) + } +} + +impl fmt::Debug for UriTemplateStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("UriTemplateStr").field(&&self.inner).finish() + } +} + +impl AsRef<str> for UriTemplateStr { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } +} + +impl AsRef<UriTemplateStr> for UriTemplateStr { + #[inline] + fn as_ref(&self) -> &UriTemplateStr { + self + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UriTemplateStr> for Cow<'a, UriTemplateStr> { + #[inline] + fn from(s: &'a UriTemplateStr) -> Self { + Cow::Borrowed(s) + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Arc<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Arc::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Arc<str>` and `Arc<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *const str = Arc::into_raw(buf); + Self::from_raw(raw as *const UriTemplateStr) + } + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Box<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Box::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *mut str = Box::into_raw(buf); + Self::from_raw(raw as *mut UriTemplateStr) + } + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Rc<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Rc::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Rc<str>` and `Rc<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *const str = Rc::into_raw(buf); + Self::from_raw(raw as *const UriTemplateStr) + } + } +} + +impl<'a> From<&'a UriTemplateStr> for &'a str { + #[inline] + fn from(s: &'a UriTemplateStr) -> &'a str { + s.as_ref() + } +} + +impl<'a> TryFrom<&'a str> for &'a UriTemplateStr { + type Error = Error; + + #[inline] + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + match validate_template_str(s) { + // SAFETY: just checked the string is valid. + Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a UriTemplateStr { + type Error = Error; + + #[inline] + fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes) + .map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?; + match validate_template_str(s) { + // SAFETY: just checked the string is valid. + Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } +} + +impl_cmp!(str, str, UriTemplateStr); +impl_cmp!(str, &str, UriTemplateStr); +impl_cmp!(str, str, &UriTemplateStr); + +impl fmt::Display for UriTemplateStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Serde deserializer implementation. +#[cfg(feature = "serde")] +mod __serde_slice { + use super::UriTemplateStr; + + use core::fmt; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom borrowed string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStrVisitor; + + impl<'de> Visitor<'de> for CustomStrVisitor { + type Value = &'de UriTemplateStr; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("URI template string") + } + + #[inline] + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + <&'de UriTemplateStr as TryFrom<&'de str>>::try_from(v).map_err(E::custom) + } + } + + // About `'de` and `'a`, see + // <https://serde.rs/lifetimes.html#the-deserializede-lifetime>. + impl<'a, 'de: 'a> Deserialize<'de> for &'a UriTemplateStr { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_string(CustomStrVisitor) + } + } +} + +/// An iterator of variables in a URI template. +#[derive(Debug, Clone)] +pub struct UriTemplateVariables<'a> { + /// Chunks iterator. + chunks: Chunks<'a>, + /// Variables in the last chunk. + vars_in_chunk: Option<VarListIter<'a>>, +} + +impl<'a> UriTemplateVariables<'a> { + /// Creates a variables iterator from the URI template. + #[inline] + #[must_use] + fn new(template: &'a UriTemplateStr) -> Self { + Self { + chunks: Chunks::new(template), + vars_in_chunk: None, + } + } +} + +impl<'a> Iterator for UriTemplateVariables<'a> { + type Item = VarName<'a>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + if let Some(vars) = &mut self.vars_in_chunk { + match vars.next() { + Some((_len, spec)) => return Some(spec.name()), + None => self.vars_in_chunk = None, + } + } + let expr = self.chunks.find_map(|chunk| match chunk { + Chunk::Literal(_) => None, + Chunk::Expr(v) => Some(v), + }); + self.vars_in_chunk = match expr { + Some(expr) => Some(expr.decompose().1.into_iter()), + None => return None, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::spec::IriSpec; + use crate::template::context::{AssocVisitor, ListVisitor, Visitor}; + + struct TestContext; + impl Context for TestContext { + fn visit<V: Visitor>(&self, visitor: V) -> V::Result { + match visitor.var_name().as_str() { + "str" => visitor.visit_string("string"), + "list" => visitor + .visit_list() + .visit_items_and_finish(["item0", "item1", "item2"]), + "assoc" => visitor + .visit_assoc() + .visit_entries_and_finish([("key0", "value0"), ("key1", "value1")]), + _ => visitor.visit_undefined(), + } + } + } + + #[test] + fn expand_error_pos() { + { + let e = UriTemplateStr::new("foo{list:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{".len())); + } + + { + let e = UriTemplateStr::new("foo{/list*,list:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{/list*,".len())); + } + + { + let e = UriTemplateStr::new("foo{/str:3,list*,assoc:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{/str:3,list*,".len())); + } + } +} diff --git a/vendor/iri-string/src/template/string/owned.rs b/vendor/iri-string/src/template/string/owned.rs new file mode 100644 index 00000000..afd201b3 --- /dev/null +++ b/vendor/iri-string/src/template/string/owned.rs @@ -0,0 +1,296 @@ +//! Owned `UriTemplateString`. + +use core::fmt; + +use alloc::borrow::Cow; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::borrow::ToOwned; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::boxed::Box; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::template::error::{CreationError, Error, ErrorKind}; +use crate::template::parser::validate_template_str; +use crate::template::string::UriTemplateStr; + +/// An owned slice of a URI template. +/// +/// URI Template is defined by [RFC 6570]. +/// +/// Note that "URI Template" can also be used for IRI. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +/// +/// # Valid values +/// +/// This type can have a URI template string. +// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since +// this won't reuse allocated memory and hides internal memory reallocation. See +// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>. +// However, this is not decided with firm belief or opinion, so there would be +// a chance that they are implemented in future. +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(transparent))] +#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UriTemplateString { + /// Inner data. + inner: String, +} + +impl UriTemplateString { + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: alloc::string::String) -> Self { + // The construction itself can be written in safe Rust, but + // every other place including unsafe functions expects + // `self.inner` to be syntactically valid as `Self`. In order to + // make them safe, the construction should validate the value + // or at least should require users to validate the value by + // making the function `unsafe`. + Self { inner: s } + } + + /// Shrinks the capacity of the inner buffer to match its length. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Returns the internal buffer capacity in bytes. + #[inline] + #[must_use] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Returns the borrowed IRI string slice. + /// + /// This is equivalent to `&*self`. + #[inline] + #[must_use] + pub fn as_slice(&self) -> &UriTemplateStr { + self.as_ref() + } + + /// Appends the template string. + #[inline] + pub fn append(&mut self, other: &UriTemplateStr) { + self.inner.push_str(other.as_str()); + debug_assert!(validate_template_str(self.as_str()).is_ok()); + } +} + +impl AsRef<str> for UriTemplateString { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } +} + +impl AsRef<UriTemplateStr> for UriTemplateString { + #[inline] + fn as_ref(&self) -> &UriTemplateStr { + // SAFETY: `UriTemplateString and `UriTemplateStr` requires same validation, + // so the content of `self: &UriTemplateString` must be valid as `UriTemplateStr`. + unsafe { UriTemplateStr::new_always_unchecked(AsRef::<str>::as_ref(self)) } + } +} + +impl core::borrow::Borrow<str> for UriTemplateString { + #[inline] + fn borrow(&self) -> &str { + self.as_ref() + } +} + +impl core::borrow::Borrow<UriTemplateStr> for UriTemplateString { + #[inline] + fn borrow(&self) -> &UriTemplateStr { + self.as_ref() + } +} + +impl ToOwned for UriTemplateStr { + type Owned = UriTemplateString; + + #[inline] + fn to_owned(&self) -> Self::Owned { + self.into() + } +} + +impl From<&'_ UriTemplateStr> for UriTemplateString { + #[inline] + fn from(s: &UriTemplateStr) -> Self { + // This is safe because `s` must be valid. + Self { + inner: alloc::string::String::from(s.as_str()), + } + } +} + +impl From<UriTemplateString> for alloc::string::String { + #[inline] + fn from(s: UriTemplateString) -> Self { + s.inner + } +} + +impl<'a> From<UriTemplateString> for Cow<'a, UriTemplateStr> { + #[inline] + fn from(s: UriTemplateString) -> Cow<'a, UriTemplateStr> { + Cow::Owned(s) + } +} + +impl From<UriTemplateString> for Box<UriTemplateStr> { + #[inline] + fn from(s: UriTemplateString) -> Box<UriTemplateStr> { + let inner: String = s.into(); + let buf = Box::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are + // compatible. Additionally, `UriTemplateString` and `UriTemplateStr` + // require the same syntax. + unsafe { + let raw: *mut str = Box::into_raw(buf); + Box::<UriTemplateStr>::from_raw(raw as *mut UriTemplateStr) + } + } +} + +impl TryFrom<&'_ str> for UriTemplateString { + type Error = Error; + + #[inline] + fn try_from(s: &str) -> Result<Self, Self::Error> { + <&UriTemplateStr>::try_from(s).map(Into::into) + } +} + +impl TryFrom<&'_ [u8]> for UriTemplateString { + type Error = Error; + + #[inline] + fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes) + .map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?; + <&UriTemplateStr>::try_from(s).map(Into::into) + } +} + +impl core::convert::TryFrom<alloc::string::String> for UriTemplateString { + type Error = CreationError<String>; + + #[inline] + fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> { + match <&UriTemplateStr>::try_from(s.as_str()) { + Ok(_) => { + // This is safe because `<&UriTemplateStr>::try_from(s)?` ensures + // that the string `s` is valid. + Ok(Self { inner: s }) + } + Err(e) => Err(CreationError::new(e, s)), + } + } +} + +impl alloc::str::FromStr for UriTemplateString { + type Err = Error; + + #[inline] + fn from_str(s: &str) -> Result<Self, Self::Err> { + TryFrom::try_from(s) + } +} + +impl core::ops::Deref for UriTemplateString { + type Target = UriTemplateStr; + + #[inline] + fn deref(&self) -> &UriTemplateStr { + self.as_ref() + } +} + +impl_cmp!(str, UriTemplateStr, Cow<'_, str>); +impl_cmp!(str, &UriTemplateStr, Cow<'_, str>); + +impl_cmp!(str, str, UriTemplateString); +impl_cmp!(str, &str, UriTemplateString); +impl_cmp!(str, Cow<'_, str>, UriTemplateString); +impl_cmp!(str, String, UriTemplateString); + +impl fmt::Display for UriTemplateString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Serde deserializer implementation. +#[cfg(feature = "serde")] +mod __serde_owned { + use super::UriTemplateString; + + use core::fmt; + + #[cfg(all(feature = "alloc", feature = "serde", not(feature = "std")))] + use alloc::string::String; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom owned string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStringVisitor; + + impl Visitor<'_> for CustomStringVisitor { + type Value = UriTemplateString; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("URI template string") + } + + #[inline] + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + <UriTemplateString as TryFrom<&str>>::try_from(v).map_err(E::custom) + } + + #[cfg(feature = "serde")] + #[inline] + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + <UriTemplateString as TryFrom<String>>::try_from(v).map_err(E::custom) + } + } + + impl<'de> Deserialize<'de> for UriTemplateString { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(CustomStringVisitor) + } + } +} diff --git a/vendor/iri-string/src/types.rs b/vendor/iri-string/src/types.rs new file mode 100644 index 00000000..38e734c9 --- /dev/null +++ b/vendor/iri-string/src/types.rs @@ -0,0 +1,224 @@ +//! URI and IRI types. +//! +//! # URI and IRI +//! +//! IRIs (Internationalized Resource Identifiers) are defined in [RFC 3987], +//! and URIs (Uniform Resource Identifiers) are defined in [RFC 3986]. +//! +//! URI consists of only ASCII characters, and is a subset of IRI. +//! +//! IRIs are defined as below: +//! +//! ```text +//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] +//! IRI-reference = IRI / irelative-ref +//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] +//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] +//! (`irelative-part` is roughly same as `ihier-part`.) +//! ``` +//! +//! Definitions for URIs are almost same, but they cannot have non-ASCII characters. +//! +//! # Types +//! +//! Types can be categorized by: +//! +//! * syntax, +//! * spec, and +//! * ownership. +//! +//! ## Syntax +//! +//! Since URIs and IRIs have almost same syntax and share algorithms, they are implemented by +//! generic types. +//! +//! * [`RiStr`] and [`RiString`] +//! + String types for `IRI` and `URI` rules. +//! * [`RiAbsoluteStr`] and [`RiAbsoluteString`] +//! + String types for `absolute-IRI` and `absolute-URI` rules. +//! * [`RiReferenceStr`] and [`RiReferenceString`] +//! + String types for `IRI-reference` and `URI-reference` rules. +//! * [`RiRelativeStr`] and [`RiRelativeString`] +//! + String types for `irelative-ref` and `relative-ref` rules. +//! * [`RiFragmentStr`] and [`RiFragmentString`] +//! + String types for `ifragment` and `fragment` rules. +//! + Note that these types represents a substring of an IRI / URI references. +//! They are not intended to used directly as an IRI / URI references. +//! +//! "Ri" stands for "Resource Identifier". +//! +//! ## Spec +//! +//! These types have a type parameter, which represents RFC specification. +//! [`IriSpec`] represents [RFC 3987] spec, and [`UriSpec`] represents [RFC 3986] spec. +//! For example, `RiAbsoluteStr<IriSpec>` can have `absolute-IRI` string value, +//! and `RiReferenceStr<UriSpec>` can have `URI-reference` string value. +//! +//! ## Ownership +//! +//! String-like types have usually two variations, borrowed and owned. +//! +//! Borrowed types (such as `str`, `Path`, `OsStr`) are unsized, and used by reference style. +//! Owned types (such as `String`, `PathBuf`, `OsString`) are sized, and requires heap allocation. +//! Owned types can be coerced to a borrowed type (for example, `&String` is automatically coerced +//! to `&str` in many context). +//! +//! IRI / URI types have same variations, `RiFooStr` and `RiFooString` +//! (`Foo` part represents syntax). +//! They are very similar to `&str` and `String`. +//! `Deref` is implemented, `RiFooStr::len()` is available, `&RiFooString` can be coerced to +//! `&RiFooStr`, `Cow<'_, RiFooStr>` and `Box<RiFooStr>` is available, and so on. +//! +//! # Hierarchy and safe conversion +//! +//! IRI syntaxes have the hierarchy below. +//! +//! ```text +//! RiReferenceStr +//! |-- RiStr +//! | `-- RiAbsoluteStr +//! `-- RiRelativeStr +//! ``` +//! +//! Therefore, the conversions below are safe and cheap: +//! +//! * `RiStr -> RiReferenceStr` +//! * `RiAbsoluteStr -> RiStr` +//! * `RiAbsoluteStr -> RiReferenceStr` +//! * `RiRelativeStr -> RiReferenceStr` +//! +//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits +//! below are implemented: +//! +//! * `AsRef<BarStr> for FooStr` +//! * `AsRef<BarStr> for FooString` +//! * `From<FooString> for BarString` +//! * `PartialEq<FooStr> for BarStr`, and lots of impls like that +//! + `PartialEq` and `ParitalOrd`. +//! + Slice, owned, `Cow`, reference, etc... +//! +//! ## Fallible conversions +//! +//! Fallible conversions are implemented from plain string into IRI strings. +//! +//! * `TryFrom<&str> for &FooStr` +//! * `TryFrom<&str> for FooString` +//! * `TryFrom<String> for FooString` +//! * `FromStr for FooString` +//! +//! Some IRI string types provide more convenient methods to convert between IRI types. +//! For example, [`RiReferenceString::into_iri()`] tries to convert an IRI reference into an IRI, +//! and returns `Result<IriString, IriRelativeString>`. +//! This is because an IRI reference is valid as an IRI or a relative IRI reference. +//! Such methods are usually more efficient than using `TryFrom` for plain strings, because they +//! prevents you from losing ownership of a string, and does a conversion without extra memory +//! allocation. +//! +//! # Aliases +//! +//! This module contains type aliases for RFC 3986 URI types and RFC 3987 IRI types. +//! +//! `IriFooStr{,ing}` are aliases of `RiFooStr{,ing}<IriSpec>`, and `UriFooStr{,ing}` are aliases +//! of `RiFooStr{,ing}<UriSpec>`. +//! +//! # Wrapped string types +//! +//! Similar to string types in std (such as `str`, `std::path::Path`, and `std::ffi::OsStr`), +//! IRI string types in this crate provides convenient conversions to: +//! +//! * `std::box::Box`, +//! * `std::borrow::Cow`, +//! * `std::rc::Rc`, and +//! * `std::sync::Arc`. +//! +//! ``` +//! # use iri_string::validate::Error; +//! # #[cfg(feature = "std")] { +//! use std::borrow::Cow; +//! use std::rc::Rc; +//! use std::sync::Arc; +//! +//! use iri_string::types::IriStr; +//! +//! let iri = IriStr::new("http://example.com/")?; +//! let iri_owned = iri.to_owned(); +//! +//! // From slice. +//! let cow_1_1: Cow<'_, IriStr> = iri.into(); +//! let cow_1_2 = Cow::<'_, IriStr>::from(iri); +//! assert!(matches!(cow_1_1, Cow::Borrowed(_))); +//! assert!(matches!(cow_1_2, Cow::Borrowed(_))); +//! // From owned. +//! let cow_2_1: Cow<'_, IriStr> = iri_owned.clone().into(); +//! let cow_2_2 = Cow::<'_, IriStr>::from(iri_owned.clone()); +//! assert!(matches!(cow_2_1, Cow::Owned(_))); +//! assert!(matches!(cow_2_2, Cow::Owned(_))); +//! +//! // From slice. +//! let box_1_1: Box<IriStr> = iri.into(); +//! let box_1_2 = Box::<IriStr>::from(iri); +//! // From owned. +//! let box_2_1: Box<IriStr> = iri_owned.clone().into(); +//! let box_2_2 = Box::<IriStr>::from(iri_owned.clone()); +//! +//! // From slice. +//! let rc_1_1: Rc<IriStr> = iri.into(); +//! let rc_1_2 = Rc::<IriStr>::from(iri); +//! // From owned. +//! // Note that `From<owned> for Rc<borrowed>` is not implemented for now. +//! // Get borrowed string by `.as_slice()` and convert it. +//! let rc_2_1: Rc<IriStr> = iri_owned.clone().as_slice().into(); +//! let rc_2_2 = Rc::<IriStr>::from(iri_owned.clone().as_slice()); +//! +//! // From slice. +//! let arc_1_1: Arc<IriStr> = iri.into(); +//! let arc_1_2 = Arc::<IriStr>::from(iri); +//! // From owned. +//! // Note that `From<owned> for Arc<borrowed>` is not implemented for now. +//! // Get borrowed string by `.as_slice()` and convert it. +//! let arc_2_1: Arc<IriStr> = iri_owned.clone().as_slice().into(); +//! let arc_2_2 = Arc::<IriStr>::from(iri_owned.clone().as_slice()); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +//! [RFC 3986]: https://tools.ietf.org/html/rfc3986 +//! [RFC 3987]: https://tools.ietf.org/html/rfc3987 +//! [`RiStr`]: struct.RiStr.html +//! [`RiString`]: struct.RiString.html +//! [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html +//! [`RiAbsoluteString`]: struct.RiAbsoluteString.html +//! [`RiFragmentStr`]: struct.RiFragmentStr.html +//! [`RiFragmentString`]: struct.RiFragmentString.html +//! [`RiReferenceStr`]: struct.RiReferenceStr.html +//! [`RiReferenceString`]: struct.RiReferenceString.html +//! [`RiReferenceString::into_iri()`]: struct.RiReferenceString.html#method.into_iri +//! [`RiRelativeStr`]: struct.RiRelativeStr.html +//! [`RiRelativeString`]: struct.RiRelativeString.html +//! [`IriSpec`]: ../spec/enum.IriSpec.html +//! [`UriSpec`]: ../spec/enum.UriSpec.html + +#[cfg(feature = "alloc")] +pub use self::{ + generic::{ + CreationError, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, + RiRelativeString, RiString, + }, + iri::{ + IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, + IriRelativeString, IriString, + }, + uri::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, + UriRelativeString, UriString, + }, +}; +pub use self::{ + generic::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr}, + iri::{IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr}, + uri::{UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr}, +}; + +pub(crate) mod generic; +mod iri; +mod uri; diff --git a/vendor/iri-string/src/types/generic.rs b/vendor/iri-string/src/types/generic.rs new file mode 100644 index 00000000..9e631069 --- /dev/null +++ b/vendor/iri-string/src/types/generic.rs @@ -0,0 +1,57 @@ +//! Generic resource identifier types. +//! +//! ```text +//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] +//! IRI-reference = IRI / irelative-ref +//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] +//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] +//! (`irelative-part` is roughly same as `ihier-part`.) +//! ``` +//! +//! Hierarchy: +//! +//! ```text +//! RiReferenceStr +//! |-- RiStr +//! | `-- RiAbsoluteStr +//! `-- RiRelativeStr +//! ``` +//! +//! Therefore, the conversions below are safe and cheap: +//! +//! * `RiStr -> RiReferenceStr` +//! * `RiAbsoluteStr -> RiStr` +//! * `RiAbsoluteStr -> RiReferenceStr` +//! * `RiRelativeStr -> RiReferenceStr` +//! +//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits +//! below are implemented: +//! +//! * `AsRef<BarStr> for FooStr` +//! * `AsRef<BarStr> for FooString` +//! * `From<FooString> for BarString` +//! * `PartialEq<FooStr> for BarStr` and lots of impls like that +//! + `PartialEq` and `ParitalOrd`. +//! + Slice, owned, `Cow`, reference, etc... + +pub use self::{ + absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr, + reference::RiReferenceStr, relative::RiRelativeStr, +}; +#[cfg(feature = "alloc")] +pub use self::{ + absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString, + query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString, +}; + +#[macro_use] +mod macros; + +mod absolute; +#[cfg(feature = "alloc")] +mod error; +mod fragment; +mod normal; +mod query; +mod reference; +mod relative; diff --git a/vendor/iri-string/src/types/generic/absolute.rs b/vendor/iri-string/src/types/generic/absolute.rs new file mode 100644 index 00000000..8e588ede --- /dev/null +++ b/vendor/iri-string/src/types/generic/absolute.rs @@ -0,0 +1,728 @@ +//! Absolute IRI (without fragment part). + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode}; +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::{RiQueryStr, RiReferenceStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiReferenceString, RiString}; +use crate::validate::absolute_iri; + +define_custom_string_slice! { + /// A borrowed slice of an absolute IRI without fragment part. + /// + /// This corresponds to [`absolute-IRI` rule] in [RFC 3987] + /// (and [`absolute-URI` rule] in [RFC 3986]). + /// In other words, this is [`RiStr`] without fragment part. + /// + /// If you want to accept fragment part, use [`RiStr`]. + /// + /// # Valid values + /// + /// This type can have an absolute IRI without fragment part. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:bar").is_ok()); + /// // Scheme `foo` and empty path. + /// assert!(IriAbsoluteStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriAbsoluteStr::new("foo:/").is_ok()); + /// assert!(IriAbsoluteStr::new("foo://").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:///").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:////").is_ok()); + /// assert!(IriAbsoluteStr::new("foo://///").is_ok()); + /// + /// ``` + /// + /// Relative IRI is not allowed. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // This is relative path. + /// assert!(IriAbsoluteStr::new("foo/bar").is_err()); + /// // `/foo/bar` is an absolute path, but it is authority-relative. + /// assert!(IriAbsoluteStr::new("/foo/bar").is_err()); + /// // `//foo/bar` is termed "network-path reference", + /// // or usually called "protocol-relative reference". + /// assert!(IriAbsoluteStr::new("//foo/bar").is_err()); + /// // Empty string is not a valid absolute IRI. + /// assert!(IriAbsoluteStr::new("").is_err()); + /// ``` + /// + /// Fragment part (such as trailing `#foo`) is not allowed. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // Fragment part is not allowed. + /// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz#qux").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an absolute IRI. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // `<` and `>` cannot directly appear in an absolute IRI. + /// assert!(IriAbsoluteStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an absolute IRI. + /// assert!(IriAbsoluteStr::new("%").is_err()); + /// assert!(IriAbsoluteStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`absolute-IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`absolute-URI` rule]: https://tools.ietf.org/html/rfc3986#section-4.3 + /// [`RiStr`]: struct.RiStr.html + struct RiAbsoluteStr { + validator = absolute_iri, + expecting_msg = "Absolute IRI string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI without fragment part. + /// + /// This corresponds to [`absolute-IRI` rule] in [RFC 3987] + /// (and [`absolute-URI` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `scheme ":" ihier-part [ "?" iquery ]`. + /// In other words, this is [`RiString`] without fragment part. + /// + /// If you want to accept fragment part, use [`RiString`]. + /// + /// For details, see the document for [`RiAbsoluteStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`absolute-IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`absolute-URI` rule]: https://tools.ietf.org/html/rfc3986#section-4.3 + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + /// [`RiString`]: struct.RiString.html + struct RiAbsoluteString { + validator = absolute_iri, + slice = RiAbsoluteStr, + expecting_msg = "Absolute IRI string", + } +} + +impl<S: Spec> RiAbsoluteStr<S> { + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/%2e/bar/..")?; + /// assert!(iri.ensure_rfc3986_normalizable().is_ok()); + /// + /// let iri2 = IriAbsoluteStr::new("scheme:/..//bar")?; + /// // The normalization result would be `scheme://bar` according to RFC + /// // 3986, but it is unintended and should be treated as a failure. + /// // This crate automatically handles this case so that `.normalize()` won't fail. + /// assert!(!iri.ensure_rfc3986_normalizable().is_err()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + NormalizationInput::from(self).ensure_rfc3986_normalizable() + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as `self.normalize().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Already normalized. + /// assert!(iri.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // Default normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_normalized(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default) + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as + /// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`, + /// does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Not normalized in the sense of RFC 3986. + /// assert!(!iri.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_normalized_rfc3986(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986) + } + + /// Returns `true` if the IRI is already normalized in the sense of + /// [`normalize_but_preserve_authorityless_relative_path`] method. + /// + /// This returns the same result as + /// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved()); + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Already normalized in the sense of + /// // `normalize_but_opaque_authorityless_relative_path()` method. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // Relative path is treated as opaque since the autority component is absent. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`normalize_but_preserve_authorityless_relative_path`]: + /// `Self::normalize_but_preserve_authorityless_relative_path` + #[inline] + #[must_use] + pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool { + trusted_parser::is_normalized::<S>( + self.as_str(), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath, + ) + } + + /// Returns the normalized IRI. + /// + /// # Notes + /// + /// For some abnormal IRIs, the normalization can produce semantically + /// incorrect string that looks syntactically valid. To avoid security + /// issues by this trap, the normalization algorithm by this crate + /// automatically applies the workaround. + /// + /// If you worry about this, test by + /// [`RiAbsoluteStr::ensure_rfc3986_normalizable`] method or + /// [`Normalized::ensure_rfc3986_normalizable`] before using the result + /// string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn normalize(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)).and_normalize() + } + + /// Returns the normalized IRI, but preserving dot segments in relative path + /// if the authority component is absent. + /// + /// This normalization would be similar to that of [WHATWG URL Standard] + /// while this implementation is not guaranteed to stricly follow the spec. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// Note that case normalization and percent-encoding normalization will + /// still be applied to any path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/../f%6f%6f")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "scheme:relative/../foo"); + /// // `.normalize()` would normalize this to `scheme:/foo`. + /// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [WHATWG URL Standard]: https://url.spec.whatwg.org/ + #[inline] + #[must_use] + pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)) + .and_normalize_but_preserve_authorityless_relative_path() + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiAbsoluteStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + trusted_parser::extract_scheme_absolute(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_absolute(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_absolute(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriAbsoluteStr, IriQueryStr}; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query_absolute_iri(self.as_str()).map(|query| { + // SAFETY: `trusted_parser::extract_query_absolute_iri()` must return + // the query part of an IRI (including the leading `?` character), + // and the returned string consists of allowed characters since it + // is a substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query_absolute_iri(self.as_str()) + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiAbsoluteString<S> { + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must still be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiAbsoluteStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password is replaced with empty string. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiAbsoluteStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiAbsoluteStr, + from_owned: RiAbsoluteString, + to_slice: RiStr, + to_owned: RiString, +} + +impl_trivial_conv_between_iri! { + from_slice: RiAbsoluteStr, + from_owned: RiAbsoluteString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/generic/error.rs b/vendor/iri-string/src/types/generic/error.rs new file mode 100644 index 00000000..b11f58b2 --- /dev/null +++ b/vendor/iri-string/src/types/generic/error.rs @@ -0,0 +1,70 @@ +//! Resource identifier creation error. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +use crate::validate::Error; + +/// Error on conversion into an IRI type. +/// +/// Enabled by `alloc` or `std` feature. +// This type itself does not require `alloc` or `std, but the type is used only when `alloc` +// feature is enabled. To avoid exporting unused stuff, the type (and the `types::generic::error` +// module) is available only when necessary. +// +// Note that all types which implement `Spec` also implement `SpecInternal`. +pub struct CreationError<T> { + /// Soruce data. + source: T, + /// Validation error. + error: Error, +} + +impl<T> CreationError<T> { + /// Returns the source data. + #[must_use] + pub fn into_source(self) -> T { + self.source + } + + /// Returns the validation error. + #[must_use] + pub fn validation_error(&self) -> Error { + self.error + } + + /// Creates a new `CreationError`. + #[must_use] + pub(crate) fn new(error: Error, source: T) -> Self { + Self { source, error } + } +} + +impl<T: fmt::Debug> fmt::Debug for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CreationError") + .field("source", &self.source) + .field("error", &self.error) + .finish() + } +} + +impl<T: Clone> Clone for CreationError<T> { + fn clone(&self) -> Self { + Self { + source: self.source.clone(), + error: self.error, + } + } +} + +impl<T> fmt::Display for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.error.fmt(f) + } +} + +#[cfg(feature = "std")] +impl<T: fmt::Debug> error::Error for CreationError<T> {} diff --git a/vendor/iri-string/src/types/generic/fragment.rs b/vendor/iri-string/src/types/generic/fragment.rs new file mode 100644 index 00000000..2fdc1613 --- /dev/null +++ b/vendor/iri-string/src/types/generic/fragment.rs @@ -0,0 +1,108 @@ +//! Fragment string. + +use crate::{ + spec::Spec, + validate::{fragment, Error}, +}; + +define_custom_string_slice! { + /// A borrowed slice of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). + /// The rule for `ifragment` is `*( ipchar / "/" / "?" )`. + /// + /// # Valid values + /// + /// This type can have an IRI fragment. + /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::new("").is_ok()); + /// assert!(IriFragmentStr::new("foo").is_ok()); + /// assert!(IriFragmentStr::new("foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a fragment. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriFragmentStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriFragmentStr::new("%").is_err()); + /// assert!(IriFragmentStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::new("#hash").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 + /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + struct RiFragmentStr { + validator = fragment, + expecting_msg = "IRI fragment string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `*( ipchar / "/" / "?" )`. + /// + /// For details, see the documentation for [`RiFragmentStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 + /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`RiFragmentStr`]: struct.RiFragmentStr.html + struct RiFragmentString { + validator = fragment, + slice = RiFragmentStr, + expecting_msg = "IRI fragment string", + } +} + +impl<S: Spec> RiFragmentStr<S> { + /// Creates a new `&RiFragmentStr` from the fragment part prefixed by `#`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::from_prefixed("#").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#foo").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#/foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#//foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#https://example.com/").is_ok()); + /// + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriFragmentStr::from_prefixed("#<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriFragmentStr::new("#%").is_err()); + /// assert!(IriFragmentStr::new("#%GG").is_err()); + /// // `#` prefix is expected. + /// assert!(IriFragmentStr::from_prefixed("").is_err()); + /// assert!(IriFragmentStr::from_prefixed("foo").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::from_prefixed("##hash").is_err()); + /// ``` + pub fn from_prefixed(s: &str) -> Result<&Self, Error> { + if !s.starts_with('#') { + return Err(Error::new()); + } + TryFrom::try_from(&s[1..]) + } +} diff --git a/vendor/iri-string/src/types/generic/macros.rs b/vendor/iri-string/src/types/generic/macros.rs new file mode 100644 index 00000000..7aaa82df --- /dev/null +++ b/vendor/iri-string/src/types/generic/macros.rs @@ -0,0 +1,1041 @@ +//! Macros to define resource identifier types. + +/// Implements type conversion from slice into smart pointer. +macro_rules! impl_from_slice_into_smartptr { + ( + // Generic slice type. + ty: $ty:ident, + // Smart pointer item path (without type parameter). + smartptr: $($smartptr:ident)::*, + // Pointer mutability for `into_raw` and `from_raw`. + // Use `mut` for `Box`, and `const` for `Arc` and `Rc`. + mutability: $mut:ident, + ) => { + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> From<&$ty<S>> for $($smartptr)::* <$ty<S>> { + fn from(s: &$ty<S>) -> Self { + let inner: &str = s.as_str(); + let buf = $($smartptr)::* ::<str>::from(inner); + // SAFETY: `$ty<S>` has `repr(transparent)` attribute, so the + // memory layouts of `$smartptr<str>` and `$smartptr<$ty<S>>` + // are compatible. + unsafe { + let raw: *$mut str = $($smartptr)::* ::into_raw(buf); + $($smartptr)::* ::<$ty<S>>::from_raw(raw as *$mut $ty<S>) + } + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd`. +macro_rules! impl_cmp { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd` with two independent spec type parameter. +macro_rules! impl_cmp2 { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd` with two independent spec type parameter. +macro_rules! impl_cmp2_as_str { + ($ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + PartialEq::eq(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + PartialEq::eq(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + PartialOrd::partial_cmp(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + PartialOrd::partial_cmp(self.as_str(), o.as_str()) + } + } + }; +} + +/// Define the custom string slice type, and implements methods and traits. +/// +/// Methods to be implemented: +/// +/// * `pub fn new()` +/// * `pub fn new_unchecked()` +/// * `pub(crate) fn new_maybe_unchecked()` +/// * `fn new_always_unchecked()` +/// * `pub fn as_str()` +/// * `pub fn len()` +/// * `pub fn is_empty()` +/// +/// Traits to be implemented: +/// +/// * fundamental +/// + `Debug for $ty` +/// + `Eq for $ty` +/// + `Ord for $ty` +/// + `Hash for $ty` +/// * type conversion +/// + `AsRef<&str> for $ty` +/// + `AsRef<&$ty> for $ty` +/// + `From<&$ty>` for Cow<$ty>` +/// + `From<&$ty>` for Arc<$ty>` +/// + `From<&$ty>` for Box<$ty>` +/// + `From<&$ty>` for Rc<$ty>` +/// + `From<&$ty> for &str` +/// + `TryFrom<&str> for &$ty` +/// + `TryFrom<&[u8]> for &$ty` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented). +/// + `PartialEq<$ty> for $ty` +/// + `str` and `$ty` +/// - `PartialEq<str> for $ty` +/// - `PartialEq<$ty> for str` +/// - `PartialEq<&str> for $ty` +/// - `PartialEq<$ty> for &str` +/// - `PartialEq<str> for &$ty` +/// - `PartialEq<&$ty> for str` +/// + `$ty` and `$ty` +/// - `PartialEq<&$ty> for $ty` +/// - `PartialEq<$ty> for &$ty` +/// * other +/// + `Display for $ty` +/// * serde +/// + `serde::Serialize` +/// + `serde::Deserialize` +macro_rules! define_custom_string_slice { + ( + $(#[$meta:meta])* + struct $ty:ident { + validator = $validate:ident, + expecting_msg = $expecting:expr, + } + ) => { + $(#[$meta])* + // `#[derive(..)]` cannot be used here, because it adds `S: DerivedTrait` bounds automatically. + #[repr(transparent)] + #[cfg_attr(feature = "serde", derive(serde::Serialize))] + #[cfg_attr(feature = "serde", serde(bound = "S: crate::spec::Spec"))] + #[cfg_attr(feature = "serde", serde(transparent))] + pub struct $ty<S> { + /// Spec. + #[cfg_attr(feature = "serde", serde(skip))] + _spec: core::marker::PhantomData<fn() -> S>, + /// Inner data. + inner: str, + } + + impl<S: crate::spec::Spec> $ty<S> { + /// Creates a new string. + #[inline] + pub fn new(s: &str) -> Result<&Self, crate::validate::Error> { + core::convert::TryFrom::try_from(s) + } + + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: &str) -> &Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string maybe without validation. + /// + /// This does validation on debug build. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[must_use] + pub(crate) unsafe fn new_maybe_unchecked(s: &str) -> &Self { + debug_assert_eq!($validate::<S>(s), Ok(())); + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. Additionally in debug build, just + // checked the content is actually valid by `$validate::<S>(s)`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string without any validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[inline] + #[must_use] + unsafe fn new_always_unchecked(s: &str) -> &Self { + // SAFETY: the cast is safe since `Self` type has `repr(transparent)` + // attribute and the content is guaranteed as valid by the + // precondition of the function. + unsafe { &*(s as *const str as *const Self) } + } + + /// Returns `&str`. + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + self.as_ref() + } + + /// Returns the string length. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.as_str().len() + } + + /// Returns whether the string is empty. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } + } + + impl<S: crate::spec::Spec> core::fmt::Debug for $ty<S> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple(stringify!($ty)).field(&&self.inner).finish() + } + } + + impl<S: crate::spec::Spec> PartialEq for $ty<S> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } + } + + impl<S: crate::spec::Spec> Eq for $ty<S> {} + + impl<S: crate::spec::Spec> PartialOrd for $ty<S> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.inner.cmp(&other.inner)) + } + } + + impl<S: crate::spec::Spec> Ord for $ty<S> { + #[inline] + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.inner.cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> core::hash::Hash for $ty<S> { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + } + } + + impl<S: crate::spec::Spec> AsRef<str> for $ty<S> { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } + } + + impl<S: crate::spec::Spec> AsRef<$ty<S>> for $ty<S> { + #[inline] + fn as_ref(&self) -> &$ty<S> { + self + } + } + + #[cfg(feature = "alloc")] + impl<'a, S: crate::spec::Spec> From<&'a $ty<S>> for alloc::borrow::Cow<'a, $ty<S>> { + #[inline] + fn from(s: &'a $ty<S>) -> Self { + alloc::borrow::Cow::Borrowed(s) + } + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::sync::Arc, + mutability: const, + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::boxed::Box, + mutability: mut, + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::rc::Rc, + mutability: const, + } + + impl<'a, S: crate::spec::Spec> From<&'a $ty<S>> for &'a str { + #[inline] + fn from(s: &'a $ty<S>) -> &'a str { + s.as_ref() + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a str> for &'a $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + match $validate::<S>(s) { + // SAFETY: just checked `s` is valid as `$ty`. + Ok(()) => Ok(unsafe { $ty::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a [u8]> for &'a $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes).map_err(|_| crate::validate::Error::new())?; + match $validate::<S>(s) { + // SAFETY: just checked `s` is valid as `$ty`. + Ok(()) => Ok(unsafe { $ty::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } + } + + impl_cmp!(str, str, $ty<S>); + impl_cmp!(str, &str, $ty<S>); + impl_cmp!(str, str, &$ty<S>); + impl_cmp2!(str, &$ty<S>, $ty<T>); + + impl<S: crate::spec::Spec> core::fmt::Display for $ty<S> { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str(self.as_str()) + } + } + + /// Serde deserializer implementation. + #[cfg(feature = "serde")] + mod __serde_slice { + use super::$ty; + + use core::{convert::TryFrom, fmt, marker::PhantomData}; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom borrowed string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStrVisitor<S>(PhantomData<fn() -> S>); + + impl<'de, S: 'de + crate::spec::Spec> Visitor<'de> for CustomStrVisitor<S> { + type Value = &'de $ty<S>; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str($expecting) + } + + #[inline] + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + <&'de $ty<S> as TryFrom<&'de str>>::try_from(v).map_err(E::custom) + } + } + + // About `'de` and `'a`, see + // <https://serde.rs/lifetimes.html#the-deserializede-lifetime>. + impl<'de: 'a, 'a, S: 'de + crate::spec::Spec> Deserialize<'de> for &'a $ty<S> { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_string(CustomStrVisitor::<S>(PhantomData)) + } + } + } + }; +} + +/// Define the custom owned string type, and implements methods and traits. +/// +/// Methods to be implemented: +/// +/// * `pub fn new_unchecked()` +/// * `pub(crate) fn new_maybe_unchecked()` +/// * `pub(crate) fn new_always_unchecked()` +/// * `pub fn shrink_to_fit()` +/// +/// Traits to be implemented: +/// +/// * fundamental +/// + `Debug for $ty` +/// + `Clone for $ty` +/// + `Eq for $ty` +/// + `Ord for $ty` +/// + `Hash for $ty` +/// * type conversion +/// + `AsRef<str> for $ty` +/// + `AsRef<$slice> for $ty` +/// + `Borrow<str> for $ty` +/// + `Borrow<$slice> for $ty` +/// + `ToOwned<Owned = $ty> for $slice` +/// + `From<&$slice> for $ty` +/// + `From<$ty> for String` +/// + `From<$ty> for Cow<'_, $slice>` +/// + `From<$ty> for Box<$slice>` +/// + `TryFrom<&str> for $ty` +/// + `TryFrom<&[u8]> for $ty` +/// + `TryFrom<String> for $ty` +/// + `FromStr for $ty` +/// + `Deref<Target = $slice> for $ty` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented. +/// + `PartialEq<$ty> for $ty` +/// + `$slice` and `str` +/// - `PartialEq<$slice> for Cow<'_, str>` +/// - `PartialEq<Cow<'_, str>> for $slice` +/// - `PartialEq<&$slice> for Cow<'_, str>` +/// - `PartialEq<Cow<'_, str>> for &$slice` +/// + `$slice` and `Cow<$slice>` +/// - `PartialEq<$slice> for Cow<'_, $slice>` +/// - `PartialEq<Cow<'_, $slice>> for $slice` +/// - `PartialEq<&$slice> for Cow<'_, $slice>` +/// - `PartialEq<Cow<'_, $slice>> for &$slice` +/// + `str` and `$ty` +/// - `PartialEq<str> for $ty` +/// - `PartialEq<$ty> for str` +/// - `PartialEq<&str> for $ty` +/// - `PartialEq<$ty> for &str` +/// - `PartialEq<Cow<'_, str>> for $ty` +/// - `PartialEq<$ty> for Cow<'_, str>` +/// + `String` and `$ty` +/// - `PartialEq<String> for $ty` +/// - `PartialEq<$ty> for String` +/// + `$slice` and `$ty` +/// - `PartialEq<$slice> for $ty` +/// - `PartialEq<$ty> for $slice` +/// - `PartialEq<&$slice> for $ty` +/// - `PartialEq<$ty> for &$slice` +/// - `PartialEq<Cow<'_, $slice>> for $ty` +/// - `PartialEq<$ty> for Cow<'_, $slice>` +/// * other +/// + `Display for $ty` +/// * serde +/// + `serde::Serialize` +/// + `serde::Deserialize` +// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since +// this won't reuse allocated memory and hides internal memory reallocation. See +// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>. +// However, this is not decided with firm belief or opinion, so there would be +// a chance that they are implemented in future. +#[cfg(feature = "alloc")] +macro_rules! define_custom_string_owned { + ( + $(#[$meta:meta])* + struct $ty:ident { + validator = $validate:ident, + slice = $slice:ident, + expecting_msg = $expecting:expr, + } + ) => { + $(#[$meta])* + // `#[derive(..)]` cannot be used here, because it adds `S: DerivedTrait` bounds automatically. + #[cfg(feature = "alloc")] + #[cfg_attr(all(feature = "serde", feature = "alloc"), derive(serde::Serialize))] + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(bound = "S: crate::spec::Spec"))] + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(transparent))] + pub struct $ty<S> { + /// Spec. + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(skip))] + _spec: core::marker::PhantomData<fn() -> S>, + /// Inner data. + inner: alloc::string::String, + } + + impl<S: crate::spec::Spec> $ty<S> { + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: alloc::string::String) -> Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string maybe without validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[inline] + #[must_use] + pub(crate) unsafe fn new_always_unchecked(s: alloc::string::String) -> Self { + // The construction itself can be written in safe Rust, but + // every other place including unsafe functions expects + // `self.inner` to be syntactically valid as `Self`. In order to + // make them safe, the construction should validate the value + // or at least should require users to validate the value by + // making the function `unsafe`. + Self { + _spec: core::marker::PhantomData, + inner: s, + } + } + + /// Creates a new string maybe without validation. + /// + /// This does validation on debug build. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[must_use] + pub(crate) unsafe fn new_maybe_unchecked(s: alloc::string::String) -> Self { + debug_assert_eq!( + $validate::<S>(&s), + Ok(()), + "[precondition] the given string must be valid" + ); + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. Additionally in debug build, just + // checked the content is actually valid by `$validate::<S>(s)`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Returns a mutable reference to the inner string buffer. + /// + /// This may be useful to implement inline modification algorithm, + /// but be careful as this method itself cannot validate the new + /// content. + /// + /// # Safety + /// + /// The content after modification must be syntactically valid as + /// `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + // TODO: Use wrapper type to enforce validation on finish? + pub(crate) unsafe fn as_inner_mut(&mut self) -> &mut alloc::string::String { + &mut self.inner + } + + /// Shrinks the capacity of the inner buffer to match its length. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Returns the internal buffer capacity in bytes. + #[inline] + #[must_use] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Returns the borrowed IRI string slice. + /// + /// This is equivalent to `&*self`. + #[inline] + #[must_use] + pub fn as_slice(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> core::fmt::Debug for $ty<S> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple(stringify!($ty)).field(&&self.inner).finish() + } + } + + impl<S: crate::spec::Spec> Clone for $ty<S> { + #[inline] + fn clone(&self) -> Self { + // This is safe because `self` must be valid. + Self { + _spec: core::marker::PhantomData, + inner: self.inner.clone(), + } + } + } + + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty<T>> for $ty<S> { + #[inline] + fn eq(&self, other: &$ty<T>) -> bool { + self.inner == other.inner + } + } + + impl<S: crate::spec::Spec> Eq for $ty<S> {} + + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty<T>> for $ty<S> { + #[inline] + fn partial_cmp(&self, other: &$ty<T>) -> Option<core::cmp::Ordering> { + self.inner.partial_cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> Ord for $ty<S> { + #[inline] + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.inner.cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> core::hash::Hash for $ty<S> { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + } + } + + impl<S: crate::spec::Spec> AsRef<str> for $ty<S> { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } + } + + impl<S: crate::spec::Spec> AsRef<$slice<S>> for $ty<S> { + #[inline] + fn as_ref(&self) -> &$slice<S> { + // SAFETY: `$ty<S>` and `$slice<S>` requires same validation, so + // the content of `self: &$ty<S>` must be valid as `$slice<S>`. + unsafe { $slice::new_always_unchecked(AsRef::<str>::as_ref(self)) } + } + } + + impl<S: crate::spec::Spec> core::borrow::Borrow<str> for $ty<S> { + #[inline] + fn borrow(&self) -> &str { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> core::borrow::Borrow<$slice<S>> for $ty<S> { + #[inline] + fn borrow(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> alloc::borrow::ToOwned for $slice<S> { + type Owned = $ty<S>; + + #[inline] + fn to_owned(&self) -> Self::Owned { + self.into() + } + } + + impl<S: crate::spec::Spec> From<&'_ $slice<S>> for $ty<S> { + #[inline] + fn from(s: &$slice<S>) -> Self { + // This is safe because `s` must be valid. + $ty { + _spec: core::marker::PhantomData, + inner: alloc::string::String::from(s.as_str()), + } + } + } + + impl<S: crate::spec::Spec> From<$ty<S>> for alloc::string::String { + #[inline] + fn from(s: $ty<S>) -> Self { + s.inner + } + } + + impl<'a, S: crate::spec::Spec> From<$ty<S>> for alloc::borrow::Cow<'a, $slice<S>> { + #[inline] + fn from(s: $ty<S>) -> alloc::borrow::Cow<'a, $slice<S>> { + alloc::borrow::Cow::Owned(s) + } + } + + impl<S: crate::spec::Spec> From<$ty<S>> for alloc::boxed::Box<$slice<S>> { + #[inline] + fn from(s: $ty<S>) -> alloc::boxed::Box<$slice<S>> { + let inner: alloc::string::String = s.into(); + let buf = alloc::boxed::Box::<str>::from(inner); + // SAFETY: `$slice<S>` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<$slice<S>>` are + // compatible. Additionally, `$ty<S>` and `$slice<S>` require + // the same syntax (it is the macro user's responsibility to + // guarantee). + unsafe { + let raw: *mut str = alloc::boxed::Box::into_raw(buf); + alloc::boxed::Box::<$slice<S>>::from_raw(raw as *mut $slice<S>) + } + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<&'_ str> for $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &str) -> Result<Self, Self::Error> { + <&$slice<S>>::try_from(s).map(Into::into) + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<&'_ [u8]> for $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes).map_err(|_| crate::validate::Error::new())?; + <&$slice<S>>::try_from(s).map(Into::into) + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<alloc::string::String> for $ty<S> { + type Error = crate::types::CreationError<alloc::string::String>; + + #[inline] + fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> { + match <&$slice<S>>::try_from(s.as_str()) { + Ok(_) => { + // This is safe because `<&$slice<S>>::try_from(s)?` ensures + // that the string `s` is valid. + Ok(Self { + _spec: core::marker::PhantomData, + inner: s, + }) + } + Err(e) => Err(crate::types::CreationError::new(e, s)), + } + } + } + + impl<S: crate::spec::Spec> alloc::str::FromStr for $ty<S> { + type Err = crate::validate::Error; + + #[inline] + fn from_str(s: &str) -> Result<Self, Self::Err> { + core::convert::TryFrom::try_from(s) + } + } + + impl<S: crate::spec::Spec> core::ops::Deref for $ty<S> { + type Target = $slice<S>; + + #[inline] + fn deref(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl_cmp!(str, $slice<S>, alloc::borrow::Cow<'_, str>); + impl_cmp!(str, &$slice<S>, alloc::borrow::Cow<'_, str>); + impl_cmp2_as_str!(&$slice<S>, alloc::borrow::Cow<'_, $slice<T>>); + + impl_cmp!(str, str, $ty<S>); + impl_cmp!(str, &str, $ty<S>); + impl_cmp!(str, alloc::borrow::Cow<'_, str>, $ty<S>); + impl_cmp!(str, alloc::string::String, $ty<S>); + impl_cmp2!(str, $slice<S>, $ty<T>); + impl_cmp2!(str, &$slice<S>, $ty<T>); + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $slice<S>>, $ty<T>); + + impl<S: crate::spec::Spec> core::fmt::Display for $ty<S> { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str(self.as_str()) + } + } + + /// Serde deserializer implementation. + #[cfg(all(feature = "alloc", feature = "serde"))] + mod __serde_owned { + use super::$ty; + + use core::{convert::TryFrom, fmt, marker::PhantomData}; + + #[cfg(all(feature = "serde", feature = "alloc"))] + use alloc::string::String; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom owned string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStringVisitor<S>(PhantomData<fn() -> S>); + + impl<'de, S: crate::spec::Spec> Visitor<'de> for CustomStringVisitor<S> { + type Value = $ty<S>; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str($expecting) + } + + #[inline] + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + <$ty<S> as TryFrom<&str>>::try_from(v).map_err(E::custom) + } + + #[cfg(all(feature = "serde", feature = "alloc"))] + #[inline] + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + <$ty<S> as TryFrom<String>>::try_from(v).map_err(E::custom) + } + } + + impl<'de, S: crate::spec::Spec> Deserialize<'de> for $ty<S> { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(CustomStringVisitor::<S>(PhantomData)) + } + } + } + }; +} + +/// Implements trivial conversions and other useful traits between two IRI types. +/// +/// Implemented traits: +/// +/// * type conversion +/// + `AsRef<$to_slice> for $from_slice` +/// + `AsRef<$to_slice> for $from_owned` +/// + `From<$from_slice> for $to_slice` +/// + `From<$from_owned> for $to_owned` +/// + `TryFrom<&$to_slice> for &$from_slice` +/// + `TryFrom<$to_owned> for $from_owned` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented). +/// + `$from_slice` and `$to_slice` +/// - `PartialEq<$from_slice> for $to_slice` +/// - `PartialEq<$to_slice> for $from_slice` +/// - `PartialEq<&$from_slice> for $to_slice` +/// - `PartialEq<$to_slice> for &$from_slice` +/// - `PartialEq<$from_slice> for &$to_slice` +/// - `PartialEq<&$to_slice> for $from_slice` +/// - `PartialEq<$from_slice> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for $from_slice` +/// - `PartialEq<&$from_slice> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for &$from_slice` +/// - `PartialEq<Cow<'_, $from_slice>> for $to_slice` +/// - `PartialEq<$to_slice> for Cow<'_, $from_slice>` +/// - `PartialEq<Cow<'_, $from_slice>> for &$to_slice` +/// - `PartialEq<&$to_slice> for Cow<'_, $from_slice>` +/// + `$from_slice` and `$to_owned` +/// - `PartialEq<$from_slice> for $to_owned` +/// - `PartialEq<$to_owned> for $from_slice` +/// - `PartialEq<&$from_slice> for $to_owned` +/// - `PartialEq<$to_owned> for &$from_slice` +/// - `PartialEq<Cow<'_, $from_slice>> for $to_owned` +/// - `PartialEq<$to_owned> for Cow<'_, $from_slice>` +/// + `$from_owned` and `$to_slice` +/// - `PartialEq<$from_owned> for $to_slice` +/// - `PartialEq<$to_slice> for $from_owned` +/// - `PartialEq<$from_owned> for &$to_slice` +/// - `PartialEq<&$to_slice> for $from_owned` +/// - `PartialEq<$from_owned> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for $from_owned` +/// + `$from_owned` and `$to_owned` +/// - `PartialEq<$from_owned> for $to_owned` +/// - `PartialEq<$to_owned> for $from_owned` +macro_rules! impl_trivial_conv_between_iri { + ( + from_slice: $from_slice:ident, + from_owned: $from_owned:ident, + to_slice: $to_slice:ident, + to_owned: $to_owned:ident, + ) => { + impl<S: crate::spec::Spec> AsRef<$to_slice<S>> for $from_slice<S> { + #[inline] + fn as_ref(&self) -> &$to_slice<S> { + // SAFETY: `$from_slice<S>` should be subset of `$to_slice<S>`. + // The caller of `impl_trivial_conv_between_iri!` macro is + // responsible for guaranteeing that. + unsafe { <$to_slice<S>>::new_maybe_unchecked(self.as_str()) } + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> AsRef<$to_slice<S>> for $from_owned<S> { + #[inline] + fn as_ref(&self) -> &$to_slice<S> { + AsRef::<$from_slice<S>>::as_ref(self).as_ref() + } + } + + impl<'a, S: crate::spec::Spec> From<&'a $from_slice<S>> for &'a $to_slice<S> { + #[inline] + fn from(s: &'a $from_slice<S>) -> &'a $to_slice<S> { + s.as_ref() + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> From<$from_owned<S>> for $to_owned<S> { + #[inline] + fn from(s: $from_owned<S>) -> $to_owned<S> { + // SAFETY: `$from_slice<S>` should be subset of `$to_slice<S>`. + // The caller of `impl_trivial_conv_between_iri!` macro is + // responsible for guaranteeing that. + unsafe { <$to_owned<S>>::new_maybe_unchecked(s.into()) } + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a $to_slice<S>> + for &'a $from_slice<S> + { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &'a $to_slice<S>) -> Result<Self, Self::Error> { + Self::try_from(s.as_str()) + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> core::convert::TryFrom<$to_owned<S>> for $from_owned<S> { + type Error = crate::types::CreationError<$to_owned<S>>; + + fn try_from(s: $to_owned<S>) -> Result<Self, Self::Error> { + match <&$from_slice<S>>::try_from(s.as_str()) { + // SAFETY: just checked `s.as_str()` is valid as `$from_slice<S>`, and it + // requires the same syntax as `$from_owned<S>`. + Ok(_) => Ok(unsafe { <$from_owned<S>>::new_always_unchecked(s.into()) }), + Err(e) => Err(crate::types::CreationError::new(e, s)), + } + } + } + + impl_cmp2_as_str!($from_slice<S>, $to_slice<T>); + impl_cmp2_as_str!(&$from_slice<S>, $to_slice<T>); + impl_cmp2_as_str!($from_slice<S>, &$to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_slice<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(&$from_slice<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, $to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, &$to_slice<T>); + + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_slice<S>, $to_owned<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(&$from_slice<S>, $to_owned<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, $to_owned<T>); + + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, $to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, &$to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, $to_owned<T>); + }; +} diff --git a/vendor/iri-string/src/types/generic/normal.rs b/vendor/iri-string/src/types/generic/normal.rs new file mode 100644 index 00000000..641a022b --- /dev/null +++ b/vendor/iri-string/src/types/generic/normal.rs @@ -0,0 +1,944 @@ +//! Usual absolute IRI (fragment part being allowed). + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode}; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiFragmentString, RiReferenceString}; +use crate::validate::iri; + +define_custom_string_slice! { + /// A borrowed string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]). + /// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`. + /// In other words, this is [`RiAbsoluteStr`] with fragment part allowed. + /// + /// # Valid values + /// + /// This type can have an IRI (which is absolute, and may have fragment part). + /// + /// ``` + /// # use iri_string::types::IriStr; + /// assert!(IriStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriStr::new("https://example.com/").is_ok()); + /// assert!(IriStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriStr::new("https://example.com/foo?bar=baz#qux").is_ok()); + /// assert!(IriStr::new("foo:bar").is_ok()); + /// assert!(IriStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriStr::new("foo:/").is_ok()); + /// assert!(IriStr::new("foo://").is_ok()); + /// assert!(IriStr::new("foo:///").is_ok()); + /// assert!(IriStr::new("foo:////").is_ok()); + /// assert!(IriStr::new("foo://///").is_ok()); + /// ``` + /// + /// Relative IRI reference is not allowed. + /// + /// ``` + /// # use iri_string::types::IriStr; + /// // This is relative path. + /// assert!(IriStr::new("foo/bar").is_err()); + /// // `/foo/bar` is an absolute path, but it is authority-relative. + /// assert!(IriStr::new("/foo/bar").is_err()); + /// // `//foo/bar` is termed "network-path reference", + /// // or usually called "protocol-relative reference". + /// assert!(IriStr::new("//foo/bar").is_err()); + /// // Same-document reference is relative. + /// assert!(IriStr::new("#foo").is_err()); + /// // Empty string is not a valid absolute IRI. + /// assert!(IriStr::new("").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI. + /// + /// ``` + /// # use iri_string::types::IriStr; + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriStr::new("%").is_err()); + /// assert!(IriStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI` rule]: https://tools.ietf.org/html/rfc3986#section-3 + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + struct RiStr { + validator = iri, + expecting_msg = "IRI string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]). + /// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`. + /// In other words, this is [`RiAbsoluteString`] with fragment part allowed. + /// + /// For details, see the document for [`RiStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI` rule]: https://tools.ietf.org/html/rfc3986#section-3 + /// [`RiAbsoluteString`]: struct.RiAbsoluteString.html + struct RiString { + validator = iri, + slice = RiStr, + expecting_msg = "IRI string", + } +} + +impl<S: Spec> RiStr<S> { + /// Splits the IRI into an absolute IRI part and a fragment part. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// let fragment_expected = IriFragmentStr::new("corge")?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// let fragment_expected = IriFragmentStr::new("")?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn to_absolute_and_fragment(&self) -> (&RiAbsoluteStr<S>, Option<&RiFragmentStr<S>>) { + let (prefix, fragment) = trusted_parser::split_fragment(self.as_str()); + // SAFETY: an IRI without fragment part is also an absolute IRI. + let prefix = unsafe { RiAbsoluteStr::new_maybe_unchecked(prefix) }; + let fragment = fragment.map(|fragment| { + // SAFETY: `trusted_parser::split_fragment()` must return a valid fragment component. + unsafe { RiFragmentStr::new_maybe_unchecked(fragment) } + }); + + (prefix, fragment) + } + + /// Strips the fragment part if exists, and returns [`&RiAbsoluteStr`][`RiAbsoluteStr`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + #[must_use] + pub fn to_absolute(&self) -> &RiAbsoluteStr<S> { + let prefix_len = trusted_parser::split_fragment(self.as_str()).0.len(); + // SAFETY: IRI without the fragment part (including a leading `#` character) + // is also an absolute IRI. + unsafe { RiAbsoluteStr::new_maybe_unchecked(&self.as_str()[..prefix_len]) } + } + + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/%2e/bar/..")?; + /// assert!(iri.ensure_rfc3986_normalizable().is_ok()); + /// + /// let iri2 = IriStr::new("scheme:/..//bar")?; + /// // The normalization result would be `scheme://bar` according to RFC + /// // 3986, but it is unintended and should be treated as a failure. + /// // This crate automatically handles this case so that `.normalize()` won't fail. + /// assert!(!iri.ensure_rfc3986_normalizable().is_err()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + NormalizationInput::from(self).ensure_rfc3986_normalizable() + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as `self.normalize().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Already normalized. + /// assert!(iri.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // Default normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + #[inline] + pub fn is_normalized(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default) + } + + /// Returns `true` if the IRI is already normalized in the sense of RFC 3986. + /// + /// This returns the same result as + /// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Not normalized in the sense of RFC 3986. + /// assert!(!iri.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + #[inline] + pub fn is_normalized_rfc3986(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986) + } + + /// Returns `true` if the IRI is already normalized in the sense of + /// [`normalize_but_preserve_authorityless_relative_path`] method. + /// + /// This returns the same result as + /// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved()); + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Already normalized in the sense of + /// // `normalize_but_opaque_authorityless_relative_path()` method. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // Relative path is treated as opaque since the autority component is absent. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`normalize_but_preserve_authorityless_relative_path`]: + /// `Self::normalize_but_preserve_authorityless_relative_path` + #[must_use] + #[inline] + pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool { + trusted_parser::is_normalized::<S>( + self.as_str(), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath, + ) + } + + /// Returns the normalized IRI. + /// + /// # Notes + /// + /// For some abnormal IRIs, the normalization can produce semantically + /// incorrect string that looks syntactically valid. To avoid security + /// issues by this trap, the normalization algorithm by this crate + /// automatically applies the workaround. + /// + /// If you worry about this, test by [`RiStr::ensure_rfc3986_normalizable`] + /// method or [`Normalized::ensure_rfc3986_normalizable`] before using the + /// result string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn normalize(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)).and_normalize() + } + + /// Returns the normalized IRI, but preserving dot segments in relative path + /// if the authority component is absent. + /// + /// This normalization would be similar to that of [WHATWG URL Standard] + /// while this implementation is not guaranteed to stricly follow the spec. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// Note that case normalization and percent-encoding normalization will + /// still be applied to any path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/../f%6f%6f")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "scheme:relative/../foo"); + /// // `.normalize()` would normalize this to `scheme:/foo`. + /// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [WHATWG URL Standard]: https://url.spec.whatwg.org/ + #[inline] + #[must_use] + pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)) + .and_normalize_but_preserve_authorityless_relative_path() + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + trusted_parser::extract_scheme_absolute(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_absolute(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_absolute(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriStr}; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).query() + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// let fragment = IriFragmentStr::new("corge")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment() + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.fragment_str(), Some("corge")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str() + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiString<S> { + /// Splits the IRI into an absolute IRI part and a fragment part. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// let fragment_expected = IriFragmentString::try_from("corge".to_owned()) + /// .map_err(|e| e.validation_error())?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// + /// ``` + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// let fragment_expected = IriFragmentString::try_from("".to_owned()) + /// .map_err(|e| e.validation_error())?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn into_absolute_and_fragment(self) -> (RiAbsoluteString<S>, Option<RiFragmentString<S>>) { + let (prefix, fragment) = raw::split_fragment_owned(self.into()); + // SAFETY: an IRI without fragment part is also an absolute IRI. + let prefix = unsafe { RiAbsoluteString::new_maybe_unchecked(prefix) }; + let fragment = fragment.map(|fragment| { + // SAFETY: the string returned by `raw::split_fragment_owned()` must + // be the fragment part, and must also be a substring of the source IRI. + unsafe { RiFragmentString::new_maybe_unchecked(fragment) } + }); + + (prefix, fragment) + } + + /// Strips the fragment part if exists, and returns an [`RiAbsoluteString`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?; + /// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?; + /// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`RiAbsoluteString`]: struct.RiAbsoluteString.html + #[must_use] + pub fn into_absolute(self) -> RiAbsoluteString<S> { + let mut s: String = self.into(); + raw::remove_fragment(&mut s); + // SAFETY: an IRI without fragment part is also an absolute IRI. + unsafe { RiAbsoluteString::new_maybe_unchecked(s) } + } + + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(iri::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must still be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must still be valid if the password is replaced with + // empty string. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiStr, + from_owned: RiString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/generic/query.rs b/vendor/iri-string/src/types/generic/query.rs new file mode 100644 index 00000000..40057f23 --- /dev/null +++ b/vendor/iri-string/src/types/generic/query.rs @@ -0,0 +1,135 @@ +//! Query string. + +use crate::{ + spec::Spec, + validate::{query, Error}, +}; + +define_custom_string_slice! { + /// A borrowed slice of an IRI query (i.e. after the first `?` and before the first `#`). + /// + /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). + /// The rule for `ifragment` is `*( ipchar / iprivate / "/" / "?" )`. + /// + /// # Valid values + /// + /// This type can have an IRI fragment. + /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::new("").is_ok()); + /// assert!(IriFragmentStr::new("foo").is_ok()); + /// assert!(IriFragmentStr::new("foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a fragment. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriFragmentStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriFragmentStr::new("%").is_err()); + /// assert!(IriFragmentStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::new("#hash").is_err()); + /// ``` + /// ``` + /// use iri_string::types::IriQueryStr; + /// assert!(IriQueryStr::new("").is_ok()); + /// assert!(IriQueryStr::new("foo").is_ok()); + /// assert!(IriQueryStr::new("foo/bar").is_ok()); + /// assert!(IriQueryStr::new("/foo/bar").is_ok()); + /// assert!(IriQueryStr::new("//foo/bar").is_ok()); + /// assert!(IriQueryStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriQueryStr::new("https://example.com/").is_ok()); + /// // Question sign `?` can appear in an IRI query. + /// assert!(IriQueryStr::new("query?again").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a query. + /// + /// ``` + /// use iri_string::types::IriQueryStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriQueryStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriQueryStr::new("%").is_err()); + /// assert!(IriQueryStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI query. + /// assert!(IriQueryStr::new("#hash").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 + /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + struct RiQueryStr { + validator = query, + expecting_msg = "IRI query string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `*( ipchar / iprivate / "/" / "?" )`. + /// + /// For details, see the documentation for [`RiQueryStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 + /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`RiQueryStr`]: struct.RiQueryStr.html + struct RiQueryString { + validator = query, + slice = RiQueryStr, + expecting_msg = "IRI query string", + } +} + +impl<S: Spec> RiQueryStr<S> { + /// Creates a new `&RiQueryStr` from the query part prefixed by `?`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::types::IriQueryStr; + /// assert!(IriQueryStr::from_prefixed("?").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?foo").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?/foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?//foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?https://user:pass@example.com:8080").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?https://example.com/").is_ok()); + /// // Question sign `?` can appear in an IRI query. + /// assert!(IriQueryStr::from_prefixed("?query?again").is_ok()); + /// + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriQueryStr::from_prefixed("?<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriQueryStr::new("?%").is_err()); + /// assert!(IriQueryStr::new("?%GG").is_err()); + /// // `?` prefix is expected. + /// assert!(IriQueryStr::from_prefixed("").is_err()); + /// assert!(IriQueryStr::from_prefixed("foo").is_err()); + /// // Hash sign `#` cannot appear in an IRI query. + /// assert!(IriQueryStr::from_prefixed("?#hash").is_err()); + /// ``` + pub fn from_prefixed(s: &str) -> Result<&Self, Error> { + if !s.starts_with('?') { + return Err(Error::new()); + } + TryFrom::try_from(&s[1..]) + } +} diff --git a/vendor/iri-string/src/types/generic/reference.rs b/vendor/iri-string/src/types/generic/reference.rs new file mode 100644 index 00000000..9ac62281 --- /dev/null +++ b/vendor/iri-string/src/types/generic/reference.rs @@ -0,0 +1,697 @@ +//! IRI reference. + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::Normalized; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::resolve::FixedBaseResolver; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiRelativeString, RiString}; +#[cfg(feature = "alloc")] +use crate::validate::iri; +use crate::validate::iri_reference; + +define_custom_string_slice! { + /// A borrowed string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI-reference` rule] in [RFC 3987] + /// (and [`URI-reference` rule] in [RFC 3986]). + /// The rule for `IRI-reference` is `IRI / irelative-ref`. + /// In other words, this is union of [`RiStr`] and [`RiRelativeStr`]. + /// + /// # Valid values + /// + /// This type can have an IRI reference (which can be absolute or relative). + /// + /// ``` + /// # use iri_string::types::IriReferenceStr; + /// assert!(IriReferenceStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz#qux").is_ok()); + /// assert!(IriReferenceStr::new("foo:bar").is_ok()); + /// assert!(IriReferenceStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriReferenceStr::new("foo:/").is_ok()); + /// assert!(IriReferenceStr::new("foo://").is_ok()); + /// assert!(IriReferenceStr::new("foo:///").is_ok()); + /// assert!(IriReferenceStr::new("foo:////").is_ok()); + /// assert!(IriReferenceStr::new("foo://///").is_ok()); + /// assert!(IriReferenceStr::new("foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("/foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("//foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("#foo").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI reference. + /// + /// ``` + /// # use iri_string::types::IriReferenceStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriReferenceStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriReferenceStr::new("%").is_err()); + /// assert!(IriReferenceStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI-reference` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI-reference` rule]: https://tools.ietf.org/html/rfc3986#section-4.1 + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + struct RiReferenceStr { + validator = iri_reference, + expecting_msg = "IRI reference string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI-reference` rule] in [RFC 3987] + /// (and [`URI-reference` rule] in [RFC 3986]). + /// The rule for `IRI-reference` is `IRI / irelative-ref`. + /// In other words, this is union of [`RiString`] and [`RiRelativeString`]. + /// + /// For details, see the document for [`RiReferenceStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI-reference` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI-reference` rule]: https://tools.ietf.org/html/rfc3986#section-4.1 + /// [`RiReferenceStr`]: struct.RiReferenceString.html + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + struct RiReferenceString { + validator = iri_reference, + slice = RiReferenceStr, + expecting_msg = "IRI reference string", + } +} + +impl<S: Spec> RiReferenceStr<S> { + /// Returns the string as [`&RiStr`][`RiStr`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`&RiRelativeStr`][`RiRelativeStr`] is returned as `Err(_)`. + /// + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + pub fn to_iri(&self) -> Result<&RiStr<S>, &RiRelativeStr<S>> { + // Check with `IRI` rule first, because the syntax rule for `IRI-reference` is + // `IRI / irelative-ref`. + // + // > Some productions are ambiguous. The "first-match-wins" (a.k.a. + // > "greedy") algorithm applies. For details, see [RFC3986]. + // > + // > --- <https://tools.ietf.org/html/rfc3987#section-2.2>. + + <&RiStr<S>>::try_from(self.as_str()).map_err(|_| { + // SAFETY: if an IRI reference is not an IRI, then it is a relative IRI. + // See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`. + unsafe { RiRelativeStr::new_maybe_unchecked(self.as_str()) } + }) + } + + /// Returns the string as [`&RiRelativeStr`][`RiRelativeStr`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`&RiStr`][`RiStr`] is returned as `Err(_)`. + /// + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + pub fn to_relative_iri(&self) -> Result<&RiRelativeStr<S>, &RiStr<S>> { + match self.to_iri() { + Ok(iri) => Err(iri), + Err(relative) => Ok(relative), + } + } + + /// Returns resolved IRI against the given base IRI. + /// + /// For IRI reference resolution output examples, see [RFC 3986 section 5.4]. + /// + /// If you are going to resolve multiple references against the common base, + /// consider using [`FixedBaseResolver`]. + /// + /// # Strictness + /// + /// The IRI parsers provided by this crate is strict (e.g. `http:g` is + /// always interpreted as a composition of the scheme `http` and the path + /// `g`), so backward compatible parsing and resolution are not provided. + /// About parser and resolver strictness, see [RFC 3986 section 5.4.2]: + /// + /// > Some parsers allow the scheme name to be present in a relative + /// > reference if it is the same as the base URI scheme. This is considered + /// > to be a loophole in prior specifications of partial URI + /// > [RFC1630](https://tools.ietf.org/html/rfc1630). Its use should be + /// > avoided but is allowed for backward compatibility. + /// > + /// > --- <https://tools.ietf.org/html/rfc3986#section-5.4.2> + /// + /// # Failures + /// + /// This method itself does not fail, but IRI resolution without WHATWG URL + /// Standard serialization can fail in some minor cases. + /// + /// To see examples of such unresolvable IRIs, visit the documentation + /// for [`normalize`][`crate::normalize`] module. + /// + /// [RFC 3986 section 5.4]: https://tools.ietf.org/html/rfc3986#section-5.4 + /// [RFC 3986 section 5.4.2]: https://tools.ietf.org/html/rfc3986#section-5.4.2 + pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> { + FixedBaseResolver::new(base).resolve(self.as_ref()) + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiReferenceStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.scheme_str(), Some("http")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.scheme_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> Option<&str> { + trusted_parser::extract_scheme(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.path_str(), "foo/bar:baz"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriReferenceStr}; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriReferenceStr}; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz?")?; + /// let query = IriQueryStr::new("")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query(self.as_str()).map(|query| { + // SAFETY: `extract_query` returns the query part of an IRI, and the + // returned string should have only valid characters since is the + // substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query as a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz?")?; + /// assert_eq!(iri.query_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?; + /// let fragment = IriFragmentStr::new("corge")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("#foo")?; + /// let fragment = IriFragmentStr::new("foo")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + trusted_parser::extract_fragment(self.as_str()).map(|fragment| { + // SAFETY: `extract_fragment` returns the fragment part of an IRI, + // and the returned string should have only valid characters since + // is the substring of the source IRI. + unsafe { RiFragmentStr::new_maybe_unchecked(fragment) } + }) + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.fragment_str(), Some("corge")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("#foo")?; + /// assert_eq!(iri.fragment_str(), Some("foo")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + trusted_parser::extract_fragment(self.as_str()) + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiReferenceString<S> { + /// Returns the string as [`RiString`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`RiRelativeString`] is returned as `Err(_)`. + /// + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + pub fn into_iri(self) -> Result<RiString<S>, RiRelativeString<S>> { + let s: String = self.into(); + // Check with `IRI` rule first, because of the syntax. + // + // > Some productions are ambiguous. The "first-match-wins" (a.k.a. + // > "greedy") algorithm applies. For details, see [RFC3986]. + // > + // > --- <https://tools.ietf.org/html/rfc3987#section-2.2>. + if iri::<S>(&s).is_ok() { + // SAFETY: just checked `s` is valid as an IRI. + Ok(unsafe { RiString::new_always_unchecked(s) }) + } else { + // SAFETY: if an IRI reference is not an IRI, then it is a relative IRI. + // See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`. + Err(unsafe { RiRelativeString::new_maybe_unchecked(s) }) + } + } + + /// Returns the string as [`RiRelativeString`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`RiString`] is returned as `Err(_)`. + /// + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + pub fn into_relative_iri(self) -> Result<RiRelativeString<S>, RiString<S>> { + match self.into_iri() { + Ok(iri) => Err(iri), + Err(relative) => Ok(relative), + } + } + + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(iri_reference::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiReferenceStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password component is + // replaced with the empty password. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiReferenceStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component \ + is replaced with the empty password" + ); + } + } +} diff --git a/vendor/iri-string/src/types/generic/relative.rs b/vendor/iri-string/src/types/generic/relative.rs new file mode 100644 index 00000000..2c1618b5 --- /dev/null +++ b/vendor/iri-string/src/types/generic/relative.rs @@ -0,0 +1,571 @@ +//! Relative IRI reference. + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::Normalized; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::resolve::FixedBaseResolver; +use crate::spec::Spec; +#[cfg(feature = "alloc")] +use crate::types::RiReferenceString; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiStr}; +use crate::validate::relative_ref; + +define_custom_string_slice! { + /// A borrowed slice of a relative IRI reference. + /// + /// This corresponds to [`irelative-ref` rule] in [RFC 3987] + /// (and [`relative-ref` rule] in [RFC 3986]). + /// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`. + /// + /// # Valid values + /// + /// This type can have a relative IRI reference. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// assert!(IriRelativeStr::new("foo").is_ok()); + /// assert!(IriRelativeStr::new("foo/bar").is_ok()); + /// assert!(IriRelativeStr::new("/foo").is_ok()); + /// assert!(IriRelativeStr::new("//foo/bar").is_ok()); + /// assert!(IriRelativeStr::new("?foo").is_ok()); + /// assert!(IriRelativeStr::new("#foo").is_ok()); + /// assert!(IriRelativeStr::new("foo/bar?baz#qux").is_ok()); + /// // The first path component can have colon if the path is absolute. + /// assert!(IriRelativeStr::new("/foo:bar/").is_ok()); + /// // Second or following path components can have colon. + /// assert!(IriRelativeStr::new("foo/bar://baz/").is_ok()); + /// assert!(IriRelativeStr::new("./foo://bar").is_ok()); + /// ``` + /// + /// Absolute form of a reference is not allowed. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// assert!(IriRelativeStr::new("https://example.com/").is_err()); + /// // The first path component cannot have colon, if the path is not absolute. + /// assert!(IriRelativeStr::new("foo:bar").is_err()); + /// assert!(IriRelativeStr::new("foo:").is_err()); + /// assert!(IriRelativeStr::new("foo:/").is_err()); + /// assert!(IriRelativeStr::new("foo://").is_err()); + /// assert!(IriRelativeStr::new("foo:///").is_err()); + /// assert!(IriRelativeStr::new("foo:////").is_err()); + /// assert!(IriRelativeStr::new("foo://///").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI reference. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// // `<` and `>` cannot directly appear in a relative IRI reference. + /// assert!(IriRelativeStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in a relative IRI reference. + /// assert!(IriRelativeStr::new("%").is_err()); + /// assert!(IriRelativeStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`irelative-ref` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`relative-ref` rule]: https://tools.ietf.org/html/rfc3986#section-4.2 + struct RiRelativeStr { + validator = relative_ref, + expecting_msg = "Relative IRI reference string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of a relative IRI reference. + /// + /// This corresponds to [`irelative-ref` rule] in [RFC 3987] + /// (and [`relative-ref` rule] in [RFC 3986]). + /// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`. + /// + /// For details, see the document for [`RiRelativeStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`irelative-ref` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`relative-ref` rule]: https://tools.ietf.org/html/rfc3986#section-4.2 + /// [`RiRelativeString`]: struct.RiRelativeString.html + struct RiRelativeString { + validator = relative_ref, + slice = RiRelativeStr, + expecting_msg = "Relative IRI reference string", + } +} + +impl<S: Spec> RiRelativeStr<S> { + /// Returns resolved IRI against the given base IRI. + /// + /// For IRI reference resolution output examples, see [RFC 3986 section 5.4]. + /// + /// If you are going to resolve multiple references against the common base, + /// consider using [`FixedBaseResolver`]. + /// + /// # Strictness + /// + /// The IRI parsers provided by this crate is strict (e.g. `http:g` is + /// always interpreted as a composition of the scheme `http` and the path + /// `g`), so backward compatible parsing and resolution are not provided. + /// About parser and resolver strictness, see [RFC 3986 section 5.4.2]: + /// + /// > Some parsers allow the scheme name to be present in a relative + /// > reference if it is the same as the base URI scheme. This is considered + /// > to be a loophole in prior specifications of partial URI + /// > [RFC1630](https://tools.ietf.org/html/rfc1630). Its use should be + /// > avoided but is allowed for backward compatibility. + /// > + /// > --- <https://tools.ietf.org/html/rfc3986#section-5.4.2> + /// + /// # Failures + /// + /// This method itself does not fail, but IRI resolution without WHATWG URL + /// Standard serialization can fail in some minor cases. + /// + /// To see examples of such unresolvable IRIs, visit the documentation + /// for [`normalize`][`crate::normalize`] module. + /// + /// [RFC 3986 section 5.4]: https://tools.ietf.org/html/rfc3986#section-5.4 + /// [RFC 3986 section 5.4.2]: https://tools.ietf.org/html/rfc3986#section-5.4.2 + pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> { + FixedBaseResolver::new(base).resolve(self.as_ref()) + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "//user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "//user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiRelativeStr<S> { + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_relative(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.path_str(), "foo//bar:baz"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_relative(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriRelativeStr}; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriRelativeStr}; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz?")?; + /// let query = IriQueryStr::new("")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query(self.as_str()).map(|query| { + // SAFETY: `extract_query` returns the query part of an IRI, and the + // returned string should have only valid characters since is the + // substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz?")?; + /// assert_eq!(iri.query_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("?foo#bar")?; + /// let fragment = IriFragmentStr::new("bar")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("#foo")?; + /// let fragment = IriFragmentStr::new("foo")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment() + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("?foo#bar")?; + /// assert_eq!(iri.fragment_str(), Some("bar")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("#foo")?; + /// assert_eq!(iri.fragment_str(), Some("foo")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str() + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiRelativeString<S> { + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::{IriFragmentStr, IriRelativeString}; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query#frag.old")?; + /// assert_eq!(iri.fragment_str(), Some("frag.old")); + /// + /// iri.set_fragment(None); + /// assert_eq!(iri.fragment(), None); + /// + /// let frag_new = IriFragmentStr::new("frag-new")?; + /// iri.set_fragment(Some(frag_new)); + /// assert_eq!(iri.fragment_str(), Some("frag-new")); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Fragment can be empty, and it is distinguished from the absense of a fragment. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("/path#")?; + /// assert_eq!(iri, "/path#"); + /// assert_eq!(iri.fragment_str(), Some(""), "Fragment is present and empty"); + /// + /// iri.set_fragment(None); + /// assert_eq!(iri, "/path", "Note that # is now removed"); + /// assert_eq!(iri.fragment_str(), None, "Fragment is absent"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(relative_ref::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "//user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "//user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: removing password component and the leading colon preserves + // the IRI still syntactically valid. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiRelativeStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "//user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "//user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password component is + // replaced with the empty password. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiRelativeStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component \ + is replaced with the empty password" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiRelativeStr, + from_owned: RiRelativeString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/iri.rs b/vendor/iri-string/src/types/iri.rs new file mode 100644 index 00000000..f89be7c7 --- /dev/null +++ b/vendor/iri-string/src/types/iri.rs @@ -0,0 +1,382 @@ +//! IRI-specific implementations. + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::convert::try_percent_encode_iri_inline; +use crate::convert::MappedToUri; +use crate::spec::IriSpec; +use crate::types::{ + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString, + RiString, +}; +use crate::types::{ + UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString, + UriString, +}; + +/// A type alias for [`RiAbsoluteStr`]`<`[`IriSpec`]`>`. +pub type IriAbsoluteStr = RiAbsoluteStr<IriSpec>; + +/// A type alias for [`RiAbsoluteString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriAbsoluteString = RiAbsoluteString<IriSpec>; + +/// A type alias for [`RiFragmentStr`]`<`[`IriSpec`]`>`. +pub type IriFragmentStr = RiFragmentStr<IriSpec>; + +/// A type alias for [`RiFragmentString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriFragmentString = RiFragmentString<IriSpec>; + +/// A type alias for [`RiStr`]`<`[`IriSpec`]`>`. +pub type IriStr = RiStr<IriSpec>; + +/// A type alias for [`RiString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriString = RiString<IriSpec>; + +/// A type alias for [`RiReferenceStr`]`<`[`IriSpec`]`>`. +pub type IriReferenceStr = RiReferenceStr<IriSpec>; + +/// A type alias for [`RiReferenceString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriReferenceString = RiReferenceString<IriSpec>; + +/// A type alias for [`RiRelativeStr`]`<`[`IriSpec`]`>`. +pub type IriRelativeStr = RiRelativeStr<IriSpec>; + +/// A type alias for [`RiRelativeString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriRelativeString = RiRelativeString<IriSpec>; + +/// A type alias for [`RiQueryStr`]`<`[`IriSpec`]`>`. +pub type IriQueryStr = RiQueryStr<IriSpec>; + +/// A type alias for [`RiQueryString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriQueryString = RiQueryString<IriSpec>; + +/// Implements the conversion from an IRI into a URI. +macro_rules! impl_conversion_between_uri { + ( + $ty_owned_iri:ident, + $ty_owned_uri:ident, + $ty_borrowed_iri:ident, + $ty_borrowed_uri:ident, + $example_iri:expr, + $example_uri:expr + ) => { + /// Conversion from an IRI into a URI. + impl $ty_borrowed_iri { + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you need more precise control over memory allocation and buffer + /// handling, use [`MappedToUri`][`crate::convert::MappedToUri`] type. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::format::ToDedicatedString;")] + #[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_to_uri().to_dedicated_string();")] + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn encode_to_uri(&self) -> MappedToUri<'_, Self> { + MappedToUri::from(self) + } + + /// Converts an IRI into a URI without modification, if possible. + /// + /// This is semantically equivalent to + #[doc = concat!("`", stringify!($ty_borrowed_uri), "::new(self.as_str()).ok()`.")] + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + #[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_borrowed_uri), "};")] + /// + #[doc = concat!("let ascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_uri), ")?;")] + /// assert_eq!( + /// ascii_iri.as_uri().map(AsRef::as_ref), + #[doc = concat!(" Some(", stringify!($example_uri), ")")] + /// ); + /// + #[doc = concat!("let nonascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")] + /// assert_eq!(nonascii_iri.as_uri(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn as_uri(&self) -> Option<&$ty_borrowed_uri> { + if !self.as_str().is_ascii() { + return None; + } + debug_assert!( + <$ty_borrowed_uri>::new(self.as_str()).is_ok(), + "[consistency] the ASCII-only IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_borrowed_uri>::new_maybe_unchecked(self.as_str()) }; + Some(uri) + } + } + + /// Conversion from an IRI into a URI. + #[cfg(feature = "alloc")] + impl $ty_owned_iri { + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// After the encode, the IRI is also a valid URI. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + /// # Panics + /// + /// Panics if the memory allocation failed. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")] + /// + #[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// iri.encode_to_uri_inline(); + #[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn encode_to_uri_inline(&mut self) { + self.try_encode_to_uri_inline() + .expect("failed to allocate memory"); + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// After the encode, the IRI is also a valid URI. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + // TODO: This seems true as of this writing, but is this guaranteed? See + // <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>. + // /// If the memory allocation failed, the content is preserved without modification. + // /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")] + /// + #[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// iri.try_encode_to_uri_inline() + /// .expect("failed to allocate memory"); + #[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn try_encode_to_uri_inline(&mut self) -> Result<(), TryReserveError> { + // SAFETY: IRI is valid after it is encoded to URI (by percent encoding). + unsafe { + let buf = self.as_inner_mut(); + try_percent_encode_iri_inline(buf)?; + } + debug_assert!( + <$ty_borrowed_iri>::new(self.as_str()).is_ok(), + "[consistency] the content must be valid at any time" + ); + Ok(()) + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_into_uri();")] + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn encode_into_uri(self) -> $ty_owned_uri { + self.try_encode_into_uri() + .expect("failed to allocate memory") + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + // TODO: This seems true as of this writing, but is this guaranteed? See + // <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>. + // /// If the memory allocation failed, the content is preserved without modification. + // /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.try_encode_into_uri()")] + /// .expect("failed to allocate memory"); + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn try_encode_into_uri(mut self) -> Result<$ty_owned_uri, TryReserveError> { + self.try_encode_to_uri_inline()?; + let s: String = self.into(); + debug_assert!( + <$ty_borrowed_uri>::new(s.as_str()).is_ok(), + "[consistency] the encoded IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) }; + Ok(uri) + } + + /// Converts an IRI into a URI without modification, if possible. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let ascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_uri), ")?;")] + /// assert_eq!( + /// ascii_iri.try_into_uri().map(|uri| uri.to_string()), + #[doc = concat!(" Ok(", stringify!($example_uri), ".to_string())")] + /// ); + /// + #[doc = concat!("let nonascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// assert_eq!( + /// nonascii_iri.try_into_uri().map_err(|iri| iri.to_string()), + #[doc = concat!(" Err(", stringify!($example_iri), ".to_string())")] + /// ); + /// # Ok::<_, Error>(()) + /// ``` + pub fn try_into_uri(self) -> Result<$ty_owned_uri, $ty_owned_iri> { + if !self.as_str().is_ascii() { + return Err(self); + } + let s: String = self.into(); + debug_assert!( + <$ty_borrowed_uri>::new(s.as_str()).is_ok(), + "[consistency] the ASCII-only IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) }; + Ok(uri) + } + } + }; +} + +impl_conversion_between_uri!( + IriAbsoluteString, + UriAbsoluteString, + IriAbsoluteStr, + UriAbsoluteStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriReferenceString, + UriReferenceString, + IriReferenceStr, + UriReferenceStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriRelativeString, + UriRelativeString, + IriRelativeStr, + UriRelativeStr, + "../?alpha=\u{03B1}", + "../?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriString, + UriString, + IriStr, + UriStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriQueryString, + UriQueryString, + IriQueryStr, + UriQueryStr, + "alpha-is-\u{03B1}", + "alpha-is-%CE%B1" +); +impl_conversion_between_uri!( + IriFragmentString, + UriFragmentString, + IriFragmentStr, + UriFragmentStr, + "alpha-is-\u{03B1}", + "alpha-is-%CE%B1" +); diff --git a/vendor/iri-string/src/types/uri.rs b/vendor/iri-string/src/types/uri.rs new file mode 100644 index 00000000..682a971c --- /dev/null +++ b/vendor/iri-string/src/types/uri.rs @@ -0,0 +1,115 @@ +//! URI-specific implementations. + +use crate::spec::UriSpec; +use crate::types::{ + IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr, + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, IriRelativeString, + IriString, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, + RiRelativeString, RiString, +}; + +/// A type alias for [`RiAbsoluteStr`]`<`[`UriSpec`]`>`. +pub type UriAbsoluteStr = RiAbsoluteStr<UriSpec>; + +/// A type alias for [`RiAbsoluteString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriAbsoluteString = RiAbsoluteString<UriSpec>; + +/// A type alias for [`RiFragmentStr`]`<`[`UriSpec`]`>`. +pub type UriFragmentStr = RiFragmentStr<UriSpec>; + +/// A type alias for [`RiFragmentString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriFragmentString = RiFragmentString<UriSpec>; + +/// A type alias for [`RiStr`]`<`[`UriSpec`]`>`. +pub type UriStr = RiStr<UriSpec>; + +/// A type alias for [`RiString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriString = RiString<UriSpec>; + +/// A type alias for [`RiReferenceStr`]`<`[`UriSpec`]`>`. +pub type UriReferenceStr = RiReferenceStr<UriSpec>; + +/// A type alias for [`RiReferenceString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriReferenceString = RiReferenceString<UriSpec>; + +/// A type alias for [`RiRelativeStr`]`<`[`UriSpec`]`>`. +pub type UriRelativeStr = RiRelativeStr<UriSpec>; + +/// A type alias for [`RiRelativeString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriRelativeString = RiRelativeString<UriSpec>; + +/// A type alias for [`RiQueryStr`]`<`[`UriSpec`]`>`. +pub type UriQueryStr = RiQueryStr<UriSpec>; + +/// A type alias for [`RiQueryString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriQueryString = RiQueryString<UriSpec>; + +/// Implements the trivial conversions between a URI and an IRI. +macro_rules! impl_conversions_between_iri { + ( + $borrowed_uri:ident, + $owned_uri:ident, + $borrowed_iri:ident, + $owned_iri:ident, + ) => { + impl AsRef<$borrowed_iri> for $borrowed_uri { + fn as_ref(&self) -> &$borrowed_iri { + // SAFETY: A valid URI is also a valid IRI. + unsafe { <$borrowed_iri>::new_maybe_unchecked(self.as_str()) } + } + } + + #[cfg(feature = "alloc")] + impl From<$owned_uri> for $owned_iri { + #[inline] + fn from(uri: $owned_uri) -> Self { + // SAFETY: A valid URI is also a valid IRI. + unsafe { Self::new_maybe_unchecked(uri.into()) } + } + } + + #[cfg(feature = "alloc")] + impl AsRef<$borrowed_iri> for $owned_uri { + fn as_ref(&self) -> &$borrowed_iri { + AsRef::<$borrowed_uri>::as_ref(self).as_ref() + } + } + }; +} + +impl_conversions_between_iri!( + UriAbsoluteStr, + UriAbsoluteString, + IriAbsoluteStr, + IriAbsoluteString, +); +impl_conversions_between_iri!( + UriReferenceStr, + UriReferenceString, + IriReferenceStr, + IriReferenceString, +); +impl_conversions_between_iri!( + UriRelativeStr, + UriRelativeString, + IriRelativeStr, + IriRelativeString, +); +impl_conversions_between_iri!(UriStr, UriString, IriStr, IriString,); +impl_conversions_between_iri!(UriQueryStr, UriQueryString, IriQueryStr, IriQueryString,); +impl_conversions_between_iri!( + UriFragmentStr, + UriFragmentString, + IriFragmentStr, + IriFragmentString, +); diff --git a/vendor/iri-string/src/validate.rs b/vendor/iri-string/src/validate.rs new file mode 100644 index 00000000..efaa7efd --- /dev/null +++ b/vendor/iri-string/src/validate.rs @@ -0,0 +1,358 @@ +//! Validators. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +use crate::parser::validate as parser; +use crate::spec::Spec; + +/// Resource identifier validation error. +// Note that this type should implement `Copy` trait. +// To return additional non-`Copy` data as an error, use wrapper type +// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error(()); + +impl Error { + /// Creates a new `Error`. + /// + /// For internal use. + #[inline] + #[must_use] + pub(crate) fn new() -> Self { + Error(()) + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Invalid IRI") + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +/// Validates [IRI][uri]. +/// +/// This validator corresponds to [`RiStr`] and [`RiString`] types. +/// +/// # Examples +/// +/// This type can have an IRI (which is absolute, and may have fragment part). +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// assert!(iri::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok()); +/// assert!(iri::<UriSpec>("foo:bar").is_ok()); +/// assert!(iri::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(iri::<UriSpec>("foo:/").is_ok()); +/// assert!(iri::<UriSpec>("foo://").is_ok()); +/// assert!(iri::<UriSpec>("foo:///").is_ok()); +/// assert!(iri::<UriSpec>("foo:////").is_ok()); +/// assert!(iri::<UriSpec>("foo://///").is_ok()); +/// ``` +/// +/// Relative IRI reference is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// // This is relative path. +/// assert!(iri::<UriSpec>("foo/bar").is_err()); +/// // `/foo/bar` is an absolute path, but it is authority-relative. +/// assert!(iri::<UriSpec>("/foo/bar").is_err()); +/// // `//foo/bar` is termed "network-path reference", +/// // or usually called "protocol-relative reference". +/// assert!(iri::<UriSpec>("//foo/bar").is_err()); +/// // Same-document reference is relative. +/// assert!(iri::<UriSpec>("#foo").is_err()); +/// // Empty string is not a valid absolute IRI. +/// assert!(iri::<UriSpec>("").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// // `<` and `>` cannot directly appear in an IRI. +/// assert!(iri::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI. +/// assert!(iri::<UriSpec>("%").is_err()); +/// assert!(iri::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [uri]: https://tools.ietf.org/html/rfc3986#section-3 +/// [`RiStr`]: ../types/struct.RiStr.html +/// [`RiString`]: ../types/struct.RiString.html +pub fn iri<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_uri::<S>(s) +} + +/// Validates [IRI reference][uri-reference]. +/// +/// This validator corresponds to [`RiReferenceStr`] and [`RiReferenceString`] types. +/// +/// # Examples +/// +/// This type can have an IRI reference (which can be absolute or relative). +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri_reference}; +/// assert!(iri_reference::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(iri_reference::<UriSpec>("foo:/").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo://").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:///").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:////").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo://///").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("/foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("//foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("#foo").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri_reference}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(iri_reference::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(iri_reference::<UriSpec>("%").is_err()); +/// assert!(iri_reference::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [uri-reference]: https://tools.ietf.org/html/rfc3986#section-4.1 +/// [`RiReferenceStr`]: ../types/struct.RiReferenceStr.html +/// [`RiReferenceString`]: ../types/struct.RiReferenceString.html +pub fn iri_reference<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_uri_reference::<S>(s) +} + +/// Validates [absolute IRI][absolute-uri]. +/// +/// This validator corresponds to [`RiAbsoluteStr`] and [`RiAbsoluteString`] types. +/// +/// # Examples +/// +/// This type can have an absolute IRI without fragment part. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:bar").is_ok()); +/// // Scheme `foo` and empty path. +/// assert!(absolute_iri::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(absolute_iri::<UriSpec>("foo:/").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo://").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:///").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:////").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo://///").is_ok()); +/// +/// ``` +/// +/// Relative IRI is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // This is relative path. +/// assert!(absolute_iri::<UriSpec>("foo/bar").is_err()); +/// // `/foo/bar` is an absolute path, but it is authority-relative. +/// assert!(absolute_iri::<UriSpec>("/foo/bar").is_err()); +/// // `//foo/bar` is termed "network-path reference", +/// // or usually called "protocol-relative reference". +/// assert!(absolute_iri::<UriSpec>("//foo/bar").is_err()); +/// // Empty string is not a valid absolute IRI. +/// assert!(absolute_iri::<UriSpec>("").is_err()); +/// ``` +/// +/// Fragment part (such as trailing `#foo`) is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // Fragment part is not allowed. +/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an absolute IRI. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // `<` and `>` cannot directly appear in an absolute IRI. +/// assert!(absolute_iri::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an absolute IRI. +/// assert!(absolute_iri::<UriSpec>("%").is_err()); +/// assert!(absolute_iri::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [absolute-uri]: https://tools.ietf.org/html/rfc3986#section-4.3 +/// [`RiAbsoluteStr`]: ../types/struct.RiAbsoluteStr.html +/// [`RiAbsoluteString`]: ../types/struct.RiAbsoluteString.html +pub fn absolute_iri<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_absolute_uri::<S>(s) +} + +/// Validates [relative reference][relative-ref]. +/// +/// This validator corresponds to [`RiRelativeStr`] and [`RiRelativeString`] types. +/// +/// # Valid values +/// +/// This type can have a relative IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// assert!(relative_ref::<UriSpec>("foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("foo/bar").is_ok()); +/// assert!(relative_ref::<UriSpec>("/foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("//foo/bar").is_ok()); +/// assert!(relative_ref::<UriSpec>("?foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("#foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("foo/bar?baz#qux").is_ok()); +/// // The first path component can have colon if the path is absolute. +/// assert!(relative_ref::<UriSpec>("/foo:bar/").is_ok()); +/// // Second or following path components can have colon. +/// assert!(relative_ref::<UriSpec>("foo/bar://baz/").is_ok()); +/// assert!(relative_ref::<UriSpec>("./foo://bar").is_ok()); +/// ``` +/// +/// Absolute form of a reference is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// assert!(relative_ref::<UriSpec>("https://example.com/").is_err()); +/// // The first path component cannot have colon, if the path is not absolute. +/// assert!(relative_ref::<UriSpec>("foo:bar").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:/").is_err()); +/// assert!(relative_ref::<UriSpec>("foo://").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:///").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:////").is_err()); +/// assert!(relative_ref::<UriSpec>("foo://///").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// // `<` and `>` cannot directly appear in a relative IRI reference. +/// assert!(relative_ref::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in a relative IRI reference. +/// assert!(relative_ref::<UriSpec>("%").is_err()); +/// assert!(relative_ref::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [relative-ref]: https://tools.ietf.org/html/rfc3986#section-4.2 +/// [`RiRelativeStr`]: ../types/struct.RiRelativeStr.html +/// [`RiRelativeString`]: ../types/struct.RiRelativeString.html +pub fn relative_ref<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_relative_ref::<S>(s) +} + +/// Validates [IRI path][path]. +/// +/// [path]: https://tools.ietf.org/html/rfc3986#section-3.3 +pub fn path<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_path::<S>(s) +} + +/// Validates [IRI query][query]. +/// +/// This validator corresponds to [`RiQueryStr`] and [`RiQueryString`] types. +/// +/// Note that the first `?` character in an IRI is not a part of a query. +/// For example, `https://example.com/?foo#bar` has a query `foo`, **not** `?foo`. +/// +/// # Examples +/// +/// This type can have an IRI query. +/// Note that the IRI `foo://bar/baz?qux#quux` has the query `qux`, **not** `?qux`. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::query}; +/// assert!(query::<UriSpec>("").is_ok()); +/// assert!(query::<UriSpec>("foo").is_ok()); +/// assert!(query::<UriSpec>("foo/bar").is_ok()); +/// assert!(query::<UriSpec>("/foo/bar").is_ok()); +/// assert!(query::<UriSpec>("//foo/bar").is_ok()); +/// assert!(query::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(query::<UriSpec>("https://example.com/").is_ok()); +/// // Question sign `?` can appear in an IRI query. +/// assert!(query::<UriSpec>("query?again").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in a query. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::query}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(query::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(query::<UriSpec>("%").is_err()); +/// assert!(query::<UriSpec>("%GG").is_err()); +/// // Hash sign `#` cannot appear in an IRI query. +/// assert!(query::<UriSpec>("#hash").is_err()); +/// ``` +/// +/// [query]: https://tools.ietf.org/html/rfc3986#section-3.4 +/// [`RiQueryStr`]: ../types/struct.RiQueryStr.html +/// [`RiQueryString`]: ../types/struct.RiQueryString.html +pub fn query<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_query::<S>(s) +} + +/// Validates [IRI fragment][fragment]. +/// +/// This validator corresponds to [`RiFragmentStr`] and [`RiFragmentString`] types. +/// +/// Note that the first `#` character in an IRI is not a part of a fragment. +/// For example, `https://example.com/#foo` has a fragment `foo`, **not** `#foo`. +/// +/// # Examples +/// +/// This type can have an IRI fragment. +/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::fragment}; +/// assert!(fragment::<UriSpec>("").is_ok()); +/// assert!(fragment::<UriSpec>("foo").is_ok()); +/// assert!(fragment::<UriSpec>("foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("/foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("//foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(fragment::<UriSpec>("https://example.com/").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in a fragment. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::fragment}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(fragment::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(fragment::<UriSpec>("%").is_err()); +/// assert!(fragment::<UriSpec>("%GG").is_err()); +/// // Hash sign `#` cannot appear in an IRI fragment. +/// assert!(fragment::<UriSpec>("#hash").is_err()); +/// ``` +/// +/// [fragment]: https://tools.ietf.org/html/rfc3986#section-3.5 +/// [`RiFragmentStr`]: ../types/struct.RiFragmentStr.html +/// [`RiFragmentString`]: ../types/struct.RiFragmentString.html +pub fn fragment<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_fragment::<S>(s) +} |
