//! URI and IRI resolvers. //! //! # IRI resolution can fail without WHATWG URL Standard serialization //! //! ## Pure RFC 3986 algorithm //! //! Though this is not explicitly stated in RFC 3986, IRI resolution can fail. //! Below are examples: //! //! * base=`scheme:`, ref=`.///bar`. //! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. //! * base=`scheme:foo`, ref=`.///bar`. //! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. //! * base=`scheme:`, ref=`/..//baz`. //! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. //! * base=`scheme:foo/bar`, ref=`..//baz`. //! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. //! //! IRI without authority (note that this is different from "with empty authority") //! cannot have a path starting with `//`, since it is ambiguous and can be //! interpreted as an IRI with authority. For the above examples, `scheme://bar` //! is not valid output, as `bar` in `scheme://bar` will be interpreted as an //! authority, not a path. //! //! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal //! cases. //! //! Note that this kind of failure can happen only when the base IRI has no //! authority and empty path. This would be rare in the wild, since many people //! would use an IRI with authority part, such as `http://`. //! //! If you are handling `scheme://`-style URIs and IRIs, don't worry about the //! failure. Currently no cases are known to fail when at least one of the base //! IRI or the relative IRI contains authorities. //! //! If you want this kind of abnormal IRI resolution to succeed and to be //! idempotent, check the resolution result using //! [`Normalized::ensure_rfc3986_normalizable`] (see the section below). //! //! ## WHATWG serialization //! //! To handle IRI resolution failure, WHATWG URL Standard defines serialization //! algorithm for this kind of result, and it makes IRI resolution (and even //! normalization) infallible and idempotent. //! //! IRI resolution and normalization provided by this crate automatically //! applies this special rule if necessary, so they are infallible. If you want //! to detect resolution/normalization failure, use //! [`Normalized::ensure_rfc3986_normalizable`] method. //! //! ## Examples //! //! ``` //! # #[cfg(feature = "alloc")] { //! use iri_string::format::ToDedicatedString; //! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; //! //! let base = IriAbsoluteStr::new("scheme:")?; //! { //! let reference = IriReferenceStr::new(".///not-a-host")?; //! let result = reference.resolve_against(base); //! assert!(result.ensure_rfc3986_normalizable().is_err()); //! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host"); //! } //! //! { //! let reference2 = IriReferenceStr::new("/..//not-a-host")?; //! // Resulting string will be `scheme://not-a-host`, but `not-a-host` //! // should be a path segment, not a host. So, the semantically correct //! // target IRI cannot be represented by RFC 3986 IRI resolution. //! let result2 = reference2.resolve_against(base); //! assert!(result2.ensure_rfc3986_normalizable().is_err()); //! //! // Algorithm defined in WHATWG URL Standard addresses this case. //! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host"); //! } //! # } //! # Ok::<_, iri_string::validate::Error>(()) //! ``` use crate::components::RiReferenceComponents; use crate::normalize::{NormalizationInput, Normalized}; use crate::spec::Spec; use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr}; /// A resolver against the fixed base. #[derive(Debug, Clone, Copy)] pub struct FixedBaseResolver<'a, S: Spec> { /// Components of the base IRI. base_components: RiReferenceComponents<'a, S>, } impl<'a, S: Spec> FixedBaseResolver<'a, S> { /// Creates a new resolver with the given base. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// # // `ToDedicatedString` is available only when /// # // `alloc` feature is enabled. /// #[cfg(feature = "alloc")] { /// use iri_string::format::ToDedicatedString; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; /// /// let base = IriAbsoluteStr::new("http://example.com/base/")?; /// let resolver = FixedBaseResolver::new(base); /// /// let reference = IriReferenceStr::new("../there")?; /// let resolved = resolver.resolve(reference); /// /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); /// # } /// # Ok::<_, Error>(()) /// ``` #[must_use] pub fn new(base: &'a RiAbsoluteStr) -> Self { Self { base_components: RiReferenceComponents::from(base.as_ref()), } } /// Returns the base. /// /// # Examples /// /// ``` /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; /// /// let base = IriAbsoluteStr::new("http://example.com/base/")?; /// let resolver = FixedBaseResolver::new(base); /// /// assert_eq!(resolver.base(), base); /// # Ok::<_, iri_string::validate::Error>(()) /// ``` #[must_use] pub fn base(&self) -> &'a RiAbsoluteStr { // SAFETY: `base_components` can only be created from `&RiAbsoluteStr`, // and the type of `base_components` does not allow modification of the // content after it is created. unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) } } } /// Components getters. /// /// These getters are more efficient than calling through the result of `.base()`. impl FixedBaseResolver<'_, S> { /// Returns the scheme. /// /// The following colon is truncated. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::IriAbsoluteStr; /// /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?; /// let resolver = FixedBaseResolver::new(base); /// /// assert_eq!(resolver.scheme_str(), "http"); /// assert_eq!(base.scheme_str(), "http"); /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn scheme_str(&self) -> &str { self.base_components .scheme_str() .expect("[validity] absolute IRI should have the scheme part") } /// Returns the authority. /// /// The leading `//` is truncated. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::IriAbsoluteStr; /// /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; /// let resolver = FixedBaseResolver::new(base); /// /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com")); /// assert_eq!(base.authority_str(), Some("user:pass@example.com")); /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn authority_str(&self) -> Option<&str> { self.base_components.authority_str() } /// Returns the path. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::IriAbsoluteStr; /// /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; /// let resolver = FixedBaseResolver::new(base); /// /// assert_eq!(resolver.path_str(), "/base/"); /// assert_eq!(base.path_str(), "/base/"); /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn path_str(&self) -> &str { self.base_components.path_str() } /// Returns the query. /// /// The leading question mark (`?`) is truncated. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::{IriAbsoluteStr, IriQueryStr}; /// /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; /// let resolver = FixedBaseResolver::new(base); /// let query = IriQueryStr::new("query")?; /// /// assert_eq!(resolver.query(), Some(query)); /// assert_eq!(base.query(), Some(query)); /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn query(&self) -> Option<&RiQueryStr> { let query_raw = self.query_str()?; let query = RiQueryStr::new(query_raw) .expect("[validity] must be valid query if present in an absolute-IRI"); Some(query) } /// Returns the query in a raw string slice. /// /// The leading question mark (`?`) is truncated. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::IriAbsoluteStr; /// /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; /// let resolver = FixedBaseResolver::new(base); /// /// assert_eq!(resolver.query_str(), Some("query")); /// assert_eq!(base.query_str(), Some("query")); /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn query_str(&self) -> Option<&str> { self.base_components.query_str() } } impl<'a, S: Spec> FixedBaseResolver<'a, S> { /// Resolves the given reference against the fixed base. /// /// The task returned by this method does **not** normalize the resolution /// result. However, `..` and `.` are recognized even when they are /// percent-encoded. /// /// # Failures /// /// This function itself does not fail, but resolution algorithm defined by /// RFC 3986 can fail. In that case, serialization algorithm defined by /// WHATWG URL Standard would be automatically applied. /// /// See the documentation of [`Normalized`]. /// /// # Examples /// /// ``` /// # use iri_string::validate::Error; /// # // `ToDedicatedString` is available only when /// # // `alloc` feature is enabled. /// # #[cfg(feature = "alloc")] { /// use iri_string::format::ToDedicatedString; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; /// /// let base = IriAbsoluteStr::new("http://example.com/base/")?; /// let resolver = FixedBaseResolver::new(base); /// /// let reference = IriReferenceStr::new("../there")?; /// let resolved = resolver.resolve(reference); /// /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); /// # } /// # Ok::<_, Error>(()) /// ``` /// /// Note that `..` and `.` path segments are recognized even when they are /// percent-encoded. /// /// ``` /// # use iri_string::validate::Error; /// # // `ToDedicatedString` is available only when /// # // `alloc` feature is enabled. /// # #[cfg(feature = "alloc")] { /// use iri_string::format::ToDedicatedString; /// use iri_string::resolve::FixedBaseResolver; /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; /// /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?; /// let resolver = FixedBaseResolver::new(base); /// /// // `%2e%2e` is recognized as `..`. /// // However, `dot%2edot` is NOT normalized into `dot.dot`. /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?; /// let resolved = resolver.resolve(reference); /// /// // Resolved but not normalized. /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot"); /// # } /// # Ok::<_, Error>(()) /// ``` #[inline] #[must_use] pub fn resolve(&self, reference: &'a RiReferenceStr) -> Normalized<'a, RiStr> { let input = NormalizationInput::with_resolution_params(&self.base_components, reference); Normalized::from_input(input) } }