diff options
Diffstat (limited to 'vendor/matchit/src')
| -rw-r--r-- | vendor/matchit/src/error.rs | 127 | ||||
| -rw-r--r-- | vendor/matchit/src/escape.rs | 184 | ||||
| -rw-r--r-- | vendor/matchit/src/lib.rs | 131 | ||||
| -rw-r--r-- | vendor/matchit/src/params.rs | 262 | ||||
| -rw-r--r-- | vendor/matchit/src/router.rs | 147 | ||||
| -rw-r--r-- | vendor/matchit/src/tree.rs | 878 |
6 files changed, 1729 insertions, 0 deletions
diff --git a/vendor/matchit/src/error.rs b/vendor/matchit/src/error.rs new file mode 100644 index 00000000..ab879f30 --- /dev/null +++ b/vendor/matchit/src/error.rs @@ -0,0 +1,127 @@ +use crate::escape::{UnescapedRef, UnescapedRoute}; +use crate::tree::{denormalize_params, Node}; + +use std::fmt; + +/// Represents errors that can occur when inserting a new route. +#[non_exhaustive] +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum InsertError { + /// Attempted to insert a path that conflicts with an existing route. + Conflict { + /// The existing route that the insertion is conflicting with. + with: String, + }, + /// Only one parameter per route segment is allowed. + /// + /// Static segments are also allowed before a parameter, but not after it. For example, + /// `/foo-{bar}` is a valid route, but `/{bar}-foo` is not. + InvalidParamSegment, + /// Parameters must be registered with a valid name and matching braces. + /// + /// Note you can use `{{` or `}}` to escape literal brackets. + InvalidParam, + /// Catch-all parameters are only allowed at the end of a path. + InvalidCatchAll, +} + +impl fmt::Display for InsertError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Conflict { with } => { + write!( + f, + "Insertion failed due to conflict with previously registered route: {}", + with + ) + } + Self::InvalidParamSegment => { + write!(f, "Only one parameter is allowed per path segment") + } + Self::InvalidParam => write!(f, "Parameters must be registered with a valid name"), + Self::InvalidCatchAll => write!( + f, + "Catch-all parameters are only allowed at the end of a route" + ), + } + } +} + +impl std::error::Error for InsertError {} + +impl InsertError { + /// Returns an error for a route conflict with the given node. + /// + /// This method attempts to find the full conflicting route. + pub(crate) fn conflict<T>( + route: &UnescapedRoute, + prefix: UnescapedRef<'_>, + current: &Node<T>, + ) -> Self { + let mut route = route.clone(); + + // The route is conflicting with the current node. + if prefix.unescaped() == current.prefix.unescaped() { + denormalize_params(&mut route, ¤t.remapping); + return InsertError::Conflict { + with: String::from_utf8(route.into_unescaped()).unwrap(), + }; + } + + // Remove the non-matching suffix from the route. + route.truncate(route.len() - prefix.len()); + + // Add the conflicting prefix. + if !route.ends_with(¤t.prefix) { + route.append(¤t.prefix); + } + + // Add the prefixes of any conflicting children. + let mut child = current.children.first(); + while let Some(node) = child { + route.append(&node.prefix); + child = node.children.first(); + } + + // Denormalize any route parameters. + let mut last = current; + while let Some(node) = last.children.first() { + last = node; + } + denormalize_params(&mut route, &last.remapping); + + // Return the conflicting route. + InsertError::Conflict { + with: String::from_utf8(route.into_unescaped()).unwrap(), + } + } +} + +/// A failed match attempt. +/// +/// ``` +/// use matchit::{MatchError, Router}; +/// # fn main() -> Result<(), Box<dyn std::error::Error>> { +/// let mut router = Router::new(); +/// router.insert("/home", "Welcome!")?; +/// router.insert("/blog", "Our blog.")?; +/// +/// // no routes match +/// if let Err(err) = router.at("/blo") { +/// assert_eq!(err, MatchError::NotFound); +/// } +/// # Ok(()) +/// # } +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum MatchError { + /// No matching route was found. + NotFound, +} + +impl fmt::Display for MatchError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Matching route not found") + } +} + +impl std::error::Error for MatchError {} diff --git a/vendor/matchit/src/escape.rs b/vendor/matchit/src/escape.rs new file mode 100644 index 00000000..ef8ddbfb --- /dev/null +++ b/vendor/matchit/src/escape.rs @@ -0,0 +1,184 @@ +use std::{fmt, ops::Range}; + +/// An unescaped route that keeps track of the position of escaped characters ('{{' or '}}'). +/// +/// Note that this type dereferences to `&[u8]`. +#[derive(Clone, Default)] +pub struct UnescapedRoute { + // The raw unescaped route. + inner: Vec<u8>, + escaped: Vec<usize>, +} + +impl UnescapedRoute { + /// Unescapes escaped brackets ('{{' or '}}') in a route. + pub fn new(mut inner: Vec<u8>) -> UnescapedRoute { + let mut escaped = Vec::new(); + let mut i = 0; + + while let Some(&c) = inner.get(i) { + if (c == b'{' && inner.get(i + 1) == Some(&b'{')) + || (c == b'}' && inner.get(i + 1) == Some(&b'}')) + { + inner.remove(i); + escaped.push(i); + } + + i += 1; + } + + UnescapedRoute { inner, escaped } + } + + /// Returns true if the character at the given index was escaped. + pub fn is_escaped(&self, i: usize) -> bool { + self.escaped.contains(&i) + } + + /// Replaces the characters in the given range. + pub fn splice( + &mut self, + range: Range<usize>, + replace: Vec<u8>, + ) -> impl Iterator<Item = u8> + '_ { + // Ignore any escaped characters in the range being replaced. + self.escaped.retain(|x| !range.contains(x)); + + // Update the escaped indices. + let offset = (replace.len() as isize) - (range.len() as isize); + for i in &mut self.escaped { + if *i > range.end { + *i = i.checked_add_signed(offset).unwrap(); + } + } + + self.inner.splice(range, replace) + } + + /// Appends another route to the end of this one. + pub fn append(&mut self, other: &UnescapedRoute) { + for i in &other.escaped { + self.escaped.push(self.inner.len() + i); + } + + self.inner.extend_from_slice(&other.inner); + } + + /// Truncates the route to the given length. + pub fn truncate(&mut self, to: usize) { + self.escaped.retain(|&x| x < to); + self.inner.truncate(to); + } + + /// Returns a reference to this route. + pub fn as_ref(&self) -> UnescapedRef<'_> { + UnescapedRef { + inner: &self.inner, + escaped: &self.escaped, + offset: 0, + } + } + + /// Returns a reference to the unescaped slice. + pub fn unescaped(&self) -> &[u8] { + &self.inner + } + + /// Returns the unescaped route. + pub fn into_unescaped(self) -> Vec<u8> { + self.inner + } +} + +impl std::ops::Deref for UnescapedRoute { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl fmt::Debug for UnescapedRoute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(std::str::from_utf8(&self.inner).unwrap(), f) + } +} + +/// A reference to an `UnescapedRoute`. +#[derive(Copy, Clone)] +pub struct UnescapedRef<'a> { + pub inner: &'a [u8], + escaped: &'a [usize], + // An offset applied to each escaped index. + offset: isize, +} + +impl<'a> UnescapedRef<'a> { + /// Converts this reference into an owned route. + pub fn to_owned(self) -> UnescapedRoute { + let mut escaped = Vec::new(); + for &i in self.escaped { + let i = i.checked_add_signed(self.offset); + + match i { + Some(i) if i < self.inner.len() => escaped.push(i), + _ => {} + } + } + + UnescapedRoute { + escaped, + inner: self.inner.to_owned(), + } + } + + /// Returns `true` if the character at the given index was escaped. + pub fn is_escaped(&self, i: usize) -> bool { + if let Some(i) = i.checked_add_signed(-self.offset) { + return self.escaped.contains(&i); + } + + false + } + + /// Slices the route with `start..`. + pub fn slice_off(&self, start: usize) -> UnescapedRef<'a> { + UnescapedRef { + inner: &self.inner[start..], + escaped: self.escaped, + offset: self.offset - (start as isize), + } + } + + /// Slices the route with `..end`. + pub fn slice_until(&self, end: usize) -> UnescapedRef<'a> { + UnescapedRef { + inner: &self.inner[..end], + escaped: self.escaped, + offset: self.offset, + } + } + + /// Returns a reference to the unescaped slice. + pub fn unescaped(&self) -> &[u8] { + self.inner + } +} + +impl<'a> std::ops::Deref for UnescapedRef<'a> { + type Target = &'a [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'a> fmt::Debug for UnescapedRef<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UnescapedRef") + .field("inner", &std::str::from_utf8(self.inner)) + .field("escaped", &self.escaped) + .field("offset", &self.offset) + .finish() + } +} diff --git a/vendor/matchit/src/lib.rs b/vendor/matchit/src/lib.rs new file mode 100644 index 00000000..cf128525 --- /dev/null +++ b/vendor/matchit/src/lib.rs @@ -0,0 +1,131 @@ +/*! +A high performance, zero-copy URL router. + +```rust +use matchit::Router; + +fn main() -> Result<(), Box<dyn std::error::Error>> { + let mut router = Router::new(); + router.insert("/home", "Welcome!")?; + router.insert("/users/{id}", "A User")?; + + let matched = router.at("/users/978")?; + assert_eq!(matched.params.get("id"), Some("978")); + assert_eq!(*matched.value, "A User"); + + Ok(()) +} +``` + +# Parameters + +The router supports dynamic route segments. These can either be named or catch-all parameters. + +Named parameters like `/{id}` match anything until the next `/` or the end of the path. Note that named parameters must be followed +by a `/` or the end of the route. Dynamic suffixes are not currently supported. + +```rust +# use matchit::Router; +# fn main() -> Result<(), Box<dyn std::error::Error>> { +let mut m = Router::new(); +m.insert("/users/{id}", true)?; + +assert_eq!(m.at("/users/1")?.params.get("id"), Some("1")); +assert_eq!(m.at("/users/23")?.params.get("id"), Some("23")); +assert!(m.at("/users").is_err()); +# Ok(()) +# } +``` + +Catch-all parameters start with `*` and match anything until the end of the path. They must always be at the **end** of the route. + +```rust +# use matchit::Router; +# fn main() -> Result<(), Box<dyn std::error::Error>> { +let mut m = Router::new(); +m.insert("/{*p}", true)?; + +assert_eq!(m.at("/foo.js")?.params.get("p"), Some("foo.js")); +assert_eq!(m.at("/c/bar.css")?.params.get("p"), Some("c/bar.css")); + +// note that this will not match +assert!(m.at("/").is_err()); +# Ok(()) +# } +``` + +The literal characters `{` and `}` may be included in a static route by escaping them with the same character. For example, the `{` character is escaped with `{{` and the `}` character is escaped with `}}`. + +```rust +# use matchit::Router; +# fn main() -> Result<(), Box<dyn std::error::Error>> { +let mut m = Router::new(); +m.insert("/{{hello}}", true)?; +m.insert("/{hello}", true)?; + +// match the static route +assert!(m.at("/{hello}")?.value); + +// match the dynamic route +assert_eq!(m.at("/hello")?.params.get("hello"), Some("hello")); +# Ok(()) +# } +``` + +# Routing Priority + +Static and dynamic route segments are allowed to overlap. If they do, static segments will be given higher priority: + +```rust +# use matchit::Router; +# fn main() -> Result<(), Box<dyn std::error::Error>> { +let mut m = Router::new(); +m.insert("/", "Welcome!").unwrap(); // priority: 1 +m.insert("/about", "About Me").unwrap(); // priority: 1 +m.insert("/{*filepath}", "...").unwrap(); // priority: 2 +# Ok(()) +# } +``` + +# How does it work? + +The router takes advantage of the fact that URL routes generally follow a hierarchical structure. Routes are stored them in a radix trie that makes heavy use of common prefixes. + +```text +Priority Path Value +9 \ 1 +3 ├s None +2 |├earch\ 2 +1 |└upport\ 3 +2 ├blog\ 4 +1 | └{post} None +1 | └\ 5 +2 ├about-us\ 6 +1 | └team\ 7 +1 └contact\ 8 +``` + +This allows us to reduce the route search to a small number of branches. Child nodes on the same level of the tree are also prioritized +by the number of children with registered values, increasing the chance of choosing the correct branch of the first try. + +As it turns out, this method of routing is extremely fast. See the [benchmark results](https://github.com/ibraheemdev/matchit?tab=readme-ov-file#benchmarks) for details. +*/ + +#![deny(rust_2018_idioms, clippy::all)] + +mod error; +mod escape; +mod params; +mod router; +mod tree; + +pub use error::{InsertError, MatchError}; +pub use params::{Params, ParamsIter}; +pub use router::{Match, Router}; + +#[cfg(doctest)] +mod readme { + #[allow(dead_code)] + #[doc = include_str!("../README.md")] + struct Readme; +} diff --git a/vendor/matchit/src/params.rs b/vendor/matchit/src/params.rs new file mode 100644 index 00000000..625b544b --- /dev/null +++ b/vendor/matchit/src/params.rs @@ -0,0 +1,262 @@ +use std::{fmt, iter, mem, slice}; + +/// A single URL parameter, consisting of a key and a value. +#[derive(PartialEq, Eq, Ord, PartialOrd, Default, Copy, Clone)] +struct Param<'k, 'v> { + // Keys and values are stored as byte slices internally by the router + // to avoid UTF8 checks when slicing, but UTF8 is still respected, + // so these slices are valid strings. + key: &'k [u8], + value: &'v [u8], +} + +impl<'k, 'v> Param<'k, 'v> { + const EMPTY: Param<'static, 'static> = Param { + key: b"", + value: b"", + }; + + // Returns the parameter key as a string. + fn key_str(&self) -> &'k str { + std::str::from_utf8(self.key).unwrap() + } + + // Returns the parameter value as a string. + fn value_str(&self) -> &'v str { + std::str::from_utf8(self.value).unwrap() + } +} + +/// A list of parameters returned by a route match. +/// +/// ```rust +/// # fn main() -> Result<(), Box<dyn std::error::Error>> { +/// # let mut router = matchit::Router::new(); +/// # router.insert("/users/{id}", true).unwrap(); +/// let matched = router.at("/users/1")?; +/// +/// // Iterate through the keys and values. +/// for (key, value) in matched.params.iter() { +/// println!("key: {}, value: {}", key, value); +/// } +/// +/// // Get a specific value by name. +/// let id = matched.params.get("id"); +/// assert_eq!(id, Some("1")); +/// # Ok(()) +/// # } +/// ``` +#[derive(PartialEq, Eq, Ord, PartialOrd, Clone)] +pub struct Params<'k, 'v> { + kind: ParamsKind<'k, 'v>, +} + +// Most routes have a small number of dynamic parameters, so we can avoid +// heap allocations in the common case. +const SMALL: usize = 3; + +// A list of parameters, optimized to avoid allocations when possible. +#[derive(PartialEq, Eq, Ord, PartialOrd, Clone)] +enum ParamsKind<'k, 'v> { + Small([Param<'k, 'v>; SMALL], usize), + Large(Vec<Param<'k, 'v>>), +} + +impl<'k, 'v> Params<'k, 'v> { + pub(crate) fn new() -> Self { + Self { + kind: ParamsKind::Small([Param::EMPTY; 3], 0), + } + } + + /// Returns the number of parameters. + pub fn len(&self) -> usize { + match self.kind { + ParamsKind::Small(_, len) => len, + ParamsKind::Large(ref vec) => vec.len(), + } + } + + // Truncates the parameter list to the given length. + pub(crate) fn truncate(&mut self, n: usize) { + match &mut self.kind { + ParamsKind::Small(_, len) => *len = n, + ParamsKind::Large(vec) => vec.truncate(n), + } + } + + /// Returns the value of the first parameter registered under the given key. + pub fn get(&self, key: impl AsRef<str>) -> Option<&'v str> { + let key = key.as_ref().as_bytes(); + + match &self.kind { + ParamsKind::Small(arr, len) => arr + .iter() + .take(*len) + .find(|param| param.key == key) + .map(Param::value_str), + ParamsKind::Large(vec) => vec + .iter() + .find(|param| param.key == key) + .map(Param::value_str), + } + } + + /// Returns an iterator over the parameters in the list. + pub fn iter(&self) -> ParamsIter<'_, 'k, 'v> { + ParamsIter::new(self) + } + + /// Returns `true` if there are no parameters in the list. + pub fn is_empty(&self) -> bool { + match self.kind { + ParamsKind::Small(_, len) => len == 0, + ParamsKind::Large(ref vec) => vec.is_empty(), + } + } + + /// Inserts a key value parameter pair into the list. + pub(crate) fn push(&mut self, key: &'k [u8], value: &'v [u8]) { + #[cold] + fn drain_to_vec<T: Default>(len: usize, elem: T, arr: &mut [T; SMALL]) -> Vec<T> { + let mut vec = Vec::with_capacity(len + 1); + vec.extend(arr.iter_mut().map(mem::take)); + vec.push(elem); + vec + } + + let param = Param { key, value }; + match &mut self.kind { + ParamsKind::Small(arr, len) => { + if *len == SMALL { + self.kind = ParamsKind::Large(drain_to_vec(*len, param, arr)); + return; + } + + arr[*len] = param; + *len += 1; + } + ParamsKind::Large(vec) => vec.push(param), + } + } + + // Applies a transformation function to each key. + pub(crate) fn for_each_key_mut(&mut self, f: impl Fn((usize, &mut &'k [u8]))) { + match &mut self.kind { + ParamsKind::Small(arr, len) => arr + .iter_mut() + .take(*len) + .map(|param| &mut param.key) + .enumerate() + .for_each(f), + ParamsKind::Large(vec) => vec + .iter_mut() + .map(|param| &mut param.key) + .enumerate() + .for_each(f), + } + } +} + +impl fmt::Debug for Params<'_, '_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +/// An iterator over the keys and values of a route's [parameters](crate::Params). +pub struct ParamsIter<'ps, 'k, 'v> { + kind: ParamsIterKind<'ps, 'k, 'v>, +} + +impl<'ps, 'k, 'v> ParamsIter<'ps, 'k, 'v> { + fn new(params: &'ps Params<'k, 'v>) -> Self { + let kind = match ¶ms.kind { + ParamsKind::Small(arr, len) => ParamsIterKind::Small(arr.iter().take(*len)), + ParamsKind::Large(vec) => ParamsIterKind::Large(vec.iter()), + }; + Self { kind } + } +} + +enum ParamsIterKind<'ps, 'k, 'v> { + Small(iter::Take<slice::Iter<'ps, Param<'k, 'v>>>), + Large(slice::Iter<'ps, Param<'k, 'v>>), +} + +impl<'ps, 'k, 'v> Iterator for ParamsIter<'ps, 'k, 'v> { + type Item = (&'k str, &'v str); + + fn next(&mut self) -> Option<Self::Item> { + match self.kind { + ParamsIterKind::Small(ref mut iter) => { + iter.next().map(|p| (p.key_str(), p.value_str())) + } + ParamsIterKind::Large(ref mut iter) => { + iter.next().map(|p| (p.key_str(), p.value_str())) + } + } + } +} + +impl ExactSizeIterator for ParamsIter<'_, '_, '_> { + fn len(&self) -> usize { + match self.kind { + ParamsIterKind::Small(ref iter) => iter.len(), + ParamsIterKind::Large(ref iter) => iter.len(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn heap_alloc() { + let vec = vec![ + ("hello", "hello"), + ("world", "world"), + ("foo", "foo"), + ("bar", "bar"), + ("baz", "baz"), + ]; + + let mut params = Params::new(); + for (key, value) in vec.clone() { + params.push(key.as_bytes(), value.as_bytes()); + assert_eq!(params.get(key), Some(value)); + } + + match params.kind { + ParamsKind::Large(..) => {} + _ => panic!(), + } + + assert!(params.iter().eq(vec.clone())); + } + + #[test] + fn stack_alloc() { + let vec = vec![("hello", "hello"), ("world", "world"), ("baz", "baz")]; + + let mut params = Params::new(); + for (key, value) in vec.clone() { + params.push(key.as_bytes(), value.as_bytes()); + assert_eq!(params.get(key), Some(value)); + } + + match params.kind { + ParamsKind::Small(..) => {} + _ => panic!(), + } + + assert!(params.iter().eq(vec.clone())); + } + + #[test] + fn ignore_array_default() { + let params = Params::new(); + assert!(params.get("").is_none()); + } +} diff --git a/vendor/matchit/src/router.rs b/vendor/matchit/src/router.rs new file mode 100644 index 00000000..8256c4bc --- /dev/null +++ b/vendor/matchit/src/router.rs @@ -0,0 +1,147 @@ +use crate::tree::Node; +use crate::{InsertError, MatchError, Params}; + +/// A zero-copy URL router. +/// +/// See [the crate documentation](crate) for details. +#[derive(Clone, Debug)] +pub struct Router<T> { + root: Node<T>, +} + +impl<T> Default for Router<T> { + fn default() -> Self { + Self { + root: Node::default(), + } + } +} + +impl<T> Router<T> { + /// Construct a new router. + pub fn new() -> Self { + Self::default() + } + + /// Insert a route into the router. + /// + /// # Examples + /// + /// ```rust + /// # use matchit::Router; + /// # fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let mut router = Router::new(); + /// router.insert("/home", "Welcome!")?; + /// router.insert("/users/{id}", "A User")?; + /// # Ok(()) + /// # } + /// ``` + pub fn insert(&mut self, route: impl Into<String>, value: T) -> Result<(), InsertError> { + self.root.insert(route.into(), value) + } + + /// Tries to find a value in the router matching the given path. + /// + /// # Examples + /// + /// ```rust + /// # use matchit::Router; + /// # fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let mut router = Router::new(); + /// router.insert("/home", "Welcome!")?; + /// + /// let matched = router.at("/home").unwrap(); + /// assert_eq!(*matched.value, "Welcome!"); + /// # Ok(()) + /// # } + /// ``` + pub fn at<'path>(&self, path: &'path str) -> Result<Match<'_, 'path, &T>, MatchError> { + match self.root.at(path.as_bytes()) { + Ok((value, params)) => Ok(Match { + // Safety: We only expose `&mut T` through `&mut self` + value: unsafe { &*value.get() }, + params, + }), + Err(e) => Err(e), + } + } + + /// Tries to find a value in the router matching the given path, + /// returning a mutable reference. + /// + /// # Examples + /// + /// ```rust + /// # use matchit::Router; + /// # fn main() -> Result<(), Box<dyn std::error::Error>> { + /// let mut router = Router::new(); + /// router.insert("/", 1)?; + /// + /// *router.at_mut("/").unwrap().value += 1; + /// assert_eq!(*router.at("/").unwrap().value, 2); + /// # Ok(()) + /// # } + /// ``` + pub fn at_mut<'path>( + &mut self, + path: &'path str, + ) -> Result<Match<'_, 'path, &mut T>, MatchError> { + match self.root.at(path.as_bytes()) { + Ok((value, params)) => Ok(Match { + // Safety: We have `&mut self` + value: unsafe { &mut *value.get() }, + params, + }), + Err(e) => Err(e), + } + } + + /// Remove a given route from the router. + /// + /// Returns the value stored under the route if it was found. + /// If the route was not found or invalid, `None` is returned. + /// + /// # Examples + /// + /// ```rust + /// # use matchit::Router; + /// let mut router = Router::new(); + /// + /// router.insert("/home", "Welcome!"); + /// assert_eq!(router.remove("/home"), Some("Welcome!")); + /// assert_eq!(router.remove("/home"), None); + /// + /// router.insert("/home/{id}/", "Hello!"); + /// assert_eq!(router.remove("/home/{id}/"), Some("Hello!")); + /// assert_eq!(router.remove("/home/{id}/"), None); + /// + /// router.insert("/home/{id}/", "Hello!"); + /// // the route does not match + /// assert_eq!(router.remove("/home/{user}"), None); + /// assert_eq!(router.remove("/home/{id}/"), Some("Hello!")); + /// + /// router.insert("/home/{id}/", "Hello!"); + /// // invalid route + /// assert_eq!(router.remove("/home/{id"), None); + /// assert_eq!(router.remove("/home/{id}/"), Some("Hello!")); + /// ``` + pub fn remove(&mut self, path: impl Into<String>) -> Option<T> { + self.root.remove(path.into()) + } + + #[cfg(feature = "__test_helpers")] + pub fn check_priorities(&self) -> Result<u32, (u32, u32)> { + self.root.check_priorities() + } +} + +/// A successful match consisting of the registered value +/// and URL parameters, returned by [`Router::at`](Router::at). +#[derive(Debug)] +pub struct Match<'k, 'v, V> { + /// The value stored under the matched node. + pub value: V, + + /// The route parameters. See [parameters](crate#parameters) for more details. + pub params: Params<'k, 'v>, +} diff --git a/vendor/matchit/src/tree.rs b/vendor/matchit/src/tree.rs new file mode 100644 index 00000000..69a01495 --- /dev/null +++ b/vendor/matchit/src/tree.rs @@ -0,0 +1,878 @@ +use crate::escape::{UnescapedRef, UnescapedRoute}; +use crate::{InsertError, MatchError, Params}; + +use std::cell::UnsafeCell; +use std::cmp::min; +use std::ops::Range; +use std::{fmt, mem}; + +/// A radix tree used for URL path matching. +/// +/// See [the crate documentation](crate) for details. +pub struct Node<T> { + // This node's prefix. + pub(crate) prefix: UnescapedRoute, + // The priority of this node. + // + // Nodes with more children are higher priority and searched first. + pub(crate) priority: u32, + // Whether this node contains a wildcard child. + pub(crate) wild_child: bool, + // The first character of any static children, for fast linear search. + pub(crate) indices: Vec<u8>, + // The type of this node. + pub(crate) node_type: NodeType, + pub(crate) children: Vec<Self>, + // The value stored at this node. + // + // See `Node::at` for why an `UnsafeCell` is necessary. + value: Option<UnsafeCell<T>>, + // Parameter name remapping, stored at nodes that hold values. + pub(crate) remapping: ParamRemapping, +} + +/// The types of nodes a tree can hold. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)] +pub(crate) enum NodeType { + /// The root path. + Root, + /// A route parameter, e.g. `/{id}`. + Param, + /// A catch-all parameter, e.g. `/*file`. + CatchAll, + /// A static prefix, e.g. `/foo`. + Static, +} + +/// Safety: We expose `value` per Rust's usual borrowing rules, so we can just +/// delegate these traits. +unsafe impl<T: Send> Send for Node<T> {} +unsafe impl<T: Sync> Sync for Node<T> {} + +impl<T> Node<T> { + // Insert a route into the tree. + pub fn insert(&mut self, route: String, val: T) -> Result<(), InsertError> { + let route = UnescapedRoute::new(route.into_bytes()); + let (route, remapping) = normalize_params(route)?; + let mut remaining = route.as_ref(); + + self.priority += 1; + + // If the tree is empty, insert the root node. + if self.prefix.is_empty() && self.children.is_empty() { + let last = self.insert_route(remaining, val)?; + last.remapping = remapping; + self.node_type = NodeType::Root; + return Ok(()); + } + + let mut current = self; + 'walk: loop { + // Find the common prefix between the route and the current node. + let len = min(remaining.len(), current.prefix.len()); + let common_prefix = (0..len) + .find(|&i| { + remaining[i] != current.prefix[i] + // Make sure not confuse the start of a wildcard with an escaped `{`. + || remaining.is_escaped(i) != current.prefix.is_escaped(i) + }) + .unwrap_or(len); + + // If this node has a longer prefix than we need, we have to fork and extract the + // common prefix into a shared parent. + if current.prefix.len() > common_prefix { + // Move the non-matching suffix into a child node. + let suffix = current.prefix.as_ref().slice_off(common_prefix).to_owned(); + let child = Node { + prefix: suffix, + value: current.value.take(), + indices: current.indices.clone(), + wild_child: current.wild_child, + children: mem::take(&mut current.children), + remapping: mem::take(&mut current.remapping), + priority: current.priority - 1, + node_type: NodeType::Static, + }; + + // The current node now only holds the common prefix. + current.children = vec![child]; + current.indices = vec![current.prefix[common_prefix]]; + current.prefix = current + .prefix + .as_ref() + .slice_until(common_prefix) + .to_owned(); + current.wild_child = false; + continue; + } + + if remaining.len() == common_prefix { + // This node must not already contain a value. + if current.value.is_some() { + return Err(InsertError::conflict(&route, remaining, current)); + } + + // Insert the value. + current.value = Some(UnsafeCell::new(val)); + current.remapping = remapping; + return Ok(()); + } + + // Otherwise, the route has a remaining non-matching suffix. + // + // We have to search deeper. + remaining = remaining.slice_off(common_prefix); + let next = remaining[0]; + + // After matching against a wildcard the next character is always `/`. + // + // Continue searching in the child node if it already exists. + if current.node_type == NodeType::Param && current.children.len() == 1 { + debug_assert_eq!(next, b'/'); + current = &mut current.children[0]; + current.priority += 1; + continue 'walk; + } + + // Find a child node that matches the next character in the route. + for mut i in 0..current.indices.len() { + if next == current.indices[i] { + // Make sure not confuse the start of a wildcard with an escaped `{` or `}`. + if matches!(next, b'{' | b'}') && !remaining.is_escaped(0) { + continue; + } + + // Continue searching in the child. + i = current.update_child_priority(i); + current = &mut current.children[i]; + continue 'walk; + } + } + + // We couldn't find a matching child. + // + // If we're not inserting a wildcard we have to create a child. + if (!matches!(next, b'{') || remaining.is_escaped(0)) + && current.node_type != NodeType::CatchAll + { + current.indices.push(next); + let mut child = current.add_child(Node::default()); + child = current.update_child_priority(child); + + // Insert into the newly created node. + let last = current.children[child].insert_route(remaining, val)?; + last.remapping = remapping; + return Ok(()); + } + + // We're trying to insert a wildcard. + // + // If this node already has a wildcard child, we have to make sure it matches. + if current.wild_child { + // Wildcards are always the last child. + current = current.children.last_mut().unwrap(); + current.priority += 1; + + // Make sure the route parameter matches. + if let Some(wildcard) = remaining.get(..current.prefix.len()) { + if *wildcard != *current.prefix { + return Err(InsertError::conflict(&route, remaining, current)); + } + } + + // Catch-all routes cannot have children. + if current.node_type == NodeType::CatchAll { + return Err(InsertError::conflict(&route, remaining, current)); + } + + // Continue with the wildcard node. + continue 'walk; + } + + // Otherwise, create a new node for the wildcard and insert the route. + let last = current.insert_route(remaining, val)?; + last.remapping = remapping; + return Ok(()); + } + } + + /// Removes a route from the tree, returning the value if the route already existed. + /// + /// The provided path should be the same as the one used to insert the route, including + /// wildcards. + pub fn remove(&mut self, route: String) -> Option<T> { + let route = UnescapedRoute::new(route.into_bytes()); + let (route, remapping) = normalize_params(route).ok()?; + let mut remaining = route.unescaped(); + + // Check if we are removing the root node. + if remaining == self.prefix.unescaped() { + let value = self.value.take().map(UnsafeCell::into_inner); + + // If the root node has no children, we can reset it. + if self.children.is_empty() { + *self = Node::default(); + } + + return value; + } + + let mut current = self; + 'walk: loop { + // The path is longer than this node's prefix, search deeper. + if remaining.len() > current.prefix.len() { + let (prefix, rest) = remaining.split_at(current.prefix.len()); + + // The prefix matches. + if prefix == current.prefix.unescaped() { + let first = rest[0]; + remaining = rest; + + // If there is a single child node, we can continue searching in the child. + if current.children.len() == 1 { + // The route matches, remove the node. + if current.children[0].prefix.unescaped() == remaining { + return current.remove_child(0, &remapping); + } + + // Otherwise, continue searching. + current = &mut current.children[0]; + continue 'walk; + } + + // Find a child node that matches the next character in the route. + if let Some(i) = current.indices.iter().position(|&c| c == first) { + // The route matches, remove the node. + if current.children[i].prefix.unescaped() == remaining { + return current.remove_child(i, &remapping); + } + + // Otherwise, continue searching. + current = &mut current.children[i]; + continue 'walk; + } + + // If the node has a matching wildcard child, continue searching in the child. + if current.wild_child + && remaining.first().zip(remaining.get(2)) == Some((&b'{', &b'}')) + { + // The route matches, remove the node. + if current.children.last_mut().unwrap().prefix.unescaped() == remaining { + return current.remove_child(current.children.len() - 1, &remapping); + } + + current = current.children.last_mut().unwrap(); + continue 'walk; + } + } + } + + // Could not find a match. + return None; + } + } + + /// Remove the child node at the given index, if the route parameters match. + fn remove_child(&mut self, i: usize, remapping: &ParamRemapping) -> Option<T> { + // Require an exact match to remove a route. + // + // For example, `/{a}` cannot be used to remove `/{b}`. + if self.children[i].remapping != *remapping { + return None; + } + + // If the node does not have any children, we can remove it completely. + let value = if self.children[i].children.is_empty() { + // Removing a single child with no indices. + if self.children.len() == 1 && self.indices.is_empty() { + self.wild_child = false; + self.children.remove(0).value.take() + } else { + // Remove the child node. + let child = self.children.remove(i); + + match child.node_type { + // Remove the index if we removed a static prefix. + NodeType::Static => { + self.indices.remove(i); + } + // Otherwise, we removed a wildcard. + _ => self.wild_child = false, + } + + child.value + } + } + // Otherwise, remove the value but preserve the node. + else { + self.children[i].value.take() + }; + + value.map(UnsafeCell::into_inner) + } + + // Adds a child to this node, keeping wildcards at the end. + fn add_child(&mut self, child: Node<T>) -> usize { + let len = self.children.len(); + + if self.wild_child && len > 0 { + self.children.insert(len - 1, child); + len - 1 + } else { + self.children.push(child); + len + } + } + + // Increments priority of the given child node, reordering the children if necessary. + // + // Returns the new index of the node. + fn update_child_priority(&mut self, i: usize) -> usize { + self.children[i].priority += 1; + let priority = self.children[i].priority; + + // Move the node to the front as necessary. + let mut updated = i; + while updated > 0 && self.children[updated - 1].priority < priority { + self.children.swap(updated - 1, updated); + updated -= 1; + } + + // Update the position of the indices to match. + if updated != i { + self.indices[updated..=i].rotate_right(1); + } + + updated + } + + // Insert a route at this node. + fn insert_route( + &mut self, + mut prefix: UnescapedRef<'_>, + val: T, + ) -> Result<&mut Node<T>, InsertError> { + let mut current = self; + + loop { + // Search for a wildcard segment. + let wildcard = match find_wildcard(prefix)? { + Some(wildcard) => wildcard, + // There is no wildcard, simply insert into the current node. + None => { + current.value = Some(UnsafeCell::new(val)); + current.prefix = prefix.to_owned(); + return Ok(current); + } + }; + + // Insering a catch-all route. + if prefix[wildcard.clone()][1] == b'*' { + // Ensure there is no suffix after the parameter, e.g. `/foo/{*x}/bar`. + if wildcard.end != prefix.len() { + return Err(InsertError::InvalidCatchAll); + } + + // Add the prefix before the wildcard into the current node. + if wildcard.start > 0 { + current.prefix = prefix.slice_until(wildcard.start).to_owned(); + prefix = prefix.slice_off(wildcard.start); + } + + // Add the catch-all as a child node. + let child = Self { + prefix: prefix.to_owned(), + node_type: NodeType::CatchAll, + value: Some(UnsafeCell::new(val)), + priority: 1, + ..Self::default() + }; + + let i = current.add_child(child); + current.wild_child = true; + return Ok(&mut current.children[i]); + } + + // Otherwise, we're inserting a regular route parameter. + assert_eq!(prefix[wildcard.clone()][0], b'{'); + + // Add the prefix before the wildcard into the current node. + if wildcard.start > 0 { + current.prefix = prefix.slice_until(wildcard.start).to_owned(); + prefix = prefix.slice_off(wildcard.start); + } + + // Add the parameter as a child node. + let child = Self { + node_type: NodeType::Param, + prefix: prefix.slice_until(wildcard.len()).to_owned(), + ..Self::default() + }; + + let child = current.add_child(child); + current.wild_child = true; + current = &mut current.children[child]; + current.priority += 1; + + // If the route doesn't end in the wildcard, we have to insert the suffix as a child. + if wildcard.len() < prefix.len() { + prefix = prefix.slice_off(wildcard.len()); + let child = Self { + priority: 1, + ..Self::default() + }; + + let child = current.add_child(child); + current = &mut current.children[child]; + continue; + } + + // Finally, insert the value. + current.value = Some(UnsafeCell::new(val)); + return Ok(current); + } + } +} + +/// A wildcard node that was skipped during a tree search. +/// +/// Contains the state necessary to backtrack to the given node. +struct Skipped<'n, 'p, T> { + // The node that was skipped. + node: &'n Node<T>, + /// The path at the time we skipped this node. + path: &'p [u8], + // The number of parameters that were present. + params: usize, +} + +#[rustfmt::skip] +macro_rules! backtracker { + ($skipped_nodes:ident, $path:ident, $current:ident, $params:ident, $backtracking:ident, $walk:lifetime) => { + macro_rules! try_backtrack { + () => { + // Try backtracking to any matching wildcard nodes that we skipped while + // traversing the tree. + while let Some(skipped) = $skipped_nodes.pop() { + if skipped.path.ends_with($path) { + // Restore the search state. + $path = skipped.path; + $current = &skipped.node; + $params.truncate(skipped.params); + $backtracking = true; + continue $walk; + } + } + }; + } + }; +} + +impl<T> Node<T> { + // Returns the node matching the given path. + // + // Returning an `UnsafeCell` allows us to avoid duplicating the logic between `Node::at` and + // `Node::at_mut`, as Rust doesn't have a great way of abstracting over mutability. + pub fn at<'node, 'path>( + &'node self, + full_path: &'path [u8], + ) -> Result<(&'node UnsafeCell<T>, Params<'node, 'path>), MatchError> { + let mut current = self; + let mut path = full_path; + let mut backtracking = false; + let mut params = Params::new(); + let mut skipped_nodes: Vec<Skipped<'_, '_, T>> = Vec::new(); + + 'walk: loop { + // Initialize the backtracker. + backtracker!(skipped_nodes, path, current, params, backtracking, 'walk); + + // Reached the end of the search. + if path.len() <= current.prefix.len() { + // Check for an exact match. + if *path == *current.prefix { + // Found the matching value. + if let Some(ref value) = current.value { + // Remap the keys of any route parameters we accumulated during the search. + params.for_each_key_mut(|(i, key)| *key = ¤t.remapping[i]); + return Ok((value, params)); + } + } + + // Try backtracking in case we skipped a wildcard that may match. + try_backtrack!(); + + // Otherwise, there are no matching routes in the tree. + return Err(MatchError::NotFound); + } + + // Otherwise, the path is longer than this node's prefix, search deeper. + let (prefix, rest) = path.split_at(current.prefix.len()); + + // The prefix does not match. + if *prefix != *current.prefix { + // Try backtracking in case we skipped a wildcard that may match. + try_backtrack!(); + + // Otherwise, there are no matching routes in the tree. + return Err(MatchError::NotFound); + } + + let previous = path; + path = rest; + + // If we are currently backtracking, avoid searching static children + // that we already searched. + if !backtracking { + let next = path[0]; + + // Find a child node that matches the next character in the path. + if let Some(i) = current.indices.iter().position(|&c| c == next) { + // Keep track of wildcard routes that we skip. + // + // We may end up needing to backtrack later in case we do not find a + // match. + if current.wild_child { + skipped_nodes.push(Skipped { + path: previous, + node: current, + params: params.len(), + }); + } + + // Continue searching. + current = ¤t.children[i]; + continue 'walk; + } + } + + // We didn't find a matching static child. + // + // If there are no wildcards, then there are no matching routes in the tree. + if !current.wild_child { + // Try backtracking in case we skipped a wildcard that may match. + try_backtrack!(); + return Err(MatchError::NotFound); + } + + // Continue searching in the wildcard child, which is kept at the end of the list. + current = current.children.last().unwrap(); + match current.node_type { + // Match against a route parameter. + NodeType::Param => { + // Check for more path segments. + let i = match path.iter().position(|&c| c == b'/') { + // Found another segment. + Some(i) => i, + // This is the last path segment. + None => { + let value = match current.value { + // Found the matching value. + Some(ref value) => value, + None => { + // Try backtracking in case we skipped a wildcard that may match. + try_backtrack!(); + + // Otherwise, there are no matching routes in the tree. + return Err(MatchError::NotFound); + } + }; + + // Store the parameter value. + // Parameters are normalized so the key is irrelevant for now. + params.push(b"", path); + + // Remap the keys of any route parameters we accumulated during the search. + params.for_each_key_mut(|(i, key)| *key = ¤t.remapping[i]); + + return Ok((value, params)); + } + }; + + // Found another path segment. + let (param, rest) = path.split_at(i); + + // If there is a static child, continue the search. + if let [child] = current.children.as_slice() { + // Store the parameter value. + // Parameters are normalized so the key is irrelevant for now. + params.push(b"", param); + + // Continue searching. + path = rest; + current = child; + backtracking = false; + continue 'walk; + } + + // Try backtracking in case we skipped a wildcard that may match. + try_backtrack!(); + + // Otherwise, there are no matching routes in the tree. + return Err(MatchError::NotFound); + } + NodeType::CatchAll => { + // Catch-all segments are only allowed at the end of the route, meaning + // this node must contain the value. + let value = match current.value { + // Found the matching value. + Some(ref value) => value, + // Otherwise, there are no matching routes in the tree. + None => return Err(MatchError::NotFound), + }; + + // Remap the keys of any route parameters we accumulated during the search. + params.for_each_key_mut(|(i, key)| *key = ¤t.remapping[i]); + + // Store the final catch-all parameter (`{*...}`). + let key = ¤t.prefix[2..current.prefix.len() - 1]; + params.push(key, path); + + return Ok((value, params)); + } + _ => unreachable!(), + } + } + } + + /// Test helper that ensures route priorities are consistent. + #[cfg(feature = "__test_helpers")] + pub fn check_priorities(&self) -> Result<u32, (u32, u32)> { + let mut priority: u32 = 0; + for child in &self.children { + priority += child.check_priorities()?; + } + + if self.value.is_some() { + priority += 1; + } + + if self.priority != priority { + return Err((self.priority, priority)); + } + + Ok(priority) + } +} + +/// An ordered list of route parameters keys for a specific route. +/// +/// To support conflicting routes like `/{a}/foo` and `/{b}/bar`, route parameters +/// are normalized before being inserted into the tree. Parameter remapping are +/// stored at nodes containing values, containing the "true" names of all route parameters +/// for the given route. +type ParamRemapping = Vec<Vec<u8>>; + +/// Returns `path` with normalized route parameters, and a parameter remapping +/// to store at the node for this route. +/// +/// Note that the parameter remapping may contain unescaped characters. +fn normalize_params( + mut path: UnescapedRoute, +) -> Result<(UnescapedRoute, ParamRemapping), InsertError> { + let mut start = 0; + let mut original = ParamRemapping::new(); + + // Parameter names are normalized alphabetically. + let mut next = b'a'; + + loop { + // Find a wildcard to normalize. + let mut wildcard = match find_wildcard(path.as_ref().slice_off(start))? { + Some(wildcard) => wildcard, + // No wildcard, we are done. + None => return Ok((path, original)), + }; + + wildcard.start += start; + wildcard.end += start; + + // Ensure the parameter has a valid name. + if wildcard.len() < 2 { + return Err(InsertError::InvalidParam); + } + + // We don't need to normalize catch-all parameters, as they are always + // at the end of a route. + if path[wildcard.clone()][1] == b'*' { + start = wildcard.end; + continue; + } + + // Normalize the parameter. + let removed = path.splice(wildcard.clone(), vec![b'{', next, b'}']); + + // Preserve the original name for remapping. + let mut removed = removed.skip(1).collect::<Vec<_>>(); + removed.pop(); + original.push(removed); + + next += 1; + if next > b'z' { + panic!("Too many route parameters."); + } + + // Continue the search after the parameter we just normalized. + start = wildcard.start + 3; + } +} + +/// Restores `route` to it's original, denormalized form. +pub(crate) fn denormalize_params(route: &mut UnescapedRoute, params: &ParamRemapping) { + let mut start = 0; + let mut i = 0; + + loop { + // Find a wildcard to denormalize. + let mut wildcard = match find_wildcard(route.as_ref().slice_off(start)).unwrap() { + Some(w) => w, + None => return, + }; + + wildcard.start += start; + wildcard.end += start; + + // Get the corresponding parameter remapping. + let mut next = match params.get(i) { + Some(param) => param.clone(), + None => return, + }; + + // Denormalize this parameter. + next.insert(0, b'{'); + next.push(b'}'); + let _ = route.splice(wildcard.clone(), next.clone()); + + i += 1; + start = wildcard.start + next.len(); + } +} + +// Searches for a wildcard segment and checks the path for invalid characters. +fn find_wildcard(path: UnescapedRef<'_>) -> Result<Option<Range<usize>>, InsertError> { + for (start, &c) in path.iter().enumerate() { + // Found an unescaped closing brace without a corresponding opening brace. + if c == b'}' && !path.is_escaped(start) { + return Err(InsertError::InvalidParam); + } + + // Keep going until we find an unescaped opening brace. + if c != b'{' || path.is_escaped(start) { + continue; + } + + // Ensure there is a non-empty parameter name. + if path.get(start + 1) == Some(&b'}') { + return Err(InsertError::InvalidParam); + } + + // Find the corresponding closing brace. + for (i, &c) in path.iter().enumerate().skip(start + 2) { + match c { + b'}' => { + // This closing brace was escaped, keep searching. + if path.is_escaped(i) { + continue; + } + + // Ensure catch-all parameters have a non-empty name. + if path.get(i - 1) == Some(&b'*') { + return Err(InsertError::InvalidParam); + } + + if let Some(&c) = path.get(i + 1) { + // Prefixes after route parameters are not supported. + if c != b'/' { + return Err(InsertError::InvalidParamSegment); + } + } + + return Ok(Some(start..i + 1)); + } + // `*` and `/` are invalid in parameter names. + b'*' | b'/' => return Err(InsertError::InvalidParam), + _ => {} + } + } + + // Missing closing brace. + return Err(InsertError::InvalidParam); + } + + Ok(None) +} + +impl<T> Clone for Node<T> +where + T: Clone, +{ + fn clone(&self) -> Self { + let value = self.value.as_ref().map(|value| { + // Safety: We only expose `&mut T` through `&mut self`. + let value = unsafe { &*value.get() }; + UnsafeCell::new(value.clone()) + }); + + Self { + value, + prefix: self.prefix.clone(), + wild_child: self.wild_child, + node_type: self.node_type.clone(), + indices: self.indices.clone(), + children: self.children.clone(), + remapping: self.remapping.clone(), + priority: self.priority, + } + } +} + +impl<T> Default for Node<T> { + fn default() -> Self { + Self { + remapping: ParamRemapping::new(), + prefix: UnescapedRoute::default(), + wild_child: false, + node_type: NodeType::Static, + indices: Vec::new(), + children: Vec::new(), + value: None, + priority: 0, + } + } +} + +impl<T> fmt::Debug for Node<T> +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Safety: We only expose `&mut T` through `&mut self`. + let value = unsafe { self.value.as_ref().map(|x| &*x.get()) }; + + let mut f = f.debug_struct("Node"); + f.field("value", &value) + .field("prefix", &self.prefix) + .field("node_type", &self.node_type) + .field("children", &self.children); + + // Extra information for debugging purposes. + #[cfg(test)] + { + let indices = self + .indices + .iter() + .map(|&x| char::from_u32(x as _)) + .collect::<Vec<_>>(); + + let params = self + .remapping + .iter() + .map(|x| std::str::from_utf8(x).unwrap()) + .collect::<Vec<_>>(); + + f.field("indices", &indices).field("params", ¶ms); + } + + f.finish() + } +} |
