From 45df4d0d9b577fecee798d672695fe24ff57fb1b Mon Sep 17 00:00:00 2001 From: mo khan Date: Tue, 15 Jul 2025 16:37:08 -0600 Subject: feat: migrate from Cedar to SpiceDB authorization system This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema. --- vendor/logos-codegen/src/graph/fork.rs | 267 --------------- vendor/logos-codegen/src/graph/impls.rs | 220 ------------- vendor/logos-codegen/src/graph/meta.rs | 174 ---------- vendor/logos-codegen/src/graph/mod.rs | 566 -------------------------------- vendor/logos-codegen/src/graph/range.rs | 144 -------- vendor/logos-codegen/src/graph/regex.rs | 295 ----------------- vendor/logos-codegen/src/graph/rope.rs | 330 ------------------- 7 files changed, 1996 deletions(-) delete mode 100644 vendor/logos-codegen/src/graph/fork.rs delete mode 100644 vendor/logos-codegen/src/graph/impls.rs delete mode 100644 vendor/logos-codegen/src/graph/meta.rs delete mode 100644 vendor/logos-codegen/src/graph/mod.rs delete mode 100644 vendor/logos-codegen/src/graph/range.rs delete mode 100644 vendor/logos-codegen/src/graph/regex.rs delete mode 100644 vendor/logos-codegen/src/graph/rope.rs (limited to 'vendor/logos-codegen/src/graph') diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs deleted file mode 100644 index 6b59836b..00000000 --- a/vendor/logos-codegen/src/graph/fork.rs +++ /dev/null @@ -1,267 +0,0 @@ -use crate::graph::{Disambiguate, Graph, NodeId, Range}; - -#[derive(Clone)] -pub struct Fork { - /// LUT matching byte -> node id - lut: Box<[Option; 256]>, - /// State to go to if no arms are matching - pub miss: Option, -} - -impl Fork { - pub fn new() -> Self { - Fork { - lut: Box::new([None; 256]), - miss: None, - } - } - - pub fn miss(mut self, miss: M) -> Self - where - M: Into>, - { - self.miss = miss.into(); - self - } - - pub fn add_branch(&mut self, range: R, then: NodeId, graph: &mut Graph) - where - R: Into, - T: Disambiguate, - { - for byte in range.into() { - match &mut self.lut[byte as usize] { - Some(other) if *other != then => { - *other = graph.merge(*other, then); - } - opt => *opt = Some(then), - } - } - } - - // TODO: Add result with a printable error - pub fn merge(&mut self, other: Fork, graph: &mut Graph) - where - T: Disambiguate, - { - self.miss = match (self.miss, other.miss) { - (None, None) => None, - (Some(id), None) | (None, Some(id)) => Some(id), - (Some(a), Some(b)) => Some(graph.merge(a, b)), - }; - - for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) { - *left = match (*left, *right) { - (None, None) => continue, - (Some(id), None) | (None, Some(id)) => Some(id), - (Some(a), Some(b)) => Some(graph.merge(a, b)), - } - } - } - - pub fn branches(&self) -> ForkIter<'_> { - ForkIter { - offset: 0, - lut: &self.lut, - } - } - - /// Checks if all bytes in the `range` have a branch on this - /// fork, and those branches are resolve to the same `NodeId`. - pub fn contains(&self, range: R) -> Option - where - R: Into, - { - let mut range = range.into(); - let byte = range.next()?; - let first = self.lut[byte as usize]?; - - for byte in range { - if first != self.lut[byte as usize]? { - return None; - } - } - - Some(first) - } - - pub fn branch(mut self, range: R, then: NodeId) -> Self - where - R: Into, - { - for byte in range.into() { - match &mut self.lut[byte as usize] { - Some(other) if *other != then => { - panic!("Overlapping branches"); - } - opt => *opt = Some(then), - } - } - self - } - - pub fn shake(&self, graph: &Graph, filter: &mut [bool]) { - if let Some(id) = self.miss { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - - for (_, id) in self.branches() { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - } -} - -pub struct ForkIter<'a> { - offset: usize, - lut: &'a [Option; 256], -} - -impl Iterator for ForkIter<'_> { - type Item = (Range, NodeId); - - fn next(&mut self) -> Option { - // Consume empty slots - self.offset += self.lut[self.offset..] - .iter() - .take_while(|next| next.is_none()) - .count(); - - let then = self.lut.get(self.offset).copied().flatten()?; - let start = self.offset; - - // Consume all slots with same NodeId target - self.offset += self.lut[self.offset..] - .iter() - .take_while(|next| **next == Some(then)) - .count(); - - Some(( - Range { - start: start as u8, - end: (self.offset - 1) as u8, - }, - then, - )) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn fork_iter() { - let mut buf = [None; 256]; - - for byte in b'4'..=b'7' { - buf[byte as usize] = Some(NodeId::new(1)); - } - for byte in b'a'..=b'd' { - buf[byte as usize] = Some(NodeId::new(2)); - } - - let iter = ForkIter { - offset: 0, - lut: &buf, - }; - - assert_eq!( - &[ - ( - Range { - start: b'4', - end: b'7' - }, - NodeId::new(1) - ), - ( - Range { - start: b'a', - end: b'd' - }, - NodeId::new(2) - ), - ], - &*iter.collect::>(), - ); - } - - #[test] - fn merge_no_conflict() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().branch(b'1', leaf1); - - fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2)); - } - - #[test] - fn merge_miss_right() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().branch(b'1', leaf1); - - fork.merge(Fork::new().miss(leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2)); - } - - #[test] - fn merge_miss_left() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().miss(leaf1); - - fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1)); - } - - #[test] - fn contains_byte() { - let fork = Fork::new().branch('a'..='z', NodeId::new(42)); - - assert_eq!(fork.contains(b't'), Some(NodeId::new(42))); - } - - #[test] - fn contains_range() { - let fork = Fork::new() - .branch('a'..='m', NodeId::new(42)) - .branch('n'..='z', NodeId::new(42)); - - assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42))); - assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42))); - } - - #[test] - fn contains_different_ranges() { - let fork = Fork::new() - .branch('a'..='m', NodeId::new(42)) - .branch('n'..='z', NodeId::new(47)); - - assert_eq!(fork.contains('i'..='r'), None); - assert_eq!(fork.contains('a'..='z'), None); - assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42))); - assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47))); - } -} diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs deleted file mode 100644 index dc97bdf1..00000000 --- a/vendor/logos-codegen/src/graph/impls.rs +++ /dev/null @@ -1,220 +0,0 @@ -use std::fmt::{self, Debug, Display}; -use std::hash::{Hash, Hasher}; - -use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope}; - -impl From for Node { - fn from(fork: Fork) -> Self { - Node::Fork(fork) - } -} -impl From for Node { - fn from(rope: Rope) -> Self { - Node::Rope(rope) - } -} - -fn is_ascii(byte: u8) -> bool { - (0x20..0x7F).contains(&byte) -} - -impl Hash for Fork { - fn hash(&self, state: &mut H) { - for branch in self.branches() { - branch.hash(state); - } - self.miss.hash(state); - } -} - -impl Hash for Node { - fn hash(&self, state: &mut H) { - match self { - Node::Rope(rope) => { - b"ROPE".hash(state); - rope.hash(state); - } - Node::Fork(fork) => { - b"FORK".hash(state); - fork.hash(state); - } - Node::Leaf(_) => b"LEAF".hash(state), - } - } -} - -impl Debug for NodeId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Debug::fmt(&self.0, f) - } -} - -/// We don't need debug impls in release builds -// #[cfg(test)] -mod debug { - use super::*; - use crate::graph::rope::Miss; - use crate::graph::Disambiguate; - use std::cmp::{Ord, Ordering}; - - impl Disambiguate for &str { - fn cmp(left: &&str, right: &&str) -> Ordering { - Ord::cmp(left, right) - } - } - - impl Debug for Range { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let Range { start, end } = *self; - - if start != end || !is_ascii(start) { - f.write_str("[")?; - } - match is_ascii(start) { - true => write!(f, "{}", start as char), - false => write!(f, "{:02X}", start), - }?; - if start != end { - match is_ascii(end) { - true => write!(f, "-{}]", end as char), - false => write!(f, "-{:02X}]", end), - }?; - } else if !is_ascii(start) { - f.write_str("]")?; - } - Ok(()) - } - } - - impl Display for Range { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - ::fmt(self, f) - } - } - - impl Debug for Graph { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let entries = self - .nodes() - .iter() - .enumerate() - .filter_map(|(i, n)| n.as_ref().map(|n| (i, n))); - - f.debug_map().entries(entries).finish() - } - } - - struct Arm(T, U); - - impl Debug for Arm - where - T: Display, - U: Display, - { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} ⇒ {}", self.0, self.1) - } - } - - impl Debug for Fork { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut list = f.debug_set(); - - for (range, then) in self.branches() { - list.entry(&Arm(range, then)); - } - if let Some(id) = self.miss { - list.entry(&Arm('_', id)); - } - - list.finish() - } - } - - impl Display for Miss { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Miss::First(id) => Display::fmt(id, f), - Miss::Any(id) => write!(f, "{}*", id), - Miss::None => f.write_str("n/a"), - } - } - } - - impl Debug for Rope { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use std::fmt::Write; - - let mut rope = String::with_capacity(self.pattern.len()); - for range in self.pattern.iter() { - write!(rope, "{}", range)?; - } - - match self.miss.is_none() { - false => { - let mut list = f.debug_list(); - - list.entry(&Arm(rope, self.then)); - list.entry(&Arm('_', self.miss)); - - list.finish() - } - true => Arm(rope, self.then).fmt(f), - } - } - } - - impl PartialEq for Fork { - fn eq(&self, other: &Self) -> bool { - self.miss == other.miss && self.branches().eq(other.branches()) - } - } - - impl Debug for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Node::Fork(fork) => fork.fmt(f), - Node::Rope(rope) => rope.fmt(f), - Node::Leaf(leaf) => leaf.fmt(f), - } - } - } - - use std::ops::RangeInclusive; - - impl From> for Range { - fn from(range: RangeInclusive) -> Range { - Range { - start: *range.start(), - end: *range.end(), - } - } - } - - impl From> for Range { - fn from(range: RangeInclusive) -> Range { - Range { - start: *range.start() as u8, - end: *range.end() as u8, - } - } - } - - impl PartialEq for Node { - fn eq(&self, other: &Rope) -> bool { - match self { - Node::Rope(rope) => rope == other, - _ => false, - } - } - } - - impl PartialEq for Node { - fn eq(&self, other: &Fork) -> bool { - match self { - Node::Fork(fork) => fork == other, - _ => false, - } - } - } -} diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs deleted file mode 100644 index 757ced09..00000000 --- a/vendor/logos-codegen/src/graph/meta.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::cmp::min; -use std::collections::BTreeMap; -use std::ops::{Index, IndexMut}; - -use crate::graph::{Graph, Node, NodeId}; - -#[derive(Debug)] -pub struct Meta { - map: BTreeMap, -} - -#[derive(Debug, Default)] -pub struct MetaItem { - /// Number of references to this node - pub refcount: usize, - /// Minimum number of bytes that ought to be read for this - /// node to find a match - pub min_read: usize, - /// Marks whether or not this node leads to a loop entry node. - pub is_loop_init: bool, - /// Ids of other nodes that point to this node while this - /// node is on a stack (creating a loop) - pub loop_entry_from: Vec, -} - -impl Index for Meta { - type Output = MetaItem; - - fn index(&self, id: NodeId) -> &MetaItem { - &self.map[&id] - } -} - -impl IndexMut for Meta { - fn index_mut(&mut self, id: NodeId) -> &mut MetaItem { - self.map.entry(id).or_default() - } -} - -impl MetaItem { - fn loop_entry(&mut self, id: NodeId) { - if let Err(idx) = self.loop_entry_from.binary_search(&id) { - self.loop_entry_from.insert(idx, id); - } - } -} - -impl Meta { - pub fn analyze(root: NodeId, graph: &Graph) -> Self { - let mut meta = Meta { - map: Default::default(), - }; - - meta.first_pass(root, root, graph, &mut Vec::new()); - - meta - } - - pub fn first_pass( - &mut self, - this: NodeId, - parent: NodeId, - graph: &Graph, - stack: &mut Vec, - ) -> &MetaItem { - let meta = &mut self[this]; - let is_done = meta.refcount > 0; - - meta.refcount += 1; - - if stack.contains(&this) { - meta.loop_entry(parent); - self[parent].is_loop_init = true; - } - if is_done { - return &self[this]; - } - - stack.push(this); - - let mut min_read; - - match &graph[this] { - Node::Fork(fork) => { - min_read = usize::MAX; - for (_, id) in fork.branches() { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 1; - } else { - min_read = min(min_read, meta.min_read + 1); - } - } - if let Some(id) = fork.miss { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 0; - } else { - min_read = min(min_read, meta.min_read); - } - } - if min_read == usize::MAX { - min_read = 0; - } - } - Node::Rope(rope) => { - min_read = rope.pattern.len(); - let meta = self.first_pass(rope.then, this, graph, stack); - - if !meta.is_loop_init { - min_read += meta.min_read; - } - - if let Some(id) = rope.miss.first() { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 0; - } else { - min_read = min(min_read, meta.min_read); - } - } - } - Node::Leaf(_) => min_read = 0, - } - - stack.pop(); - - let meta = &mut self[this]; - meta.min_read = min_read; - let second_pass = meta.loop_entry_from.clone(); - - for id in second_pass { - self.meta_second_pass(id, graph); - } - - &self[this] - } - - fn meta_second_pass(&mut self, id: NodeId, graph: &Graph) { - let mut min_read; - - match &graph[id] { - Node::Fork(fork) => { - min_read = usize::MAX; - for (_, id) in fork.branches() { - let meta = &self[id]; - - if meta.is_loop_init { - min_read = 1; - } else { - min_read = min(min_read, meta.min_read + 1); - } - } - if min_read == usize::MAX { - min_read = 0; - } - } - Node::Rope(rope) => { - min_read = rope.pattern.len(); - let meta = &self[rope.then]; - - if !meta.is_loop_init { - min_read += meta.min_read; - } - } - Node::Leaf(_) => unreachable!(), - } - - self[id].min_read = min_read; - } -} diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs deleted file mode 100644 index 6d218e8c..00000000 --- a/vendor/logos-codegen/src/graph/mod.rs +++ /dev/null @@ -1,566 +0,0 @@ -use std::cmp::Ordering; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap as Map; -use std::hash::{Hash, Hasher}; -use std::num::NonZeroU32; -use std::ops::Index; - -use fnv::FnvHasher; - -mod fork; -mod impls; -mod meta; -mod range; -mod regex; -mod rope; - -pub use self::fork::Fork; -pub use self::meta::Meta; -pub use self::range::Range; -pub use self::rope::Rope; - -/// Disambiguation error during the attempt to merge two leaf -/// nodes with the same priority -#[derive(Debug)] -pub struct DisambiguationError(pub NodeId, pub NodeId); - -pub struct Graph { - /// Internal storage of all allocated nodes. Once a node is - /// put here, it should never be mutated. - nodes: Vec>>, - /// When merging two nodes into a new node, we store the two - /// entry keys and the result, so that we don't merge the same - /// two nodes multiple times. - /// - /// Most of the time the entry we want to find will be the last - /// one that has been inserted, so we can use a vec with reverse - /// order search to get O(1) searches much faster than any *Map. - merges: Map, - /// Another map used for accounting. Before `.push`ing a new node - /// onto the graph (inserts are exempt), we hash it and find if - /// an identical(!) node has been created before. - hashes: Map, - /// Instead of handling errors over return types, opt to collect - /// them internally. - errors: Vec, - /// Deferred merges. When when attempting to merge a node with an - /// empty reserved slot, the merging operation is deferred until - /// the reserved slot is populated. This is a stack that keeps track - /// of all such deferred merges - deferred: Vec, -} - -/// Trait to be implemented on `Leaf` nodes in order to disambiguate -/// between them. -pub trait Disambiguate { - fn cmp(left: &Self, right: &Self) -> Ordering; -} - -/// Id of a Node in the graph. `NodeId` can be referencing an empty -/// slot that is going to be populated later in time. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct NodeId(NonZeroU32); - -impl Hash for NodeId { - fn hash(&self, state: &mut H) { - // Always use little-endian byte order for hashing to avoid - // different code generation on big-endian platforms due to - // iteration over a HashMap, - // see https://github.com/maciejhirsz/logos/issues/427. - state.write(&self.0.get().to_le_bytes()) - } -} - -impl NodeId { - fn get(self) -> usize { - self.0.get() as usize - } - - fn new(n: usize) -> NodeId { - NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId")) - } -} - -/// Unique reserved `NodeId` that is guaranteed to point to an -/// empty allocated slot in the graph. It's safe to create multiple -/// `NodeId` copies of `ReservedId`, however API users should never -/// be able to clone a `ReservedId`, or create a new one from `NodeId`. -/// -/// `ReservedId` is consumed once passed into `Graph::insert`. -#[derive(Debug)] -pub struct ReservedId(NodeId); - -impl ReservedId { - pub fn get(&self) -> NodeId { - self.0 - } -} - -/// Merge key used to lookup whether two nodes have been previously -/// mered, so we can avoid duplicating merges, potentially running into -/// loops that blow out the stack. -/// -/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure -/// that node merges are symmetric. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] -struct Merge(NodeId, NodeId); - -impl Merge { - fn new(a: NodeId, b: NodeId) -> Self { - if a < b { - Merge(a, b) - } else { - Merge(b, a) - } - } -} - -/// When attempting to merge two nodes, one of which was not yet created, -/// we can record such attempt, and execute the merge later on when the -/// `awaiting` has been `insert`ed into the graph. -#[derive(Debug)] -pub struct DeferredMerge { - awaiting: NodeId, - with: NodeId, - into: ReservedId, -} - -impl Graph { - pub fn new() -> Self { - Graph { - // Start with an empty slot so we can start - // counting NodeIds from 1 and use NonZero - // optimizations - nodes: vec![None], - merges: Map::new(), - hashes: Map::new(), - errors: Vec::new(), - deferred: Vec::new(), - } - } - - pub fn errors(&self) -> &[DisambiguationError] { - &self.errors - } - - fn next_id(&self) -> NodeId { - NodeId::new(self.nodes.len()) - } - - /// Reserve an empty slot for a node on the graph and return an - /// id for it. `ReservedId` cannot be cloned, and must be consumed - /// by calling `insert` on the graph. - pub fn reserve(&mut self) -> ReservedId { - let id = self.next_id(); - - self.nodes.push(None); - - ReservedId(id) - } - - /// Insert a node at a given, previously reserved id. Returns the - /// inserted `NodeId`. - pub fn insert(&mut self, reserved: ReservedId, node: N) -> NodeId - where - N: Into>, - Leaf: Disambiguate, - { - let id = reserved.get(); - - self.nodes[id.get()] = Some(node.into()); - - let mut awaiting = Vec::new(); - - // Partition out all `DeferredMerge`s that can be completed - // now that this `ReservedId` has a `Node` inserted into it. - for idx in (0..self.deferred.len()).rev() { - if self.deferred[idx].awaiting == id { - awaiting.push(self.deferred.remove(idx)); - } - } - - // Complete deferred merges. We've collected them from the back, - // so we must iterate through them from the back as well to restore - // proper order of merges in case there is some cascading going on. - for DeferredMerge { - awaiting, - with, - into, - } in awaiting.into_iter().rev() - { - self.merge_unchecked(awaiting, with, into); - } - - id - } - - /// Push a node onto the graph and get an id to it. If an identical - /// node has already been pushed on the graph, it will return the id - /// of that node instead. - pub fn push(&mut self, node: B) -> NodeId - where - B: Into>, - { - let node = node.into(); - - if let Node::Leaf(_) = node { - return self.push_unchecked(node); - } - - let mut hasher = FnvHasher::default(); - node.hash(&mut hasher); - - let next_id = self.next_id(); - - match self.hashes.entry(hasher.finish()) { - Entry::Occupied(occupied) => { - let id = *occupied.get(); - - if self[id].eq(&node) { - return id; - } - } - Entry::Vacant(vacant) => { - vacant.insert(next_id); - } - } - - self.push_unchecked(node) - } - - fn push_unchecked(&mut self, node: Node) -> NodeId { - let id = self.next_id(); - - self.nodes.push(Some(node)); - - id - } - - /// If nodes `a` and `b` have been already merged, return the - /// `NodeId` of the node they have been merged into. - fn find_merged(&self, a: NodeId, b: NodeId) -> Option { - let probe = Merge::new(a, b); - - self.merges.get(&probe).copied() - } - - /// Mark that nodes `a` and `b` have been merged into `product`. - /// - /// This will also mark merging `a` and `product`, as well as - /// `b` and `product` into `product`, since those are symmetric - /// operations. - /// - /// This is necessary to break out asymmetric merge loops. - fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) { - self.merges.insert(Merge::new(a, b), product); - self.merges.insert(Merge::new(a, product), product); - self.merges.insert(Merge::new(b, product), product); - } - - /// Merge the nodes at id `a` and `b`, returning a new id. - pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId - where - Leaf: Disambiguate, - { - if a == b { - return a; - } - - // If the id pair is already merged (or is being merged), just return the id - if let Some(id) = self.find_merged(a, b) { - return id; - } - - match (self.get(a), self.get(b)) { - (None, None) => { - panic!( - "Merging two reserved nodes! This is a bug, please report it:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues" - ); - } - (None, Some(_)) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: a, - with: b, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(_), None) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: b, - with: a, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => { - return match Disambiguate::cmp(left, right) { - Ordering::Less => b, - Ordering::Greater => a, - Ordering::Equal => { - self.errors.push(DisambiguationError(a, b)); - - a - } - }; - } - _ => (), - } - - // Reserve the id for the merge and save it. Since the graph can contain loops, - // this prevents us from trying to merge the same id pair in a loop, blowing up - // the stack. - let reserved = self.reserve(); - self.set_merged(a, b, reserved.get()); - - self.merge_unchecked(a, b, reserved) - } - - /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are - /// not pointing to empty slots. - fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId - where - Leaf: Disambiguate, - { - let merged_rope = match (self.get(a), self.get(b)) { - (Some(Node::Rope(rope)), _) => { - let rope = rope.clone(); - - self.merge_rope(rope, b) - } - (_, Some(Node::Rope(rope))) => { - let rope = rope.clone(); - - self.merge_rope(rope, a) - } - _ => None, - }; - - if let Some(rope) = merged_rope { - return self.insert(reserved, rope); - } - - let mut fork = self.fork_off(a); - fork.merge(self.fork_off(b), self); - - let mut stack = vec![reserved.get()]; - - // Flatten the fork - while let Some(miss) = fork.miss { - if stack.contains(&miss) { - break; - } - stack.push(miss); - - let other = match self.get(miss) { - Some(Node::Fork(other)) => other.clone(), - Some(Node::Rope(other)) => other.clone().into_fork(self), - _ => break, - }; - match other.miss { - Some(id) if self.get(id).is_none() => break, - _ => (), - } - fork.miss = None; - fork.merge(other, self); - } - - self.insert(reserved, fork) - } - - fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option - where - Leaf: Disambiguate, - { - match self.get(other) { - Some(Node::Fork(fork)) if rope.miss.is_none() => { - // Count how many consecutive ranges in this rope would - // branch into the fork that results in a loop. - // - // e.g.: for rope "foobar" and a looping fork [a-z]: 6 - let count = rope - .pattern - .iter() - .take_while(|range| fork.contains(**range) == Some(other)) - .count(); - - let mut rope = rope.split_at(count, self)?.miss_any(other); - - rope.then = self.merge(rope.then, other); - - Some(rope) - } - Some(Node::Rope(other)) => { - let (prefix, miss) = rope.prefix(other)?; - - let (a, b) = (rope, other.clone()); - - let a = a.remainder(prefix.len(), self); - let b = b.remainder(prefix.len(), self); - - let rope = Rope::new(prefix, self.merge(a, b)).miss(miss); - - Some(rope) - } - Some(Node::Leaf(_)) | None => { - if rope.miss.is_none() { - Some(rope.miss(other)) - } else { - None - } - } - _ => None, - } - } - - pub fn fork_off(&mut self, id: NodeId) -> Fork - where - Leaf: Disambiguate, - { - match self.get(id) { - Some(Node::Fork(fork)) => fork.clone(), - Some(Node::Rope(rope)) => rope.clone().into_fork(self), - Some(Node::Leaf(_)) | None => Fork::new().miss(id), - } - } - - pub fn nodes(&self) -> &[Option>] { - &self.nodes - } - - /// Find all nodes that have no references and remove them. - pub fn shake(&mut self, root: NodeId) { - let mut filter = vec![false; self.nodes.len()]; - - filter[root.get()] = true; - - self[root].shake(self, &mut filter); - - for (id, referenced) in filter.into_iter().enumerate() { - if !referenced { - self.nodes[id] = None; - } - } - } - - pub fn get(&self, id: NodeId) -> Option<&Node> { - self.nodes.get(id.get())?.as_ref() - } -} - -impl Index for Graph { - type Output = Node; - - fn index(&self, id: NodeId) -> &Node { - self.get(id).expect( - "Indexing into an empty node. This is a bug, please report it at:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues", - ) - } -} - -impl std::fmt::Display for NodeId { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - std::fmt::Display::fmt(&self.0, f) - } -} - -#[cfg_attr(test, derive(PartialEq))] -pub enum Node { - /// Fork node, can lead to more than one state - Fork(Fork), - /// Rope node, can lead to one state on match, one state on miss - Rope(Rope), - /// Leaf node, terminal state - Leaf(Leaf), -} - -impl Node { - pub fn miss(&self) -> Option { - match self { - Node::Rope(rope) => rope.miss.first(), - Node::Fork(fork) => fork.miss, - Node::Leaf(_) => None, - } - } - - fn eq(&self, other: &Node) -> bool { - match (self, other) { - (Node::Fork(a), Node::Fork(b)) => a == b, - (Node::Rope(a), Node::Rope(b)) => a == b, - _ => false, - } - } - - fn shake(&self, graph: &Graph, filter: &mut [bool]) { - match self { - Node::Fork(fork) => fork.shake(graph, filter), - Node::Rope(rope) => rope.shake(graph, filter), - Node::Leaf(_) => (), - } - } - - pub fn unwrap_leaf(&self) -> &Leaf { - match self { - Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"), - Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"), - Node::Leaf(leaf) => leaf, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn leaf_stack_size() { - use std::mem::size_of; - - const WORD: usize = size_of::(); - const NODE: usize = size_of::>(); - - assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE); - } - - #[test] - fn create_a_loop() { - let mut graph = Graph::new(); - - let token = graph.push(Node::Leaf("IDENT")); - let id = graph.reserve(); - let fork = Fork::new().branch('a'..='z', id.get()).miss(token); - let root = graph.insert(id, fork); - - assert_eq!(graph[token], Node::Leaf("IDENT")); - assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),); - } - - #[test] - fn fork_off() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = graph.push(Rope::new("rope", leaf)); - let fork = graph.push(Fork::new().branch(b'!', leaf)); - - assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf)); - assert_eq!( - graph.fork_off(rope), - Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1)) - ); - assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf)); - } -} diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs deleted file mode 100644 index a4d23d3b..00000000 --- a/vendor/logos-codegen/src/graph/range.rs +++ /dev/null @@ -1,144 +0,0 @@ -use regex_syntax::hir::ClassBytesRange; -use regex_syntax::hir::ClassUnicodeRange; -use regex_syntax::utf8::Utf8Range; - -use std::cmp::{Ord, Ordering}; - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct Range { - pub start: u8, - pub end: u8, -} - -impl Range { - pub fn as_byte(&self) -> Option { - if self.is_byte() { - Some(self.start) - } else { - None - } - } - - pub fn is_byte(&self) -> bool { - self.start == self.end - } -} - -impl From for Range { - fn from(byte: u8) -> Range { - Range { - start: byte, - end: byte, - } - } -} - -impl From<&u8> for Range { - fn from(byte: &u8) -> Range { - Range::from(*byte) - } -} - -impl Iterator for Range { - type Item = u8; - - fn next(&mut self) -> Option { - match self.start.cmp(&self.end) { - std::cmp::Ordering::Less => { - let res = self.start; - self.start += 1; - - Some(res) - } - std::cmp::Ordering::Equal => { - let res = self.start; - - // Necessary so that range 0xFF-0xFF doesn't loop forever - self.start = 0xFF; - self.end = 0x00; - - Some(res) - } - std::cmp::Ordering::Greater => None, - } - } -} - -impl PartialOrd for Range { - fn partial_cmp(&self, other: &Range) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Range { - fn cmp(&self, other: &Self) -> Ordering { - self.start.cmp(&other.start) - } -} - -impl From for Range { - fn from(r: Utf8Range) -> Range { - Range { - start: r.start, - end: r.end, - } - } -} - -impl From for Range { - fn from(r: ClassUnicodeRange) -> Range { - let start = r.start() as u32; - let end = r.end() as u32; - - if start >= 128 || end >= 128 && end != 0x0010FFFF { - panic!("Casting non-ascii ClassUnicodeRange to Range") - } - - Range { - start: start as u8, - end: end as u8, - } - } -} - -impl From for Range { - fn from(r: ClassBytesRange) -> Range { - Range { - start: r.start(), - end: r.end(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn range_iter_one() { - let byte = Range::from(b'!'); - let collected = byte.take(1000).collect::>(); - - assert_eq!(b"!", &collected[..]); - } - - #[test] - fn range_iter_few() { - let byte = Range { - start: b'a', - end: b'd', - }; - let collected = byte.take(1000).collect::>(); - - assert_eq!(b"abcd", &collected[..]); - } - - #[test] - fn range_iter_bounds() { - let byte = Range::from(0xFA..=0xFF); - - let collected = byte.take(1000).collect::>(); - - assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]); - } -} diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs deleted file mode 100644 index d55c490f..00000000 --- a/vendor/logos-codegen/src/graph/regex.rs +++ /dev/null @@ -1,295 +0,0 @@ -use std::fmt::Debug; - -use regex_syntax::utf8::Utf8Sequences; - -use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope}; -use crate::mir::{Class, ClassUnicode, Literal, Mir}; - -impl Graph { - pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId { - self.parse_mir(&mir, then, None, None, false) - } - - fn parse_mir( - &mut self, - mir: &Mir, - then: NodeId, - miss: Option, - reserved: Option, - repeated: bool, - ) -> NodeId { - match mir { - Mir::Empty => then, - Mir::Loop(mir) => { - let reserved_first = reserved.unwrap_or_else(|| self.reserve()); - - let (new_then, new_miss); - if let Some(old_miss) = miss { - // We have to separate the first iteration from the other iterations, - // because the `old_miss` path must only be taken if we miss the first - // iteration. - let reserved_next = self.reserve(); - new_then = self.parse_mir( - mir, - reserved_next.get(), - Some(then), - Some(reserved_next), - true, - ); - new_miss = self.merge(old_miss, then); - } else { - new_then = reserved_first.get(); - new_miss = then; - } - - self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true) - } - Mir::Maybe(mir) => { - let miss = match miss { - Some(id) => self.merge(id, then), - None => then, - }; - - self.parse_mir(mir, then, Some(miss), reserved, true) - } - Mir::Alternation(alternation) => { - let mut fork = Fork::new().miss(miss); - - for mir in alternation { - let id = self.parse_mir(mir, then, None, None, repeated); - let alt = self.fork_off(id); - - fork.merge(alt, self); - } - - self.insert_or_push(reserved, fork) - } - Mir::Literal(literal) => { - let pattern = literal.0.to_vec(); - - self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss)) - } - Mir::Concat(concat) => { - // Take an initial guess at the capacity - estimates a little worse than an average case - // scenario by assuming every concat element is singular but has a full code-point unicode literal. - // The only way to get the actual size of the Vec is if every sub-concat node is added up. - let mut ropebuf: Vec = Vec::with_capacity(concat.len() * 4); - let mut then = then; - - let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir { - Mir::Literal(Literal(bytes)) => { - ropebuf.extend(bytes.iter().rev().map(Into::::into)); - true - } - Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => { - ropebuf.push(class.ranges()[0].into()); - true - } - Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => { - ropebuf.push(class.ranges()[0].into()); - true - } - _ => { - if !ropebuf.is_empty() { - let rope = - Rope::new(ropebuf.iter().cloned().rev().collect::>(), *then); - - *then = graph.push(rope); - ropebuf = Vec::with_capacity(concat.len() * 4); - } - false - } - }; - - for mir in concat[1..].iter().rev() { - if !handle_bytes(self, mir, &mut then) { - then = self.parse_mir(mir, then, None, None, false); - } - } - - let first_mir = &concat[0]; - if handle_bytes(self, first_mir, &mut then) { - let rope = Rope::new(ropebuf.iter().cloned().rev().collect::>(), then) - .miss(miss); - self.insert_or_push(reserved, rope) - } else { - self.parse_mir(first_mir, then, miss, reserved, false) - } - } - Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => { - let mut ropes = class - .iter() - .flat_map(|range| Utf8Sequences::new(range.start(), range.end())) - .map(|sequence| Rope::new(sequence.as_slice(), then)) - .collect::>(); - - if ropes.len() == 1 { - let rope = ropes.remove(0); - - return self.insert_or_push(reserved, rope.miss(miss)); - } - - let mut root = Fork::new().miss(miss); - - for rope in ropes { - let fork = rope.into_fork(self); - root.merge(fork, self); - } - - self.insert_or_push(reserved, root) - } - Mir::Class(class) => { - let mut fork = Fork::new().miss(miss); - - let class: Vec = match class { - Class::Unicode(u) => u.iter().copied().map(Into::into).collect(), - Class::Bytes(b) => b.iter().copied().map(Into::into).collect(), - }; - - for range in class { - fork.add_branch(range, then, self); - } - - self.insert_or_push(reserved, fork) - } - } - } - - fn insert_or_push(&mut self, id: Option, node: N) -> NodeId - where - N: Into>, - { - match id { - Some(id) => self.insert(id, node), - None => self.push(node), - } - } -} - -/// Return whether current class unicode is ascii. -/// -/// Because unicode ranges are iterated in increasing order, -/// it is only necessary to check the last range. -/// -/// If the check is performed in a repetition, -/// a fast path is used by checking if end of range is 0x0010_FFFF. -fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool { - class.iter().last().map_or(true, |range| { - let start = range.start() as u32; - let end = range.end() as u32; - end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) - }) -} - -/// Return whether current class unicode is ascii and only contains -/// one range. -/// -/// See [`is_ascii`] function for more details. -fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool { - if class.ranges().len() != 1 { - return false; - } - - let range = &class.ranges()[0]; - let start = range.start() as u32; - let end = range.end() as u32; - end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) -} - -#[cfg(test)] -mod tests { - use std::num::NonZeroU32; - - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn rope() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("foobar").unwrap(); - - assert_eq!(mir.priority(), 12); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),) - } - - #[test] - fn alternation() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("a|b").unwrap(); - - assert_eq!(mir.priority(), 2); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)), - ); - } - - #[test] - fn repeat() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("[a-z]*").unwrap(); - - assert_eq!(mir.priority(), 0); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork( - Fork::new() - .branch('a'..='z', id) // goto self == loop - .miss(leaf) - ), - ); - } - - #[test] - fn maybe() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("[a-z]?").unwrap(); - - assert_eq!(mir.priority(), 0); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)), - ); - } - - #[test] - fn long_concat_389() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap(); - - assert_eq!(mir.priority(), 50); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - let sub_id = NodeId(NonZeroU32::new(2).unwrap()); - - assert_eq!( - graph[id], - Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id)) - ); - - assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf))) - } -} diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs deleted file mode 100644 index 6e563fac..00000000 --- a/vendor/logos-codegen/src/graph/rope.rs +++ /dev/null @@ -1,330 +0,0 @@ -use std::ops::Deref; - -use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range}; - -#[derive(PartialEq, Clone, Hash)] -pub struct Rope { - pub pattern: Pattern, - pub then: NodeId, - pub miss: Miss, -} - -#[derive(PartialEq, Clone, Hash)] -pub struct Pattern(pub Vec); - -impl Deref for Pattern { - type Target = [Range]; - - fn deref(&self) -> &[Range] { - &self.0 - } -} - -/// Because Ropes could potentially fail a match mid-pattern, -/// a regular `Option` is not sufficient here. -#[derive(PartialEq, Clone, Copy, Hash)] -pub enum Miss { - /// Same as Option::None, error on fail - None, - /// Jump to id if first byte does not match, fail on partial match - First(NodeId), - /// Jump to id on partial or empty match - Any(NodeId), -} - -impl Miss { - pub fn is_none(&self) -> bool { - matches!(self, Miss::None) - } - - pub fn first(self) -> Option { - match self { - Miss::First(id) | Miss::Any(id) => Some(id), - _ => None, - } - } - - pub fn take_first(&mut self) -> Option { - match *self { - Miss::First(id) => { - *self = Miss::None; - - Some(id) - } - Miss::Any(id) => Some(id), - Miss::None => None, - } - } -} - -impl From> for Miss { - fn from(miss: Option) -> Self { - match miss { - Some(id) => Miss::First(id), - None => Miss::None, - } - } -} - -impl From for Miss { - fn from(id: NodeId) -> Self { - Miss::First(id) - } -} - -impl Rope { - pub fn new

(pattern: P, then: NodeId) -> Self - where - P: Into, - { - Rope { - pattern: pattern.into(), - then, - miss: Miss::None, - } - } - - pub fn miss(mut self, miss: M) -> Self - where - M: Into, - { - self.miss = miss.into(); - self - } - - pub fn miss_any(mut self, miss: NodeId) -> Self { - self.miss = Miss::Any(miss); - self - } - - pub fn into_fork(mut self, graph: &mut Graph) -> Fork - where - T: Disambiguate, - { - let first = self.pattern.0.remove(0); - let miss = self.miss.take_first(); - - // The new fork will lead to a new rope, - // or the old target if no new rope was created - let then = match self.pattern.len() { - 0 => self.then, - _ => graph.push(self), - }; - - Fork::new().branch(first, then).miss(miss) - } - - pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> { - let count = self - .pattern - .iter() - .zip(other.pattern.iter()) - .take_while(|(a, b)| a == b) - .count(); - - let pattern = match count { - 0 => return None, - n => self.pattern[..n].into(), - }; - let miss = match (self.miss, other.miss) { - (Miss::None, miss) => miss, - (miss, Miss::None) => miss, - _ => return None, - }; - - Some((pattern, miss)) - } - - pub fn split_at(mut self, at: usize, graph: &mut Graph) -> Option - where - T: Disambiguate, - { - match at { - 0 => return None, - n if n == self.pattern.len() => return Some(self), - _ => (), - } - - let (this, next) = self.pattern.split_at(at); - - let next_miss = match self.miss { - Miss::Any(_) => self.miss, - _ => Miss::None, - }; - - let next = graph.push(Rope { - pattern: next.into(), - miss: next_miss, - then: self.then, - }); - - self.pattern = this.into(); - self.then = next; - - Some(self) - } - - pub fn remainder(mut self, at: usize, graph: &mut Graph) -> NodeId - where - T: Disambiguate, - { - self.pattern = self.pattern[at..].into(); - - match self.pattern.len() { - 0 => self.then, - _ => graph.push(self), - } - } - - pub fn shake(&self, graph: &Graph, filter: &mut [bool]) { - if let Some(id) = self.miss.first() { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - - if !filter[self.then.get()] { - filter[self.then.get()] = true; - graph[self.then].shake(graph, filter); - } - } -} - -impl Pattern { - pub fn to_bytes(&self) -> Option> { - let mut out = Vec::with_capacity(self.len()); - - for range in self.iter() { - out.push(range.as_byte()?); - } - - Some(out) - } -} - -impl From<&[T]> for Pattern -where - T: Into + Copy, -{ - fn from(slice: &[T]) -> Self { - Pattern(slice.iter().copied().map(Into::into).collect()) - } -} - -impl From> for Pattern -where - T: Into, -{ - fn from(vec: Vec) -> Self { - Pattern(vec.into_iter().map(Into::into).collect()) - } -} - -impl From<&str> for Pattern { - fn from(slice: &str) -> Self { - slice.as_bytes().into() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn into_fork() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("foobar", leaf); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2))); - assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf)); - } - - #[test] - fn into_fork_one_byte() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("!", leaf); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1))); - } - - #[test] - fn into_fork_miss_any() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("42", leaf).miss_any(NodeId::new(42)); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!( - fork, - Fork::new() - .branch(b'4', NodeId::new(2)) - .miss(NodeId::new(42)) - ); - assert_eq!( - graph[NodeId::new(2)], - Rope::new("2", leaf).miss_any(NodeId::new(42)) - ); - } - - #[test] - fn into_fork_miss_first() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42))); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!( - fork, - Fork::new() - .branch(b'4', NodeId::new(2)) - .miss(NodeId::new(42)) - ); - assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf)); - } - - #[test] - fn split_at() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("foobar", leaf); - - assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope); - - let split = rope.split_at(3, &mut graph).unwrap(); - let expected_id = NodeId::new(leaf.get() + 1); - - assert_eq!(split, Rope::new("foo", expected_id)); - assert_eq!(graph[expected_id], Rope::new("bar", leaf)); - } - - #[test] - fn pattern_to_bytes() { - let pat = Pattern::from("foobar"); - - assert_eq!(pat.to_bytes().unwrap(), b"foobar"); - - let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']); - - assert_eq!(ranges.to_bytes(), None); - } -} -- cgit v1.2.3