diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/graph | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/logos-codegen/src/graph')
| -rw-r--r-- | vendor/logos-codegen/src/graph/fork.rs | 267 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/impls.rs | 220 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/meta.rs | 174 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/mod.rs | 566 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/range.rs | 144 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/regex.rs | 295 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/graph/rope.rs | 330 |
7 files changed, 1996 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs new file mode 100644 index 00000000..6b59836b --- /dev/null +++ b/vendor/logos-codegen/src/graph/fork.rs @@ -0,0 +1,267 @@ +use crate::graph::{Disambiguate, Graph, NodeId, Range}; + +#[derive(Clone)] +pub struct Fork { + /// LUT matching byte -> node id + lut: Box<[Option<NodeId>; 256]>, + /// State to go to if no arms are matching + pub miss: Option<NodeId>, +} + +impl Fork { + pub fn new() -> Self { + Fork { + lut: Box::new([None; 256]), + miss: None, + } + } + + pub fn miss<M>(mut self, miss: M) -> Self + where + M: Into<Option<NodeId>>, + { + self.miss = miss.into(); + self + } + + pub fn add_branch<R, T>(&mut self, range: R, then: NodeId, graph: &mut Graph<T>) + where + R: Into<Range>, + T: Disambiguate, + { + for byte in range.into() { + match &mut self.lut[byte as usize] { + Some(other) if *other != then => { + *other = graph.merge(*other, then); + } + opt => *opt = Some(then), + } + } + } + + // TODO: Add result with a printable error + pub fn merge<T>(&mut self, other: Fork, graph: &mut Graph<T>) + where + T: Disambiguate, + { + self.miss = match (self.miss, other.miss) { + (None, None) => None, + (Some(id), None) | (None, Some(id)) => Some(id), + (Some(a), Some(b)) => Some(graph.merge(a, b)), + }; + + for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) { + *left = match (*left, *right) { + (None, None) => continue, + (Some(id), None) | (None, Some(id)) => Some(id), + (Some(a), Some(b)) => Some(graph.merge(a, b)), + } + } + } + + pub fn branches(&self) -> ForkIter<'_> { + ForkIter { + offset: 0, + lut: &self.lut, + } + } + + /// Checks if all bytes in the `range` have a branch on this + /// fork, and those branches are resolve to the same `NodeId`. + pub fn contains<R>(&self, range: R) -> Option<NodeId> + where + R: Into<Range>, + { + let mut range = range.into(); + let byte = range.next()?; + let first = self.lut[byte as usize]?; + + for byte in range { + if first != self.lut[byte as usize]? { + return None; + } + } + + Some(first) + } + + pub fn branch<R>(mut self, range: R, then: NodeId) -> Self + where + R: Into<Range>, + { + for byte in range.into() { + match &mut self.lut[byte as usize] { + Some(other) if *other != then => { + panic!("Overlapping branches"); + } + opt => *opt = Some(then), + } + } + self + } + + pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) { + if let Some(id) = self.miss { + if !filter[id.get()] { + filter[id.get()] = true; + graph[id].shake(graph, filter); + } + } + + for (_, id) in self.branches() { + if !filter[id.get()] { + filter[id.get()] = true; + graph[id].shake(graph, filter); + } + } + } +} + +pub struct ForkIter<'a> { + offset: usize, + lut: &'a [Option<NodeId>; 256], +} + +impl Iterator for ForkIter<'_> { + type Item = (Range, NodeId); + + fn next(&mut self) -> Option<Self::Item> { + // Consume empty slots + self.offset += self.lut[self.offset..] + .iter() + .take_while(|next| next.is_none()) + .count(); + + let then = self.lut.get(self.offset).copied().flatten()?; + let start = self.offset; + + // Consume all slots with same NodeId target + self.offset += self.lut[self.offset..] + .iter() + .take_while(|next| **next == Some(then)) + .count(); + + Some(( + Range { + start: start as u8, + end: (self.offset - 1) as u8, + }, + then, + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::Node; + use pretty_assertions::assert_eq; + + #[test] + fn fork_iter() { + let mut buf = [None; 256]; + + for byte in b'4'..=b'7' { + buf[byte as usize] = Some(NodeId::new(1)); + } + for byte in b'a'..=b'd' { + buf[byte as usize] = Some(NodeId::new(2)); + } + + let iter = ForkIter { + offset: 0, + lut: &buf, + }; + + assert_eq!( + &[ + ( + Range { + start: b'4', + end: b'7' + }, + NodeId::new(1) + ), + ( + Range { + start: b'a', + end: b'd' + }, + NodeId::new(2) + ), + ], + &*iter.collect::<Vec<_>>(), + ); + } + + #[test] + fn merge_no_conflict() { + let mut graph = Graph::new(); + + let leaf1 = graph.push(Node::Leaf("FOO")); + let leaf2 = graph.push(Node::Leaf("BAR")); + + let mut fork = Fork::new().branch(b'1', leaf1); + + fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); + + assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2)); + } + + #[test] + fn merge_miss_right() { + let mut graph = Graph::new(); + + let leaf1 = graph.push(Node::Leaf("FOO")); + let leaf2 = graph.push(Node::Leaf("BAR")); + + let mut fork = Fork::new().branch(b'1', leaf1); + + fork.merge(Fork::new().miss(leaf2), &mut graph); + + assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2)); + } + + #[test] + fn merge_miss_left() { + let mut graph = Graph::new(); + + let leaf1 = graph.push(Node::Leaf("FOO")); + let leaf2 = graph.push(Node::Leaf("BAR")); + + let mut fork = Fork::new().miss(leaf1); + + fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); + + assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1)); + } + + #[test] + fn contains_byte() { + let fork = Fork::new().branch('a'..='z', NodeId::new(42)); + + assert_eq!(fork.contains(b't'), Some(NodeId::new(42))); + } + + #[test] + fn contains_range() { + let fork = Fork::new() + .branch('a'..='m', NodeId::new(42)) + .branch('n'..='z', NodeId::new(42)); + + assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42))); + assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42))); + } + + #[test] + fn contains_different_ranges() { + let fork = Fork::new() + .branch('a'..='m', NodeId::new(42)) + .branch('n'..='z', NodeId::new(47)); + + assert_eq!(fork.contains('i'..='r'), None); + assert_eq!(fork.contains('a'..='z'), None); + assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42))); + assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47))); + } +} diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs new file mode 100644 index 00000000..dc97bdf1 --- /dev/null +++ b/vendor/logos-codegen/src/graph/impls.rs @@ -0,0 +1,220 @@ +use std::fmt::{self, Debug, Display}; +use std::hash::{Hash, Hasher}; + +use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope}; + +impl<T> From<Fork> for Node<T> { + fn from(fork: Fork) -> Self { + Node::Fork(fork) + } +} +impl<T> From<Rope> for Node<T> { + fn from(rope: Rope) -> Self { + Node::Rope(rope) + } +} + +fn is_ascii(byte: u8) -> bool { + (0x20..0x7F).contains(&byte) +} + +impl Hash for Fork { + fn hash<H: Hasher>(&self, state: &mut H) { + for branch in self.branches() { + branch.hash(state); + } + self.miss.hash(state); + } +} + +impl<T> Hash for Node<T> { + fn hash<H: Hasher>(&self, state: &mut H) { + match self { + Node::Rope(rope) => { + b"ROPE".hash(state); + rope.hash(state); + } + Node::Fork(fork) => { + b"FORK".hash(state); + fork.hash(state); + } + Node::Leaf(_) => b"LEAF".hash(state), + } + } +} + +impl Debug for NodeId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&self.0, f) + } +} + +/// We don't need debug impls in release builds +// #[cfg(test)] +mod debug { + use super::*; + use crate::graph::rope::Miss; + use crate::graph::Disambiguate; + use std::cmp::{Ord, Ordering}; + + impl Disambiguate for &str { + fn cmp(left: &&str, right: &&str) -> Ordering { + Ord::cmp(left, right) + } + } + + impl Debug for Range { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Range { start, end } = *self; + + if start != end || !is_ascii(start) { + f.write_str("[")?; + } + match is_ascii(start) { + true => write!(f, "{}", start as char), + false => write!(f, "{:02X}", start), + }?; + if start != end { + match is_ascii(end) { + true => write!(f, "-{}]", end as char), + false => write!(f, "-{:02X}]", end), + }?; + } else if !is_ascii(start) { + f.write_str("]")?; + } + Ok(()) + } + } + + impl Display for Range { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + <Range as Debug>::fmt(self, f) + } + } + + impl<T: Debug> Debug for Graph<T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let entries = self + .nodes() + .iter() + .enumerate() + .filter_map(|(i, n)| n.as_ref().map(|n| (i, n))); + + f.debug_map().entries(entries).finish() + } + } + + struct Arm<T, U>(T, U); + + impl<T, U> Debug for Arm<T, U> + where + T: Display, + U: Display, + { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} ⇒ {}", self.0, self.1) + } + } + + impl Debug for Fork { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut list = f.debug_set(); + + for (range, then) in self.branches() { + list.entry(&Arm(range, then)); + } + if let Some(id) = self.miss { + list.entry(&Arm('_', id)); + } + + list.finish() + } + } + + impl Display for Miss { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Miss::First(id) => Display::fmt(id, f), + Miss::Any(id) => write!(f, "{}*", id), + Miss::None => f.write_str("n/a"), + } + } + } + + impl Debug for Rope { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use std::fmt::Write; + + let mut rope = String::with_capacity(self.pattern.len()); + for range in self.pattern.iter() { + write!(rope, "{}", range)?; + } + + match self.miss.is_none() { + false => { + let mut list = f.debug_list(); + + list.entry(&Arm(rope, self.then)); + list.entry(&Arm('_', self.miss)); + + list.finish() + } + true => Arm(rope, self.then).fmt(f), + } + } + } + + impl PartialEq for Fork { + fn eq(&self, other: &Self) -> bool { + self.miss == other.miss && self.branches().eq(other.branches()) + } + } + + impl<T: Debug> Debug for Node<T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Node::Fork(fork) => fork.fmt(f), + Node::Rope(rope) => rope.fmt(f), + Node::Leaf(leaf) => leaf.fmt(f), + } + } + } + + use std::ops::RangeInclusive; + + impl From<RangeInclusive<u8>> for Range { + fn from(range: RangeInclusive<u8>) -> Range { + Range { + start: *range.start(), + end: *range.end(), + } + } + } + + impl From<RangeInclusive<char>> for Range { + fn from(range: RangeInclusive<char>) -> Range { + Range { + start: *range.start() as u8, + end: *range.end() as u8, + } + } + } + + impl<T> PartialEq<Rope> for Node<T> { + fn eq(&self, other: &Rope) -> bool { + match self { + Node::Rope(rope) => rope == other, + _ => false, + } + } + } + + impl<T> PartialEq<Fork> for Node<T> { + fn eq(&self, other: &Fork) -> bool { + match self { + Node::Fork(fork) => fork == other, + _ => false, + } + } + } +} diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs new file mode 100644 index 00000000..757ced09 --- /dev/null +++ b/vendor/logos-codegen/src/graph/meta.rs @@ -0,0 +1,174 @@ +use std::cmp::min; +use std::collections::BTreeMap; +use std::ops::{Index, IndexMut}; + +use crate::graph::{Graph, Node, NodeId}; + +#[derive(Debug)] +pub struct Meta { + map: BTreeMap<NodeId, MetaItem>, +} + +#[derive(Debug, Default)] +pub struct MetaItem { + /// Number of references to this node + pub refcount: usize, + /// Minimum number of bytes that ought to be read for this + /// node to find a match + pub min_read: usize, + /// Marks whether or not this node leads to a loop entry node. + pub is_loop_init: bool, + /// Ids of other nodes that point to this node while this + /// node is on a stack (creating a loop) + pub loop_entry_from: Vec<NodeId>, +} + +impl Index<NodeId> for Meta { + type Output = MetaItem; + + fn index(&self, id: NodeId) -> &MetaItem { + &self.map[&id] + } +} + +impl IndexMut<NodeId> for Meta { + fn index_mut(&mut self, id: NodeId) -> &mut MetaItem { + self.map.entry(id).or_default() + } +} + +impl MetaItem { + fn loop_entry(&mut self, id: NodeId) { + if let Err(idx) = self.loop_entry_from.binary_search(&id) { + self.loop_entry_from.insert(idx, id); + } + } +} + +impl Meta { + pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self { + let mut meta = Meta { + map: Default::default(), + }; + + meta.first_pass(root, root, graph, &mut Vec::new()); + + meta + } + + pub fn first_pass<T>( + &mut self, + this: NodeId, + parent: NodeId, + graph: &Graph<T>, + stack: &mut Vec<NodeId>, + ) -> &MetaItem { + let meta = &mut self[this]; + let is_done = meta.refcount > 0; + + meta.refcount += 1; + + if stack.contains(&this) { + meta.loop_entry(parent); + self[parent].is_loop_init = true; + } + if is_done { + return &self[this]; + } + + stack.push(this); + + let mut min_read; + + match &graph[this] { + Node::Fork(fork) => { + min_read = usize::MAX; + for (_, id) in fork.branches() { + let meta = self.first_pass(id, this, graph, stack); + + if meta.is_loop_init { + min_read = 1; + } else { + min_read = min(min_read, meta.min_read + 1); + } + } + if let Some(id) = fork.miss { + let meta = self.first_pass(id, this, graph, stack); + + if meta.is_loop_init { + min_read = 0; + } else { + min_read = min(min_read, meta.min_read); + } + } + if min_read == usize::MAX { + min_read = 0; + } + } + Node::Rope(rope) => { + min_read = rope.pattern.len(); + let meta = self.first_pass(rope.then, this, graph, stack); + + if !meta.is_loop_init { + min_read += meta.min_read; + } + + if let Some(id) = rope.miss.first() { + let meta = self.first_pass(id, this, graph, stack); + + if meta.is_loop_init { + min_read = 0; + } else { + min_read = min(min_read, meta.min_read); + } + } + } + Node::Leaf(_) => min_read = 0, + } + + stack.pop(); + + let meta = &mut self[this]; + meta.min_read = min_read; + let second_pass = meta.loop_entry_from.clone(); + + for id in second_pass { + self.meta_second_pass(id, graph); + } + + &self[this] + } + + fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) { + let mut min_read; + + match &graph[id] { + Node::Fork(fork) => { + min_read = usize::MAX; + for (_, id) in fork.branches() { + let meta = &self[id]; + + if meta.is_loop_init { + min_read = 1; + } else { + min_read = min(min_read, meta.min_read + 1); + } + } + if min_read == usize::MAX { + min_read = 0; + } + } + Node::Rope(rope) => { + min_read = rope.pattern.len(); + let meta = &self[rope.then]; + + if !meta.is_loop_init { + min_read += meta.min_read; + } + } + Node::Leaf(_) => unreachable!(), + } + + self[id].min_read = min_read; + } +} diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs new file mode 100644 index 00000000..6d218e8c --- /dev/null +++ b/vendor/logos-codegen/src/graph/mod.rs @@ -0,0 +1,566 @@ +use std::cmp::Ordering; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap as Map; +use std::hash::{Hash, Hasher}; +use std::num::NonZeroU32; +use std::ops::Index; + +use fnv::FnvHasher; + +mod fork; +mod impls; +mod meta; +mod range; +mod regex; +mod rope; + +pub use self::fork::Fork; +pub use self::meta::Meta; +pub use self::range::Range; +pub use self::rope::Rope; + +/// Disambiguation error during the attempt to merge two leaf +/// nodes with the same priority +#[derive(Debug)] +pub struct DisambiguationError(pub NodeId, pub NodeId); + +pub struct Graph<Leaf> { + /// Internal storage of all allocated nodes. Once a node is + /// put here, it should never be mutated. + nodes: Vec<Option<Node<Leaf>>>, + /// When merging two nodes into a new node, we store the two + /// entry keys and the result, so that we don't merge the same + /// two nodes multiple times. + /// + /// Most of the time the entry we want to find will be the last + /// one that has been inserted, so we can use a vec with reverse + /// order search to get O(1) searches much faster than any *Map. + merges: Map<Merge, NodeId>, + /// Another map used for accounting. Before `.push`ing a new node + /// onto the graph (inserts are exempt), we hash it and find if + /// an identical(!) node has been created before. + hashes: Map<u64, NodeId>, + /// Instead of handling errors over return types, opt to collect + /// them internally. + errors: Vec<DisambiguationError>, + /// Deferred merges. When when attempting to merge a node with an + /// empty reserved slot, the merging operation is deferred until + /// the reserved slot is populated. This is a stack that keeps track + /// of all such deferred merges + deferred: Vec<DeferredMerge>, +} + +/// Trait to be implemented on `Leaf` nodes in order to disambiguate +/// between them. +pub trait Disambiguate { + fn cmp(left: &Self, right: &Self) -> Ordering; +} + +/// Id of a Node in the graph. `NodeId` can be referencing an empty +/// slot that is going to be populated later in time. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct NodeId(NonZeroU32); + +impl Hash for NodeId { + fn hash<H: Hasher>(&self, state: &mut H) { + // Always use little-endian byte order for hashing to avoid + // different code generation on big-endian platforms due to + // iteration over a HashMap, + // see https://github.com/maciejhirsz/logos/issues/427. + state.write(&self.0.get().to_le_bytes()) + } +} + +impl NodeId { + fn get(self) -> usize { + self.0.get() as usize + } + + fn new(n: usize) -> NodeId { + NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId")) + } +} + +/// Unique reserved `NodeId` that is guaranteed to point to an +/// empty allocated slot in the graph. It's safe to create multiple +/// `NodeId` copies of `ReservedId`, however API users should never +/// be able to clone a `ReservedId`, or create a new one from `NodeId`. +/// +/// `ReservedId` is consumed once passed into `Graph::insert`. +#[derive(Debug)] +pub struct ReservedId(NodeId); + +impl ReservedId { + pub fn get(&self) -> NodeId { + self.0 + } +} + +/// Merge key used to lookup whether two nodes have been previously +/// mered, so we can avoid duplicating merges, potentially running into +/// loops that blow out the stack. +/// +/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure +/// that node merges are symmetric. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] +struct Merge(NodeId, NodeId); + +impl Merge { + fn new(a: NodeId, b: NodeId) -> Self { + if a < b { + Merge(a, b) + } else { + Merge(b, a) + } + } +} + +/// When attempting to merge two nodes, one of which was not yet created, +/// we can record such attempt, and execute the merge later on when the +/// `awaiting` has been `insert`ed into the graph. +#[derive(Debug)] +pub struct DeferredMerge { + awaiting: NodeId, + with: NodeId, + into: ReservedId, +} + +impl<Leaf> Graph<Leaf> { + pub fn new() -> Self { + Graph { + // Start with an empty slot so we can start + // counting NodeIds from 1 and use NonZero + // optimizations + nodes: vec![None], + merges: Map::new(), + hashes: Map::new(), + errors: Vec::new(), + deferred: Vec::new(), + } + } + + pub fn errors(&self) -> &[DisambiguationError] { + &self.errors + } + + fn next_id(&self) -> NodeId { + NodeId::new(self.nodes.len()) + } + + /// Reserve an empty slot for a node on the graph and return an + /// id for it. `ReservedId` cannot be cloned, and must be consumed + /// by calling `insert` on the graph. + pub fn reserve(&mut self) -> ReservedId { + let id = self.next_id(); + + self.nodes.push(None); + + ReservedId(id) + } + + /// Insert a node at a given, previously reserved id. Returns the + /// inserted `NodeId`. + pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId + where + N: Into<Node<Leaf>>, + Leaf: Disambiguate, + { + let id = reserved.get(); + + self.nodes[id.get()] = Some(node.into()); + + let mut awaiting = Vec::new(); + + // Partition out all `DeferredMerge`s that can be completed + // now that this `ReservedId` has a `Node` inserted into it. + for idx in (0..self.deferred.len()).rev() { + if self.deferred[idx].awaiting == id { + awaiting.push(self.deferred.remove(idx)); + } + } + + // Complete deferred merges. We've collected them from the back, + // so we must iterate through them from the back as well to restore + // proper order of merges in case there is some cascading going on. + for DeferredMerge { + awaiting, + with, + into, + } in awaiting.into_iter().rev() + { + self.merge_unchecked(awaiting, with, into); + } + + id + } + + /// Push a node onto the graph and get an id to it. If an identical + /// node has already been pushed on the graph, it will return the id + /// of that node instead. + pub fn push<B>(&mut self, node: B) -> NodeId + where + B: Into<Node<Leaf>>, + { + let node = node.into(); + + if let Node::Leaf(_) = node { + return self.push_unchecked(node); + } + + let mut hasher = FnvHasher::default(); + node.hash(&mut hasher); + + let next_id = self.next_id(); + + match self.hashes.entry(hasher.finish()) { + Entry::Occupied(occupied) => { + let id = *occupied.get(); + + if self[id].eq(&node) { + return id; + } + } + Entry::Vacant(vacant) => { + vacant.insert(next_id); + } + } + + self.push_unchecked(node) + } + + fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId { + let id = self.next_id(); + + self.nodes.push(Some(node)); + + id + } + + /// If nodes `a` and `b` have been already merged, return the + /// `NodeId` of the node they have been merged into. + fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> { + let probe = Merge::new(a, b); + + self.merges.get(&probe).copied() + } + + /// Mark that nodes `a` and `b` have been merged into `product`. + /// + /// This will also mark merging `a` and `product`, as well as + /// `b` and `product` into `product`, since those are symmetric + /// operations. + /// + /// This is necessary to break out asymmetric merge loops. + fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) { + self.merges.insert(Merge::new(a, b), product); + self.merges.insert(Merge::new(a, product), product); + self.merges.insert(Merge::new(b, product), product); + } + + /// Merge the nodes at id `a` and `b`, returning a new id. + pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId + where + Leaf: Disambiguate, + { + if a == b { + return a; + } + + // If the id pair is already merged (or is being merged), just return the id + if let Some(id) = self.find_merged(a, b) { + return id; + } + + match (self.get(a), self.get(b)) { + (None, None) => { + panic!( + "Merging two reserved nodes! This is a bug, please report it:\n\ + \n\ + https://github.com/maciejhirsz/logos/issues" + ); + } + (None, Some(_)) => { + let reserved = self.reserve(); + let id = reserved.get(); + self.deferred.push(DeferredMerge { + awaiting: a, + with: b, + into: reserved, + }); + self.set_merged(a, b, id); + + return id; + } + (Some(_), None) => { + let reserved = self.reserve(); + let id = reserved.get(); + self.deferred.push(DeferredMerge { + awaiting: b, + with: a, + into: reserved, + }); + self.set_merged(a, b, id); + + return id; + } + (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => { + return match Disambiguate::cmp(left, right) { + Ordering::Less => b, + Ordering::Greater => a, + Ordering::Equal => { + self.errors.push(DisambiguationError(a, b)); + + a + } + }; + } + _ => (), + } + + // Reserve the id for the merge and save it. Since the graph can contain loops, + // this prevents us from trying to merge the same id pair in a loop, blowing up + // the stack. + let reserved = self.reserve(); + self.set_merged(a, b, reserved.get()); + + self.merge_unchecked(a, b, reserved) + } + + /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are + /// not pointing to empty slots. + fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId + where + Leaf: Disambiguate, + { + let merged_rope = match (self.get(a), self.get(b)) { + (Some(Node::Rope(rope)), _) => { + let rope = rope.clone(); + + self.merge_rope(rope, b) + } + (_, Some(Node::Rope(rope))) => { + let rope = rope.clone(); + + self.merge_rope(rope, a) + } + _ => None, + }; + + if let Some(rope) = merged_rope { + return self.insert(reserved, rope); + } + + let mut fork = self.fork_off(a); + fork.merge(self.fork_off(b), self); + + let mut stack = vec![reserved.get()]; + + // Flatten the fork + while let Some(miss) = fork.miss { + if stack.contains(&miss) { + break; + } + stack.push(miss); + + let other = match self.get(miss) { + Some(Node::Fork(other)) => other.clone(), + Some(Node::Rope(other)) => other.clone().into_fork(self), + _ => break, + }; + match other.miss { + Some(id) if self.get(id).is_none() => break, + _ => (), + } + fork.miss = None; + fork.merge(other, self); + } + + self.insert(reserved, fork) + } + + fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope> + where + Leaf: Disambiguate, + { + match self.get(other) { + Some(Node::Fork(fork)) if rope.miss.is_none() => { + // Count how many consecutive ranges in this rope would + // branch into the fork that results in a loop. + // + // e.g.: for rope "foobar" and a looping fork [a-z]: 6 + let count = rope + .pattern + .iter() + .take_while(|range| fork.contains(**range) == Some(other)) + .count(); + + let mut rope = rope.split_at(count, self)?.miss_any(other); + + rope.then = self.merge(rope.then, other); + + Some(rope) + } + Some(Node::Rope(other)) => { + let (prefix, miss) = rope.prefix(other)?; + + let (a, b) = (rope, other.clone()); + + let a = a.remainder(prefix.len(), self); + let b = b.remainder(prefix.len(), self); + + let rope = Rope::new(prefix, self.merge(a, b)).miss(miss); + + Some(rope) + } + Some(Node::Leaf(_)) | None => { + if rope.miss.is_none() { + Some(rope.miss(other)) + } else { + None + } + } + _ => None, + } + } + + pub fn fork_off(&mut self, id: NodeId) -> Fork + where + Leaf: Disambiguate, + { + match self.get(id) { + Some(Node::Fork(fork)) => fork.clone(), + Some(Node::Rope(rope)) => rope.clone().into_fork(self), + Some(Node::Leaf(_)) | None => Fork::new().miss(id), + } + } + + pub fn nodes(&self) -> &[Option<Node<Leaf>>] { + &self.nodes + } + + /// Find all nodes that have no references and remove them. + pub fn shake(&mut self, root: NodeId) { + let mut filter = vec![false; self.nodes.len()]; + + filter[root.get()] = true; + + self[root].shake(self, &mut filter); + + for (id, referenced) in filter.into_iter().enumerate() { + if !referenced { + self.nodes[id] = None; + } + } + } + + pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> { + self.nodes.get(id.get())?.as_ref() + } +} + +impl<Leaf> Index<NodeId> for Graph<Leaf> { + type Output = Node<Leaf>; + + fn index(&self, id: NodeId) -> &Node<Leaf> { + self.get(id).expect( + "Indexing into an empty node. This is a bug, please report it at:\n\ + \n\ + https://github.com/maciejhirsz/logos/issues", + ) + } +} + +impl std::fmt::Display for NodeId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + std::fmt::Display::fmt(&self.0, f) + } +} + +#[cfg_attr(test, derive(PartialEq))] +pub enum Node<Leaf> { + /// Fork node, can lead to more than one state + Fork(Fork), + /// Rope node, can lead to one state on match, one state on miss + Rope(Rope), + /// Leaf node, terminal state + Leaf(Leaf), +} + +impl<Leaf> Node<Leaf> { + pub fn miss(&self) -> Option<NodeId> { + match self { + Node::Rope(rope) => rope.miss.first(), + Node::Fork(fork) => fork.miss, + Node::Leaf(_) => None, + } + } + + fn eq(&self, other: &Node<Leaf>) -> bool { + match (self, other) { + (Node::Fork(a), Node::Fork(b)) => a == b, + (Node::Rope(a), Node::Rope(b)) => a == b, + _ => false, + } + } + + fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) { + match self { + Node::Fork(fork) => fork.shake(graph, filter), + Node::Rope(rope) => rope.shake(graph, filter), + Node::Leaf(_) => (), + } + } + + pub fn unwrap_leaf(&self) -> &Leaf { + match self { + Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"), + Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"), + Node::Leaf(leaf) => leaf, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn leaf_stack_size() { + use std::mem::size_of; + + const WORD: usize = size_of::<usize>(); + const NODE: usize = size_of::<Node<()>>(); + + assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE); + } + + #[test] + fn create_a_loop() { + let mut graph = Graph::new(); + + let token = graph.push(Node::Leaf("IDENT")); + let id = graph.reserve(); + let fork = Fork::new().branch('a'..='z', id.get()).miss(token); + let root = graph.insert(id, fork); + + assert_eq!(graph[token], Node::Leaf("IDENT")); + assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),); + } + + #[test] + fn fork_off() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = graph.push(Rope::new("rope", leaf)); + let fork = graph.push(Fork::new().branch(b'!', leaf)); + + assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf)); + assert_eq!( + graph.fork_off(rope), + Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1)) + ); + assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf)); + } +} diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs new file mode 100644 index 00000000..a4d23d3b --- /dev/null +++ b/vendor/logos-codegen/src/graph/range.rs @@ -0,0 +1,144 @@ +use regex_syntax::hir::ClassBytesRange; +use regex_syntax::hir::ClassUnicodeRange; +use regex_syntax::utf8::Utf8Range; + +use std::cmp::{Ord, Ordering}; + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct Range { + pub start: u8, + pub end: u8, +} + +impl Range { + pub fn as_byte(&self) -> Option<u8> { + if self.is_byte() { + Some(self.start) + } else { + None + } + } + + pub fn is_byte(&self) -> bool { + self.start == self.end + } +} + +impl From<u8> for Range { + fn from(byte: u8) -> Range { + Range { + start: byte, + end: byte, + } + } +} + +impl From<&u8> for Range { + fn from(byte: &u8) -> Range { + Range::from(*byte) + } +} + +impl Iterator for Range { + type Item = u8; + + fn next(&mut self) -> Option<u8> { + match self.start.cmp(&self.end) { + std::cmp::Ordering::Less => { + let res = self.start; + self.start += 1; + + Some(res) + } + std::cmp::Ordering::Equal => { + let res = self.start; + + // Necessary so that range 0xFF-0xFF doesn't loop forever + self.start = 0xFF; + self.end = 0x00; + + Some(res) + } + std::cmp::Ordering::Greater => None, + } + } +} + +impl PartialOrd for Range { + fn partial_cmp(&self, other: &Range) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for Range { + fn cmp(&self, other: &Self) -> Ordering { + self.start.cmp(&other.start) + } +} + +impl From<Utf8Range> for Range { + fn from(r: Utf8Range) -> Range { + Range { + start: r.start, + end: r.end, + } + } +} + +impl From<ClassUnicodeRange> for Range { + fn from(r: ClassUnicodeRange) -> Range { + let start = r.start() as u32; + let end = r.end() as u32; + + if start >= 128 || end >= 128 && end != 0x0010FFFF { + panic!("Casting non-ascii ClassUnicodeRange to Range") + } + + Range { + start: start as u8, + end: end as u8, + } + } +} + +impl From<ClassBytesRange> for Range { + fn from(r: ClassBytesRange) -> Range { + Range { + start: r.start(), + end: r.end(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn range_iter_one() { + let byte = Range::from(b'!'); + let collected = byte.take(1000).collect::<Vec<_>>(); + + assert_eq!(b"!", &collected[..]); + } + + #[test] + fn range_iter_few() { + let byte = Range { + start: b'a', + end: b'd', + }; + let collected = byte.take(1000).collect::<Vec<_>>(); + + assert_eq!(b"abcd", &collected[..]); + } + + #[test] + fn range_iter_bounds() { + let byte = Range::from(0xFA..=0xFF); + + let collected = byte.take(1000).collect::<Vec<_>>(); + + assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]); + } +} diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs new file mode 100644 index 00000000..d55c490f --- /dev/null +++ b/vendor/logos-codegen/src/graph/regex.rs @@ -0,0 +1,295 @@ +use std::fmt::Debug; + +use regex_syntax::utf8::Utf8Sequences; + +use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope}; +use crate::mir::{Class, ClassUnicode, Literal, Mir}; + +impl<Leaf: Disambiguate + Debug> Graph<Leaf> { + pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId { + self.parse_mir(&mir, then, None, None, false) + } + + fn parse_mir( + &mut self, + mir: &Mir, + then: NodeId, + miss: Option<NodeId>, + reserved: Option<ReservedId>, + repeated: bool, + ) -> NodeId { + match mir { + Mir::Empty => then, + Mir::Loop(mir) => { + let reserved_first = reserved.unwrap_or_else(|| self.reserve()); + + let (new_then, new_miss); + if let Some(old_miss) = miss { + // We have to separate the first iteration from the other iterations, + // because the `old_miss` path must only be taken if we miss the first + // iteration. + let reserved_next = self.reserve(); + new_then = self.parse_mir( + mir, + reserved_next.get(), + Some(then), + Some(reserved_next), + true, + ); + new_miss = self.merge(old_miss, then); + } else { + new_then = reserved_first.get(); + new_miss = then; + } + + self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true) + } + Mir::Maybe(mir) => { + let miss = match miss { + Some(id) => self.merge(id, then), + None => then, + }; + + self.parse_mir(mir, then, Some(miss), reserved, true) + } + Mir::Alternation(alternation) => { + let mut fork = Fork::new().miss(miss); + + for mir in alternation { + let id = self.parse_mir(mir, then, None, None, repeated); + let alt = self.fork_off(id); + + fork.merge(alt, self); + } + + self.insert_or_push(reserved, fork) + } + Mir::Literal(literal) => { + let pattern = literal.0.to_vec(); + + self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss)) + } + Mir::Concat(concat) => { + // Take an initial guess at the capacity - estimates a little worse than an average case + // scenario by assuming every concat element is singular but has a full code-point unicode literal. + // The only way to get the actual size of the Vec is if every sub-concat node is added up. + let mut ropebuf: Vec<Range> = Vec::with_capacity(concat.len() * 4); + let mut then = then; + + let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir { + Mir::Literal(Literal(bytes)) => { + ropebuf.extend(bytes.iter().rev().map(Into::<Range>::into)); + true + } + Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => { + ropebuf.push(class.ranges()[0].into()); + true + } + Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => { + ropebuf.push(class.ranges()[0].into()); + true + } + _ => { + if !ropebuf.is_empty() { + let rope = + Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), *then); + + *then = graph.push(rope); + ropebuf = Vec::with_capacity(concat.len() * 4); + } + false + } + }; + + for mir in concat[1..].iter().rev() { + if !handle_bytes(self, mir, &mut then) { + then = self.parse_mir(mir, then, None, None, false); + } + } + + let first_mir = &concat[0]; + if handle_bytes(self, first_mir, &mut then) { + let rope = Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), then) + .miss(miss); + self.insert_or_push(reserved, rope) + } else { + self.parse_mir(first_mir, then, miss, reserved, false) + } + } + Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => { + let mut ropes = class + .iter() + .flat_map(|range| Utf8Sequences::new(range.start(), range.end())) + .map(|sequence| Rope::new(sequence.as_slice(), then)) + .collect::<Vec<_>>(); + + if ropes.len() == 1 { + let rope = ropes.remove(0); + + return self.insert_or_push(reserved, rope.miss(miss)); + } + + let mut root = Fork::new().miss(miss); + + for rope in ropes { + let fork = rope.into_fork(self); + root.merge(fork, self); + } + + self.insert_or_push(reserved, root) + } + Mir::Class(class) => { + let mut fork = Fork::new().miss(miss); + + let class: Vec<Range> = match class { + Class::Unicode(u) => u.iter().copied().map(Into::into).collect(), + Class::Bytes(b) => b.iter().copied().map(Into::into).collect(), + }; + + for range in class { + fork.add_branch(range, then, self); + } + + self.insert_or_push(reserved, fork) + } + } + } + + fn insert_or_push<N>(&mut self, id: Option<ReservedId>, node: N) -> NodeId + where + N: Into<Node<Leaf>>, + { + match id { + Some(id) => self.insert(id, node), + None => self.push(node), + } + } +} + +/// Return whether current class unicode is ascii. +/// +/// Because unicode ranges are iterated in increasing order, +/// it is only necessary to check the last range. +/// +/// If the check is performed in a repetition, +/// a fast path is used by checking if end of range is 0x0010_FFFF. +fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool { + class.iter().last().map_or(true, |range| { + let start = range.start() as u32; + let end = range.end() as u32; + end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) + }) +} + +/// Return whether current class unicode is ascii and only contains +/// one range. +/// +/// See [`is_ascii`] function for more details. +fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool { + if class.ranges().len() != 1 { + return false; + } + + let range = &class.ranges()[0]; + let start = range.start() as u32; + let end = range.end() as u32; + end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) +} + +#[cfg(test)] +mod tests { + use std::num::NonZeroU32; + + use super::*; + use crate::graph::Node; + use pretty_assertions::assert_eq; + + #[test] + fn rope() { + let mut graph = Graph::new(); + + let mir = Mir::utf8("foobar").unwrap(); + + assert_eq!(mir.priority(), 12); + + let leaf = graph.push(Node::Leaf("LEAF")); + let id = graph.regex(mir, leaf); + + assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),) + } + + #[test] + fn alternation() { + let mut graph = Graph::new(); + + let mir = Mir::utf8("a|b").unwrap(); + + assert_eq!(mir.priority(), 2); + + let leaf = graph.push(Node::Leaf("LEAF")); + let id = graph.regex(mir, leaf); + + assert_eq!( + graph[id], + Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)), + ); + } + + #[test] + fn repeat() { + let mut graph = Graph::new(); + + let mir = Mir::utf8("[a-z]*").unwrap(); + + assert_eq!(mir.priority(), 0); + + let leaf = graph.push(Node::Leaf("LEAF")); + let id = graph.regex(mir, leaf); + + assert_eq!( + graph[id], + Node::Fork( + Fork::new() + .branch('a'..='z', id) // goto self == loop + .miss(leaf) + ), + ); + } + + #[test] + fn maybe() { + let mut graph = Graph::new(); + + let mir = Mir::utf8("[a-z]?").unwrap(); + + assert_eq!(mir.priority(), 0); + + let leaf = graph.push(Node::Leaf("LEAF")); + let id = graph.regex(mir, leaf); + + assert_eq!( + graph[id], + Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)), + ); + } + + #[test] + fn long_concat_389() { + let mut graph = Graph::new(); + + let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap(); + + assert_eq!(mir.priority(), 50); + + let leaf = graph.push(Node::Leaf("LEAF")); + let id = graph.regex(mir, leaf); + let sub_id = NodeId(NonZeroU32::new(2).unwrap()); + + assert_eq!( + graph[id], + Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id)) + ); + + assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf))) + } +} diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs new file mode 100644 index 00000000..6e563fac --- /dev/null +++ b/vendor/logos-codegen/src/graph/rope.rs @@ -0,0 +1,330 @@ +use std::ops::Deref; + +use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range}; + +#[derive(PartialEq, Clone, Hash)] +pub struct Rope { + pub pattern: Pattern, + pub then: NodeId, + pub miss: Miss, +} + +#[derive(PartialEq, Clone, Hash)] +pub struct Pattern(pub Vec<Range>); + +impl Deref for Pattern { + type Target = [Range]; + + fn deref(&self) -> &[Range] { + &self.0 + } +} + +/// Because Ropes could potentially fail a match mid-pattern, +/// a regular `Option` is not sufficient here. +#[derive(PartialEq, Clone, Copy, Hash)] +pub enum Miss { + /// Same as Option::None, error on fail + None, + /// Jump to id if first byte does not match, fail on partial match + First(NodeId), + /// Jump to id on partial or empty match + Any(NodeId), +} + +impl Miss { + pub fn is_none(&self) -> bool { + matches!(self, Miss::None) + } + + pub fn first(self) -> Option<NodeId> { + match self { + Miss::First(id) | Miss::Any(id) => Some(id), + _ => None, + } + } + + pub fn take_first(&mut self) -> Option<NodeId> { + match *self { + Miss::First(id) => { + *self = Miss::None; + + Some(id) + } + Miss::Any(id) => Some(id), + Miss::None => None, + } + } +} + +impl From<Option<NodeId>> for Miss { + fn from(miss: Option<NodeId>) -> Self { + match miss { + Some(id) => Miss::First(id), + None => Miss::None, + } + } +} + +impl From<NodeId> for Miss { + fn from(id: NodeId) -> Self { + Miss::First(id) + } +} + +impl Rope { + pub fn new<P>(pattern: P, then: NodeId) -> Self + where + P: Into<Pattern>, + { + Rope { + pattern: pattern.into(), + then, + miss: Miss::None, + } + } + + pub fn miss<M>(mut self, miss: M) -> Self + where + M: Into<Miss>, + { + self.miss = miss.into(); + self + } + + pub fn miss_any(mut self, miss: NodeId) -> Self { + self.miss = Miss::Any(miss); + self + } + + pub fn into_fork<T>(mut self, graph: &mut Graph<T>) -> Fork + where + T: Disambiguate, + { + let first = self.pattern.0.remove(0); + let miss = self.miss.take_first(); + + // The new fork will lead to a new rope, + // or the old target if no new rope was created + let then = match self.pattern.len() { + 0 => self.then, + _ => graph.push(self), + }; + + Fork::new().branch(first, then).miss(miss) + } + + pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> { + let count = self + .pattern + .iter() + .zip(other.pattern.iter()) + .take_while(|(a, b)| a == b) + .count(); + + let pattern = match count { + 0 => return None, + n => self.pattern[..n].into(), + }; + let miss = match (self.miss, other.miss) { + (Miss::None, miss) => miss, + (miss, Miss::None) => miss, + _ => return None, + }; + + Some((pattern, miss)) + } + + pub fn split_at<T>(mut self, at: usize, graph: &mut Graph<T>) -> Option<Rope> + where + T: Disambiguate, + { + match at { + 0 => return None, + n if n == self.pattern.len() => return Some(self), + _ => (), + } + + let (this, next) = self.pattern.split_at(at); + + let next_miss = match self.miss { + Miss::Any(_) => self.miss, + _ => Miss::None, + }; + + let next = graph.push(Rope { + pattern: next.into(), + miss: next_miss, + then: self.then, + }); + + self.pattern = this.into(); + self.then = next; + + Some(self) + } + + pub fn remainder<T>(mut self, at: usize, graph: &mut Graph<T>) -> NodeId + where + T: Disambiguate, + { + self.pattern = self.pattern[at..].into(); + + match self.pattern.len() { + 0 => self.then, + _ => graph.push(self), + } + } + + pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) { + if let Some(id) = self.miss.first() { + if !filter[id.get()] { + filter[id.get()] = true; + graph[id].shake(graph, filter); + } + } + + if !filter[self.then.get()] { + filter[self.then.get()] = true; + graph[self.then].shake(graph, filter); + } + } +} + +impl Pattern { + pub fn to_bytes(&self) -> Option<Vec<u8>> { + let mut out = Vec::with_capacity(self.len()); + + for range in self.iter() { + out.push(range.as_byte()?); + } + + Some(out) + } +} + +impl<T> From<&[T]> for Pattern +where + T: Into<Range> + Copy, +{ + fn from(slice: &[T]) -> Self { + Pattern(slice.iter().copied().map(Into::into).collect()) + } +} + +impl<T> From<Vec<T>> for Pattern +where + T: Into<Range>, +{ + fn from(vec: Vec<T>) -> Self { + Pattern(vec.into_iter().map(Into::into).collect()) + } +} + +impl From<&str> for Pattern { + fn from(slice: &str) -> Self { + slice.as_bytes().into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::Node; + use pretty_assertions::assert_eq; + + #[test] + fn into_fork() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = Rope::new("foobar", leaf); + + let fork = rope.into_fork(&mut graph); + + assert_eq!(leaf, NodeId::new(1)); + assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2))); + assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf)); + } + + #[test] + fn into_fork_one_byte() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = Rope::new("!", leaf); + + let fork = rope.into_fork(&mut graph); + + assert_eq!(leaf, NodeId::new(1)); + assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1))); + } + + #[test] + fn into_fork_miss_any() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = Rope::new("42", leaf).miss_any(NodeId::new(42)); + + let fork = rope.into_fork(&mut graph); + + assert_eq!(leaf, NodeId::new(1)); + assert_eq!( + fork, + Fork::new() + .branch(b'4', NodeId::new(2)) + .miss(NodeId::new(42)) + ); + assert_eq!( + graph[NodeId::new(2)], + Rope::new("2", leaf).miss_any(NodeId::new(42)) + ); + } + + #[test] + fn into_fork_miss_first() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42))); + + let fork = rope.into_fork(&mut graph); + + assert_eq!(leaf, NodeId::new(1)); + assert_eq!( + fork, + Fork::new() + .branch(b'4', NodeId::new(2)) + .miss(NodeId::new(42)) + ); + assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf)); + } + + #[test] + fn split_at() { + let mut graph = Graph::new(); + + let leaf = graph.push(Node::Leaf("LEAF")); + let rope = Rope::new("foobar", leaf); + + assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope); + + let split = rope.split_at(3, &mut graph).unwrap(); + let expected_id = NodeId::new(leaf.get() + 1); + + assert_eq!(split, Rope::new("foo", expected_id)); + assert_eq!(graph[expected_id], Rope::new("bar", leaf)); + } + + #[test] + fn pattern_to_bytes() { + let pat = Pattern::from("foobar"); + + assert_eq!(pat.to_bytes().unwrap(), b"foobar"); + + let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']); + + assert_eq!(ranges.to_bytes(), None); + } +} |
