chore: add vendor directory

author: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
committer: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
commit: 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree: 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/graph
parent: 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
7 files changed, 1996 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs
new file mode 100644
index 00000000..6b59836b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/fork.rs
@@ -0,0 +1,267 @@
+use crate::graph::{Disambiguate, Graph, NodeId, Range};
+
+#[derive(Clone)]
+pub struct Fork {
+    /// LUT matching byte -> node id
+    lut: Box<[Option<NodeId>; 256]>,
+    /// State to go to if no arms are matching
+    pub miss: Option<NodeId>,
+}
+
+impl Fork {
+    pub fn new() -> Self {
+        Fork {
+            lut: Box::new([None; 256]),
+            miss: None,
+        }
+    }
+
+    pub fn miss<M>(mut self, miss: M) -> Self
+    where
+        M: Into<Option<NodeId>>,
+    {
+        self.miss = miss.into();
+        self
+    }
+
+    pub fn add_branch<R, T>(&mut self, range: R, then: NodeId, graph: &mut Graph<T>)
+    where
+        R: Into<Range>,
+        T: Disambiguate,
+    {
+        for byte in range.into() {
+            match &mut self.lut[byte as usize] {
+                Some(other) if *other != then => {
+                    *other = graph.merge(*other, then);
+                }
+                opt => *opt = Some(then),
+            }
+        }
+    }
+
+    // TODO: Add result with a printable error
+    pub fn merge<T>(&mut self, other: Fork, graph: &mut Graph<T>)
+    where
+        T: Disambiguate,
+    {
+        self.miss = match (self.miss, other.miss) {
+            (None, None) => None,
+            (Some(id), None) | (None, Some(id)) => Some(id),
+            (Some(a), Some(b)) => Some(graph.merge(a, b)),
+        };
+
+        for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) {
+            *left = match (*left, *right) {
+                (None, None) => continue,
+                (Some(id), None) | (None, Some(id)) => Some(id),
+                (Some(a), Some(b)) => Some(graph.merge(a, b)),
+            }
+        }
+    }
+
+    pub fn branches(&self) -> ForkIter<'_> {
+        ForkIter {
+            offset: 0,
+            lut: &self.lut,
+        }
+    }
+
+    /// Checks if all bytes in the `range` have a branch on this
+    /// fork, and those branches are resolve to the same `NodeId`.
+    pub fn contains<R>(&self, range: R) -> Option<NodeId>
+    where
+        R: Into<Range>,
+    {
+        let mut range = range.into();
+        let byte = range.next()?;
+        let first = self.lut[byte as usize]?;
+
+        for byte in range {
+            if first != self.lut[byte as usize]? {
+                return None;
+            }
+        }
+
+        Some(first)
+    }
+
+    pub fn branch<R>(mut self, range: R, then: NodeId) -> Self
+    where
+        R: Into<Range>,
+    {
+        for byte in range.into() {
+            match &mut self.lut[byte as usize] {
+                Some(other) if *other != then => {
+                    panic!("Overlapping branches");
+                }
+                opt => *opt = Some(then),
+            }
+        }
+        self
+    }
+
+    pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+        if let Some(id) = self.miss {
+            if !filter[id.get()] {
+                filter[id.get()] = true;
+                graph[id].shake(graph, filter);
+            }
+        }
+
+        for (_, id) in self.branches() {
+            if !filter[id.get()] {
+                filter[id.get()] = true;
+                graph[id].shake(graph, filter);
+            }
+        }
+    }
+}
+
+pub struct ForkIter<'a> {
+    offset: usize,
+    lut: &'a [Option<NodeId>; 256],
+}
+
+impl Iterator for ForkIter<'_> {
+    type Item = (Range, NodeId);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Consume empty slots
+        self.offset += self.lut[self.offset..]
+            .iter()
+            .take_while(|next| next.is_none())
+            .count();
+
+        let then = self.lut.get(self.offset).copied().flatten()?;
+        let start = self.offset;
+
+        // Consume all slots with same NodeId target
+        self.offset += self.lut[self.offset..]
+            .iter()
+            .take_while(|next| **next == Some(then))
+            .count();
+
+        Some((
+            Range {
+                start: start as u8,
+                end: (self.offset - 1) as u8,
+            },
+            then,
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::graph::Node;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn fork_iter() {
+        let mut buf = [None; 256];
+
+        for byte in b'4'..=b'7' {
+            buf[byte as usize] = Some(NodeId::new(1));
+        }
+        for byte in b'a'..=b'd' {
+            buf[byte as usize] = Some(NodeId::new(2));
+        }
+
+        let iter = ForkIter {
+            offset: 0,
+            lut: &buf,
+        };
+
+        assert_eq!(
+            &[
+                (
+                    Range {
+                        start: b'4',
+                        end: b'7'
+                    },
+                    NodeId::new(1)
+                ),
+                (
+                    Range {
+                        start: b'a',
+                        end: b'd'
+                    },
+                    NodeId::new(2)
+                ),
+            ],
+            &*iter.collect::<Vec<_>>(),
+        );
+    }
+
+    #[test]
+    fn merge_no_conflict() {
+        let mut graph = Graph::new();
+
+        let leaf1 = graph.push(Node::Leaf("FOO"));
+        let leaf2 = graph.push(Node::Leaf("BAR"));
+
+        let mut fork = Fork::new().branch(b'1', leaf1);
+
+        fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+        assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2));
+    }
+
+    #[test]
+    fn merge_miss_right() {
+        let mut graph = Graph::new();
+
+        let leaf1 = graph.push(Node::Leaf("FOO"));
+        let leaf2 = graph.push(Node::Leaf("BAR"));
+
+        let mut fork = Fork::new().branch(b'1', leaf1);
+
+        fork.merge(Fork::new().miss(leaf2), &mut graph);
+
+        assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2));
+    }
+
+    #[test]
+    fn merge_miss_left() {
+        let mut graph = Graph::new();
+
+        let leaf1 = graph.push(Node::Leaf("FOO"));
+        let leaf2 = graph.push(Node::Leaf("BAR"));
+
+        let mut fork = Fork::new().miss(leaf1);
+
+        fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+        assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1));
+    }
+
+    #[test]
+    fn contains_byte() {
+        let fork = Fork::new().branch('a'..='z', NodeId::new(42));
+
+        assert_eq!(fork.contains(b't'), Some(NodeId::new(42)));
+    }
+
+    #[test]
+    fn contains_range() {
+        let fork = Fork::new()
+            .branch('a'..='m', NodeId::new(42))
+            .branch('n'..='z', NodeId::new(42));
+
+        assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42)));
+        assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42)));
+    }
+
+    #[test]
+    fn contains_different_ranges() {
+        let fork = Fork::new()
+            .branch('a'..='m', NodeId::new(42))
+            .branch('n'..='z', NodeId::new(47));
+
+        assert_eq!(fork.contains('i'..='r'), None);
+        assert_eq!(fork.contains('a'..='z'), None);
+        assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42)));
+        assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47)));
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs
new file mode 100644
index 00000000..dc97bdf1
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/impls.rs
@@ -0,0 +1,220 @@
+use std::fmt::{self, Debug, Display};
+use std::hash::{Hash, Hasher};
+
+use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope};
+
+impl<T> From<Fork> for Node<T> {
+    fn from(fork: Fork) -> Self {
+        Node::Fork(fork)
+    }
+}
+impl<T> From<Rope> for Node<T> {
+    fn from(rope: Rope) -> Self {
+        Node::Rope(rope)
+    }
+}
+
+fn is_ascii(byte: u8) -> bool {
+    (0x20..0x7F).contains(&byte)
+}
+
+impl Hash for Fork {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        for branch in self.branches() {
+            branch.hash(state);
+        }
+        self.miss.hash(state);
+    }
+}
+
+impl<T> Hash for Node<T> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        match self {
+            Node::Rope(rope) => {
+                b"ROPE".hash(state);
+                rope.hash(state);
+            }
+            Node::Fork(fork) => {
+                b"FORK".hash(state);
+                fork.hash(state);
+            }
+            Node::Leaf(_) => b"LEAF".hash(state),
+        }
+    }
+}
+
+impl Debug for NodeId {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        Debug::fmt(&self.0, f)
+    }
+}
+
+/// We don't need debug impls in release builds
+// #[cfg(test)]
+mod debug {
+    use super::*;
+    use crate::graph::rope::Miss;
+    use crate::graph::Disambiguate;
+    use std::cmp::{Ord, Ordering};
+
+    impl Disambiguate for &str {
+        fn cmp(left: &&str, right: &&str) -> Ordering {
+            Ord::cmp(left, right)
+        }
+    }
+
+    impl Debug for Range {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            let Range { start, end } = *self;
+
+            if start != end || !is_ascii(start) {
+                f.write_str("[")?;
+            }
+            match is_ascii(start) {
+                true => write!(f, "{}", start as char),
+                false => write!(f, "{:02X}", start),
+            }?;
+            if start != end {
+                match is_ascii(end) {
+                    true => write!(f, "-{}]", end as char),
+                    false => write!(f, "-{:02X}]", end),
+                }?;
+            } else if !is_ascii(start) {
+                f.write_str("]")?;
+            }
+            Ok(())
+        }
+    }
+
+    impl Display for Range {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            <Range as Debug>::fmt(self, f)
+        }
+    }
+
+    impl<T: Debug> Debug for Graph<T> {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            let entries = self
+                .nodes()
+                .iter()
+                .enumerate()
+                .filter_map(|(i, n)| n.as_ref().map(|n| (i, n)));
+
+            f.debug_map().entries(entries).finish()
+        }
+    }
+
+    struct Arm<T, U>(T, U);
+
+    impl<T, U> Debug for Arm<T, U>
+    where
+        T: Display,
+        U: Display,
+    {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            write!(f, "{} ⇒ {}", self.0, self.1)
+        }
+    }
+
+    impl Debug for Fork {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            let mut list = f.debug_set();
+
+            for (range, then) in self.branches() {
+                list.entry(&Arm(range, then));
+            }
+            if let Some(id) = self.miss {
+                list.entry(&Arm('_', id));
+            }
+
+            list.finish()
+        }
+    }
+
+    impl Display for Miss {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            match self {
+                Miss::First(id) => Display::fmt(id, f),
+                Miss::Any(id) => write!(f, "{}*", id),
+                Miss::None => f.write_str("n/a"),
+            }
+        }
+    }
+
+    impl Debug for Rope {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            use std::fmt::Write;
+
+            let mut rope = String::with_capacity(self.pattern.len());
+            for range in self.pattern.iter() {
+                write!(rope, "{}", range)?;
+            }
+
+            match self.miss.is_none() {
+                false => {
+                    let mut list = f.debug_list();
+
+                    list.entry(&Arm(rope, self.then));
+                    list.entry(&Arm('_', self.miss));
+
+                    list.finish()
+                }
+                true => Arm(rope, self.then).fmt(f),
+            }
+        }
+    }
+
+    impl PartialEq for Fork {
+        fn eq(&self, other: &Self) -> bool {
+            self.miss == other.miss && self.branches().eq(other.branches())
+        }
+    }
+
+    impl<T: Debug> Debug for Node<T> {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            match self {
+                Node::Fork(fork) => fork.fmt(f),
+                Node::Rope(rope) => rope.fmt(f),
+                Node::Leaf(leaf) => leaf.fmt(f),
+            }
+        }
+    }
+
+    use std::ops::RangeInclusive;
+
+    impl From<RangeInclusive<u8>> for Range {
+        fn from(range: RangeInclusive<u8>) -> Range {
+            Range {
+                start: *range.start(),
+                end: *range.end(),
+            }
+        }
+    }
+
+    impl From<RangeInclusive<char>> for Range {
+        fn from(range: RangeInclusive<char>) -> Range {
+            Range {
+                start: *range.start() as u8,
+                end: *range.end() as u8,
+            }
+        }
+    }
+
+    impl<T> PartialEq<Rope> for Node<T> {
+        fn eq(&self, other: &Rope) -> bool {
+            match self {
+                Node::Rope(rope) => rope == other,
+                _ => false,
+            }
+        }
+    }
+
+    impl<T> PartialEq<Fork> for Node<T> {
+        fn eq(&self, other: &Fork) -> bool {
+            match self {
+                Node::Fork(fork) => fork == other,
+                _ => false,
+            }
+        }
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs
new file mode 100644
index 00000000..757ced09
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/meta.rs
@@ -0,0 +1,174 @@
+use std::cmp::min;
+use std::collections::BTreeMap;
+use std::ops::{Index, IndexMut};
+
+use crate::graph::{Graph, Node, NodeId};
+
+#[derive(Debug)]
+pub struct Meta {
+    map: BTreeMap<NodeId, MetaItem>,
+}
+
+#[derive(Debug, Default)]
+pub struct MetaItem {
+    /// Number of references to this node
+    pub refcount: usize,
+    /// Minimum number of bytes that ought to be read for this
+    /// node to find a match
+    pub min_read: usize,
+    /// Marks whether or not this node leads to a loop entry node.
+    pub is_loop_init: bool,
+    /// Ids of other nodes that point to this node while this
+    /// node is on a stack (creating a loop)
+    pub loop_entry_from: Vec<NodeId>,
+}
+
+impl Index<NodeId> for Meta {
+    type Output = MetaItem;
+
+    fn index(&self, id: NodeId) -> &MetaItem {
+        &self.map[&id]
+    }
+}
+
+impl IndexMut<NodeId> for Meta {
+    fn index_mut(&mut self, id: NodeId) -> &mut MetaItem {
+        self.map.entry(id).or_default()
+    }
+}
+
+impl MetaItem {
+    fn loop_entry(&mut self, id: NodeId) {
+        if let Err(idx) = self.loop_entry_from.binary_search(&id) {
+            self.loop_entry_from.insert(idx, id);
+        }
+    }
+}
+
+impl Meta {
+    pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self {
+        let mut meta = Meta {
+            map: Default::default(),
+        };
+
+        meta.first_pass(root, root, graph, &mut Vec::new());
+
+        meta
+    }
+
+    pub fn first_pass<T>(
+        &mut self,
+        this: NodeId,
+        parent: NodeId,
+        graph: &Graph<T>,
+        stack: &mut Vec<NodeId>,
+    ) -> &MetaItem {
+        let meta = &mut self[this];
+        let is_done = meta.refcount > 0;
+
+        meta.refcount += 1;
+
+        if stack.contains(&this) {
+            meta.loop_entry(parent);
+            self[parent].is_loop_init = true;
+        }
+        if is_done {
+            return &self[this];
+        }
+
+        stack.push(this);
+
+        let mut min_read;
+
+        match &graph[this] {
+            Node::Fork(fork) => {
+                min_read = usize::MAX;
+                for (_, id) in fork.branches() {
+                    let meta = self.first_pass(id, this, graph, stack);
+
+                    if meta.is_loop_init {
+                        min_read = 1;
+                    } else {
+                        min_read = min(min_read, meta.min_read + 1);
+                    }
+                }
+                if let Some(id) = fork.miss {
+                    let meta = self.first_pass(id, this, graph, stack);
+
+                    if meta.is_loop_init {
+                        min_read = 0;
+                    } else {
+                        min_read = min(min_read, meta.min_read);
+                    }
+                }
+                if min_read == usize::MAX {
+                    min_read = 0;
+                }
+            }
+            Node::Rope(rope) => {
+                min_read = rope.pattern.len();
+                let meta = self.first_pass(rope.then, this, graph, stack);
+
+                if !meta.is_loop_init {
+                    min_read += meta.min_read;
+                }
+
+                if let Some(id) = rope.miss.first() {
+                    let meta = self.first_pass(id, this, graph, stack);
+
+                    if meta.is_loop_init {
+                        min_read = 0;
+                    } else {
+                        min_read = min(min_read, meta.min_read);
+                    }
+                }
+            }
+            Node::Leaf(_) => min_read = 0,
+        }
+
+        stack.pop();
+
+        let meta = &mut self[this];
+        meta.min_read = min_read;
+        let second_pass = meta.loop_entry_from.clone();
+
+        for id in second_pass {
+            self.meta_second_pass(id, graph);
+        }
+
+        &self[this]
+    }
+
+    fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) {
+        let mut min_read;
+
+        match &graph[id] {
+            Node::Fork(fork) => {
+                min_read = usize::MAX;
+                for (_, id) in fork.branches() {
+                    let meta = &self[id];
+
+                    if meta.is_loop_init {
+                        min_read = 1;
+                    } else {
+                        min_read = min(min_read, meta.min_read + 1);
+                    }
+                }
+                if min_read == usize::MAX {
+                    min_read = 0;
+                }
+            }
+            Node::Rope(rope) => {
+                min_read = rope.pattern.len();
+                let meta = &self[rope.then];
+
+                if !meta.is_loop_init {
+                    min_read += meta.min_read;
+                }
+            }
+            Node::Leaf(_) => unreachable!(),
+        }
+
+        self[id].min_read = min_read;
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs
new file mode 100644
index 00000000..6d218e8c
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/mod.rs
@@ -0,0 +1,566 @@
+use std::cmp::Ordering;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap as Map;
+use std::hash::{Hash, Hasher};
+use std::num::NonZeroU32;
+use std::ops::Index;
+
+use fnv::FnvHasher;
+
+mod fork;
+mod impls;
+mod meta;
+mod range;
+mod regex;
+mod rope;
+
+pub use self::fork::Fork;
+pub use self::meta::Meta;
+pub use self::range::Range;
+pub use self::rope::Rope;
+
+/// Disambiguation error during the attempt to merge two leaf
+/// nodes with the same priority
+#[derive(Debug)]
+pub struct DisambiguationError(pub NodeId, pub NodeId);
+
+pub struct Graph<Leaf> {
+    /// Internal storage of all allocated nodes. Once a node is
+    /// put here, it should never be mutated.
+    nodes: Vec<Option<Node<Leaf>>>,
+    /// When merging two nodes into a new node, we store the two
+    /// entry keys and the result, so that we don't merge the same
+    /// two nodes multiple times.
+    ///
+    /// Most of the time the entry we want to find will be the last
+    /// one that has been inserted, so we can use a vec with reverse
+    /// order search to get O(1) searches much faster than any *Map.
+    merges: Map<Merge, NodeId>,
+    /// Another map used for accounting. Before `.push`ing a new node
+    /// onto the graph (inserts are exempt), we hash it and find if
+    /// an identical(!) node has been created before.
+    hashes: Map<u64, NodeId>,
+    /// Instead of handling errors over return types, opt to collect
+    /// them internally.
+    errors: Vec<DisambiguationError>,
+    /// Deferred merges. When when attempting to merge a node with an
+    /// empty reserved slot, the merging operation is deferred until
+    /// the reserved slot is populated. This is a stack that keeps track
+    /// of all such deferred merges
+    deferred: Vec<DeferredMerge>,
+}
+
+/// Trait to be implemented on `Leaf` nodes in order to disambiguate
+/// between them.
+pub trait Disambiguate {
+    fn cmp(left: &Self, right: &Self) -> Ordering;
+}
+
+/// Id of a Node in the graph. `NodeId` can be referencing an empty
+/// slot that is going to be populated later in time.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct NodeId(NonZeroU32);
+
+impl Hash for NodeId {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        // Always use little-endian byte order for hashing to avoid
+        // different code generation on big-endian platforms due to
+        // iteration over a HashMap,
+        // see https://github.com/maciejhirsz/logos/issues/427.
+        state.write(&self.0.get().to_le_bytes())
+    }
+}
+
+impl NodeId {
+    fn get(self) -> usize {
+        self.0.get() as usize
+    }
+
+    fn new(n: usize) -> NodeId {
+        NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId"))
+    }
+}
+
+/// Unique reserved `NodeId` that is guaranteed to point to an
+/// empty allocated slot in the graph. It's safe to create multiple
+/// `NodeId` copies of `ReservedId`, however API users should never
+/// be able to clone a `ReservedId`, or create a new one from `NodeId`.
+///
+/// `ReservedId` is consumed once passed into `Graph::insert`.
+#[derive(Debug)]
+pub struct ReservedId(NodeId);
+
+impl ReservedId {
+    pub fn get(&self) -> NodeId {
+        self.0
+    }
+}
+
+/// Merge key used to lookup whether two nodes have been previously
+/// mered, so we can avoid duplicating merges, potentially running into
+/// loops that blow out the stack.
+///
+/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure
+/// that node merges are symmetric.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
+struct Merge(NodeId, NodeId);
+
+impl Merge {
+    fn new(a: NodeId, b: NodeId) -> Self {
+        if a < b {
+            Merge(a, b)
+        } else {
+            Merge(b, a)
+        }
+    }
+}
+
+/// When attempting to merge two nodes, one of which was not yet created,
+/// we can record such attempt, and execute the merge later on when the
+/// `awaiting` has been `insert`ed into the graph.
+#[derive(Debug)]
+pub struct DeferredMerge {
+    awaiting: NodeId,
+    with: NodeId,
+    into: ReservedId,
+}
+
+impl<Leaf> Graph<Leaf> {
+    pub fn new() -> Self {
+        Graph {
+            // Start with an empty slot so we can start
+            // counting NodeIds from 1 and use NonZero
+            // optimizations
+            nodes: vec![None],
+            merges: Map::new(),
+            hashes: Map::new(),
+            errors: Vec::new(),
+            deferred: Vec::new(),
+        }
+    }
+
+    pub fn errors(&self) -> &[DisambiguationError] {
+        &self.errors
+    }
+
+    fn next_id(&self) -> NodeId {
+        NodeId::new(self.nodes.len())
+    }
+
+    /// Reserve an empty slot for a node on the graph and return an
+    /// id for it. `ReservedId` cannot be cloned, and must be consumed
+    /// by calling `insert` on the graph.
+    pub fn reserve(&mut self) -> ReservedId {
+        let id = self.next_id();
+
+        self.nodes.push(None);
+
+        ReservedId(id)
+    }
+
+    /// Insert a node at a given, previously reserved id. Returns the
+    /// inserted `NodeId`.
+    pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId
+    where
+        N: Into<Node<Leaf>>,
+        Leaf: Disambiguate,
+    {
+        let id = reserved.get();
+
+        self.nodes[id.get()] = Some(node.into());
+
+        let mut awaiting = Vec::new();
+
+        // Partition out all `DeferredMerge`s that can be completed
+        // now that this `ReservedId` has a `Node` inserted into it.
+        for idx in (0..self.deferred.len()).rev() {
+            if self.deferred[idx].awaiting == id {
+                awaiting.push(self.deferred.remove(idx));
+            }
+        }
+
+        // Complete deferred merges. We've collected them from the back,
+        // so we must iterate through them from the back as well to restore
+        // proper order of merges in case there is some cascading going on.
+        for DeferredMerge {
+            awaiting,
+            with,
+            into,
+        } in awaiting.into_iter().rev()
+        {
+            self.merge_unchecked(awaiting, with, into);
+        }
+
+        id
+    }
+
+    /// Push a node onto the graph and get an id to it. If an identical
+    /// node has already been pushed on the graph, it will return the id
+    /// of that node instead.
+    pub fn push<B>(&mut self, node: B) -> NodeId
+    where
+        B: Into<Node<Leaf>>,
+    {
+        let node = node.into();
+
+        if let Node::Leaf(_) = node {
+            return self.push_unchecked(node);
+        }
+
+        let mut hasher = FnvHasher::default();
+        node.hash(&mut hasher);
+
+        let next_id = self.next_id();
+
+        match self.hashes.entry(hasher.finish()) {
+            Entry::Occupied(occupied) => {
+                let id = *occupied.get();
+
+                if self[id].eq(&node) {
+                    return id;
+                }
+            }
+            Entry::Vacant(vacant) => {
+                vacant.insert(next_id);
+            }
+        }
+
+        self.push_unchecked(node)
+    }
+
+    fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId {
+        let id = self.next_id();
+
+        self.nodes.push(Some(node));
+
+        id
+    }
+
+    /// If nodes `a` and `b` have been already merged, return the
+    /// `NodeId` of the node they have been merged into.
+    fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> {
+        let probe = Merge::new(a, b);
+
+        self.merges.get(&probe).copied()
+    }
+
+    /// Mark that nodes `a` and `b` have been merged into `product`.
+    ///
+    /// This will also mark merging `a` and `product`, as well as
+    /// `b` and `product` into `product`, since those are symmetric
+    /// operations.
+    ///
+    /// This is necessary to break out asymmetric merge loops.
+    fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) {
+        self.merges.insert(Merge::new(a, b), product);
+        self.merges.insert(Merge::new(a, product), product);
+        self.merges.insert(Merge::new(b, product), product);
+    }
+
+    /// Merge the nodes at id `a` and `b`, returning a new id.
+    pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId
+    where
+        Leaf: Disambiguate,
+    {
+        if a == b {
+            return a;
+        }
+
+        // If the id pair is already merged (or is being merged), just return the id
+        if let Some(id) = self.find_merged(a, b) {
+            return id;
+        }
+
+        match (self.get(a), self.get(b)) {
+            (None, None) => {
+                panic!(
+                    "Merging two reserved nodes! This is a bug, please report it:\n\
+                    \n\
+                    https://github.com/maciejhirsz/logos/issues"
+                );
+            }
+            (None, Some(_)) => {
+                let reserved = self.reserve();
+                let id = reserved.get();
+                self.deferred.push(DeferredMerge {
+                    awaiting: a,
+                    with: b,
+                    into: reserved,
+                });
+                self.set_merged(a, b, id);
+
+                return id;
+            }
+            (Some(_), None) => {
+                let reserved = self.reserve();
+                let id = reserved.get();
+                self.deferred.push(DeferredMerge {
+                    awaiting: b,
+                    with: a,
+                    into: reserved,
+                });
+                self.set_merged(a, b, id);
+
+                return id;
+            }
+            (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => {
+                return match Disambiguate::cmp(left, right) {
+                    Ordering::Less => b,
+                    Ordering::Greater => a,
+                    Ordering::Equal => {
+                        self.errors.push(DisambiguationError(a, b));
+
+                        a
+                    }
+                };
+            }
+            _ => (),
+        }
+
+        // Reserve the id for the merge and save it. Since the graph can contain loops,
+        // this prevents us from trying to merge the same id pair in a loop, blowing up
+        // the stack.
+        let reserved = self.reserve();
+        self.set_merged(a, b, reserved.get());
+
+        self.merge_unchecked(a, b, reserved)
+    }
+
+    /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are
+    /// not pointing to empty slots.
+    fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId
+    where
+        Leaf: Disambiguate,
+    {
+        let merged_rope = match (self.get(a), self.get(b)) {
+            (Some(Node::Rope(rope)), _) => {
+                let rope = rope.clone();
+
+                self.merge_rope(rope, b)
+            }
+            (_, Some(Node::Rope(rope))) => {
+                let rope = rope.clone();
+
+                self.merge_rope(rope, a)
+            }
+            _ => None,
+        };
+
+        if let Some(rope) = merged_rope {
+            return self.insert(reserved, rope);
+        }
+
+        let mut fork = self.fork_off(a);
+        fork.merge(self.fork_off(b), self);
+
+        let mut stack = vec![reserved.get()];
+
+        // Flatten the fork
+        while let Some(miss) = fork.miss {
+            if stack.contains(&miss) {
+                break;
+            }
+            stack.push(miss);
+
+            let other = match self.get(miss) {
+                Some(Node::Fork(other)) => other.clone(),
+                Some(Node::Rope(other)) => other.clone().into_fork(self),
+                _ => break,
+            };
+            match other.miss {
+                Some(id) if self.get(id).is_none() => break,
+                _ => (),
+            }
+            fork.miss = None;
+            fork.merge(other, self);
+        }
+
+        self.insert(reserved, fork)
+    }
+
+    fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope>
+    where
+        Leaf: Disambiguate,
+    {
+        match self.get(other) {
+            Some(Node::Fork(fork)) if rope.miss.is_none() => {
+                // Count how many consecutive ranges in this rope would
+                // branch into the fork that results in a loop.
+                //
+                // e.g.: for rope "foobar" and a looping fork [a-z]: 6
+                let count = rope
+                    .pattern
+                    .iter()
+                    .take_while(|range| fork.contains(**range) == Some(other))
+                    .count();
+
+                let mut rope = rope.split_at(count, self)?.miss_any(other);
+
+                rope.then = self.merge(rope.then, other);
+
+                Some(rope)
+            }
+            Some(Node::Rope(other)) => {
+                let (prefix, miss) = rope.prefix(other)?;
+
+                let (a, b) = (rope, other.clone());
+
+                let a = a.remainder(prefix.len(), self);
+                let b = b.remainder(prefix.len(), self);
+
+                let rope = Rope::new(prefix, self.merge(a, b)).miss(miss);
+
+                Some(rope)
+            }
+            Some(Node::Leaf(_)) | None => {
+                if rope.miss.is_none() {
+                    Some(rope.miss(other))
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        }
+    }
+
+    pub fn fork_off(&mut self, id: NodeId) -> Fork
+    where
+        Leaf: Disambiguate,
+    {
+        match self.get(id) {
+            Some(Node::Fork(fork)) => fork.clone(),
+            Some(Node::Rope(rope)) => rope.clone().into_fork(self),
+            Some(Node::Leaf(_)) | None => Fork::new().miss(id),
+        }
+    }
+
+    pub fn nodes(&self) -> &[Option<Node<Leaf>>] {
+        &self.nodes
+    }
+
+    /// Find all nodes that have no references and remove them.
+    pub fn shake(&mut self, root: NodeId) {
+        let mut filter = vec![false; self.nodes.len()];
+
+        filter[root.get()] = true;
+
+        self[root].shake(self, &mut filter);
+
+        for (id, referenced) in filter.into_iter().enumerate() {
+            if !referenced {
+                self.nodes[id] = None;
+            }
+        }
+    }
+
+    pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> {
+        self.nodes.get(id.get())?.as_ref()
+    }
+}
+
+impl<Leaf> Index<NodeId> for Graph<Leaf> {
+    type Output = Node<Leaf>;
+
+    fn index(&self, id: NodeId) -> &Node<Leaf> {
+        self.get(id).expect(
+            "Indexing into an empty node. This is a bug, please report it at:\n\
+            \n\
+            https://github.com/maciejhirsz/logos/issues",
+        )
+    }
+}
+
+impl std::fmt::Display for NodeId {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        std::fmt::Display::fmt(&self.0, f)
+    }
+}
+
+#[cfg_attr(test, derive(PartialEq))]
+pub enum Node<Leaf> {
+    /// Fork node, can lead to more than one state
+    Fork(Fork),
+    /// Rope node, can lead to one state on match, one state on miss
+    Rope(Rope),
+    /// Leaf node, terminal state
+    Leaf(Leaf),
+}
+
+impl<Leaf> Node<Leaf> {
+    pub fn miss(&self) -> Option<NodeId> {
+        match self {
+            Node::Rope(rope) => rope.miss.first(),
+            Node::Fork(fork) => fork.miss,
+            Node::Leaf(_) => None,
+        }
+    }
+
+    fn eq(&self, other: &Node<Leaf>) -> bool {
+        match (self, other) {
+            (Node::Fork(a), Node::Fork(b)) => a == b,
+            (Node::Rope(a), Node::Rope(b)) => a == b,
+            _ => false,
+        }
+    }
+
+    fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) {
+        match self {
+            Node::Fork(fork) => fork.shake(graph, filter),
+            Node::Rope(rope) => rope.shake(graph, filter),
+            Node::Leaf(_) => (),
+        }
+    }
+
+    pub fn unwrap_leaf(&self) -> &Leaf {
+        match self {
+            Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"),
+            Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"),
+            Node::Leaf(leaf) => leaf,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn leaf_stack_size() {
+        use std::mem::size_of;
+
+        const WORD: usize = size_of::<usize>();
+        const NODE: usize = size_of::<Node<()>>();
+
+        assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE);
+    }
+
+    #[test]
+    fn create_a_loop() {
+        let mut graph = Graph::new();
+
+        let token = graph.push(Node::Leaf("IDENT"));
+        let id = graph.reserve();
+        let fork = Fork::new().branch('a'..='z', id.get()).miss(token);
+        let root = graph.insert(id, fork);
+
+        assert_eq!(graph[token], Node::Leaf("IDENT"));
+        assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),);
+    }
+
+    #[test]
+    fn fork_off() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = graph.push(Rope::new("rope", leaf));
+        let fork = graph.push(Fork::new().branch(b'!', leaf));
+
+        assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf));
+        assert_eq!(
+            graph.fork_off(rope),
+            Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1))
+        );
+        assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf));
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs
new file mode 100644
index 00000000..a4d23d3b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/range.rs
@@ -0,0 +1,144 @@
+use regex_syntax::hir::ClassBytesRange;
+use regex_syntax::hir::ClassUnicodeRange;
+use regex_syntax::utf8::Utf8Range;
+
+use std::cmp::{Ord, Ordering};
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct Range {
+    pub start: u8,
+    pub end: u8,
+}
+
+impl Range {
+    pub fn as_byte(&self) -> Option<u8> {
+        if self.is_byte() {
+            Some(self.start)
+        } else {
+            None
+        }
+    }
+
+    pub fn is_byte(&self) -> bool {
+        self.start == self.end
+    }
+}
+
+impl From<u8> for Range {
+    fn from(byte: u8) -> Range {
+        Range {
+            start: byte,
+            end: byte,
+        }
+    }
+}
+
+impl From<&u8> for Range {
+    fn from(byte: &u8) -> Range {
+        Range::from(*byte)
+    }
+}
+
+impl Iterator for Range {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        match self.start.cmp(&self.end) {
+            std::cmp::Ordering::Less => {
+                let res = self.start;
+                self.start += 1;
+
+                Some(res)
+            }
+            std::cmp::Ordering::Equal => {
+                let res = self.start;
+
+                // Necessary so that range 0xFF-0xFF doesn't loop forever
+                self.start = 0xFF;
+                self.end = 0x00;
+
+                Some(res)
+            }
+            std::cmp::Ordering::Greater => None,
+        }
+    }
+}
+
+impl PartialOrd for Range {
+    fn partial_cmp(&self, other: &Range) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Range {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.start.cmp(&other.start)
+    }
+}
+
+impl From<Utf8Range> for Range {
+    fn from(r: Utf8Range) -> Range {
+        Range {
+            start: r.start,
+            end: r.end,
+        }
+    }
+}
+
+impl From<ClassUnicodeRange> for Range {
+    fn from(r: ClassUnicodeRange) -> Range {
+        let start = r.start() as u32;
+        let end = r.end() as u32;
+
+        if start >= 128 || end >= 128 && end != 0x0010FFFF {
+            panic!("Casting non-ascii ClassUnicodeRange to Range")
+        }
+
+        Range {
+            start: start as u8,
+            end: end as u8,
+        }
+    }
+}
+
+impl From<ClassBytesRange> for Range {
+    fn from(r: ClassBytesRange) -> Range {
+        Range {
+            start: r.start(),
+            end: r.end(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn range_iter_one() {
+        let byte = Range::from(b'!');
+        let collected = byte.take(1000).collect::<Vec<_>>();
+
+        assert_eq!(b"!", &collected[..]);
+    }
+
+    #[test]
+    fn range_iter_few() {
+        let byte = Range {
+            start: b'a',
+            end: b'd',
+        };
+        let collected = byte.take(1000).collect::<Vec<_>>();
+
+        assert_eq!(b"abcd", &collected[..]);
+    }
+
+    #[test]
+    fn range_iter_bounds() {
+        let byte = Range::from(0xFA..=0xFF);
+
+        let collected = byte.take(1000).collect::<Vec<_>>();
+
+        assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]);
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs
new file mode 100644
index 00000000..d55c490f
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/regex.rs
@@ -0,0 +1,295 @@
+use std::fmt::Debug;
+
+use regex_syntax::utf8::Utf8Sequences;
+
+use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope};
+use crate::mir::{Class, ClassUnicode, Literal, Mir};
+
+impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
+    pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId {
+        self.parse_mir(&mir, then, None, None, false)
+    }
+
+    fn parse_mir(
+        &mut self,
+        mir: &Mir,
+        then: NodeId,
+        miss: Option<NodeId>,
+        reserved: Option<ReservedId>,
+        repeated: bool,
+    ) -> NodeId {
+        match mir {
+            Mir::Empty => then,
+            Mir::Loop(mir) => {
+                let reserved_first = reserved.unwrap_or_else(|| self.reserve());
+
+                let (new_then, new_miss);
+                if let Some(old_miss) = miss {
+                    // We have to separate the first iteration from the other iterations,
+                    // because the `old_miss` path must only be taken if we miss the first
+                    // iteration.
+                    let reserved_next = self.reserve();
+                    new_then = self.parse_mir(
+                        mir,
+                        reserved_next.get(),
+                        Some(then),
+                        Some(reserved_next),
+                        true,
+                    );
+                    new_miss = self.merge(old_miss, then);
+                } else {
+                    new_then = reserved_first.get();
+                    new_miss = then;
+                }
+
+                self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true)
+            }
+            Mir::Maybe(mir) => {
+                let miss = match miss {
+                    Some(id) => self.merge(id, then),
+                    None => then,
+                };
+
+                self.parse_mir(mir, then, Some(miss), reserved, true)
+            }
+            Mir::Alternation(alternation) => {
+                let mut fork = Fork::new().miss(miss);
+
+                for mir in alternation {
+                    let id = self.parse_mir(mir, then, None, None, repeated);
+                    let alt = self.fork_off(id);
+
+                    fork.merge(alt, self);
+                }
+
+                self.insert_or_push(reserved, fork)
+            }
+            Mir::Literal(literal) => {
+                let pattern = literal.0.to_vec();
+
+                self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss))
+            }
+            Mir::Concat(concat) => {
+                // Take an initial guess at the capacity - estimates a little worse than an average case
+                // scenario by assuming every concat element is singular but has a full code-point unicode literal.
+                // The only way to get the actual size of the Vec is if every sub-concat node is added up.
+                let mut ropebuf: Vec<Range> = Vec::with_capacity(concat.len() * 4);
+                let mut then = then;
+
+                let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir {
+                    Mir::Literal(Literal(bytes)) => {
+                        ropebuf.extend(bytes.iter().rev().map(Into::<Range>::into));
+                        true
+                    }
+                    Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => {
+                        ropebuf.push(class.ranges()[0].into());
+                        true
+                    }
+                    Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => {
+                        ropebuf.push(class.ranges()[0].into());
+                        true
+                    }
+                    _ => {
+                        if !ropebuf.is_empty() {
+                            let rope =
+                                Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), *then);
+
+                            *then = graph.push(rope);
+                            ropebuf = Vec::with_capacity(concat.len() * 4);
+                        }
+                        false
+                    }
+                };
+
+                for mir in concat[1..].iter().rev() {
+                    if !handle_bytes(self, mir, &mut then) {
+                        then = self.parse_mir(mir, then, None, None, false);
+                    }
+                }
+
+                let first_mir = &concat[0];
+                if handle_bytes(self, first_mir, &mut then) {
+                    let rope = Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), then)
+                        .miss(miss);
+                    self.insert_or_push(reserved, rope)
+                } else {
+                    self.parse_mir(first_mir, then, miss, reserved, false)
+                }
+            }
+            Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => {
+                let mut ropes = class
+                    .iter()
+                    .flat_map(|range| Utf8Sequences::new(range.start(), range.end()))
+                    .map(|sequence| Rope::new(sequence.as_slice(), then))
+                    .collect::<Vec<_>>();
+
+                if ropes.len() == 1 {
+                    let rope = ropes.remove(0);
+
+                    return self.insert_or_push(reserved, rope.miss(miss));
+                }
+
+                let mut root = Fork::new().miss(miss);
+
+                for rope in ropes {
+                    let fork = rope.into_fork(self);
+                    root.merge(fork, self);
+                }
+
+                self.insert_or_push(reserved, root)
+            }
+            Mir::Class(class) => {
+                let mut fork = Fork::new().miss(miss);
+
+                let class: Vec<Range> = match class {
+                    Class::Unicode(u) => u.iter().copied().map(Into::into).collect(),
+                    Class::Bytes(b) => b.iter().copied().map(Into::into).collect(),
+                };
+
+                for range in class {
+                    fork.add_branch(range, then, self);
+                }
+
+                self.insert_or_push(reserved, fork)
+            }
+        }
+    }
+
+    fn insert_or_push<N>(&mut self, id: Option<ReservedId>, node: N) -> NodeId
+    where
+        N: Into<Node<Leaf>>,
+    {
+        match id {
+            Some(id) => self.insert(id, node),
+            None => self.push(node),
+        }
+    }
+}
+
+/// Return whether current class unicode is ascii.
+///
+/// Because unicode ranges are iterated in increasing order,
+/// it is only necessary to check the last range.
+///
+/// If the check is performed in a repetition,
+/// a fast path is used by checking if end of range is 0x0010_FFFF.
+fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+    class.iter().last().map_or(true, |range| {
+        let start = range.start() as u32;
+        let end = range.end() as u32;
+        end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+    })
+}
+
+/// Return whether current class unicode is ascii and only contains
+/// one range.
+///
+/// See [`is_ascii`] function for more details.
+fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+    if class.ranges().len() != 1 {
+        return false;
+    }
+
+    let range = &class.ranges()[0];
+    let start = range.start() as u32;
+    let end = range.end() as u32;
+    end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::num::NonZeroU32;
+
+    use super::*;
+    use crate::graph::Node;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn rope() {
+        let mut graph = Graph::new();
+
+        let mir = Mir::utf8("foobar").unwrap();
+
+        assert_eq!(mir.priority(), 12);
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let id = graph.regex(mir, leaf);
+
+        assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),)
+    }
+
+    #[test]
+    fn alternation() {
+        let mut graph = Graph::new();
+
+        let mir = Mir::utf8("a|b").unwrap();
+
+        assert_eq!(mir.priority(), 2);
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let id = graph.regex(mir, leaf);
+
+        assert_eq!(
+            graph[id],
+            Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)),
+        );
+    }
+
+    #[test]
+    fn repeat() {
+        let mut graph = Graph::new();
+
+        let mir = Mir::utf8("[a-z]*").unwrap();
+
+        assert_eq!(mir.priority(), 0);
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let id = graph.regex(mir, leaf);
+
+        assert_eq!(
+            graph[id],
+            Node::Fork(
+                Fork::new()
+                    .branch('a'..='z', id) // goto self == loop
+                    .miss(leaf)
+            ),
+        );
+    }
+
+    #[test]
+    fn maybe() {
+        let mut graph = Graph::new();
+
+        let mir = Mir::utf8("[a-z]?").unwrap();
+
+        assert_eq!(mir.priority(), 0);
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let id = graph.regex(mir, leaf);
+
+        assert_eq!(
+            graph[id],
+            Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)),
+        );
+    }
+
+    #[test]
+    fn long_concat_389() {
+        let mut graph = Graph::new();
+
+        let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap();
+
+        assert_eq!(mir.priority(), 50);
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let id = graph.regex(mir, leaf);
+        let sub_id = NodeId(NonZeroU32::new(2).unwrap());
+
+        assert_eq!(
+            graph[id],
+            Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id))
+        );
+
+        assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf)))
+    }
+}
diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs
new file mode 100644
index 00000000..6e563fac
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/rope.rs
@@ -0,0 +1,330 @@
+use std::ops::Deref;
+
+use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range};
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Rope {
+    pub pattern: Pattern,
+    pub then: NodeId,
+    pub miss: Miss,
+}
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Pattern(pub Vec<Range>);
+
+impl Deref for Pattern {
+    type Target = [Range];
+
+    fn deref(&self) -> &[Range] {
+        &self.0
+    }
+}
+
+/// Because Ropes could potentially fail a match mid-pattern,
+/// a regular `Option` is not sufficient here.
+#[derive(PartialEq, Clone, Copy, Hash)]
+pub enum Miss {
+    /// Same as Option::None, error on fail
+    None,
+    /// Jump to id if first byte does not match, fail on partial match
+    First(NodeId),
+    /// Jump to id on partial or empty match
+    Any(NodeId),
+}
+
+impl Miss {
+    pub fn is_none(&self) -> bool {
+        matches!(self, Miss::None)
+    }
+
+    pub fn first(self) -> Option<NodeId> {
+        match self {
+            Miss::First(id) | Miss::Any(id) => Some(id),
+            _ => None,
+        }
+    }
+
+    pub fn take_first(&mut self) -> Option<NodeId> {
+        match *self {
+            Miss::First(id) => {
+                *self = Miss::None;
+
+                Some(id)
+            }
+            Miss::Any(id) => Some(id),
+            Miss::None => None,
+        }
+    }
+}
+
+impl From<Option<NodeId>> for Miss {
+    fn from(miss: Option<NodeId>) -> Self {
+        match miss {
+            Some(id) => Miss::First(id),
+            None => Miss::None,
+        }
+    }
+}
+
+impl From<NodeId> for Miss {
+    fn from(id: NodeId) -> Self {
+        Miss::First(id)
+    }
+}
+
+impl Rope {
+    pub fn new<P>(pattern: P, then: NodeId) -> Self
+    where
+        P: Into<Pattern>,
+    {
+        Rope {
+            pattern: pattern.into(),
+            then,
+            miss: Miss::None,
+        }
+    }
+
+    pub fn miss<M>(mut self, miss: M) -> Self
+    where
+        M: Into<Miss>,
+    {
+        self.miss = miss.into();
+        self
+    }
+
+    pub fn miss_any(mut self, miss: NodeId) -> Self {
+        self.miss = Miss::Any(miss);
+        self
+    }
+
+    pub fn into_fork<T>(mut self, graph: &mut Graph<T>) -> Fork
+    where
+        T: Disambiguate,
+    {
+        let first = self.pattern.0.remove(0);
+        let miss = self.miss.take_first();
+
+        // The new fork will lead to a new rope,
+        // or the old target if no new rope was created
+        let then = match self.pattern.len() {
+            0 => self.then,
+            _ => graph.push(self),
+        };
+
+        Fork::new().branch(first, then).miss(miss)
+    }
+
+    pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> {
+        let count = self
+            .pattern
+            .iter()
+            .zip(other.pattern.iter())
+            .take_while(|(a, b)| a == b)
+            .count();
+
+        let pattern = match count {
+            0 => return None,
+            n => self.pattern[..n].into(),
+        };
+        let miss = match (self.miss, other.miss) {
+            (Miss::None, miss) => miss,
+            (miss, Miss::None) => miss,
+            _ => return None,
+        };
+
+        Some((pattern, miss))
+    }
+
+    pub fn split_at<T>(mut self, at: usize, graph: &mut Graph<T>) -> Option<Rope>
+    where
+        T: Disambiguate,
+    {
+        match at {
+            0 => return None,
+            n if n == self.pattern.len() => return Some(self),
+            _ => (),
+        }
+
+        let (this, next) = self.pattern.split_at(at);
+
+        let next_miss = match self.miss {
+            Miss::Any(_) => self.miss,
+            _ => Miss::None,
+        };
+
+        let next = graph.push(Rope {
+            pattern: next.into(),
+            miss: next_miss,
+            then: self.then,
+        });
+
+        self.pattern = this.into();
+        self.then = next;
+
+        Some(self)
+    }
+
+    pub fn remainder<T>(mut self, at: usize, graph: &mut Graph<T>) -> NodeId
+    where
+        T: Disambiguate,
+    {
+        self.pattern = self.pattern[at..].into();
+
+        match self.pattern.len() {
+            0 => self.then,
+            _ => graph.push(self),
+        }
+    }
+
+    pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+        if let Some(id) = self.miss.first() {
+            if !filter[id.get()] {
+                filter[id.get()] = true;
+                graph[id].shake(graph, filter);
+            }
+        }
+
+        if !filter[self.then.get()] {
+            filter[self.then.get()] = true;
+            graph[self.then].shake(graph, filter);
+        }
+    }
+}
+
+impl Pattern {
+    pub fn to_bytes(&self) -> Option<Vec<u8>> {
+        let mut out = Vec::with_capacity(self.len());
+
+        for range in self.iter() {
+            out.push(range.as_byte()?);
+        }
+
+        Some(out)
+    }
+}
+
+impl<T> From<&[T]> for Pattern
+where
+    T: Into<Range> + Copy,
+{
+    fn from(slice: &[T]) -> Self {
+        Pattern(slice.iter().copied().map(Into::into).collect())
+    }
+}
+
+impl<T> From<Vec<T>> for Pattern
+where
+    T: Into<Range>,
+{
+    fn from(vec: Vec<T>) -> Self {
+        Pattern(vec.into_iter().map(Into::into).collect())
+    }
+}
+
+impl From<&str> for Pattern {
+    fn from(slice: &str) -> Self {
+        slice.as_bytes().into()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::graph::Node;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn into_fork() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = Rope::new("foobar", leaf);
+
+        let fork = rope.into_fork(&mut graph);
+
+        assert_eq!(leaf, NodeId::new(1));
+        assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2)));
+        assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf));
+    }
+
+    #[test]
+    fn into_fork_one_byte() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = Rope::new("!", leaf);
+
+        let fork = rope.into_fork(&mut graph);
+
+        assert_eq!(leaf, NodeId::new(1));
+        assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1)));
+    }
+
+    #[test]
+    fn into_fork_miss_any() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = Rope::new("42", leaf).miss_any(NodeId::new(42));
+
+        let fork = rope.into_fork(&mut graph);
+
+        assert_eq!(leaf, NodeId::new(1));
+        assert_eq!(
+            fork,
+            Fork::new()
+                .branch(b'4', NodeId::new(2))
+                .miss(NodeId::new(42))
+        );
+        assert_eq!(
+            graph[NodeId::new(2)],
+            Rope::new("2", leaf).miss_any(NodeId::new(42))
+        );
+    }
+
+    #[test]
+    fn into_fork_miss_first() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42)));
+
+        let fork = rope.into_fork(&mut graph);
+
+        assert_eq!(leaf, NodeId::new(1));
+        assert_eq!(
+            fork,
+            Fork::new()
+                .branch(b'4', NodeId::new(2))
+                .miss(NodeId::new(42))
+        );
+        assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf));
+    }
+
+    #[test]
+    fn split_at() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = Rope::new("foobar", leaf);
+
+        assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope);
+
+        let split = rope.split_at(3, &mut graph).unwrap();
+        let expected_id = NodeId::new(leaf.get() + 1);
+
+        assert_eq!(split, Rope::new("foo", expected_id));
+        assert_eq!(graph[expected_id], Rope::new("bar", leaf));
+    }
+
+    #[test]
+    fn pattern_to_bytes() {
+        let pat = Pattern::from("foobar");
+
+        assert_eq!(pat.to_bytes().unwrap(), b"foobar");
+
+        let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']);
+
+        assert_eq!(ranges.to_bytes(), None);
+    }
+}
author	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
committer	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
commit	8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree	22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/graph
parent	4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)