chore: add vendor directory

author: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
committer: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
commit: 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree: 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/graph/mod.rs
parent: 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
1 files changed, 566 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs
new file mode 100644
index 00000000..6d218e8c
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/mod.rs
@@ -0,0 +1,566 @@
+use std::cmp::Ordering;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap as Map;
+use std::hash::{Hash, Hasher};
+use std::num::NonZeroU32;
+use std::ops::Index;
+
+use fnv::FnvHasher;
+
+mod fork;
+mod impls;
+mod meta;
+mod range;
+mod regex;
+mod rope;
+
+pub use self::fork::Fork;
+pub use self::meta::Meta;
+pub use self::range::Range;
+pub use self::rope::Rope;
+
+/// Disambiguation error during the attempt to merge two leaf
+/// nodes with the same priority
+#[derive(Debug)]
+pub struct DisambiguationError(pub NodeId, pub NodeId);
+
+pub struct Graph<Leaf> {
+    /// Internal storage of all allocated nodes. Once a node is
+    /// put here, it should never be mutated.
+    nodes: Vec<Option<Node<Leaf>>>,
+    /// When merging two nodes into a new node, we store the two
+    /// entry keys and the result, so that we don't merge the same
+    /// two nodes multiple times.
+    ///
+    /// Most of the time the entry we want to find will be the last
+    /// one that has been inserted, so we can use a vec with reverse
+    /// order search to get O(1) searches much faster than any *Map.
+    merges: Map<Merge, NodeId>,
+    /// Another map used for accounting. Before `.push`ing a new node
+    /// onto the graph (inserts are exempt), we hash it and find if
+    /// an identical(!) node has been created before.
+    hashes: Map<u64, NodeId>,
+    /// Instead of handling errors over return types, opt to collect
+    /// them internally.
+    errors: Vec<DisambiguationError>,
+    /// Deferred merges. When when attempting to merge a node with an
+    /// empty reserved slot, the merging operation is deferred until
+    /// the reserved slot is populated. This is a stack that keeps track
+    /// of all such deferred merges
+    deferred: Vec<DeferredMerge>,
+}
+
+/// Trait to be implemented on `Leaf` nodes in order to disambiguate
+/// between them.
+pub trait Disambiguate {
+    fn cmp(left: &Self, right: &Self) -> Ordering;
+}
+
+/// Id of a Node in the graph. `NodeId` can be referencing an empty
+/// slot that is going to be populated later in time.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct NodeId(NonZeroU32);
+
+impl Hash for NodeId {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        // Always use little-endian byte order for hashing to avoid
+        // different code generation on big-endian platforms due to
+        // iteration over a HashMap,
+        // see https://github.com/maciejhirsz/logos/issues/427.
+        state.write(&self.0.get().to_le_bytes())
+    }
+}
+
+impl NodeId {
+    fn get(self) -> usize {
+        self.0.get() as usize
+    }
+
+    fn new(n: usize) -> NodeId {
+        NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId"))
+    }
+}
+
+/// Unique reserved `NodeId` that is guaranteed to point to an
+/// empty allocated slot in the graph. It's safe to create multiple
+/// `NodeId` copies of `ReservedId`, however API users should never
+/// be able to clone a `ReservedId`, or create a new one from `NodeId`.
+///
+/// `ReservedId` is consumed once passed into `Graph::insert`.
+#[derive(Debug)]
+pub struct ReservedId(NodeId);
+
+impl ReservedId {
+    pub fn get(&self) -> NodeId {
+        self.0
+    }
+}
+
+/// Merge key used to lookup whether two nodes have been previously
+/// mered, so we can avoid duplicating merges, potentially running into
+/// loops that blow out the stack.
+///
+/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure
+/// that node merges are symmetric.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
+struct Merge(NodeId, NodeId);
+
+impl Merge {
+    fn new(a: NodeId, b: NodeId) -> Self {
+        if a < b {
+            Merge(a, b)
+        } else {
+            Merge(b, a)
+        }
+    }
+}
+
+/// When attempting to merge two nodes, one of which was not yet created,
+/// we can record such attempt, and execute the merge later on when the
+/// `awaiting` has been `insert`ed into the graph.
+#[derive(Debug)]
+pub struct DeferredMerge {
+    awaiting: NodeId,
+    with: NodeId,
+    into: ReservedId,
+}
+
+impl<Leaf> Graph<Leaf> {
+    pub fn new() -> Self {
+        Graph {
+            // Start with an empty slot so we can start
+            // counting NodeIds from 1 and use NonZero
+            // optimizations
+            nodes: vec![None],
+            merges: Map::new(),
+            hashes: Map::new(),
+            errors: Vec::new(),
+            deferred: Vec::new(),
+        }
+    }
+
+    pub fn errors(&self) -> &[DisambiguationError] {
+        &self.errors
+    }
+
+    fn next_id(&self) -> NodeId {
+        NodeId::new(self.nodes.len())
+    }
+
+    /// Reserve an empty slot for a node on the graph and return an
+    /// id for it. `ReservedId` cannot be cloned, and must be consumed
+    /// by calling `insert` on the graph.
+    pub fn reserve(&mut self) -> ReservedId {
+        let id = self.next_id();
+
+        self.nodes.push(None);
+
+        ReservedId(id)
+    }
+
+    /// Insert a node at a given, previously reserved id. Returns the
+    /// inserted `NodeId`.
+    pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId
+    where
+        N: Into<Node<Leaf>>,
+        Leaf: Disambiguate,
+    {
+        let id = reserved.get();
+
+        self.nodes[id.get()] = Some(node.into());
+
+        let mut awaiting = Vec::new();
+
+        // Partition out all `DeferredMerge`s that can be completed
+        // now that this `ReservedId` has a `Node` inserted into it.
+        for idx in (0..self.deferred.len()).rev() {
+            if self.deferred[idx].awaiting == id {
+                awaiting.push(self.deferred.remove(idx));
+            }
+        }
+
+        // Complete deferred merges. We've collected them from the back,
+        // so we must iterate through them from the back as well to restore
+        // proper order of merges in case there is some cascading going on.
+        for DeferredMerge {
+            awaiting,
+            with,
+            into,
+        } in awaiting.into_iter().rev()
+        {
+            self.merge_unchecked(awaiting, with, into);
+        }
+
+        id
+    }
+
+    /// Push a node onto the graph and get an id to it. If an identical
+    /// node has already been pushed on the graph, it will return the id
+    /// of that node instead.
+    pub fn push<B>(&mut self, node: B) -> NodeId
+    where
+        B: Into<Node<Leaf>>,
+    {
+        let node = node.into();
+
+        if let Node::Leaf(_) = node {
+            return self.push_unchecked(node);
+        }
+
+        let mut hasher = FnvHasher::default();
+        node.hash(&mut hasher);
+
+        let next_id = self.next_id();
+
+        match self.hashes.entry(hasher.finish()) {
+            Entry::Occupied(occupied) => {
+                let id = *occupied.get();
+
+                if self[id].eq(&node) {
+                    return id;
+                }
+            }
+            Entry::Vacant(vacant) => {
+                vacant.insert(next_id);
+            }
+        }
+
+        self.push_unchecked(node)
+    }
+
+    fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId {
+        let id = self.next_id();
+
+        self.nodes.push(Some(node));
+
+        id
+    }
+
+    /// If nodes `a` and `b` have been already merged, return the
+    /// `NodeId` of the node they have been merged into.
+    fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> {
+        let probe = Merge::new(a, b);
+
+        self.merges.get(&probe).copied()
+    }
+
+    /// Mark that nodes `a` and `b` have been merged into `product`.
+    ///
+    /// This will also mark merging `a` and `product`, as well as
+    /// `b` and `product` into `product`, since those are symmetric
+    /// operations.
+    ///
+    /// This is necessary to break out asymmetric merge loops.
+    fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) {
+        self.merges.insert(Merge::new(a, b), product);
+        self.merges.insert(Merge::new(a, product), product);
+        self.merges.insert(Merge::new(b, product), product);
+    }
+
+    /// Merge the nodes at id `a` and `b`, returning a new id.
+    pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId
+    where
+        Leaf: Disambiguate,
+    {
+        if a == b {
+            return a;
+        }
+
+        // If the id pair is already merged (or is being merged), just return the id
+        if let Some(id) = self.find_merged(a, b) {
+            return id;
+        }
+
+        match (self.get(a), self.get(b)) {
+            (None, None) => {
+                panic!(
+                    "Merging two reserved nodes! This is a bug, please report it:\n\
+                    \n\
+                    https://github.com/maciejhirsz/logos/issues"
+                );
+            }
+            (None, Some(_)) => {
+                let reserved = self.reserve();
+                let id = reserved.get();
+                self.deferred.push(DeferredMerge {
+                    awaiting: a,
+                    with: b,
+                    into: reserved,
+                });
+                self.set_merged(a, b, id);
+
+                return id;
+            }
+            (Some(_), None) => {
+                let reserved = self.reserve();
+                let id = reserved.get();
+                self.deferred.push(DeferredMerge {
+                    awaiting: b,
+                    with: a,
+                    into: reserved,
+                });
+                self.set_merged(a, b, id);
+
+                return id;
+            }
+            (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => {
+                return match Disambiguate::cmp(left, right) {
+                    Ordering::Less => b,
+                    Ordering::Greater => a,
+                    Ordering::Equal => {
+                        self.errors.push(DisambiguationError(a, b));
+
+                        a
+                    }
+                };
+            }
+            _ => (),
+        }
+
+        // Reserve the id for the merge and save it. Since the graph can contain loops,
+        // this prevents us from trying to merge the same id pair in a loop, blowing up
+        // the stack.
+        let reserved = self.reserve();
+        self.set_merged(a, b, reserved.get());
+
+        self.merge_unchecked(a, b, reserved)
+    }
+
+    /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are
+    /// not pointing to empty slots.
+    fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId
+    where
+        Leaf: Disambiguate,
+    {
+        let merged_rope = match (self.get(a), self.get(b)) {
+            (Some(Node::Rope(rope)), _) => {
+                let rope = rope.clone();
+
+                self.merge_rope(rope, b)
+            }
+            (_, Some(Node::Rope(rope))) => {
+                let rope = rope.clone();
+
+                self.merge_rope(rope, a)
+            }
+            _ => None,
+        };
+
+        if let Some(rope) = merged_rope {
+            return self.insert(reserved, rope);
+        }
+
+        let mut fork = self.fork_off(a);
+        fork.merge(self.fork_off(b), self);
+
+        let mut stack = vec![reserved.get()];
+
+        // Flatten the fork
+        while let Some(miss) = fork.miss {
+            if stack.contains(&miss) {
+                break;
+            }
+            stack.push(miss);
+
+            let other = match self.get(miss) {
+                Some(Node::Fork(other)) => other.clone(),
+                Some(Node::Rope(other)) => other.clone().into_fork(self),
+                _ => break,
+            };
+            match other.miss {
+                Some(id) if self.get(id).is_none() => break,
+                _ => (),
+            }
+            fork.miss = None;
+            fork.merge(other, self);
+        }
+
+        self.insert(reserved, fork)
+    }
+
+    fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope>
+    where
+        Leaf: Disambiguate,
+    {
+        match self.get(other) {
+            Some(Node::Fork(fork)) if rope.miss.is_none() => {
+                // Count how many consecutive ranges in this rope would
+                // branch into the fork that results in a loop.
+                //
+                // e.g.: for rope "foobar" and a looping fork [a-z]: 6
+                let count = rope
+                    .pattern
+                    .iter()
+                    .take_while(|range| fork.contains(**range) == Some(other))
+                    .count();
+
+                let mut rope = rope.split_at(count, self)?.miss_any(other);
+
+                rope.then = self.merge(rope.then, other);
+
+                Some(rope)
+            }
+            Some(Node::Rope(other)) => {
+                let (prefix, miss) = rope.prefix(other)?;
+
+                let (a, b) = (rope, other.clone());
+
+                let a = a.remainder(prefix.len(), self);
+                let b = b.remainder(prefix.len(), self);
+
+                let rope = Rope::new(prefix, self.merge(a, b)).miss(miss);
+
+                Some(rope)
+            }
+            Some(Node::Leaf(_)) | None => {
+                if rope.miss.is_none() {
+                    Some(rope.miss(other))
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        }
+    }
+
+    pub fn fork_off(&mut self, id: NodeId) -> Fork
+    where
+        Leaf: Disambiguate,
+    {
+        match self.get(id) {
+            Some(Node::Fork(fork)) => fork.clone(),
+            Some(Node::Rope(rope)) => rope.clone().into_fork(self),
+            Some(Node::Leaf(_)) | None => Fork::new().miss(id),
+        }
+    }
+
+    pub fn nodes(&self) -> &[Option<Node<Leaf>>] {
+        &self.nodes
+    }
+
+    /// Find all nodes that have no references and remove them.
+    pub fn shake(&mut self, root: NodeId) {
+        let mut filter = vec![false; self.nodes.len()];
+
+        filter[root.get()] = true;
+
+        self[root].shake(self, &mut filter);
+
+        for (id, referenced) in filter.into_iter().enumerate() {
+            if !referenced {
+                self.nodes[id] = None;
+            }
+        }
+    }
+
+    pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> {
+        self.nodes.get(id.get())?.as_ref()
+    }
+}
+
+impl<Leaf> Index<NodeId> for Graph<Leaf> {
+    type Output = Node<Leaf>;
+
+    fn index(&self, id: NodeId) -> &Node<Leaf> {
+        self.get(id).expect(
+            "Indexing into an empty node. This is a bug, please report it at:\n\
+            \n\
+            https://github.com/maciejhirsz/logos/issues",
+        )
+    }
+}
+
+impl std::fmt::Display for NodeId {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        std::fmt::Display::fmt(&self.0, f)
+    }
+}
+
+#[cfg_attr(test, derive(PartialEq))]
+pub enum Node<Leaf> {
+    /// Fork node, can lead to more than one state
+    Fork(Fork),
+    /// Rope node, can lead to one state on match, one state on miss
+    Rope(Rope),
+    /// Leaf node, terminal state
+    Leaf(Leaf),
+}
+
+impl<Leaf> Node<Leaf> {
+    pub fn miss(&self) -> Option<NodeId> {
+        match self {
+            Node::Rope(rope) => rope.miss.first(),
+            Node::Fork(fork) => fork.miss,
+            Node::Leaf(_) => None,
+        }
+    }
+
+    fn eq(&self, other: &Node<Leaf>) -> bool {
+        match (self, other) {
+            (Node::Fork(a), Node::Fork(b)) => a == b,
+            (Node::Rope(a), Node::Rope(b)) => a == b,
+            _ => false,
+        }
+    }
+
+    fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) {
+        match self {
+            Node::Fork(fork) => fork.shake(graph, filter),
+            Node::Rope(rope) => rope.shake(graph, filter),
+            Node::Leaf(_) => (),
+        }
+    }
+
+    pub fn unwrap_leaf(&self) -> &Leaf {
+        match self {
+            Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"),
+            Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"),
+            Node::Leaf(leaf) => leaf,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn leaf_stack_size() {
+        use std::mem::size_of;
+
+        const WORD: usize = size_of::<usize>();
+        const NODE: usize = size_of::<Node<()>>();
+
+        assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE);
+    }
+
+    #[test]
+    fn create_a_loop() {
+        let mut graph = Graph::new();
+
+        let token = graph.push(Node::Leaf("IDENT"));
+        let id = graph.reserve();
+        let fork = Fork::new().branch('a'..='z', id.get()).miss(token);
+        let root = graph.insert(id, fork);
+
+        assert_eq!(graph[token], Node::Leaf("IDENT"));
+        assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),);
+    }
+
+    #[test]
+    fn fork_off() {
+        let mut graph = Graph::new();
+
+        let leaf = graph.push(Node::Leaf("LEAF"));
+        let rope = graph.push(Rope::new("rope", leaf));
+        let fork = graph.push(Fork::new().branch(b'!', leaf));
+
+        assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf));
+        assert_eq!(
+            graph.fork_off(rope),
+            Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1))
+        );
+        assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf));
+    }
+}
author	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
committer	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
commit	8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree	22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/graph/mod.rs
parent	4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)