diff options
Diffstat (limited to 'vendor/logos-codegen/src/graph/mod.rs')
| -rw-r--r-- | vendor/logos-codegen/src/graph/mod.rs | 566 |
1 files changed, 0 insertions, 566 deletions
diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs deleted file mode 100644 index 6d218e8c..00000000 --- a/vendor/logos-codegen/src/graph/mod.rs +++ /dev/null @@ -1,566 +0,0 @@ -use std::cmp::Ordering; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap as Map; -use std::hash::{Hash, Hasher}; -use std::num::NonZeroU32; -use std::ops::Index; - -use fnv::FnvHasher; - -mod fork; -mod impls; -mod meta; -mod range; -mod regex; -mod rope; - -pub use self::fork::Fork; -pub use self::meta::Meta; -pub use self::range::Range; -pub use self::rope::Rope; - -/// Disambiguation error during the attempt to merge two leaf -/// nodes with the same priority -#[derive(Debug)] -pub struct DisambiguationError(pub NodeId, pub NodeId); - -pub struct Graph<Leaf> { - /// Internal storage of all allocated nodes. Once a node is - /// put here, it should never be mutated. - nodes: Vec<Option<Node<Leaf>>>, - /// When merging two nodes into a new node, we store the two - /// entry keys and the result, so that we don't merge the same - /// two nodes multiple times. - /// - /// Most of the time the entry we want to find will be the last - /// one that has been inserted, so we can use a vec with reverse - /// order search to get O(1) searches much faster than any *Map. - merges: Map<Merge, NodeId>, - /// Another map used for accounting. Before `.push`ing a new node - /// onto the graph (inserts are exempt), we hash it and find if - /// an identical(!) node has been created before. - hashes: Map<u64, NodeId>, - /// Instead of handling errors over return types, opt to collect - /// them internally. - errors: Vec<DisambiguationError>, - /// Deferred merges. When when attempting to merge a node with an - /// empty reserved slot, the merging operation is deferred until - /// the reserved slot is populated. This is a stack that keeps track - /// of all such deferred merges - deferred: Vec<DeferredMerge>, -} - -/// Trait to be implemented on `Leaf` nodes in order to disambiguate -/// between them. -pub trait Disambiguate { - fn cmp(left: &Self, right: &Self) -> Ordering; -} - -/// Id of a Node in the graph. `NodeId` can be referencing an empty -/// slot that is going to be populated later in time. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct NodeId(NonZeroU32); - -impl Hash for NodeId { - fn hash<H: Hasher>(&self, state: &mut H) { - // Always use little-endian byte order for hashing to avoid - // different code generation on big-endian platforms due to - // iteration over a HashMap, - // see https://github.com/maciejhirsz/logos/issues/427. - state.write(&self.0.get().to_le_bytes()) - } -} - -impl NodeId { - fn get(self) -> usize { - self.0.get() as usize - } - - fn new(n: usize) -> NodeId { - NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId")) - } -} - -/// Unique reserved `NodeId` that is guaranteed to point to an -/// empty allocated slot in the graph. It's safe to create multiple -/// `NodeId` copies of `ReservedId`, however API users should never -/// be able to clone a `ReservedId`, or create a new one from `NodeId`. -/// -/// `ReservedId` is consumed once passed into `Graph::insert`. -#[derive(Debug)] -pub struct ReservedId(NodeId); - -impl ReservedId { - pub fn get(&self) -> NodeId { - self.0 - } -} - -/// Merge key used to lookup whether two nodes have been previously -/// mered, so we can avoid duplicating merges, potentially running into -/// loops that blow out the stack. -/// -/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure -/// that node merges are symmetric. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] -struct Merge(NodeId, NodeId); - -impl Merge { - fn new(a: NodeId, b: NodeId) -> Self { - if a < b { - Merge(a, b) - } else { - Merge(b, a) - } - } -} - -/// When attempting to merge two nodes, one of which was not yet created, -/// we can record such attempt, and execute the merge later on when the -/// `awaiting` has been `insert`ed into the graph. -#[derive(Debug)] -pub struct DeferredMerge { - awaiting: NodeId, - with: NodeId, - into: ReservedId, -} - -impl<Leaf> Graph<Leaf> { - pub fn new() -> Self { - Graph { - // Start with an empty slot so we can start - // counting NodeIds from 1 and use NonZero - // optimizations - nodes: vec![None], - merges: Map::new(), - hashes: Map::new(), - errors: Vec::new(), - deferred: Vec::new(), - } - } - - pub fn errors(&self) -> &[DisambiguationError] { - &self.errors - } - - fn next_id(&self) -> NodeId { - NodeId::new(self.nodes.len()) - } - - /// Reserve an empty slot for a node on the graph and return an - /// id for it. `ReservedId` cannot be cloned, and must be consumed - /// by calling `insert` on the graph. - pub fn reserve(&mut self) -> ReservedId { - let id = self.next_id(); - - self.nodes.push(None); - - ReservedId(id) - } - - /// Insert a node at a given, previously reserved id. Returns the - /// inserted `NodeId`. - pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId - where - N: Into<Node<Leaf>>, - Leaf: Disambiguate, - { - let id = reserved.get(); - - self.nodes[id.get()] = Some(node.into()); - - let mut awaiting = Vec::new(); - - // Partition out all `DeferredMerge`s that can be completed - // now that this `ReservedId` has a `Node` inserted into it. - for idx in (0..self.deferred.len()).rev() { - if self.deferred[idx].awaiting == id { - awaiting.push(self.deferred.remove(idx)); - } - } - - // Complete deferred merges. We've collected them from the back, - // so we must iterate through them from the back as well to restore - // proper order of merges in case there is some cascading going on. - for DeferredMerge { - awaiting, - with, - into, - } in awaiting.into_iter().rev() - { - self.merge_unchecked(awaiting, with, into); - } - - id - } - - /// Push a node onto the graph and get an id to it. If an identical - /// node has already been pushed on the graph, it will return the id - /// of that node instead. - pub fn push<B>(&mut self, node: B) -> NodeId - where - B: Into<Node<Leaf>>, - { - let node = node.into(); - - if let Node::Leaf(_) = node { - return self.push_unchecked(node); - } - - let mut hasher = FnvHasher::default(); - node.hash(&mut hasher); - - let next_id = self.next_id(); - - match self.hashes.entry(hasher.finish()) { - Entry::Occupied(occupied) => { - let id = *occupied.get(); - - if self[id].eq(&node) { - return id; - } - } - Entry::Vacant(vacant) => { - vacant.insert(next_id); - } - } - - self.push_unchecked(node) - } - - fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId { - let id = self.next_id(); - - self.nodes.push(Some(node)); - - id - } - - /// If nodes `a` and `b` have been already merged, return the - /// `NodeId` of the node they have been merged into. - fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> { - let probe = Merge::new(a, b); - - self.merges.get(&probe).copied() - } - - /// Mark that nodes `a` and `b` have been merged into `product`. - /// - /// This will also mark merging `a` and `product`, as well as - /// `b` and `product` into `product`, since those are symmetric - /// operations. - /// - /// This is necessary to break out asymmetric merge loops. - fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) { - self.merges.insert(Merge::new(a, b), product); - self.merges.insert(Merge::new(a, product), product); - self.merges.insert(Merge::new(b, product), product); - } - - /// Merge the nodes at id `a` and `b`, returning a new id. - pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId - where - Leaf: Disambiguate, - { - if a == b { - return a; - } - - // If the id pair is already merged (or is being merged), just return the id - if let Some(id) = self.find_merged(a, b) { - return id; - } - - match (self.get(a), self.get(b)) { - (None, None) => { - panic!( - "Merging two reserved nodes! This is a bug, please report it:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues" - ); - } - (None, Some(_)) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: a, - with: b, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(_), None) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: b, - with: a, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => { - return match Disambiguate::cmp(left, right) { - Ordering::Less => b, - Ordering::Greater => a, - Ordering::Equal => { - self.errors.push(DisambiguationError(a, b)); - - a - } - }; - } - _ => (), - } - - // Reserve the id for the merge and save it. Since the graph can contain loops, - // this prevents us from trying to merge the same id pair in a loop, blowing up - // the stack. - let reserved = self.reserve(); - self.set_merged(a, b, reserved.get()); - - self.merge_unchecked(a, b, reserved) - } - - /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are - /// not pointing to empty slots. - fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId - where - Leaf: Disambiguate, - { - let merged_rope = match (self.get(a), self.get(b)) { - (Some(Node::Rope(rope)), _) => { - let rope = rope.clone(); - - self.merge_rope(rope, b) - } - (_, Some(Node::Rope(rope))) => { - let rope = rope.clone(); - - self.merge_rope(rope, a) - } - _ => None, - }; - - if let Some(rope) = merged_rope { - return self.insert(reserved, rope); - } - - let mut fork = self.fork_off(a); - fork.merge(self.fork_off(b), self); - - let mut stack = vec![reserved.get()]; - - // Flatten the fork - while let Some(miss) = fork.miss { - if stack.contains(&miss) { - break; - } - stack.push(miss); - - let other = match self.get(miss) { - Some(Node::Fork(other)) => other.clone(), - Some(Node::Rope(other)) => other.clone().into_fork(self), - _ => break, - }; - match other.miss { - Some(id) if self.get(id).is_none() => break, - _ => (), - } - fork.miss = None; - fork.merge(other, self); - } - - self.insert(reserved, fork) - } - - fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope> - where - Leaf: Disambiguate, - { - match self.get(other) { - Some(Node::Fork(fork)) if rope.miss.is_none() => { - // Count how many consecutive ranges in this rope would - // branch into the fork that results in a loop. - // - // e.g.: for rope "foobar" and a looping fork [a-z]: 6 - let count = rope - .pattern - .iter() - .take_while(|range| fork.contains(**range) == Some(other)) - .count(); - - let mut rope = rope.split_at(count, self)?.miss_any(other); - - rope.then = self.merge(rope.then, other); - - Some(rope) - } - Some(Node::Rope(other)) => { - let (prefix, miss) = rope.prefix(other)?; - - let (a, b) = (rope, other.clone()); - - let a = a.remainder(prefix.len(), self); - let b = b.remainder(prefix.len(), self); - - let rope = Rope::new(prefix, self.merge(a, b)).miss(miss); - - Some(rope) - } - Some(Node::Leaf(_)) | None => { - if rope.miss.is_none() { - Some(rope.miss(other)) - } else { - None - } - } - _ => None, - } - } - - pub fn fork_off(&mut self, id: NodeId) -> Fork - where - Leaf: Disambiguate, - { - match self.get(id) { - Some(Node::Fork(fork)) => fork.clone(), - Some(Node::Rope(rope)) => rope.clone().into_fork(self), - Some(Node::Leaf(_)) | None => Fork::new().miss(id), - } - } - - pub fn nodes(&self) -> &[Option<Node<Leaf>>] { - &self.nodes - } - - /// Find all nodes that have no references and remove them. - pub fn shake(&mut self, root: NodeId) { - let mut filter = vec![false; self.nodes.len()]; - - filter[root.get()] = true; - - self[root].shake(self, &mut filter); - - for (id, referenced) in filter.into_iter().enumerate() { - if !referenced { - self.nodes[id] = None; - } - } - } - - pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> { - self.nodes.get(id.get())?.as_ref() - } -} - -impl<Leaf> Index<NodeId> for Graph<Leaf> { - type Output = Node<Leaf>; - - fn index(&self, id: NodeId) -> &Node<Leaf> { - self.get(id).expect( - "Indexing into an empty node. This is a bug, please report it at:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues", - ) - } -} - -impl std::fmt::Display for NodeId { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - std::fmt::Display::fmt(&self.0, f) - } -} - -#[cfg_attr(test, derive(PartialEq))] -pub enum Node<Leaf> { - /// Fork node, can lead to more than one state - Fork(Fork), - /// Rope node, can lead to one state on match, one state on miss - Rope(Rope), - /// Leaf node, terminal state - Leaf(Leaf), -} - -impl<Leaf> Node<Leaf> { - pub fn miss(&self) -> Option<NodeId> { - match self { - Node::Rope(rope) => rope.miss.first(), - Node::Fork(fork) => fork.miss, - Node::Leaf(_) => None, - } - } - - fn eq(&self, other: &Node<Leaf>) -> bool { - match (self, other) { - (Node::Fork(a), Node::Fork(b)) => a == b, - (Node::Rope(a), Node::Rope(b)) => a == b, - _ => false, - } - } - - fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) { - match self { - Node::Fork(fork) => fork.shake(graph, filter), - Node::Rope(rope) => rope.shake(graph, filter), - Node::Leaf(_) => (), - } - } - - pub fn unwrap_leaf(&self) -> &Leaf { - match self { - Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"), - Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"), - Node::Leaf(leaf) => leaf, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn leaf_stack_size() { - use std::mem::size_of; - - const WORD: usize = size_of::<usize>(); - const NODE: usize = size_of::<Node<()>>(); - - assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE); - } - - #[test] - fn create_a_loop() { - let mut graph = Graph::new(); - - let token = graph.push(Node::Leaf("IDENT")); - let id = graph.reserve(); - let fork = Fork::new().branch('a'..='z', id.get()).miss(token); - let root = graph.insert(id, fork); - - assert_eq!(graph[token], Node::Leaf("IDENT")); - assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),); - } - - #[test] - fn fork_off() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = graph.push(Rope::new("rope", leaf)); - let fork = graph.push(Fork::new().branch(b'!', leaf)); - - assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf)); - assert_eq!( - graph.fork_off(rope), - Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1)) - ); - assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf)); - } -} |
