diff options
Diffstat (limited to 'vendor/logos-codegen/src/generator/mod.rs')
| -rw-r--r-- | vendor/logos-codegen/src/generator/mod.rs | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/generator/mod.rs b/vendor/logos-codegen/src/generator/mod.rs new file mode 100644 index 00000000..1b8bf8bf --- /dev/null +++ b/vendor/logos-codegen/src/generator/mod.rs @@ -0,0 +1,268 @@ +use fnv::{FnvHashMap as Map, FnvHashSet as Set}; +use proc_macro2::TokenStream; +use quote::{quote, ToTokens, TokenStreamExt}; +use syn::Ident; + +use crate::graph::{Graph, Meta, Node, NodeId, Range}; +use crate::leaf::Leaf; +use crate::util::ToIdent; + +mod context; +mod fork; +mod leaf; +mod rope; +mod tables; + +use self::context::Context; +use self::tables::TableStack; + +pub struct Generator<'a> { + /// Name of the type we are implementing the `Logos` trait for + name: &'a Ident, + /// Name of the type with any generics it might need + this: &'a TokenStream, + /// Id to the root node + root: NodeId, + /// Reference to the graph with all of the nodes + graph: &'a Graph<Leaf<'a>>, + /// Meta data collected for the nodes + meta: Meta, + /// Buffer with functions growing during generation + rendered: TokenStream, + /// Set of functions that have already been rendered + fns: Set<(NodeId, Context)>, + /// Function name identifiers + idents: Map<(NodeId, Context), Ident>, + /// Local function calls. Note: a call might change its context, + /// so we can't use `idents` for this purpose. + gotos: Map<(NodeId, Context), TokenStream>, + /// Identifiers for helper functions matching a byte to a given + /// set of ranges + tests: Map<Vec<Range>, Ident>, + /// Related to above, table stack manages tables that need to be + tables: TableStack, +} + +impl<'a> Generator<'a> { + pub fn new( + name: &'a Ident, + this: &'a TokenStream, + root: NodeId, + graph: &'a Graph<Leaf>, + ) -> Self { + let rendered = Self::fast_loop_macro(); + let meta = Meta::analyze(root, graph); + + Generator { + name, + this, + root, + graph, + meta, + rendered, + fns: Set::default(), + idents: Map::default(), + gotos: Map::default(), + tests: Map::default(), + tables: TableStack::new(), + } + } + + pub fn generate(mut self) -> TokenStream { + let root = self.goto(self.root, Context::default()).clone(); + let rendered = &self.rendered; + let tables = &self.tables; + + quote! { + #tables + #rendered + #root + } + } + + fn generate_fn(&mut self, id: NodeId, ctx: Context) { + if self.fns.contains(&(id, ctx)) { + return; + } + self.fns.insert((id, ctx)); + + let body = match &self.graph[id] { + Node::Fork(fork) => self.generate_fork(id, fork, ctx), + Node::Rope(rope) => self.generate_rope(rope, ctx), + Node::Leaf(leaf) => self.generate_leaf(leaf, ctx), + }; + let ident = self.generate_ident(id, ctx); + let out = quote! { + #[inline] + fn #ident<'s>(lex: &mut Lexer<'s>) { + #body + } + }; + + self.rendered.append_all(out); + } + + fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream { + let key = (id, ctx); + + // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self + // too. + #[allow(clippy::map_entry)] + if !self.gotos.contains_key(&key) { + let meta = &self.meta[id]; + let enters_loop = !meta.loop_entry_from.is_empty(); + + let bump = if enters_loop || !ctx.can_backtrack() { + ctx.switch(self.graph[id].miss()) + } else { + None + }; + + let bump = match (bump, enters_loop, meta.min_read) { + (Some(t), _, _) => Some(t), + (None, true, _) => ctx.bump(), + (None, false, 0) => ctx.bump(), + (None, false, _) => None, + }; + + if meta.min_read == 0 || ctx.remainder() < meta.min_read { + ctx.wipe(); + } + + let ident = self.generate_ident(id, ctx); + let mut call_site = quote!(#ident(lex)); + + if let Some(bump) = bump { + call_site = quote!({ + #bump + #call_site + }); + } + self.gotos.insert(key, call_site); + self.generate_fn(id, ctx); + } + &self.gotos[&key] + } + + fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident { + self.idents.entry((id, ctx)).or_insert_with(|| { + let mut ident = format!("goto{}", id); + + ctx.write_suffix(&mut ident); + + ident.to_ident() + }) + } + + /// Returns an identifier to a function that matches a byte to any + /// of the provided ranges. This will generate either a simple + /// match expression, or use a lookup table internally. + fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident { + if !self.tests.contains_key(&ranges) { + let idx = self.tests.len(); + let ident = format!("pattern{}", idx).to_ident(); + + let lo = ranges.first().unwrap().start; + let hi = ranges.last().unwrap().end; + + let body = match ranges.len() { + 0..=2 => { + quote! { + match byte { + #(#ranges)|* => true, + _ => false, + } + } + } + _ if hi - lo < 64 => { + let mut offset = hi.saturating_sub(63); + + while offset.count_ones() > 1 && lo - offset > 0 { + offset += 1; + } + + let mut table = 0u64; + + for byte in ranges.iter().flat_map(|range| *range) { + if byte - offset >= 64 { + panic!("{:#?} {} {} {}", ranges, hi, lo, offset); + } + table |= 1 << (byte - offset); + } + + let search = match offset { + 0 => quote!(byte), + _ => quote!(byte.wrapping_sub(#offset)), + }; + + quote! { + const LUT: u64 = #table; + + match 1u64.checked_shl(#search as u32) { + Some(shift) => LUT & shift != 0, + None => false, + } + } + } + _ => { + let mut view = self.tables.view(); + + for byte in ranges.iter().flat_map(|range| *range) { + view.flag(byte); + } + + let mask = view.mask(); + let lut = view.ident(); + + quote! { + #lut[byte as usize] & #mask > 0 + } + } + }; + self.rendered.append_all(quote! { + #[inline] + fn #ident(byte: u8) -> bool { + #body + } + }); + self.tests.insert(ranges.clone(), ident); + } + &self.tests[&ranges] + } +} + +macro_rules! match_quote { + ($source:expr; $($byte:tt,)* ) => {match $source { + $( $byte => quote!($byte), )* + byte => quote!(#byte), + }} +} + +fn byte_to_tokens(byte: u8) -> TokenStream { + match_quote! { + byte; + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', + b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', + b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', + b'u', b'v', b'w', b'x', b'y', b'z', + b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', + b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', + b'U', b'V', b'W', b'X', b'Y', b'Z', + b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')', + b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+', + b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\', + } +} + +impl ToTokens for Range { + fn to_tokens(&self, tokens: &mut TokenStream) { + let Range { start, end } = self; + + tokens.append_all(byte_to_tokens(*start)); + + if start != end { + tokens.append_all(quote!(..=)); + tokens.append_all(byte_to_tokens(*end)); + } + } +} |
