summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen/src/generator/mod.rs
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
committermo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
commit8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/generator/mod.rs
parent4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
chore: add vendor directory
Diffstat (limited to 'vendor/logos-codegen/src/generator/mod.rs')
-rw-r--r--vendor/logos-codegen/src/generator/mod.rs268
1 files changed, 268 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/generator/mod.rs b/vendor/logos-codegen/src/generator/mod.rs
new file mode 100644
index 00000000..1b8bf8bf
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/mod.rs
@@ -0,0 +1,268 @@
+use fnv::{FnvHashMap as Map, FnvHashSet as Set};
+use proc_macro2::TokenStream;
+use quote::{quote, ToTokens, TokenStreamExt};
+use syn::Ident;
+
+use crate::graph::{Graph, Meta, Node, NodeId, Range};
+use crate::leaf::Leaf;
+use crate::util::ToIdent;
+
+mod context;
+mod fork;
+mod leaf;
+mod rope;
+mod tables;
+
+use self::context::Context;
+use self::tables::TableStack;
+
+pub struct Generator<'a> {
+ /// Name of the type we are implementing the `Logos` trait for
+ name: &'a Ident,
+ /// Name of the type with any generics it might need
+ this: &'a TokenStream,
+ /// Id to the root node
+ root: NodeId,
+ /// Reference to the graph with all of the nodes
+ graph: &'a Graph<Leaf<'a>>,
+ /// Meta data collected for the nodes
+ meta: Meta,
+ /// Buffer with functions growing during generation
+ rendered: TokenStream,
+ /// Set of functions that have already been rendered
+ fns: Set<(NodeId, Context)>,
+ /// Function name identifiers
+ idents: Map<(NodeId, Context), Ident>,
+ /// Local function calls. Note: a call might change its context,
+ /// so we can't use `idents` for this purpose.
+ gotos: Map<(NodeId, Context), TokenStream>,
+ /// Identifiers for helper functions matching a byte to a given
+ /// set of ranges
+ tests: Map<Vec<Range>, Ident>,
+ /// Related to above, table stack manages tables that need to be
+ tables: TableStack,
+}
+
+impl<'a> Generator<'a> {
+ pub fn new(
+ name: &'a Ident,
+ this: &'a TokenStream,
+ root: NodeId,
+ graph: &'a Graph<Leaf>,
+ ) -> Self {
+ let rendered = Self::fast_loop_macro();
+ let meta = Meta::analyze(root, graph);
+
+ Generator {
+ name,
+ this,
+ root,
+ graph,
+ meta,
+ rendered,
+ fns: Set::default(),
+ idents: Map::default(),
+ gotos: Map::default(),
+ tests: Map::default(),
+ tables: TableStack::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> TokenStream {
+ let root = self.goto(self.root, Context::default()).clone();
+ let rendered = &self.rendered;
+ let tables = &self.tables;
+
+ quote! {
+ #tables
+ #rendered
+ #root
+ }
+ }
+
+ fn generate_fn(&mut self, id: NodeId, ctx: Context) {
+ if self.fns.contains(&(id, ctx)) {
+ return;
+ }
+ self.fns.insert((id, ctx));
+
+ let body = match &self.graph[id] {
+ Node::Fork(fork) => self.generate_fork(id, fork, ctx),
+ Node::Rope(rope) => self.generate_rope(rope, ctx),
+ Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
+ };
+ let ident = self.generate_ident(id, ctx);
+ let out = quote! {
+ #[inline]
+ fn #ident<'s>(lex: &mut Lexer<'s>) {
+ #body
+ }
+ };
+
+ self.rendered.append_all(out);
+ }
+
+ fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
+ let key = (id, ctx);
+
+ // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self
+ // too.
+ #[allow(clippy::map_entry)]
+ if !self.gotos.contains_key(&key) {
+ let meta = &self.meta[id];
+ let enters_loop = !meta.loop_entry_from.is_empty();
+
+ let bump = if enters_loop || !ctx.can_backtrack() {
+ ctx.switch(self.graph[id].miss())
+ } else {
+ None
+ };
+
+ let bump = match (bump, enters_loop, meta.min_read) {
+ (Some(t), _, _) => Some(t),
+ (None, true, _) => ctx.bump(),
+ (None, false, 0) => ctx.bump(),
+ (None, false, _) => None,
+ };
+
+ if meta.min_read == 0 || ctx.remainder() < meta.min_read {
+ ctx.wipe();
+ }
+
+ let ident = self.generate_ident(id, ctx);
+ let mut call_site = quote!(#ident(lex));
+
+ if let Some(bump) = bump {
+ call_site = quote!({
+ #bump
+ #call_site
+ });
+ }
+ self.gotos.insert(key, call_site);
+ self.generate_fn(id, ctx);
+ }
+ &self.gotos[&key]
+ }
+
+ fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
+ self.idents.entry((id, ctx)).or_insert_with(|| {
+ let mut ident = format!("goto{}", id);
+
+ ctx.write_suffix(&mut ident);
+
+ ident.to_ident()
+ })
+ }
+
+ /// Returns an identifier to a function that matches a byte to any
+ /// of the provided ranges. This will generate either a simple
+ /// match expression, or use a lookup table internally.
+ fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
+ if !self.tests.contains_key(&ranges) {
+ let idx = self.tests.len();
+ let ident = format!("pattern{}", idx).to_ident();
+
+ let lo = ranges.first().unwrap().start;
+ let hi = ranges.last().unwrap().end;
+
+ let body = match ranges.len() {
+ 0..=2 => {
+ quote! {
+ match byte {
+ #(#ranges)|* => true,
+ _ => false,
+ }
+ }
+ }
+ _ if hi - lo < 64 => {
+ let mut offset = hi.saturating_sub(63);
+
+ while offset.count_ones() > 1 && lo - offset > 0 {
+ offset += 1;
+ }
+
+ let mut table = 0u64;
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ if byte - offset >= 64 {
+ panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
+ }
+ table |= 1 << (byte - offset);
+ }
+
+ let search = match offset {
+ 0 => quote!(byte),
+ _ => quote!(byte.wrapping_sub(#offset)),
+ };
+
+ quote! {
+ const LUT: u64 = #table;
+
+ match 1u64.checked_shl(#search as u32) {
+ Some(shift) => LUT & shift != 0,
+ None => false,
+ }
+ }
+ }
+ _ => {
+ let mut view = self.tables.view();
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ view.flag(byte);
+ }
+
+ let mask = view.mask();
+ let lut = view.ident();
+
+ quote! {
+ #lut[byte as usize] & #mask > 0
+ }
+ }
+ };
+ self.rendered.append_all(quote! {
+ #[inline]
+ fn #ident(byte: u8) -> bool {
+ #body
+ }
+ });
+ self.tests.insert(ranges.clone(), ident);
+ }
+ &self.tests[&ranges]
+ }
+}
+
+macro_rules! match_quote {
+ ($source:expr; $($byte:tt,)* ) => {match $source {
+ $( $byte => quote!($byte), )*
+ byte => quote!(#byte),
+ }}
+}
+
+fn byte_to_tokens(byte: u8) -> TokenStream {
+ match_quote! {
+ byte;
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z',
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
+ b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z',
+ b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
+ b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
+ b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
+ }
+}
+
+impl ToTokens for Range {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let Range { start, end } = self;
+
+ tokens.append_all(byte_to_tokens(*start));
+
+ if start != end {
+ tokens.append_all(quote!(..=));
+ tokens.append_all(byte_to_tokens(*end));
+ }
+ }
+}