diff options
Diffstat (limited to 'vendor/logos-codegen/src/lib.rs')
| -rw-r--r-- | vendor/logos-codegen/src/lib.rs | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/lib.rs b/vendor/logos-codegen/src/lib.rs new file mode 100644 index 00000000..2b2d3db2 --- /dev/null +++ b/vendor/logos-codegen/src/lib.rs @@ -0,0 +1,391 @@ +//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right"> +//! +//! # Logos +//! +//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos). + +// The `quote!` macro requires deep recursion. +#![recursion_limit = "196"] +#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")] + +mod error; +mod generator; +#[cfg(not(feature = "fuzzing"))] +mod graph; +#[cfg(feature = "fuzzing")] +pub mod graph; +mod leaf; +#[cfg(not(feature = "fuzzing"))] +mod mir; +#[cfg(feature = "fuzzing")] +pub mod mir; +mod parser; +mod util; + +#[macro_use] +#[allow(missing_docs)] +mod macros; + +use generator::Generator; +use graph::{DisambiguationError, Fork, Graph, Rope}; +use leaf::Leaf; +use parser::{IgnoreFlags, Mode, Parser}; +use quote::ToTokens; +use util::MaybeVoid; + +use proc_macro2::{Delimiter, TokenStream, TokenTree}; +use quote::quote; +use syn::parse_quote; +use syn::spanned::Spanned; +use syn::{Fields, ItemEnum}; + +const LOGOS_ATTR: &str = "logos"; +const ERROR_ATTR: &str = "error"; +const TOKEN_ATTR: &str = "token"; +const REGEX_ATTR: &str = "regex"; + +/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens. +pub fn generate(input: TokenStream) -> TokenStream { + debug!("Reading input token streams"); + + let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); + + let name = &item.ident; + + let mut parser = Parser::default(); + + for param in item.generics.params { + parser.parse_generic(param); + } + + for attr in &mut item.attrs { + parser.try_parse_logos(attr); + } + + let mut ropes = Vec::new(); + let mut regex_ids = Vec::new(); + let mut graph = Graph::new(); + + { + let errors = &mut parser.errors; + + for literal in &parser.skips { + match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) { + Ok(mir) => { + let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority())); + let id = graph.regex(mir, then); + + regex_ids.push(id); + } + Err(err) => { + errors.err(err, literal.span()); + } + } + } + } + + debug!("Iterating through enum variants"); + + for variant in &mut item.variants { + let field = match &mut variant.fields { + Fields::Unit => MaybeVoid::Void, + Fields::Unnamed(fields) => { + if fields.unnamed.len() != 1 { + parser.err( + format!( + "Logos currently only supports variants with one field, found {}", + fields.unnamed.len(), + ), + fields.span(), + ); + } + + let ty = &mut fields + .unnamed + .first_mut() + .expect("Already checked len; qed") + .ty; + let ty = parser.get_type(ty); + + MaybeVoid::Some(ty) + } + Fields::Named(fields) => { + parser.err("Logos doesn't support named fields yet.", fields.span()); + + MaybeVoid::Void + } + }; + + // Lazy leaf constructor to avoid cloning + let var_ident = &variant.ident; + let leaf = move |span| Leaf::new(var_ident, span).field(field.clone()); + + for attr in &mut variant.attrs { + let attr_name = match attr.path().get_ident() { + Some(ident) => ident.to_string(), + None => continue, + }; + + match attr_name.as_str() { + ERROR_ATTR => { + // TODO: Remove in future versions + parser.err( + "\ + Since 0.13 Logos no longer requires the #[error] variant.\n\ + \n\ + For help with migration see release notes: \ + https://github.com/maciejhirsz/logos/releases\ + ", + attr.span(), + ); + } + TOKEN_ATTR => { + let definition = match parser.parse_definition(attr) { + Some(definition) => definition, + None => { + parser.err("Expected #[token(...)]", attr.span()); + continue; + } + }; + + if definition.ignore_flags.is_empty() { + let bytes = definition.literal.to_bytes(); + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or(bytes.len() * 2)) + .callback(definition.callback), + ); + + ropes.push(Rope::new(bytes, then)); + } else { + let mir = definition + .literal + .escape_regex() + .to_mir( + &Default::default(), + definition.ignore_flags, + &mut parser.errors, + ) + .expect("The literal should be perfectly valid regex"); + + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or_else(|| mir.priority())) + .callback(definition.callback), + ); + let id = graph.regex(mir, then); + + regex_ids.push(id); + } + } + REGEX_ATTR => { + let definition = match parser.parse_definition(attr) { + Some(definition) => definition, + None => { + parser.err("Expected #[regex(...)]", attr.span()); + continue; + } + }; + let mir = match definition.literal.to_mir( + &parser.subpatterns, + definition.ignore_flags, + &mut parser.errors, + ) { + Ok(mir) => mir, + Err(err) => { + parser.err(err, definition.literal.span()); + continue; + } + }; + + let then = graph.push( + leaf(definition.literal.span()) + .priority(definition.priority.unwrap_or_else(|| mir.priority())) + .callback(definition.callback), + ); + let id = graph.regex(mir, then); + + regex_ids.push(id); + } + _ => (), + } + } + } + + let mut root = Fork::new(); + + debug!("Parsing additional options (extras, source, ...)"); + + let error_type = parser.error_type.take(); + let extras = parser.extras.take(); + let source = parser + .source + .take() + .map(strip_wrapping_parens) + .unwrap_or(match parser.mode { + Mode::Utf8 => quote!(str), + Mode::Binary => quote!([u8]), + }); + let logos_path = parser + .logos_path + .take() + .unwrap_or_else(|| parse_quote!(::logos)); + + let generics = parser.generics(); + let this = quote!(#name #generics); + + let impl_logos = |body| { + quote! { + impl<'s> #logos_path::Logos<'s> for #this { + type Error = #error_type; + + type Extras = #extras; + + type Source = #source; + + fn lex(lex: &mut #logos_path::Lexer<'s, Self>) { + #body + } + } + } + }; + + for id in regex_ids { + let fork = graph.fork_off(id); + + root.merge(fork, &mut graph); + } + for rope in ropes { + root.merge(rope.into_fork(&mut graph), &mut graph); + } + while let Some(id) = root.miss.take() { + let fork = graph.fork_off(id); + + if fork.branches().next().is_some() { + root.merge(fork, &mut graph); + } else { + break; + } + } + + debug!("Checking if any two tokens have the same priority"); + + for &DisambiguationError(a, b) in graph.errors() { + let a = graph[a].unwrap_leaf(); + let b = graph[b].unwrap_leaf(); + let disambiguate = a.priority + 1; + + let mut err = |a: &Leaf, b: &Leaf| { + parser.err( + format!( + "\ + A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\ + \n\ + hint: Consider giving one definition a higher priority: \ + #[regex(..., priority = {disambiguate})]\ + ", + ), + a.span + ); + }; + + err(a, b); + err(b, a); + } + + if let Some(errors) = parser.errors.render() { + return impl_logos(errors); + } + + let root = graph.push(root); + + graph.shake(root); + + debug!("Generating code from graph:\n{graph:#?}"); + + let generator = Generator::new(name, &this, root, &graph); + + let body = generator.generate(); + impl_logos(quote! { + use #logos_path::internal::{LexerInternal, CallbackResult}; + + type Lexer<'s> = #logos_path::Lexer<'s, #this>; + + fn _end<'s>(lex: &mut Lexer<'s>) { + lex.end() + } + + fn _error<'s>(lex: &mut Lexer<'s>) { + lex.bump_unchecked(1); + + lex.error(); + } + + #body + }) +} + +/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present. +pub fn strip_attributes(input: TokenStream) -> TokenStream { + let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); + + strip_attrs_from_vec(&mut item.attrs); + + for attr in &mut item.attrs { + if let syn::Meta::List(meta) = &mut attr.meta { + if meta.path.is_ident("derive") { + let mut tokens = + std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter(); + + while let Some(TokenTree::Ident(ident)) = tokens.next() { + let punct = tokens.next(); + + if ident == "Logos" { + continue; + } + + meta.tokens.extend([TokenTree::Ident(ident)]); + meta.tokens.extend(punct); + } + } + } + } + + for variant in &mut item.variants { + strip_attrs_from_vec(&mut variant.attrs); + for field in &mut variant.fields { + strip_attrs_from_vec(&mut field.attrs); + } + } + + item.to_token_stream() +} + +fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) { + attrs.retain(|attr| !is_logos_attr(attr)) +} + +fn is_logos_attr(attr: &syn::Attribute) -> bool { + attr.path().is_ident(LOGOS_ATTR) + || attr.path().is_ident(TOKEN_ATTR) + || attr.path().is_ident(REGEX_ATTR) +} + +fn strip_wrapping_parens(t: TokenStream) -> TokenStream { + let tts: Vec<TokenTree> = t.into_iter().collect(); + + if tts.len() != 1 { + tts.into_iter().collect() + } else { + match tts.into_iter().next().unwrap() { + TokenTree::Group(g) => { + if g.delimiter() == Delimiter::Parenthesis { + g.stream() + } else { + core::iter::once(TokenTree::Group(g)).collect() + } + } + tt => core::iter::once(tt).collect(), + } + } +} |
