diff options
Diffstat (limited to 'vendor/logos-codegen/src/parser')
| -rw-r--r-- | vendor/logos-codegen/src/parser/definition.rs | 193 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/parser/ignore_flags.rs | 499 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/parser/mod.rs | 331 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/parser/nested.rs | 146 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/parser/subpattern.rs | 97 | ||||
| -rw-r--r-- | vendor/logos-codegen/src/parser/type_params.rs | 200 |
6 files changed, 1466 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs new file mode 100644 index 00000000..a876fb59 --- /dev/null +++ b/vendor/logos-codegen/src/parser/definition.rs @@ -0,0 +1,193 @@ +use proc_macro2::{Ident, Span}; +use syn::{spanned::Spanned, LitByteStr, LitStr}; + +use crate::error::{Errors, Result}; +use crate::leaf::Callback; +use crate::mir::Mir; +use crate::parser::nested::NestedValue; +use crate::parser::{IgnoreFlags, Parser, Subpatterns}; + +use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive; + +pub struct Definition { + pub literal: Literal, + pub priority: Option<usize>, + pub callback: Option<Callback>, + pub ignore_flags: IgnoreFlags, +} + +pub enum Literal { + Utf8(LitStr), + Bytes(LitByteStr), +} + +impl Definition { + pub fn new(literal: Literal) -> Self { + Definition { + literal, + priority: None, + callback: None, + ignore_flags: IgnoreFlags::Empty, + } + } + + pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { + match (name.to_string().as_str(), value) { + ("priority", NestedValue::Assign(tokens)) => { + let prio = match tokens.to_string().parse() { + Ok(prio) => prio, + Err(_) => { + parser.err("Expected an unsigned integer", tokens.span()); + return; + } + }; + + if self.priority.replace(prio).is_some() { + parser.err("Resetting previously set priority", tokens.span()); + } + } + ("priority", _) => { + parser.err("Expected: priority = <integer>", name.span()); + } + ("callback", NestedValue::Assign(tokens)) => { + let span = tokens.span(); + let callback = match parser.parse_callback(tokens) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + + if let Some(previous) = self.callback.replace(callback) { + parser + .err( + "Callback has been already set", + span.join(name.span()).unwrap(), + ) + .err("Previous callback set here", previous.span()); + } + } + ("callback", _) => { + parser.err("Expected: callback = ...", name.span()); + } + ("ignore", NestedValue::Group(tokens)) => { + self.ignore_flags.parse_group(name, tokens, parser); + } + ("ignore", _) => { + parser.err("Expected: ignore(<flag>, ...)", name.span()); + } + (unknown, _) => { + parser.err( + format!( + "\ + Unknown nested attribute: {}\n\ + \n\ + Expected one of: priority, callback\ + ", + unknown + ), + name.span(), + ); + } + } + } +} + +impl Literal { + pub fn to_bytes(&self) -> Vec<u8> { + match self { + Literal::Utf8(string) => string.value().into_bytes(), + Literal::Bytes(bytes) => bytes.value(), + } + } + + pub fn escape_regex(&self) -> Literal { + match self { + Literal::Utf8(string) => Literal::Utf8(LitStr::new( + regex_syntax::escape(&string.value()).as_str(), + self.span(), + )), + Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new( + regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(), + self.span(), + )), + } + } + + pub fn to_mir( + &self, + subpatterns: &Subpatterns, + ignore_flags: IgnoreFlags, + errors: &mut Errors, + ) -> Result<Mir> { + let value = subpatterns.fix(self, errors); + + if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) { + match self { + Literal::Utf8(_) => { + Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive) + } + Literal::Bytes(_) => Mir::binary_ignore_case(&value), + } + } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) { + match self { + Literal::Utf8(_) => Mir::utf8_ignore_case(&value), + Literal::Bytes(_) => Mir::binary_ignore_case(&value), + } + } else { + match self { + Literal::Utf8(_) => Mir::utf8(&value), + Literal::Bytes(_) => Mir::binary(&value), + } + } + } + + pub fn span(&self) -> Span { + match self { + Literal::Utf8(string) => string.span(), + Literal::Bytes(bytes) => bytes.span(), + } + } +} + +impl syn::parse::Parse for Literal { + fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> { + let la = input.lookahead1(); + if la.peek(LitStr) { + Ok(Literal::Utf8(input.parse()?)) + } else if la.peek(LitByteStr) { + Ok(Literal::Bytes(input.parse()?)) + } else { + Err(la.error()) + } + } +} + +pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String { + if bytes.is_ascii() { + unsafe { + // Unicode values are prohibited, so we can't use + // safe version of String::from_utf8 + // + // We can, however, construct a safe ASCII string + return String::from_utf8_unchecked(bytes); + } + } + + let mut string = String::with_capacity(bytes.len() * 2); + + for byte in bytes { + if byte < 0x80 { + string.push(byte as char); + } else { + static DIGITS: [u8; 16] = *b"0123456789abcdef"; + + string.push_str(r"\x"); + string.push(DIGITS[(byte / 16) as usize] as char); + string.push(DIGITS[(byte % 16) as usize] as char); + } + } + + string +} diff --git a/vendor/logos-codegen/src/parser/ignore_flags.rs b/vendor/logos-codegen/src/parser/ignore_flags.rs new file mode 100644 index 00000000..3a79d31b --- /dev/null +++ b/vendor/logos-codegen/src/parser/ignore_flags.rs @@ -0,0 +1,499 @@ +use std::ops::{BitAnd, BitOr}; + +use proc_macro2::{Ident, TokenStream, TokenTree}; + +use crate::parser::Parser; +use crate::util::is_punct; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct IgnoreFlags { + bits: u8, +} + +#[allow(non_upper_case_globals)] +impl IgnoreFlags { + pub const Empty: Self = Self::new(0x00); + pub const IgnoreCase: Self = Self::new(0x01); + pub const IgnoreAsciiCase: Self = Self::new(0x02); + + #[inline] + pub const fn new(bits: u8) -> Self { + Self { bits } + } + + /// Enables a variant. + #[inline] + pub fn enable(&mut self, variant: Self) { + self.bits |= variant.bits; + } + + /// Checks if this `IgnoreFlags` contains *any* of the given variants. + #[inline] + pub fn contains(&self, variants: Self) -> bool { + self.bits & variants.bits != 0 + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.bits == 0 + } + + /// Parses an identifier an enables it for `self`. + /// + /// Valid inputs are (that produces `true`): + /// * `"case"` (incompatible with `"ascii_case"`) + /// * `"ascii_case"` (incompatible with `"case"`) + /// + /// An error causes this function to return `false` and emits an error to + /// the given `Parser`. + fn parse_ident(&mut self, ident: Ident, parser: &mut Parser) -> bool { + match ident.to_string().as_str() { + "case" => { + if self.contains(Self::IgnoreAsciiCase) { + parser.err( + "\ + The flag \"case\" cannot be used along with \"ascii_case\"\ + ", + ident.span(), + ); + false + } else { + self.enable(Self::IgnoreCase); + true + } + } + "ascii_case" => { + if self.contains(Self::IgnoreCase) { + parser.err( + "\ + The flag \"ascii_case\" cannot be used along with \"case\"\ + ", + ident.span(), + ); + false + } else { + self.enable(Self::IgnoreAsciiCase); + true + } + } + unknown => { + parser.err( + format!( + "\ + Unknown flag: {}\n\ + \n\ + Expected one of: case, ascii_case\ + ", + unknown + ), + ident.span(), + ); + false + } + } + } + + pub fn parse_group(&mut self, name: Ident, tokens: TokenStream, parser: &mut Parser) { + // Little finite state machine to parse "<flag>(,<flag>)*,?" + + // FSM description for future maintenance + // 0: Initial state + // <flag> -> 1 + // _ -> error + // 1: A flag was found + // , -> 2 + // None -> done + // _ -> error + // 2: A comma was found (after a <flag>) + // <flag> -> 1 + // None -> done + // _ -> error + let mut state = 0u8; + + let mut tokens = tokens.into_iter(); + + loop { + state = match state { + 0 => match tokens.next() { + Some(TokenTree::Ident(ident)) => { + if self.parse_ident(ident, parser) { + 1 + } else { + return; + } + } + _ => { + parser.err( + "\ + Invalid ignore flag\n\ + \n\ + Expected one of: case, ascii_case\ + ", + name.span(), + ); + return; + } + }, + 1 => match tokens.next() { + Some(tt) if is_punct(&tt, ',') => 2, + None => return, + Some(unexpected_tt) => { + parser.err( + format!( + "\ + Unexpected token: {:?}\ + ", + unexpected_tt.to_string(), + ), + unexpected_tt.span(), + ); + return; + } + }, + 2 => match tokens.next() { + Some(TokenTree::Ident(ident)) => { + if self.parse_ident(ident, parser) { + 1 + } else { + return; + } + } + None => return, + Some(unexpected_tt) => { + parser.err( + format!( + "\ + Unexpected token: {:?}\ + ", + unexpected_tt.to_string(), + ), + unexpected_tt.span(), + ); + return; + } + }, + _ => unreachable!("Internal Error: invalid state ({})", state), + } + } + } +} + +impl BitOr for IgnoreFlags { + type Output = Self; + + fn bitor(self, other: Self) -> Self { + Self::new(self.bits | other.bits) + } +} + +impl BitAnd for IgnoreFlags { + type Output = Self; + + fn bitand(self, other: Self) -> Self { + Self::new(self.bits & other.bits) + } +} + +pub mod ascii_case { + use regex_syntax::hir; + + use crate::mir::Mir; + use crate::parser::Literal; + + macro_rules! literal { + ($byte:expr) => { + hir::Literal(Box::new([$byte])) + }; + (@char $c:expr) => { + hir::Literal( + $c.encode_utf8(&mut [0; 4]) + .as_bytes() + .to_vec() + .into_boxed_slice(), + ) + }; + } + + pub trait MakeAsciiCaseInsensitive { + /// Creates a equivalent regular expression which ignore the letter casing + /// of ascii characters. + fn make_ascii_case_insensitive(self) -> Mir; + } + + impl MakeAsciiCaseInsensitive for u8 { + fn make_ascii_case_insensitive(self) -> Mir { + if self.is_ascii_lowercase() { + Mir::Alternation(vec![ + Mir::Literal(literal!(self - 32)), + Mir::Literal(literal!(self)), + ]) + } else if self.is_ascii_uppercase() { + Mir::Alternation(vec![ + Mir::Literal(literal!(self)), + Mir::Literal(literal!(self + 32)), + ]) + } else { + Mir::Literal(literal!(self)) + } + } + } + + impl MakeAsciiCaseInsensitive for char { + fn make_ascii_case_insensitive(self) -> Mir { + if self.is_ascii() { + (self as u8).make_ascii_case_insensitive() + } else { + Mir::Literal(literal!(@char self)) + } + } + } + + impl MakeAsciiCaseInsensitive for hir::Literal { + fn make_ascii_case_insensitive(self) -> Mir { + Mir::Concat( + self.0 + .iter() + .map(|x| x.make_ascii_case_insensitive()) + .collect(), + ) + } + } + + impl MakeAsciiCaseInsensitive for hir::ClassBytes { + fn make_ascii_case_insensitive(mut self) -> Mir { + self.case_fold_simple(); + Mir::Class(hir::Class::Bytes(self)) + } + } + + impl MakeAsciiCaseInsensitive for hir::ClassUnicode { + fn make_ascii_case_insensitive(mut self) -> Mir { + use std::cmp; + + // Manuall implementation to only perform the case folding on ascii characters. + + let mut ranges = Vec::new(); + + for range in self.ranges() { + #[inline] + fn overlaps(st1: u8, end1: u8, st2: u8, end2: u8) -> bool { + (st2 <= st1 && st1 <= end2) || (st1 <= st2 && st2 <= end1) + } + + #[inline] + fn make_ascii(c: char) -> Option<u8> { + if c.is_ascii() { + Some(c as u8) + } else { + None + } + } + + match (make_ascii(range.start()), make_ascii(range.end())) { + (Some(start), Some(end)) => { + if overlaps(b'a', b'z', start, end) { + let lower = cmp::max(start, b'a'); + let upper = cmp::min(end, b'z'); + ranges.push(hir::ClassUnicodeRange::new( + (lower - 32) as char, + (upper - 32) as char, + )) + } + + if overlaps(b'A', b'Z', start, end) { + let lower = cmp::max(start, b'A'); + let upper = cmp::min(end, b'Z'); + ranges.push(hir::ClassUnicodeRange::new( + (lower + 32) as char, + (upper + 32) as char, + )) + } + } + (Some(start), None) => { + if overlaps(b'a', b'z', start, b'z') { + let lower = cmp::max(start, b'a'); + ranges.push(hir::ClassUnicodeRange::new((lower - 32) as char, 'Z')) + } + + if overlaps(b'A', b'Z', start, b'Z') { + let lower = cmp::max(start, b'A'); + ranges.push(hir::ClassUnicodeRange::new((lower + 32) as char, 'Z')) + } + } + _ => (), + } + } + + self.union(&hir::ClassUnicode::new(ranges)); + + Mir::Class(hir::Class::Unicode(self)) + } + } + + impl MakeAsciiCaseInsensitive for hir::Class { + fn make_ascii_case_insensitive(self) -> Mir { + match self { + hir::Class::Bytes(b) => b.make_ascii_case_insensitive(), + hir::Class::Unicode(u) => u.make_ascii_case_insensitive(), + } + } + } + + impl MakeAsciiCaseInsensitive for &Literal { + fn make_ascii_case_insensitive(self) -> Mir { + match self { + Literal::Bytes(bytes) => Mir::Concat( + bytes + .value() + .into_iter() + .map(|b| b.make_ascii_case_insensitive()) + .collect(), + ), + Literal::Utf8(s) => Mir::Concat( + s.value() + .chars() + .map(|b| b.make_ascii_case_insensitive()) + .collect(), + ), + } + } + } + + impl MakeAsciiCaseInsensitive for Mir { + fn make_ascii_case_insensitive(self) -> Mir { + match self { + Mir::Empty => Mir::Empty, + Mir::Loop(l) => Mir::Loop(Box::new(l.make_ascii_case_insensitive())), + Mir::Maybe(m) => Mir::Maybe(Box::new(m.make_ascii_case_insensitive())), + Mir::Concat(c) => Mir::Concat( + c.into_iter() + .map(|m| m.make_ascii_case_insensitive()) + .collect(), + ), + Mir::Alternation(a) => Mir::Alternation( + a.into_iter() + .map(|m| m.make_ascii_case_insensitive()) + .collect(), + ), + Mir::Class(c) => c.make_ascii_case_insensitive(), + Mir::Literal(l) => l.make_ascii_case_insensitive(), + } + } + } + + #[cfg(test)] + mod tests { + use super::MakeAsciiCaseInsensitive; + use crate::mir::{Class, Mir}; + use regex_syntax::hir::{ClassUnicode, ClassUnicodeRange}; + + fn assert_range(in_s: char, in_e: char, expected: &[(char, char)]) { + let range = ClassUnicodeRange::new(in_s, in_e); + let class = ClassUnicode::new(vec![range]); + + let expected = + ClassUnicode::new(expected.iter().map(|&(a, b)| ClassUnicodeRange::new(a, b))); + + if let Mir::Class(Class::Unicode(result)) = class.make_ascii_case_insensitive() { + assert_eq!(result, expected); + } else { + panic!("Not a unicode class"); + }; + } + + #[test] + fn no_letters_left() { + assert_range(' ', '+', &[(' ', '+')]); + } + + #[test] + fn no_letters_right() { + assert_range('{', '~', &[('{', '~')]); + } + + #[test] + fn no_letters_middle() { + assert_range('[', '`', &[('[', '`')]); + } + + #[test] + fn lowercase_left_edge() { + assert_range('a', 'd', &[('a', 'd'), ('A', 'D')]); + } + + #[test] + fn lowercase_right_edge() { + assert_range('r', 'z', &[('r', 'z'), ('R', 'Z')]); + } + + #[test] + fn lowercase_total() { + assert_range('a', 'z', &[('a', 'z'), ('A', 'Z')]); + } + + #[test] + fn uppercase_left_edge() { + assert_range('A', 'D', &[('a', 'd'), ('A', 'D')]); + } + + #[test] + fn uppercase_right_edge() { + assert_range('R', 'Z', &[('r', 'z'), ('R', 'Z')]); + } + + #[test] + fn uppercase_total() { + assert_range('A', 'Z', &[('a', 'z'), ('A', 'Z')]); + } + + #[test] + fn lowercase_cross_left() { + assert_range('[', 'h', &[('[', 'h'), ('A', 'H')]); + } + + #[test] + fn lowercase_cross_right() { + assert_range('d', '}', &[('d', '}'), ('D', 'Z')]); + } + + #[test] + fn uppercase_cross_left() { + assert_range(';', 'H', &[(';', 'H'), ('a', 'h')]); + } + + #[test] + fn uppercase_cross_right() { + assert_range('T', ']', &[('t', 'z'), ('T', ']')]); + } + + #[test] + fn cross_both() { + assert_range('X', 'c', &[('X', 'c'), ('x', 'z'), ('A', 'C')]); + } + + #[test] + fn all_letters() { + assert_range('+', '|', &[('+', '|')]); + } + + #[test] + fn oob_all_letters() { + assert_range('#', 'é', &[('#', 'é')]); + } + + #[test] + fn oob_from_uppercase() { + assert_range('Q', 'é', &[('A', 'é')]); + } + + #[test] + fn oob_from_lowercase() { + assert_range('q', 'é', &[('q', 'é'), ('Q', 'Z')]); + } + + #[test] + fn oob_no_letters() { + assert_range('|', 'é', &[('|', 'é')]); + } + } +} diff --git a/vendor/logos-codegen/src/parser/mod.rs b/vendor/logos-codegen/src/parser/mod.rs new file mode 100644 index 00000000..3ad7202e --- /dev/null +++ b/vendor/logos-codegen/src/parser/mod.rs @@ -0,0 +1,331 @@ +use beef::lean::Cow; +use proc_macro2::{Span, TokenStream, TokenTree}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Attribute, GenericParam, Lit, Meta, Type}; + +use crate::error::Errors; +use crate::leaf::{Callback, InlineCallback}; +use crate::util::{expect_punct, MaybeVoid}; +use crate::LOGOS_ATTR; + +mod definition; +mod ignore_flags; +mod nested; +mod subpattern; +mod type_params; + +pub use self::definition::{Definition, Literal}; +pub use self::ignore_flags::IgnoreFlags; +use self::nested::{AttributeParser, Nested, NestedValue}; +pub use self::subpattern::Subpatterns; +use self::type_params::{replace_lifetime, traverse_type, TypeParams}; + +#[derive(Default)] +pub struct Parser { + pub errors: Errors, + pub mode: Mode, + pub source: Option<TokenStream>, + pub skips: Vec<Literal>, + pub extras: MaybeVoid, + pub error_type: MaybeVoid, + pub subpatterns: Subpatterns, + pub logos_path: Option<TokenStream>, + types: TypeParams, +} + +#[derive(Default)] +pub enum Mode { + #[default] + Utf8, + Binary, +} + +impl Parser { + pub fn parse_generic(&mut self, param: GenericParam) { + match param { + GenericParam::Lifetime(lt) => { + self.types.explicit_lifetime(lt, &mut self.errors); + } + GenericParam::Type(ty) => { + self.types.add(ty.ident); + } + GenericParam::Const(c) => { + self.err("Logos doesn't support const generics.", c.span()); + } + } + } + + pub fn generics(&mut self) -> Option<TokenStream> { + self.types.generics(&mut self.errors) + } + + fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> { + match &mut attr.meta { + Meta::List(list) => { + let tokens = std::mem::replace(&mut list.tokens, TokenStream::new()); + + Some(AttributeParser::new(tokens)) + } + _ => None, + } + } + + /// Try to parse the main `#[logos(...)]`, does nothing if + /// the attribute's name isn't `logos`. + pub fn try_parse_logos(&mut self, attr: &mut Attribute) { + if !attr.path().is_ident(LOGOS_ATTR) { + return; + } + + let nested = match self.parse_attr(attr) { + Some(tokens) => tokens, + None => { + self.err("Expected #[logos(...)]", attr.span()); + return; + } + }; + + for nested in nested { + let (name, value) = match nested { + Nested::Named(name, value) => (name, value), + Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => { + self.err("Invalid nested attribute", tokens.span()); + continue; + } + }; + + // IMPORTANT: Keep these sorted alphabetically for binary search down the line + #[allow(clippy::type_complexity)] + static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[ + ("crate", |parser, span, value| match value { + NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path), + _ => { + parser.err("Expected: #[logos(crate = path::to::logos)]", span); + } + }), + ("error", |parser, span, value| match value { + NestedValue::Assign(value) => { + let span = value.span(); + + if let MaybeVoid::Some(previous) = parser.error_type.replace(value) { + parser + .err("Error type can be defined only once", span) + .err("Previous definition here", previous.span()); + } + } + _ => { + parser.err("Expected: #[logos(error = SomeType)]", span); + } + }), + ("extras", |parser, span, value| match value { + NestedValue::Assign(value) => { + let span = value.span(); + + if let MaybeVoid::Some(previous) = parser.extras.replace(value) { + parser + .err("Extras can be defined only once", span) + .err("Previous definition here", previous.span()); + } + } + _ => { + parser.err("Expected: #[logos(extras = SomeType)]", span); + } + }), + ("skip", |parser, span, value| match value { + NestedValue::Literal(lit) => { + if let Some(literal) = parser.parse_literal(Lit::new(lit)) { + parser.skips.push(literal); + } + } + _ => { + parser.err("Expected: #[logos(skip \"regex literal\")]", span); + } + }), + ("source", |parser, span, value| match value { + NestedValue::Assign(value) => { + let span = value.span(); + if let Some(previous) = parser.source.replace(value) { + parser + .err("Source can be defined only once", span) + .err("Previous definition here", previous.span()); + } + } + _ => { + parser.err("Expected: #[logos(source = SomeType)]", span); + } + }), + ("subpattern", |parser, span, value| match value { + NestedValue::KeywordAssign(name, value) => { + parser.subpatterns.add(name, value, &mut parser.errors); + } + _ => { + parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span); + } + }), + ("type", |parser, span, value| match value { + NestedValue::KeywordAssign(generic, ty) => { + parser.types.set(generic, ty, &mut parser.errors); + } + _ => { + parser.err("Expected: #[logos(type T = SomeType)]", span); + } + }), + ]; + + match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) { + Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value), + Err(_) => { + let mut err = format!( + "Unknown nested attribute #[logos({name})], expected one of: {}", + NESTED_LOOKUP[0].0 + ); + + for (allowed, _) in &NESTED_LOOKUP[1..] { + err.push_str(", "); + err.push_str(allowed); + } + + self.err(err, name.span()); + } + } + } + } + + pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> { + match lit { + Lit::Str(string) => Some(Literal::Utf8(string)), + Lit::ByteStr(bytes) => { + self.mode = Mode::Binary; + + Some(Literal::Bytes(bytes)) + } + _ => { + self.err("Expected a &str or &[u8] slice", lit.span()); + + None + } + } + } + + /// Parse attribute definition of a token: + /// + /// + `#[token(literal[, callback])]` + /// + `#[regex(literal[, callback])]` + pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> { + let mut nested = self.parse_attr(attr)?; + + let literal = match nested.parsed::<Lit>()? { + Ok(lit) => self.parse_literal(lit)?, + Err(err) => { + self.err(err.to_string(), err.span()); + + return None; + } + }; + + let mut def = Definition::new(literal); + + for (position, next) in nested.enumerate() { + match next { + Nested::Unexpected(tokens) => { + self.err("Unexpected token in attribute", tokens.span()); + } + Nested::Unnamed(tokens) => match position { + 0 => def.callback = self.parse_callback(tokens), + _ => { + self.err( + "\ + Expected a named argument at this position\n\ + \n\ + hint: If you are trying to define a callback here use: callback = ...\ + ", + tokens.span(), + ); + } + }, + Nested::Named(name, value) => { + def.named_attr(name, value, self); + } + } + } + + Some(def) + } + + fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> { + let span = tokens.span(); + let mut tokens = tokens.into_iter(); + + if let Some(tt) = expect_punct(tokens.next(), '|') { + let mut label = TokenStream::from(tt); + + label.extend(tokens); + + return Some(Callback::Label(label)); + } + + let first = tokens.next(); + let error = expect_punct(tokens.next(), '|'); + + let arg = match (error, first) { + (None, Some(TokenTree::Ident(arg))) => arg, + _ => { + self.err( + "Inline callbacks must use closure syntax with exactly one parameter", + span, + ); + return None; + } + }; + + let body = match tokens.next() { + Some(TokenTree::Group(group)) => group.stream(), + Some(first) => { + let mut body = TokenStream::from(first); + + body.extend(tokens); + body + } + None => { + self.err("Callback missing a body", span); + return None; + } + }; + + let inline = InlineCallback { arg, body, span }; + + Some(inline.into()) + } + + /// Checks if `ty` is a declared generic param, if so replaces it + /// with a concrete type defined using #[logos(type T = Type)] + /// + /// If no matching generic param is found, all lifetimes are fixed + /// to the source lifetime + pub fn get_type(&self, ty: &mut Type) -> TokenStream { + traverse_type(ty, &mut |ty| { + if let Type::Path(tp) = ty { + // Skip types that begin with `self::` + if tp.qself.is_none() { + // If `ty` is a generic type parameter, try to find + // its concrete type defined with #[logos(type T = Type)] + if let Some(substitute) = self.types.find(&tp.path) { + *ty = substitute; + } + } + } + // If `ty` is a concrete type, fix its lifetimes to 'source + replace_lifetime(ty); + }); + + quote!(#ty) + } + + pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors + where + M: Into<Cow<'static, str>>, + { + self.errors.err(message, span) + } +} diff --git a/vendor/logos-codegen/src/parser/nested.rs b/vendor/logos-codegen/src/parser/nested.rs new file mode 100644 index 00000000..44ecaeac --- /dev/null +++ b/vendor/logos-codegen/src/parser/nested.rs @@ -0,0 +1,146 @@ +use proc_macro2::token_stream::IntoIter as TokenIter; +use proc_macro2::{Ident, Literal, TokenStream, TokenTree}; +use quote::quote; + +use crate::util::{expect_punct, is_punct}; + +pub enum NestedValue { + /// `name = ...` + Assign(TokenStream), + /// `name "literal"` + Literal(Literal), + /// `name(...)` + Group(TokenStream), + /// `name ident = ...` + KeywordAssign(Ident, TokenStream), +} + +pub enum Nested { + /// Unnamed nested attribute, such as a string, + /// callback closure, or a lone ident/path + /// + /// Note: a lone ident will be Named with no value instead + Unnamed(TokenStream), + /// Named: name ... + Named(Ident, NestedValue), + /// Unexpected token, + Unexpected(TokenStream), +} + +pub struct AttributeParser { + inner: TokenIter, +} + +pub struct Empty; + +impl From<Empty> for TokenStream { + fn from(_: Empty) -> TokenStream { + TokenStream::new() + } +} + +impl AttributeParser { + pub fn new(stream: TokenStream) -> Self { + AttributeParser { + inner: stream.into_iter(), + } + } + + pub fn parsed<T>(&mut self) -> Option<syn::Result<T>> + where + T: syn::parse::Parse, + { + let tokens = self.collect_tail(TokenStream::new()); + + if tokens.is_empty() { + return None; + } + + Some(syn::parse2(tokens)) + } + + fn next_tt(&mut self) -> Option<TokenTree> { + expect_punct(self.inner.next(), ',') + } + + fn collect_tail<T>(&mut self, first: T) -> TokenStream + where + T: Into<TokenStream>, + { + let mut out = first.into(); + + while let Some(tt) = self.next_tt() { + out.extend(Some(tt)); + } + + out + } + + fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested { + let mut out = TokenStream::from(TokenTree::Ident(first)); + + out.extend(self.collect_tail(next)); + + Nested::Unnamed(out.into_iter().collect()) + } + + fn parse_assign(&mut self, name: Ident) -> Nested { + let value = self.collect_tail(Empty); + + Nested::Named(name, NestedValue::Assign(value)) + } + + fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested { + // TODO: Error if there are any tokens following + let _ = self.collect_tail(Empty); + + Nested::Named(name, NestedValue::Literal(lit)) + } + + fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested { + Nested::Named(name, NestedValue::Group(group)) + } + + fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested { + let error = expect_punct(self.next_tt(), '='); + + match error { + Some(error) => { + let error = self.collect_tail(error); + + Nested::Unexpected(error) + } + None => { + let value = self.collect_tail(Empty); + + Nested::Named(keyword, NestedValue::KeywordAssign(name, value)) + } + } + } +} + +impl Iterator for AttributeParser { + type Item = Nested; + + fn next(&mut self) -> Option<Nested> { + let first = self.inner.next()?; + + let name = match first { + TokenTree::Ident(ident) => ident, + tt => { + let stream = self.collect_tail(tt); + + return Some(Nested::Unnamed(stream.into_iter().collect())); + } + }; + + match self.next_tt() { + Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)), + Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)), + Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())), + Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)), + Some(next) => Some(self.parse_unnamed(name, next)), + None => Some(Nested::Unnamed(quote!(#name))), + } + } +} diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs new file mode 100644 index 00000000..eb620028 --- /dev/null +++ b/vendor/logos-codegen/src/parser/subpattern.rs @@ -0,0 +1,97 @@ +use proc_macro2::TokenStream; +use syn::Ident; + +use crate::error::Errors; +use crate::mir::Mir; +use crate::parser::definition::{bytes_to_regex_string, Literal}; + +#[derive(Default)] +pub struct Subpatterns { + map: Vec<(Ident, String)>, +} + +impl Subpatterns { + pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) { + let lit = match syn::parse2::<Literal>(pattern) { + Ok(lit) => lit, + Err(e) => { + errors.err(e.to_string(), e.span()); + return; + } + }; + + if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) { + errors + .err(format!("{} can only be assigned once", param), param.span()) + .err("Previously assigned here", name.span()); + return; + } + + let fixed = self.fix(&lit, errors); + + // Validate the literal as proper regex. If it's not, emit an error. + let mir = match &lit { + Literal::Utf8(_) => Mir::utf8(&fixed), + Literal::Bytes(_) => Mir::binary(&fixed), + }; + + if let Err(err) = mir { + errors.err(err, lit.span()); + }; + + self.map.push((param, fixed)); + } + + pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String { + let mut i = 0; + let mut pattern = match lit { + Literal::Utf8(s) => s.value(), + Literal::Bytes(b) => bytes_to_regex_string(b.value()), + }; + + while let Some(f) = pattern[i..].find("(?&") { + i += f; + pattern.replace_range(i..i + 3, "(?:"); + i += 3; + + let subref_end = if let Some(f) = pattern[i..].find(')') { + i + f + } else { + pattern.truncate(i); // truncate so latter error doesn't suppress + break; // regex-syntax will report the unclosed group + }; + + let name = &pattern[i..subref_end]; + let name = match syn::parse_str::<Ident>(name) { + Ok(name) => name, + Err(_) => { + errors.err( + format!("subpattern reference `{}` is not an identifier", name), + lit.span(), + ); + // we emitted the error; make something up and continue + pattern.replace_range(i..subref_end, "_"); + i += 2; + continue; + } + }; + + match self.map.iter().find(|(def, _)| *def == name) { + Some((_, subpattern)) => { + pattern.replace_range(i..subref_end, subpattern); + i += subpattern.len() + 1; + } + None => { + errors.err( + format!("subpattern reference `{}` has not been defined", name), + lit.span(), + ); + // leaving `(?:name)` is fine + i = subref_end + 1; + } + } + } + + pattern + } +} diff --git a/vendor/logos-codegen/src/parser/type_params.rs b/vendor/logos-codegen/src/parser/type_params.rs new file mode 100644 index 00000000..1be4948e --- /dev/null +++ b/vendor/logos-codegen/src/parser/type_params.rs @@ -0,0 +1,200 @@ +use proc_macro2::{Ident, Span, TokenStream}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Lifetime, LifetimeParam, Path, Type}; + +use crate::error::Errors; + +#[derive(Default)] +pub struct TypeParams { + lifetime: bool, + type_params: Vec<(Ident, Option<Type>)>, +} + +impl TypeParams { + pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) { + if self.lifetime { + let span = lt.span(); + + errors.err("Logos types can only have one lifetime can be set", span); + } + + self.lifetime = true; + } + + pub fn add(&mut self, param: Ident) { + self.type_params.push((param, None)); + } + + pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) { + let ty = match syn::parse2::<Type>(ty) { + Ok(mut ty) => { + replace_lifetimes(&mut ty); + ty + } + Err(err) => { + errors.err(err.to_string(), err.span()); + return; + } + }; + + match self.type_params.iter_mut().find(|(name, _)| *name == param) { + Some((_, slot)) => { + if let Some(previous) = slot.replace(ty) { + errors + .err( + format!("{} can only have one type assigned to it", param), + param.span(), + ) + .err("Previously assigned here", previous.span()); + } + } + None => { + errors.err( + format!("{} is not a declared type parameter", param), + param.span(), + ); + } + } + } + + pub fn find(&self, path: &Path) -> Option<Type> { + for (ident, ty) in &self.type_params { + if path.is_ident(ident) { + return ty.clone(); + } + } + + None + } + + pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> { + if !self.lifetime && self.type_params.is_empty() { + return None; + } + + let mut generics = Vec::new(); + + if self.lifetime { + generics.push(quote!('s)); + } + + for (ty, replace) in self.type_params.iter() { + match replace { + Some(ty) => generics.push(quote!(#ty)), + None => { + errors.err( + format!( + "Generic type parameter without a concrete type\n\ + \n\ + Define a concrete type Logos can use: #[logos(type {} = Type)]", + ty, + ), + ty.span(), + ); + } + } + } + + if generics.is_empty() { + None + } else { + Some(quote!(<#(#generics),*>)) + } + } +} + +pub fn replace_lifetimes(ty: &mut Type) { + traverse_type(ty, &mut replace_lifetime) +} + +pub fn replace_lifetime(ty: &mut Type) { + use syn::{GenericArgument, PathArguments}; + + match ty { + Type::Path(p) => { + p.path + .segments + .iter_mut() + .filter_map(|segment| match &mut segment.arguments { + PathArguments::AngleBracketed(ab) => Some(ab), + _ => None, + }) + .flat_map(|ab| ab.args.iter_mut()) + .for_each(|arg| { + if let GenericArgument::Lifetime(lt) = arg { + *lt = Lifetime::new("'s", lt.span()); + } + }); + } + Type::Reference(r) => { + let span = match r.lifetime.take() { + Some(lt) => lt.span(), + None => Span::call_site(), + }; + + r.lifetime = Some(Lifetime::new("'s", span)); + } + _ => (), + } +} + +pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) { + f(ty); + match ty { + Type::Array(array) => traverse_type(&mut array.elem, f), + Type::BareFn(bare_fn) => { + for input in &mut bare_fn.inputs { + traverse_type(&mut input.ty, f); + } + if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output { + traverse_type(ty, f); + } + } + Type::Group(group) => traverse_type(&mut group.elem, f), + Type::Paren(paren) => traverse_type(&mut paren.elem, f), + Type::Path(path) => traverse_path(&mut path.path, f), + Type::Ptr(p) => traverse_type(&mut p.elem, f), + Type::Reference(r) => traverse_type(&mut r.elem, f), + Type::Slice(slice) => traverse_type(&mut slice.elem, f), + Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| { + if let syn::TypeParamBound::Trait(trait_bound) = bound { + traverse_path(&mut trait_bound.path, f); + } + }), + Type::Tuple(tuple) => tuple + .elems + .iter_mut() + .for_each(|elem| traverse_type(elem, f)), + _ => (), + } +} + +fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) { + for segment in &mut path.segments { + match &mut segment.arguments { + syn::PathArguments::None => (), + syn::PathArguments::AngleBracketed(args) => { + for arg in &mut args.args { + match arg { + syn::GenericArgument::Type(ty) => { + traverse_type(ty, f); + } + syn::GenericArgument::AssocType(assoc) => { + traverse_type(&mut assoc.ty, f); + } + _ => (), + } + } + } + syn::PathArguments::Parenthesized(args) => { + for arg in &mut args.inputs { + traverse_type(arg, f); + } + if let syn::ReturnType::Type(_, ty) = &mut args.output { + traverse_type(ty, f); + } + } + } + } +} |
