summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen/src/parser
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/logos-codegen/src/parser')
-rw-r--r--vendor/logos-codegen/src/parser/definition.rs193
-rw-r--r--vendor/logos-codegen/src/parser/ignore_flags.rs499
-rw-r--r--vendor/logos-codegen/src/parser/mod.rs331
-rw-r--r--vendor/logos-codegen/src/parser/nested.rs146
-rw-r--r--vendor/logos-codegen/src/parser/subpattern.rs97
-rw-r--r--vendor/logos-codegen/src/parser/type_params.rs200
6 files changed, 1466 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs
new file mode 100644
index 00000000..a876fb59
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/definition.rs
@@ -0,0 +1,193 @@
+use proc_macro2::{Ident, Span};
+use syn::{spanned::Spanned, LitByteStr, LitStr};
+
+use crate::error::{Errors, Result};
+use crate::leaf::Callback;
+use crate::mir::Mir;
+use crate::parser::nested::NestedValue;
+use crate::parser::{IgnoreFlags, Parser, Subpatterns};
+
+use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive;
+
+pub struct Definition {
+ pub literal: Literal,
+ pub priority: Option<usize>,
+ pub callback: Option<Callback>,
+ pub ignore_flags: IgnoreFlags,
+}
+
+pub enum Literal {
+ Utf8(LitStr),
+ Bytes(LitByteStr),
+}
+
+impl Definition {
+ pub fn new(literal: Literal) -> Self {
+ Definition {
+ literal,
+ priority: None,
+ callback: None,
+ ignore_flags: IgnoreFlags::Empty,
+ }
+ }
+
+ pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) {
+ match (name.to_string().as_str(), value) {
+ ("priority", NestedValue::Assign(tokens)) => {
+ let prio = match tokens.to_string().parse() {
+ Ok(prio) => prio,
+ Err(_) => {
+ parser.err("Expected an unsigned integer", tokens.span());
+ return;
+ }
+ };
+
+ if self.priority.replace(prio).is_some() {
+ parser.err("Resetting previously set priority", tokens.span());
+ }
+ }
+ ("priority", _) => {
+ parser.err("Expected: priority = <integer>", name.span());
+ }
+ ("callback", NestedValue::Assign(tokens)) => {
+ let span = tokens.span();
+ let callback = match parser.parse_callback(tokens) {
+ Some(callback) => callback,
+ None => {
+ parser.err("Not a valid callback", span);
+ return;
+ }
+ };
+
+ if let Some(previous) = self.callback.replace(callback) {
+ parser
+ .err(
+ "Callback has been already set",
+ span.join(name.span()).unwrap(),
+ )
+ .err("Previous callback set here", previous.span());
+ }
+ }
+ ("callback", _) => {
+ parser.err("Expected: callback = ...", name.span());
+ }
+ ("ignore", NestedValue::Group(tokens)) => {
+ self.ignore_flags.parse_group(name, tokens, parser);
+ }
+ ("ignore", _) => {
+ parser.err("Expected: ignore(<flag>, ...)", name.span());
+ }
+ (unknown, _) => {
+ parser.err(
+ format!(
+ "\
+ Unknown nested attribute: {}\n\
+ \n\
+ Expected one of: priority, callback\
+ ",
+ unknown
+ ),
+ name.span(),
+ );
+ }
+ }
+ }
+}
+
+impl Literal {
+ pub fn to_bytes(&self) -> Vec<u8> {
+ match self {
+ Literal::Utf8(string) => string.value().into_bytes(),
+ Literal::Bytes(bytes) => bytes.value(),
+ }
+ }
+
+ pub fn escape_regex(&self) -> Literal {
+ match self {
+ Literal::Utf8(string) => Literal::Utf8(LitStr::new(
+ regex_syntax::escape(&string.value()).as_str(),
+ self.span(),
+ )),
+ Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new(
+ regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(),
+ self.span(),
+ )),
+ }
+ }
+
+ pub fn to_mir(
+ &self,
+ subpatterns: &Subpatterns,
+ ignore_flags: IgnoreFlags,
+ errors: &mut Errors,
+ ) -> Result<Mir> {
+ let value = subpatterns.fix(self, errors);
+
+ if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) {
+ match self {
+ Literal::Utf8(_) => {
+ Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive)
+ }
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) {
+ match self {
+ Literal::Utf8(_) => Mir::utf8_ignore_case(&value),
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else {
+ match self {
+ Literal::Utf8(_) => Mir::utf8(&value),
+ Literal::Bytes(_) => Mir::binary(&value),
+ }
+ }
+ }
+
+ pub fn span(&self) -> Span {
+ match self {
+ Literal::Utf8(string) => string.span(),
+ Literal::Bytes(bytes) => bytes.span(),
+ }
+ }
+}
+
+impl syn::parse::Parse for Literal {
+ fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+ let la = input.lookahead1();
+ if la.peek(LitStr) {
+ Ok(Literal::Utf8(input.parse()?))
+ } else if la.peek(LitByteStr) {
+ Ok(Literal::Bytes(input.parse()?))
+ } else {
+ Err(la.error())
+ }
+ }
+}
+
+pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String {
+ if bytes.is_ascii() {
+ unsafe {
+ // Unicode values are prohibited, so we can't use
+ // safe version of String::from_utf8
+ //
+ // We can, however, construct a safe ASCII string
+ return String::from_utf8_unchecked(bytes);
+ }
+ }
+
+ let mut string = String::with_capacity(bytes.len() * 2);
+
+ for byte in bytes {
+ if byte < 0x80 {
+ string.push(byte as char);
+ } else {
+ static DIGITS: [u8; 16] = *b"0123456789abcdef";
+
+ string.push_str(r"\x");
+ string.push(DIGITS[(byte / 16) as usize] as char);
+ string.push(DIGITS[(byte % 16) as usize] as char);
+ }
+ }
+
+ string
+}
diff --git a/vendor/logos-codegen/src/parser/ignore_flags.rs b/vendor/logos-codegen/src/parser/ignore_flags.rs
new file mode 100644
index 00000000..3a79d31b
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/ignore_flags.rs
@@ -0,0 +1,499 @@
+use std::ops::{BitAnd, BitOr};
+
+use proc_macro2::{Ident, TokenStream, TokenTree};
+
+use crate::parser::Parser;
+use crate::util::is_punct;
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct IgnoreFlags {
+ bits: u8,
+}
+
+#[allow(non_upper_case_globals)]
+impl IgnoreFlags {
+ pub const Empty: Self = Self::new(0x00);
+ pub const IgnoreCase: Self = Self::new(0x01);
+ pub const IgnoreAsciiCase: Self = Self::new(0x02);
+
+ #[inline]
+ pub const fn new(bits: u8) -> Self {
+ Self { bits }
+ }
+
+ /// Enables a variant.
+ #[inline]
+ pub fn enable(&mut self, variant: Self) {
+ self.bits |= variant.bits;
+ }
+
+ /// Checks if this `IgnoreFlags` contains *any* of the given variants.
+ #[inline]
+ pub fn contains(&self, variants: Self) -> bool {
+ self.bits & variants.bits != 0
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.bits == 0
+ }
+
+ /// Parses an identifier an enables it for `self`.
+ ///
+ /// Valid inputs are (that produces `true`):
+ /// * `"case"` (incompatible with `"ascii_case"`)
+ /// * `"ascii_case"` (incompatible with `"case"`)
+ ///
+ /// An error causes this function to return `false` and emits an error to
+ /// the given `Parser`.
+ fn parse_ident(&mut self, ident: Ident, parser: &mut Parser) -> bool {
+ match ident.to_string().as_str() {
+ "case" => {
+ if self.contains(Self::IgnoreAsciiCase) {
+ parser.err(
+ "\
+ The flag \"case\" cannot be used along with \"ascii_case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreCase);
+ true
+ }
+ }
+ "ascii_case" => {
+ if self.contains(Self::IgnoreCase) {
+ parser.err(
+ "\
+ The flag \"ascii_case\" cannot be used along with \"case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreAsciiCase);
+ true
+ }
+ }
+ unknown => {
+ parser.err(
+ format!(
+ "\
+ Unknown flag: {}\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ unknown
+ ),
+ ident.span(),
+ );
+ false
+ }
+ }
+ }
+
+ pub fn parse_group(&mut self, name: Ident, tokens: TokenStream, parser: &mut Parser) {
+ // Little finite state machine to parse "<flag>(,<flag>)*,?"
+
+ // FSM description for future maintenance
+ // 0: Initial state
+ // <flag> -> 1
+ // _ -> error
+ // 1: A flag was found
+ // , -> 2
+ // None -> done
+ // _ -> error
+ // 2: A comma was found (after a <flag>)
+ // <flag> -> 1
+ // None -> done
+ // _ -> error
+ let mut state = 0u8;
+
+ let mut tokens = tokens.into_iter();
+
+ loop {
+ state = match state {
+ 0 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ _ => {
+ parser.err(
+ "\
+ Invalid ignore flag\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ name.span(),
+ );
+ return;
+ }
+ },
+ 1 => match tokens.next() {
+ Some(tt) if is_punct(&tt, ',') => 2,
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ 2 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ _ => unreachable!("Internal Error: invalid state ({})", state),
+ }
+ }
+ }
+}
+
+impl BitOr for IgnoreFlags {
+ type Output = Self;
+
+ fn bitor(self, other: Self) -> Self {
+ Self::new(self.bits | other.bits)
+ }
+}
+
+impl BitAnd for IgnoreFlags {
+ type Output = Self;
+
+ fn bitand(self, other: Self) -> Self {
+ Self::new(self.bits & other.bits)
+ }
+}
+
+pub mod ascii_case {
+ use regex_syntax::hir;
+
+ use crate::mir::Mir;
+ use crate::parser::Literal;
+
+ macro_rules! literal {
+ ($byte:expr) => {
+ hir::Literal(Box::new([$byte]))
+ };
+ (@char $c:expr) => {
+ hir::Literal(
+ $c.encode_utf8(&mut [0; 4])
+ .as_bytes()
+ .to_vec()
+ .into_boxed_slice(),
+ )
+ };
+ }
+
+ pub trait MakeAsciiCaseInsensitive {
+ /// Creates a equivalent regular expression which ignore the letter casing
+ /// of ascii characters.
+ fn make_ascii_case_insensitive(self) -> Mir;
+ }
+
+ impl MakeAsciiCaseInsensitive for u8 {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii_lowercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self - 32)),
+ Mir::Literal(literal!(self)),
+ ])
+ } else if self.is_ascii_uppercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self)),
+ Mir::Literal(literal!(self + 32)),
+ ])
+ } else {
+ Mir::Literal(literal!(self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for char {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii() {
+ (self as u8).make_ascii_case_insensitive()
+ } else {
+ Mir::Literal(literal!(@char self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ Mir::Concat(
+ self.0
+ .iter()
+ .map(|x| x.make_ascii_case_insensitive())
+ .collect(),
+ )
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassBytes {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ self.case_fold_simple();
+ Mir::Class(hir::Class::Bytes(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassUnicode {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ use std::cmp;
+
+ // Manuall implementation to only perform the case folding on ascii characters.
+
+ let mut ranges = Vec::new();
+
+ for range in self.ranges() {
+ #[inline]
+ fn overlaps(st1: u8, end1: u8, st2: u8, end2: u8) -> bool {
+ (st2 <= st1 && st1 <= end2) || (st1 <= st2 && st2 <= end1)
+ }
+
+ #[inline]
+ fn make_ascii(c: char) -> Option<u8> {
+ if c.is_ascii() {
+ Some(c as u8)
+ } else {
+ None
+ }
+ }
+
+ match (make_ascii(range.start()), make_ascii(range.end())) {
+ (Some(start), Some(end)) => {
+ if overlaps(b'a', b'z', start, end) {
+ let lower = cmp::max(start, b'a');
+ let upper = cmp::min(end, b'z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower - 32) as char,
+ (upper - 32) as char,
+ ))
+ }
+
+ if overlaps(b'A', b'Z', start, end) {
+ let lower = cmp::max(start, b'A');
+ let upper = cmp::min(end, b'Z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower + 32) as char,
+ (upper + 32) as char,
+ ))
+ }
+ }
+ (Some(start), None) => {
+ if overlaps(b'a', b'z', start, b'z') {
+ let lower = cmp::max(start, b'a');
+ ranges.push(hir::ClassUnicodeRange::new((lower - 32) as char, 'Z'))
+ }
+
+ if overlaps(b'A', b'Z', start, b'Z') {
+ let lower = cmp::max(start, b'A');
+ ranges.push(hir::ClassUnicodeRange::new((lower + 32) as char, 'Z'))
+ }
+ }
+ _ => (),
+ }
+ }
+
+ self.union(&hir::ClassUnicode::new(ranges));
+
+ Mir::Class(hir::Class::Unicode(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Class {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ hir::Class::Bytes(b) => b.make_ascii_case_insensitive(),
+ hir::Class::Unicode(u) => u.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for &Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Literal::Bytes(bytes) => Mir::Concat(
+ bytes
+ .value()
+ .into_iter()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Literal::Utf8(s) => Mir::Concat(
+ s.value()
+ .chars()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for Mir {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Mir::Empty => Mir::Empty,
+ Mir::Loop(l) => Mir::Loop(Box::new(l.make_ascii_case_insensitive())),
+ Mir::Maybe(m) => Mir::Maybe(Box::new(m.make_ascii_case_insensitive())),
+ Mir::Concat(c) => Mir::Concat(
+ c.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Alternation(a) => Mir::Alternation(
+ a.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Class(c) => c.make_ascii_case_insensitive(),
+ Mir::Literal(l) => l.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ #[cfg(test)]
+ mod tests {
+ use super::MakeAsciiCaseInsensitive;
+ use crate::mir::{Class, Mir};
+ use regex_syntax::hir::{ClassUnicode, ClassUnicodeRange};
+
+ fn assert_range(in_s: char, in_e: char, expected: &[(char, char)]) {
+ let range = ClassUnicodeRange::new(in_s, in_e);
+ let class = ClassUnicode::new(vec![range]);
+
+ let expected =
+ ClassUnicode::new(expected.iter().map(|&(a, b)| ClassUnicodeRange::new(a, b)));
+
+ if let Mir::Class(Class::Unicode(result)) = class.make_ascii_case_insensitive() {
+ assert_eq!(result, expected);
+ } else {
+ panic!("Not a unicode class");
+ };
+ }
+
+ #[test]
+ fn no_letters_left() {
+ assert_range(' ', '+', &[(' ', '+')]);
+ }
+
+ #[test]
+ fn no_letters_right() {
+ assert_range('{', '~', &[('{', '~')]);
+ }
+
+ #[test]
+ fn no_letters_middle() {
+ assert_range('[', '`', &[('[', '`')]);
+ }
+
+ #[test]
+ fn lowercase_left_edge() {
+ assert_range('a', 'd', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn lowercase_right_edge() {
+ assert_range('r', 'z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_total() {
+ assert_range('a', 'z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_left_edge() {
+ assert_range('A', 'D', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn uppercase_right_edge() {
+ assert_range('R', 'Z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_total() {
+ assert_range('A', 'Z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_cross_left() {
+ assert_range('[', 'h', &[('[', 'h'), ('A', 'H')]);
+ }
+
+ #[test]
+ fn lowercase_cross_right() {
+ assert_range('d', '}', &[('d', '}'), ('D', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_cross_left() {
+ assert_range(';', 'H', &[(';', 'H'), ('a', 'h')]);
+ }
+
+ #[test]
+ fn uppercase_cross_right() {
+ assert_range('T', ']', &[('t', 'z'), ('T', ']')]);
+ }
+
+ #[test]
+ fn cross_both() {
+ assert_range('X', 'c', &[('X', 'c'), ('x', 'z'), ('A', 'C')]);
+ }
+
+ #[test]
+ fn all_letters() {
+ assert_range('+', '|', &[('+', '|')]);
+ }
+
+ #[test]
+ fn oob_all_letters() {
+ assert_range('#', 'é', &[('#', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_uppercase() {
+ assert_range('Q', 'é', &[('A', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_lowercase() {
+ assert_range('q', 'é', &[('q', 'é'), ('Q', 'Z')]);
+ }
+
+ #[test]
+ fn oob_no_letters() {
+ assert_range('|', 'é', &[('|', 'é')]);
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/mod.rs b/vendor/logos-codegen/src/parser/mod.rs
new file mode 100644
index 00000000..3ad7202e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/mod.rs
@@ -0,0 +1,331 @@
+use beef::lean::Cow;
+use proc_macro2::{Span, TokenStream, TokenTree};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Attribute, GenericParam, Lit, Meta, Type};
+
+use crate::error::Errors;
+use crate::leaf::{Callback, InlineCallback};
+use crate::util::{expect_punct, MaybeVoid};
+use crate::LOGOS_ATTR;
+
+mod definition;
+mod ignore_flags;
+mod nested;
+mod subpattern;
+mod type_params;
+
+pub use self::definition::{Definition, Literal};
+pub use self::ignore_flags::IgnoreFlags;
+use self::nested::{AttributeParser, Nested, NestedValue};
+pub use self::subpattern::Subpatterns;
+use self::type_params::{replace_lifetime, traverse_type, TypeParams};
+
+#[derive(Default)]
+pub struct Parser {
+ pub errors: Errors,
+ pub mode: Mode,
+ pub source: Option<TokenStream>,
+ pub skips: Vec<Literal>,
+ pub extras: MaybeVoid,
+ pub error_type: MaybeVoid,
+ pub subpatterns: Subpatterns,
+ pub logos_path: Option<TokenStream>,
+ types: TypeParams,
+}
+
+#[derive(Default)]
+pub enum Mode {
+ #[default]
+ Utf8,
+ Binary,
+}
+
+impl Parser {
+ pub fn parse_generic(&mut self, param: GenericParam) {
+ match param {
+ GenericParam::Lifetime(lt) => {
+ self.types.explicit_lifetime(lt, &mut self.errors);
+ }
+ GenericParam::Type(ty) => {
+ self.types.add(ty.ident);
+ }
+ GenericParam::Const(c) => {
+ self.err("Logos doesn't support const generics.", c.span());
+ }
+ }
+ }
+
+ pub fn generics(&mut self) -> Option<TokenStream> {
+ self.types.generics(&mut self.errors)
+ }
+
+ fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> {
+ match &mut attr.meta {
+ Meta::List(list) => {
+ let tokens = std::mem::replace(&mut list.tokens, TokenStream::new());
+
+ Some(AttributeParser::new(tokens))
+ }
+ _ => None,
+ }
+ }
+
+ /// Try to parse the main `#[logos(...)]`, does nothing if
+ /// the attribute's name isn't `logos`.
+ pub fn try_parse_logos(&mut self, attr: &mut Attribute) {
+ if !attr.path().is_ident(LOGOS_ATTR) {
+ return;
+ }
+
+ let nested = match self.parse_attr(attr) {
+ Some(tokens) => tokens,
+ None => {
+ self.err("Expected #[logos(...)]", attr.span());
+ return;
+ }
+ };
+
+ for nested in nested {
+ let (name, value) = match nested {
+ Nested::Named(name, value) => (name, value),
+ Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => {
+ self.err("Invalid nested attribute", tokens.span());
+ continue;
+ }
+ };
+
+ // IMPORTANT: Keep these sorted alphabetically for binary search down the line
+ #[allow(clippy::type_complexity)]
+ static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[
+ ("crate", |parser, span, value| match value {
+ NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path),
+ _ => {
+ parser.err("Expected: #[logos(crate = path::to::logos)]", span);
+ }
+ }),
+ ("error", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.error_type.replace(value) {
+ parser
+ .err("Error type can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(error = SomeType)]", span);
+ }
+ }),
+ ("extras", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.extras.replace(value) {
+ parser
+ .err("Extras can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(extras = SomeType)]", span);
+ }
+ }),
+ ("skip", |parser, span, value| match value {
+ NestedValue::Literal(lit) => {
+ if let Some(literal) = parser.parse_literal(Lit::new(lit)) {
+ parser.skips.push(literal);
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(skip \"regex literal\")]", span);
+ }
+ }),
+ ("source", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+ if let Some(previous) = parser.source.replace(value) {
+ parser
+ .err("Source can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(source = SomeType)]", span);
+ }
+ }),
+ ("subpattern", |parser, span, value| match value {
+ NestedValue::KeywordAssign(name, value) => {
+ parser.subpatterns.add(name, value, &mut parser.errors);
+ }
+ _ => {
+ parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span);
+ }
+ }),
+ ("type", |parser, span, value| match value {
+ NestedValue::KeywordAssign(generic, ty) => {
+ parser.types.set(generic, ty, &mut parser.errors);
+ }
+ _ => {
+ parser.err("Expected: #[logos(type T = SomeType)]", span);
+ }
+ }),
+ ];
+
+ match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) {
+ Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value),
+ Err(_) => {
+ let mut err = format!(
+ "Unknown nested attribute #[logos({name})], expected one of: {}",
+ NESTED_LOOKUP[0].0
+ );
+
+ for (allowed, _) in &NESTED_LOOKUP[1..] {
+ err.push_str(", ");
+ err.push_str(allowed);
+ }
+
+ self.err(err, name.span());
+ }
+ }
+ }
+ }
+
+ pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> {
+ match lit {
+ Lit::Str(string) => Some(Literal::Utf8(string)),
+ Lit::ByteStr(bytes) => {
+ self.mode = Mode::Binary;
+
+ Some(Literal::Bytes(bytes))
+ }
+ _ => {
+ self.err("Expected a &str or &[u8] slice", lit.span());
+
+ None
+ }
+ }
+ }
+
+ /// Parse attribute definition of a token:
+ ///
+ /// + `#[token(literal[, callback])]`
+ /// + `#[regex(literal[, callback])]`
+ pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> {
+ let mut nested = self.parse_attr(attr)?;
+
+ let literal = match nested.parsed::<Lit>()? {
+ Ok(lit) => self.parse_literal(lit)?,
+ Err(err) => {
+ self.err(err.to_string(), err.span());
+
+ return None;
+ }
+ };
+
+ let mut def = Definition::new(literal);
+
+ for (position, next) in nested.enumerate() {
+ match next {
+ Nested::Unexpected(tokens) => {
+ self.err("Unexpected token in attribute", tokens.span());
+ }
+ Nested::Unnamed(tokens) => match position {
+ 0 => def.callback = self.parse_callback(tokens),
+ _ => {
+ self.err(
+ "\
+ Expected a named argument at this position\n\
+ \n\
+ hint: If you are trying to define a callback here use: callback = ...\
+ ",
+ tokens.span(),
+ );
+ }
+ },
+ Nested::Named(name, value) => {
+ def.named_attr(name, value, self);
+ }
+ }
+ }
+
+ Some(def)
+ }
+
+ fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> {
+ let span = tokens.span();
+ let mut tokens = tokens.into_iter();
+
+ if let Some(tt) = expect_punct(tokens.next(), '|') {
+ let mut label = TokenStream::from(tt);
+
+ label.extend(tokens);
+
+ return Some(Callback::Label(label));
+ }
+
+ let first = tokens.next();
+ let error = expect_punct(tokens.next(), '|');
+
+ let arg = match (error, first) {
+ (None, Some(TokenTree::Ident(arg))) => arg,
+ _ => {
+ self.err(
+ "Inline callbacks must use closure syntax with exactly one parameter",
+ span,
+ );
+ return None;
+ }
+ };
+
+ let body = match tokens.next() {
+ Some(TokenTree::Group(group)) => group.stream(),
+ Some(first) => {
+ let mut body = TokenStream::from(first);
+
+ body.extend(tokens);
+ body
+ }
+ None => {
+ self.err("Callback missing a body", span);
+ return None;
+ }
+ };
+
+ let inline = InlineCallback { arg, body, span };
+
+ Some(inline.into())
+ }
+
+ /// Checks if `ty` is a declared generic param, if so replaces it
+ /// with a concrete type defined using #[logos(type T = Type)]
+ ///
+ /// If no matching generic param is found, all lifetimes are fixed
+ /// to the source lifetime
+ pub fn get_type(&self, ty: &mut Type) -> TokenStream {
+ traverse_type(ty, &mut |ty| {
+ if let Type::Path(tp) = ty {
+ // Skip types that begin with `self::`
+ if tp.qself.is_none() {
+ // If `ty` is a generic type parameter, try to find
+ // its concrete type defined with #[logos(type T = Type)]
+ if let Some(substitute) = self.types.find(&tp.path) {
+ *ty = substitute;
+ }
+ }
+ }
+ // If `ty` is a concrete type, fix its lifetimes to 'source
+ replace_lifetime(ty);
+ });
+
+ quote!(#ty)
+ }
+
+ pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ self.errors.err(message, span)
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/nested.rs b/vendor/logos-codegen/src/parser/nested.rs
new file mode 100644
index 00000000..44ecaeac
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/nested.rs
@@ -0,0 +1,146 @@
+use proc_macro2::token_stream::IntoIter as TokenIter;
+use proc_macro2::{Ident, Literal, TokenStream, TokenTree};
+use quote::quote;
+
+use crate::util::{expect_punct, is_punct};
+
+pub enum NestedValue {
+ /// `name = ...`
+ Assign(TokenStream),
+ /// `name "literal"`
+ Literal(Literal),
+ /// `name(...)`
+ Group(TokenStream),
+ /// `name ident = ...`
+ KeywordAssign(Ident, TokenStream),
+}
+
+pub enum Nested {
+ /// Unnamed nested attribute, such as a string,
+ /// callback closure, or a lone ident/path
+ ///
+ /// Note: a lone ident will be Named with no value instead
+ Unnamed(TokenStream),
+ /// Named: name ...
+ Named(Ident, NestedValue),
+ /// Unexpected token,
+ Unexpected(TokenStream),
+}
+
+pub struct AttributeParser {
+ inner: TokenIter,
+}
+
+pub struct Empty;
+
+impl From<Empty> for TokenStream {
+ fn from(_: Empty) -> TokenStream {
+ TokenStream::new()
+ }
+}
+
+impl AttributeParser {
+ pub fn new(stream: TokenStream) -> Self {
+ AttributeParser {
+ inner: stream.into_iter(),
+ }
+ }
+
+ pub fn parsed<T>(&mut self) -> Option<syn::Result<T>>
+ where
+ T: syn::parse::Parse,
+ {
+ let tokens = self.collect_tail(TokenStream::new());
+
+ if tokens.is_empty() {
+ return None;
+ }
+
+ Some(syn::parse2(tokens))
+ }
+
+ fn next_tt(&mut self) -> Option<TokenTree> {
+ expect_punct(self.inner.next(), ',')
+ }
+
+ fn collect_tail<T>(&mut self, first: T) -> TokenStream
+ where
+ T: Into<TokenStream>,
+ {
+ let mut out = first.into();
+
+ while let Some(tt) = self.next_tt() {
+ out.extend(Some(tt));
+ }
+
+ out
+ }
+
+ fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested {
+ let mut out = TokenStream::from(TokenTree::Ident(first));
+
+ out.extend(self.collect_tail(next));
+
+ Nested::Unnamed(out.into_iter().collect())
+ }
+
+ fn parse_assign(&mut self, name: Ident) -> Nested {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Assign(value))
+ }
+
+ fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested {
+ // TODO: Error if there are any tokens following
+ let _ = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Literal(lit))
+ }
+
+ fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested {
+ Nested::Named(name, NestedValue::Group(group))
+ }
+
+ fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested {
+ let error = expect_punct(self.next_tt(), '=');
+
+ match error {
+ Some(error) => {
+ let error = self.collect_tail(error);
+
+ Nested::Unexpected(error)
+ }
+ None => {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(keyword, NestedValue::KeywordAssign(name, value))
+ }
+ }
+ }
+}
+
+impl Iterator for AttributeParser {
+ type Item = Nested;
+
+ fn next(&mut self) -> Option<Nested> {
+ let first = self.inner.next()?;
+
+ let name = match first {
+ TokenTree::Ident(ident) => ident,
+ tt => {
+ let stream = self.collect_tail(tt);
+
+ return Some(Nested::Unnamed(stream.into_iter().collect()));
+ }
+ };
+
+ match self.next_tt() {
+ Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)),
+ Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)),
+ Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())),
+ Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)),
+ Some(next) => Some(self.parse_unnamed(name, next)),
+ None => Some(Nested::Unnamed(quote!(#name))),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs
new file mode 100644
index 00000000..eb620028
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/subpattern.rs
@@ -0,0 +1,97 @@
+use proc_macro2::TokenStream;
+use syn::Ident;
+
+use crate::error::Errors;
+use crate::mir::Mir;
+use crate::parser::definition::{bytes_to_regex_string, Literal};
+
+#[derive(Default)]
+pub struct Subpatterns {
+ map: Vec<(Ident, String)>,
+}
+
+impl Subpatterns {
+ pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) {
+ let lit = match syn::parse2::<Literal>(pattern) {
+ Ok(lit) => lit,
+ Err(e) => {
+ errors.err(e.to_string(), e.span());
+ return;
+ }
+ };
+
+ if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) {
+ errors
+ .err(format!("{} can only be assigned once", param), param.span())
+ .err("Previously assigned here", name.span());
+ return;
+ }
+
+ let fixed = self.fix(&lit, errors);
+
+ // Validate the literal as proper regex. If it's not, emit an error.
+ let mir = match &lit {
+ Literal::Utf8(_) => Mir::utf8(&fixed),
+ Literal::Bytes(_) => Mir::binary(&fixed),
+ };
+
+ if let Err(err) = mir {
+ errors.err(err, lit.span());
+ };
+
+ self.map.push((param, fixed));
+ }
+
+ pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
+ let mut i = 0;
+ let mut pattern = match lit {
+ Literal::Utf8(s) => s.value(),
+ Literal::Bytes(b) => bytes_to_regex_string(b.value()),
+ };
+
+ while let Some(f) = pattern[i..].find("(?&") {
+ i += f;
+ pattern.replace_range(i..i + 3, "(?:");
+ i += 3;
+
+ let subref_end = if let Some(f) = pattern[i..].find(')') {
+ i + f
+ } else {
+ pattern.truncate(i); // truncate so latter error doesn't suppress
+ break; // regex-syntax will report the unclosed group
+ };
+
+ let name = &pattern[i..subref_end];
+ let name = match syn::parse_str::<Ident>(name) {
+ Ok(name) => name,
+ Err(_) => {
+ errors.err(
+ format!("subpattern reference `{}` is not an identifier", name),
+ lit.span(),
+ );
+ // we emitted the error; make something up and continue
+ pattern.replace_range(i..subref_end, "_");
+ i += 2;
+ continue;
+ }
+ };
+
+ match self.map.iter().find(|(def, _)| *def == name) {
+ Some((_, subpattern)) => {
+ pattern.replace_range(i..subref_end, subpattern);
+ i += subpattern.len() + 1;
+ }
+ None => {
+ errors.err(
+ format!("subpattern reference `{}` has not been defined", name),
+ lit.span(),
+ );
+ // leaving `(?:name)` is fine
+ i = subref_end + 1;
+ }
+ }
+ }
+
+ pattern
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/type_params.rs b/vendor/logos-codegen/src/parser/type_params.rs
new file mode 100644
index 00000000..1be4948e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/type_params.rs
@@ -0,0 +1,200 @@
+use proc_macro2::{Ident, Span, TokenStream};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Lifetime, LifetimeParam, Path, Type};
+
+use crate::error::Errors;
+
+#[derive(Default)]
+pub struct TypeParams {
+ lifetime: bool,
+ type_params: Vec<(Ident, Option<Type>)>,
+}
+
+impl TypeParams {
+ pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) {
+ if self.lifetime {
+ let span = lt.span();
+
+ errors.err("Logos types can only have one lifetime can be set", span);
+ }
+
+ self.lifetime = true;
+ }
+
+ pub fn add(&mut self, param: Ident) {
+ self.type_params.push((param, None));
+ }
+
+ pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) {
+ let ty = match syn::parse2::<Type>(ty) {
+ Ok(mut ty) => {
+ replace_lifetimes(&mut ty);
+ ty
+ }
+ Err(err) => {
+ errors.err(err.to_string(), err.span());
+ return;
+ }
+ };
+
+ match self.type_params.iter_mut().find(|(name, _)| *name == param) {
+ Some((_, slot)) => {
+ if let Some(previous) = slot.replace(ty) {
+ errors
+ .err(
+ format!("{} can only have one type assigned to it", param),
+ param.span(),
+ )
+ .err("Previously assigned here", previous.span());
+ }
+ }
+ None => {
+ errors.err(
+ format!("{} is not a declared type parameter", param),
+ param.span(),
+ );
+ }
+ }
+ }
+
+ pub fn find(&self, path: &Path) -> Option<Type> {
+ for (ident, ty) in &self.type_params {
+ if path.is_ident(ident) {
+ return ty.clone();
+ }
+ }
+
+ None
+ }
+
+ pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> {
+ if !self.lifetime && self.type_params.is_empty() {
+ return None;
+ }
+
+ let mut generics = Vec::new();
+
+ if self.lifetime {
+ generics.push(quote!('s));
+ }
+
+ for (ty, replace) in self.type_params.iter() {
+ match replace {
+ Some(ty) => generics.push(quote!(#ty)),
+ None => {
+ errors.err(
+ format!(
+ "Generic type parameter without a concrete type\n\
+ \n\
+ Define a concrete type Logos can use: #[logos(type {} = Type)]",
+ ty,
+ ),
+ ty.span(),
+ );
+ }
+ }
+ }
+
+ if generics.is_empty() {
+ None
+ } else {
+ Some(quote!(<#(#generics),*>))
+ }
+ }
+}
+
+pub fn replace_lifetimes(ty: &mut Type) {
+ traverse_type(ty, &mut replace_lifetime)
+}
+
+pub fn replace_lifetime(ty: &mut Type) {
+ use syn::{GenericArgument, PathArguments};
+
+ match ty {
+ Type::Path(p) => {
+ p.path
+ .segments
+ .iter_mut()
+ .filter_map(|segment| match &mut segment.arguments {
+ PathArguments::AngleBracketed(ab) => Some(ab),
+ _ => None,
+ })
+ .flat_map(|ab| ab.args.iter_mut())
+ .for_each(|arg| {
+ if let GenericArgument::Lifetime(lt) = arg {
+ *lt = Lifetime::new("'s", lt.span());
+ }
+ });
+ }
+ Type::Reference(r) => {
+ let span = match r.lifetime.take() {
+ Some(lt) => lt.span(),
+ None => Span::call_site(),
+ };
+
+ r.lifetime = Some(Lifetime::new("'s", span));
+ }
+ _ => (),
+ }
+}
+
+pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) {
+ f(ty);
+ match ty {
+ Type::Array(array) => traverse_type(&mut array.elem, f),
+ Type::BareFn(bare_fn) => {
+ for input in &mut bare_fn.inputs {
+ traverse_type(&mut input.ty, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output {
+ traverse_type(ty, f);
+ }
+ }
+ Type::Group(group) => traverse_type(&mut group.elem, f),
+ Type::Paren(paren) => traverse_type(&mut paren.elem, f),
+ Type::Path(path) => traverse_path(&mut path.path, f),
+ Type::Ptr(p) => traverse_type(&mut p.elem, f),
+ Type::Reference(r) => traverse_type(&mut r.elem, f),
+ Type::Slice(slice) => traverse_type(&mut slice.elem, f),
+ Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| {
+ if let syn::TypeParamBound::Trait(trait_bound) = bound {
+ traverse_path(&mut trait_bound.path, f);
+ }
+ }),
+ Type::Tuple(tuple) => tuple
+ .elems
+ .iter_mut()
+ .for_each(|elem| traverse_type(elem, f)),
+ _ => (),
+ }
+}
+
+fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) {
+ for segment in &mut path.segments {
+ match &mut segment.arguments {
+ syn::PathArguments::None => (),
+ syn::PathArguments::AngleBracketed(args) => {
+ for arg in &mut args.args {
+ match arg {
+ syn::GenericArgument::Type(ty) => {
+ traverse_type(ty, f);
+ }
+ syn::GenericArgument::AssocType(assoc) => {
+ traverse_type(&mut assoc.ty, f);
+ }
+ _ => (),
+ }
+ }
+ }
+ syn::PathArguments::Parenthesized(args) => {
+ for arg in &mut args.inputs {
+ traverse_type(arg, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut args.output {
+ traverse_type(ty, f);
+ }
+ }
+ }
+ }
+}