summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen/src
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
committermo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
commit8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src
parent4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
chore: add vendor directory
Diffstat (limited to 'vendor/logos-codegen/src')
-rw-r--r--vendor/logos-codegen/src/error.rs110
-rw-r--r--vendor/logos-codegen/src/generator/context.rs128
-rw-r--r--vendor/logos-codegen/src/generator/fork.rs216
-rw-r--r--vendor/logos-codegen/src/generator/leaf.rs67
-rw-r--r--vendor/logos-codegen/src/generator/mod.rs268
-rw-r--r--vendor/logos-codegen/src/generator/rope.rs39
-rw-r--r--vendor/logos-codegen/src/generator/tables.rs77
-rw-r--r--vendor/logos-codegen/src/graph/fork.rs267
-rw-r--r--vendor/logos-codegen/src/graph/impls.rs220
-rw-r--r--vendor/logos-codegen/src/graph/meta.rs174
-rw-r--r--vendor/logos-codegen/src/graph/mod.rs566
-rw-r--r--vendor/logos-codegen/src/graph/range.rs144
-rw-r--r--vendor/logos-codegen/src/graph/regex.rs295
-rw-r--r--vendor/logos-codegen/src/graph/rope.rs330
-rw-r--r--vendor/logos-codegen/src/leaf.rs118
-rw-r--r--vendor/logos-codegen/src/lib.rs391
-rw-r--r--vendor/logos-codegen/src/macros.rs12
-rw-r--r--vendor/logos-codegen/src/mir.rs235
-rw-r--r--vendor/logos-codegen/src/parser/definition.rs193
-rw-r--r--vendor/logos-codegen/src/parser/ignore_flags.rs499
-rw-r--r--vendor/logos-codegen/src/parser/mod.rs331
-rw-r--r--vendor/logos-codegen/src/parser/nested.rs146
-rw-r--r--vendor/logos-codegen/src/parser/subpattern.rs97
-rw-r--r--vendor/logos-codegen/src/parser/type_params.rs200
-rw-r--r--vendor/logos-codegen/src/util.rs64
25 files changed, 5187 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/error.rs b/vendor/logos-codegen/src/error.rs
new file mode 100644
index 00000000..64f93c01
--- /dev/null
+++ b/vendor/logos-codegen/src/error.rs
@@ -0,0 +1,110 @@
+use std::fmt;
+
+use beef::lean::Cow;
+use proc_macro2::{Span, TokenStream};
+use quote::quote;
+use quote::{quote_spanned, ToTokens, TokenStreamExt};
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Default)]
+pub struct Errors {
+ collected: Vec<SpannedError>,
+}
+
+impl Errors {
+ pub fn err<M>(&mut self, message: M, span: Span) -> &mut Self
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ self.collected.push(SpannedError {
+ message: message.into(),
+ span,
+ });
+
+ self
+ }
+
+ pub fn render(self) -> Option<TokenStream> {
+ let errors = self.collected;
+
+ match errors.len() {
+ 0 => None,
+ _ => Some(quote! {
+ fn _logos_derive_compile_errors() {
+ #(#errors)*
+ }
+ }),
+ }
+ }
+}
+
+pub struct Error(Cow<'static, str>);
+
+#[derive(Debug)]
+pub struct SpannedError {
+ message: Cow<'static, str>,
+ span: Span,
+}
+
+impl Error {
+ pub fn new<M>(message: M) -> Self
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ Error(message.into())
+ }
+
+ pub fn span(self, span: Span) -> SpannedError {
+ SpannedError {
+ message: self.0,
+ span,
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+impl fmt::Debug for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self, f)
+ }
+}
+
+impl From<regex_syntax::Error> for Error {
+ fn from(err: regex_syntax::Error) -> Error {
+ Error(err.to_string().into())
+ }
+}
+
+impl From<&'static str> for Error {
+ fn from(err: &'static str) -> Error {
+ Error(err.into())
+ }
+}
+
+impl From<String> for Error {
+ fn from(err: String) -> Error {
+ Error(err.into())
+ }
+}
+
+impl From<Error> for Cow<'static, str> {
+ fn from(err: Error) -> Self {
+ err.0
+ }
+}
+
+impl ToTokens for SpannedError {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let message = &*self.message;
+
+ tokens.append_all(quote_spanned!(self.span => {
+ compile_error!(#message)
+ }))
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/context.rs b/vendor/logos-codegen/src/generator/context.rs
new file mode 100644
index 00000000..dc52f594
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/context.rs
@@ -0,0 +1,128 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::Generator;
+use crate::graph::NodeId;
+
+/// This struct keeps track of bytes available to be read without
+/// bounds checking across the tree.
+///
+/// For example, a branch that matches 4 bytes followed by a fork
+/// with smallest branch containing of 2 bytes can do a bounds check
+/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size)
+/// to be handled by the fork, avoiding bound checks there.
+#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub struct Context {
+ /// Amount of bytes that haven't been bumped yet but should
+ /// before a new read is performed
+ at: usize,
+ /// Number of bytes available without bound checks
+ available: usize,
+ /// Whether or not the Lexer has been bumped at least by 1 byte
+ bumped: bool,
+ /// Node to backtrack to to in case an explicit match has failed.
+ /// If `None` will instead produce an error token.
+ backtrack: Option<NodeId>,
+}
+
+impl Context {
+ pub fn can_backtrack(&self) -> bool {
+ self.backtrack.is_some()
+ }
+
+ pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> {
+ self.backtrack = Some(miss?);
+ self.bump()
+ }
+
+ pub const fn advance(self, n: usize) -> Self {
+ Context {
+ at: self.at + n,
+ ..self
+ }
+ }
+
+ pub fn bump(&mut self) -> Option<TokenStream> {
+ match self.at {
+ 0 => None,
+ n => {
+ let tokens = quote!(lex.bump_unchecked(#n););
+ self.at = 0;
+ self.available = 0;
+ self.bumped = true;
+ Some(tokens)
+ }
+ }
+ }
+
+ pub fn remainder(&self) -> usize {
+ self.available.saturating_sub(self.at)
+ }
+
+ pub fn read_byte(&mut self) -> TokenStream {
+ let at = self.at;
+
+ self.advance(1);
+
+ #[cfg(not(feature = "forbid_unsafe"))]
+ {
+ quote!(unsafe { lex.read_byte_unchecked(#at) })
+ }
+
+ #[cfg(feature = "forbid_unsafe")]
+ {
+ quote!(lex.read_byte(#at))
+ }
+ }
+
+ pub fn read(&mut self, len: usize) -> TokenStream {
+ self.available = len;
+
+ match (self.at, len) {
+ (0, 0) => quote!(lex.read::<u8>()),
+ (a, 0) => quote!(lex.read_at::<u8>(#a)),
+ (0, l) => quote!(lex.read::<&[u8; #l]>()),
+ (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)),
+ }
+ }
+
+ pub fn wipe(&mut self) {
+ self.available = 0;
+ }
+
+ const fn backtrack(self) -> Self {
+ Context {
+ at: 0,
+ available: 0,
+ bumped: self.bumped,
+ backtrack: None,
+ }
+ }
+
+ pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream {
+ self.wipe();
+ match (miss, self.backtrack) {
+ (Some(id), _) => gen.goto(id, self).clone(),
+ (_, Some(id)) => gen.goto(id, self.backtrack()).clone(),
+ _ if self.bumped => quote!(lex.error()),
+ _ => quote!(_error(lex)),
+ }
+ }
+
+ pub fn write_suffix(&self, buf: &mut String) {
+ use std::fmt::Write;
+
+ if self.at > 0 {
+ let _ = write!(buf, "_at{}", self.at);
+ }
+ if self.available > 0 {
+ let _ = write!(buf, "_with{}", self.available);
+ }
+ if let Some(id) = self.backtrack {
+ let _ = write!(buf, "_ctx{}", id);
+ }
+ if self.bumped {
+ buf.push_str("_x");
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/fork.rs b/vendor/logos-codegen/src/generator/fork.rs
new file mode 100644
index 00000000..11d4eab2
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/fork.rs
@@ -0,0 +1,216 @@
+use std::cmp::max;
+
+use fnv::FnvHashMap as Map;
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::graph::{Fork, NodeId, Range};
+use crate::util::ToIdent;
+
+type Targets = Map<NodeId, Vec<Range>>;
+
+impl Generator<'_> {
+ pub fn generate_fork(&mut self, this: NodeId, fork: &Fork, mut ctx: Context) -> TokenStream {
+ let mut targets: Targets = Map::default();
+
+ for (range, then) in fork.branches() {
+ targets.entry(then).or_default().push(range);
+ }
+ let loops_to_self = self.meta[this].loop_entry_from.contains(&this);
+
+ match targets.len() {
+ 1 if loops_to_self => return self.generate_fast_loop(fork, ctx),
+ 0..=2 => (),
+ _ => return self.generate_fork_jump_table(this, fork, targets, ctx),
+ }
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+ let branches = targets.into_iter().map(|(id, ranges)| {
+ let next = self.goto(id, ctx.advance(1));
+
+ match *ranges {
+ [range] => {
+ quote!(#range => #next,)
+ }
+ [a, b] if a.is_byte() && b.is_byte() => {
+ quote!(#a | #b => #next,)
+ }
+ _ => {
+ let test = self.generate_test(ranges).clone();
+ let next = self.goto(id, ctx.advance(1));
+
+ quote!(byte if #test(byte) => #next,)
+ }
+ }
+ });
+
+ quote! {
+ #read
+
+ match #byte {
+ #(#branches)*
+ _ => #miss,
+ }
+ }
+ }
+
+ fn generate_fork_jump_table(
+ &mut self,
+ this: NodeId,
+ fork: &Fork,
+ targets: Targets,
+ mut ctx: Context,
+ ) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+
+ let mut table: [u8; 256] = [0; 256];
+ let mut jumps = vec!["__".to_ident()];
+
+ let branches = targets
+ .into_iter()
+ .enumerate()
+ .map(|(idx, (id, ranges))| {
+ let idx = (idx as u8) + 1;
+ let next = self.goto(id, ctx.advance(1));
+ jumps.push(format!("J{}", id).to_ident());
+
+ for byte in ranges.into_iter().flatten() {
+ table[byte as usize] = idx;
+ }
+ let jump = jumps.last().unwrap();
+
+ quote!(Jump::#jump => #next,)
+ })
+ .collect::<TokenStream>();
+
+ let may_error = table.iter().any(|&idx| idx == 0);
+
+ let jumps = jumps.as_slice();
+ let table = table.iter().copied().map(|idx| &jumps[idx as usize]);
+
+ let jumps = if may_error { jumps } else { &jumps[1..] };
+ let error_branch = if may_error {
+ Some(quote!(Jump::__ => #miss))
+ } else {
+ None
+ };
+
+ quote! {
+ enum Jump {
+ #(#jumps,)*
+ }
+
+ const LUT: [Jump; 256] = {
+ use Jump::*;
+
+ [#(#table),*]
+ };
+
+ #read
+
+ match LUT[#byte as usize] {
+ #branches
+ #error_branch
+ }
+ }
+ }
+
+ fn fork_end(&self, this: NodeId, miss: &TokenStream) -> TokenStream {
+ if this == self.root {
+ quote!(_end(lex))
+ } else {
+ miss.clone()
+ }
+ }
+
+ fn fork_read(
+ &self,
+ this: NodeId,
+ end: TokenStream,
+ ctx: &mut Context,
+ ) -> (TokenStream, TokenStream) {
+ let min_read = self.meta[this].min_read;
+
+ if ctx.remainder() >= max(min_read, 1) {
+ let read = ctx.read_byte();
+
+ return (quote!(byte), quote!(let byte = #read;));
+ }
+
+ match min_read {
+ 0 | 1 => {
+ let read = ctx.read(0);
+
+ (
+ quote!(byte),
+ quote! {
+ let byte = match #read {
+ Some(byte) => byte,
+ None => return #end,
+ };
+ },
+ )
+ }
+ len => {
+ let read = ctx.read(len);
+
+ (
+ quote!(arr[0]),
+ quote! {
+ let arr = match #read {
+ Some(arr) => arr,
+ None => return #end,
+ };
+ },
+ )
+ }
+ }
+ }
+
+ fn generate_fast_loop(&mut self, fork: &Fork, ctx: Context) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let ranges = fork.branches().map(|(range, _)| range).collect::<Vec<_>>();
+ let test = self.generate_test(ranges);
+
+ quote! {
+ _fast_loop!(lex, #test, #miss);
+ }
+ }
+
+ pub fn fast_loop_macro() -> TokenStream {
+ quote! {
+ macro_rules! _fast_loop {
+ ($lex:ident, $test:ident, $miss:expr) => {
+ // Do one bounds check for multiple bytes till EOF
+ while let Some(arr) = $lex.read::<&[u8; 16]>() {
+ if $test(arr[0]) { if $test(arr[1]) { if $test(arr[2]) { if $test(arr[3]) {
+ if $test(arr[4]) { if $test(arr[5]) { if $test(arr[6]) { if $test(arr[7]) {
+ if $test(arr[8]) { if $test(arr[9]) { if $test(arr[10]) { if $test(arr[11]) {
+ if $test(arr[12]) { if $test(arr[13]) { if $test(arr[14]) { if $test(arr[15]) {
+
+ $lex.bump_unchecked(16); continue; } $lex.bump_unchecked(15); return $miss; }
+ $lex.bump_unchecked(14); return $miss; } $lex.bump_unchecked(13); return $miss; }
+ $lex.bump_unchecked(12); return $miss; } $lex.bump_unchecked(11); return $miss; }
+ $lex.bump_unchecked(10); return $miss; } $lex.bump_unchecked(9); return $miss; }
+ $lex.bump_unchecked(8); return $miss; } $lex.bump_unchecked(7); return $miss; }
+ $lex.bump_unchecked(6); return $miss; } $lex.bump_unchecked(5); return $miss; }
+ $lex.bump_unchecked(4); return $miss; } $lex.bump_unchecked(3); return $miss; }
+ $lex.bump_unchecked(2); return $miss; } $lex.bump_unchecked(1); return $miss; }
+
+ return $miss;
+ }
+
+ while $lex.test($test) {
+ $lex.bump_unchecked(1);
+ }
+
+ $miss
+ };
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/leaf.rs b/vendor/logos-codegen/src/generator/leaf.rs
new file mode 100644
index 00000000..0841a4ff
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/leaf.rs
@@ -0,0 +1,67 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::leaf::{Callback, Leaf};
+use crate::util::MaybeVoid;
+
+impl Generator<'_> {
+ pub fn generate_leaf(&mut self, leaf: &Leaf, mut ctx: Context) -> TokenStream {
+ let bump = ctx.bump();
+
+ let ident = &leaf.ident;
+ let name = self.name;
+ let this = self.this;
+ let ty = &leaf.field;
+
+ let constructor = match leaf.field {
+ MaybeVoid::Some(_) => quote!(#name::#ident),
+ MaybeVoid::Void => quote!(|()| #name::#ident),
+ };
+
+ match &leaf.callback {
+ Some(Callback::Label(callback)) => quote! {
+ #bump
+ #callback(lex).construct(#constructor, lex);
+ },
+ Some(Callback::Inline(inline)) => {
+ let arg = &inline.arg;
+ let body = &inline.body;
+
+ #[cfg(not(rust_1_82))]
+ let ret = quote!(impl CallbackResult<'s, #ty, #this>);
+
+ #[cfg(rust_1_82)]
+ let ret = quote!(impl CallbackResult<'s, #ty, #this> + use<'s>);
+
+ quote! {
+ #bump
+
+ #[inline]
+ fn callback<'s>(#arg: &mut Lexer<'s>) -> #ret {
+ #body
+ }
+
+ callback(lex).construct(#constructor, lex);
+ }
+ }
+ Some(Callback::Skip(_)) => {
+ quote! {
+ #bump
+
+ lex.trivia();
+ #name::lex(lex);
+ }
+ }
+ None if matches!(leaf.field, MaybeVoid::Void) => quote! {
+ #bump
+ lex.set(Ok(#name::#ident));
+ },
+ None => quote! {
+ #bump
+ let token = #name::#ident(lex.slice());
+ lex.set(Ok(token));
+ },
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/mod.rs b/vendor/logos-codegen/src/generator/mod.rs
new file mode 100644
index 00000000..1b8bf8bf
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/mod.rs
@@ -0,0 +1,268 @@
+use fnv::{FnvHashMap as Map, FnvHashSet as Set};
+use proc_macro2::TokenStream;
+use quote::{quote, ToTokens, TokenStreamExt};
+use syn::Ident;
+
+use crate::graph::{Graph, Meta, Node, NodeId, Range};
+use crate::leaf::Leaf;
+use crate::util::ToIdent;
+
+mod context;
+mod fork;
+mod leaf;
+mod rope;
+mod tables;
+
+use self::context::Context;
+use self::tables::TableStack;
+
+pub struct Generator<'a> {
+ /// Name of the type we are implementing the `Logos` trait for
+ name: &'a Ident,
+ /// Name of the type with any generics it might need
+ this: &'a TokenStream,
+ /// Id to the root node
+ root: NodeId,
+ /// Reference to the graph with all of the nodes
+ graph: &'a Graph<Leaf<'a>>,
+ /// Meta data collected for the nodes
+ meta: Meta,
+ /// Buffer with functions growing during generation
+ rendered: TokenStream,
+ /// Set of functions that have already been rendered
+ fns: Set<(NodeId, Context)>,
+ /// Function name identifiers
+ idents: Map<(NodeId, Context), Ident>,
+ /// Local function calls. Note: a call might change its context,
+ /// so we can't use `idents` for this purpose.
+ gotos: Map<(NodeId, Context), TokenStream>,
+ /// Identifiers for helper functions matching a byte to a given
+ /// set of ranges
+ tests: Map<Vec<Range>, Ident>,
+ /// Related to above, table stack manages tables that need to be
+ tables: TableStack,
+}
+
+impl<'a> Generator<'a> {
+ pub fn new(
+ name: &'a Ident,
+ this: &'a TokenStream,
+ root: NodeId,
+ graph: &'a Graph<Leaf>,
+ ) -> Self {
+ let rendered = Self::fast_loop_macro();
+ let meta = Meta::analyze(root, graph);
+
+ Generator {
+ name,
+ this,
+ root,
+ graph,
+ meta,
+ rendered,
+ fns: Set::default(),
+ idents: Map::default(),
+ gotos: Map::default(),
+ tests: Map::default(),
+ tables: TableStack::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> TokenStream {
+ let root = self.goto(self.root, Context::default()).clone();
+ let rendered = &self.rendered;
+ let tables = &self.tables;
+
+ quote! {
+ #tables
+ #rendered
+ #root
+ }
+ }
+
+ fn generate_fn(&mut self, id: NodeId, ctx: Context) {
+ if self.fns.contains(&(id, ctx)) {
+ return;
+ }
+ self.fns.insert((id, ctx));
+
+ let body = match &self.graph[id] {
+ Node::Fork(fork) => self.generate_fork(id, fork, ctx),
+ Node::Rope(rope) => self.generate_rope(rope, ctx),
+ Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
+ };
+ let ident = self.generate_ident(id, ctx);
+ let out = quote! {
+ #[inline]
+ fn #ident<'s>(lex: &mut Lexer<'s>) {
+ #body
+ }
+ };
+
+ self.rendered.append_all(out);
+ }
+
+ fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
+ let key = (id, ctx);
+
+ // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self
+ // too.
+ #[allow(clippy::map_entry)]
+ if !self.gotos.contains_key(&key) {
+ let meta = &self.meta[id];
+ let enters_loop = !meta.loop_entry_from.is_empty();
+
+ let bump = if enters_loop || !ctx.can_backtrack() {
+ ctx.switch(self.graph[id].miss())
+ } else {
+ None
+ };
+
+ let bump = match (bump, enters_loop, meta.min_read) {
+ (Some(t), _, _) => Some(t),
+ (None, true, _) => ctx.bump(),
+ (None, false, 0) => ctx.bump(),
+ (None, false, _) => None,
+ };
+
+ if meta.min_read == 0 || ctx.remainder() < meta.min_read {
+ ctx.wipe();
+ }
+
+ let ident = self.generate_ident(id, ctx);
+ let mut call_site = quote!(#ident(lex));
+
+ if let Some(bump) = bump {
+ call_site = quote!({
+ #bump
+ #call_site
+ });
+ }
+ self.gotos.insert(key, call_site);
+ self.generate_fn(id, ctx);
+ }
+ &self.gotos[&key]
+ }
+
+ fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
+ self.idents.entry((id, ctx)).or_insert_with(|| {
+ let mut ident = format!("goto{}", id);
+
+ ctx.write_suffix(&mut ident);
+
+ ident.to_ident()
+ })
+ }
+
+ /// Returns an identifier to a function that matches a byte to any
+ /// of the provided ranges. This will generate either a simple
+ /// match expression, or use a lookup table internally.
+ fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
+ if !self.tests.contains_key(&ranges) {
+ let idx = self.tests.len();
+ let ident = format!("pattern{}", idx).to_ident();
+
+ let lo = ranges.first().unwrap().start;
+ let hi = ranges.last().unwrap().end;
+
+ let body = match ranges.len() {
+ 0..=2 => {
+ quote! {
+ match byte {
+ #(#ranges)|* => true,
+ _ => false,
+ }
+ }
+ }
+ _ if hi - lo < 64 => {
+ let mut offset = hi.saturating_sub(63);
+
+ while offset.count_ones() > 1 && lo - offset > 0 {
+ offset += 1;
+ }
+
+ let mut table = 0u64;
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ if byte - offset >= 64 {
+ panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
+ }
+ table |= 1 << (byte - offset);
+ }
+
+ let search = match offset {
+ 0 => quote!(byte),
+ _ => quote!(byte.wrapping_sub(#offset)),
+ };
+
+ quote! {
+ const LUT: u64 = #table;
+
+ match 1u64.checked_shl(#search as u32) {
+ Some(shift) => LUT & shift != 0,
+ None => false,
+ }
+ }
+ }
+ _ => {
+ let mut view = self.tables.view();
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ view.flag(byte);
+ }
+
+ let mask = view.mask();
+ let lut = view.ident();
+
+ quote! {
+ #lut[byte as usize] & #mask > 0
+ }
+ }
+ };
+ self.rendered.append_all(quote! {
+ #[inline]
+ fn #ident(byte: u8) -> bool {
+ #body
+ }
+ });
+ self.tests.insert(ranges.clone(), ident);
+ }
+ &self.tests[&ranges]
+ }
+}
+
+macro_rules! match_quote {
+ ($source:expr; $($byte:tt,)* ) => {match $source {
+ $( $byte => quote!($byte), )*
+ byte => quote!(#byte),
+ }}
+}
+
+fn byte_to_tokens(byte: u8) -> TokenStream {
+ match_quote! {
+ byte;
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z',
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
+ b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z',
+ b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
+ b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
+ b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
+ }
+}
+
+impl ToTokens for Range {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let Range { start, end } = self;
+
+ tokens.append_all(byte_to_tokens(*start));
+
+ if start != end {
+ tokens.append_all(quote!(..=));
+ tokens.append_all(byte_to_tokens(*end));
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/rope.rs b/vendor/logos-codegen/src/generator/rope.rs
new file mode 100644
index 00000000..ae3b07ad
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/rope.rs
@@ -0,0 +1,39 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::graph::Rope;
+
+impl Generator<'_> {
+ pub fn generate_rope(&mut self, rope: &Rope, mut ctx: Context) -> TokenStream {
+ let miss = ctx.miss(rope.miss.first(), self);
+ let read = ctx.read(rope.pattern.len());
+ let then = self.goto(rope.then, ctx.advance(rope.pattern.len()));
+
+ let pat = match rope.pattern.to_bytes() {
+ Some(bytes) => byte_slice_literal(&bytes),
+ None => {
+ let ranges = rope.pattern.iter();
+
+ quote!([#(#ranges),*])
+ }
+ };
+
+ quote! {
+ match #read {
+ Some(#pat) => #then,
+ _ => #miss,
+ }
+ }
+ }
+}
+
+fn byte_slice_literal(bytes: &[u8]) -> TokenStream {
+ if bytes.iter().any(|&b| !(0x20..0x7F).contains(&b)) {
+ return quote!(&[#(#bytes),*]);
+ }
+
+ let slice = std::str::from_utf8(bytes).unwrap();
+
+ syn::parse_str(&format!("b{:?}", slice)).unwrap()
+}
diff --git a/vendor/logos-codegen/src/generator/tables.rs b/vendor/logos-codegen/src/generator/tables.rs
new file mode 100644
index 00000000..f1e53273
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/tables.rs
@@ -0,0 +1,77 @@
+use crate::util::ToIdent;
+use proc_macro2::{Literal, TokenStream};
+use quote::{quote, ToTokens};
+use syn::Ident;
+
+pub struct TableStack {
+ tables: Vec<(Ident, [u8; 256])>,
+ shift: u8,
+}
+
+pub struct TableView<'a> {
+ ident: &'a Ident,
+ table: &'a mut [u8; 256],
+ mask: u8,
+}
+
+impl TableStack {
+ pub fn new() -> Self {
+ TableStack {
+ tables: vec![("COMPACT_TABLE_0".to_ident(), [0; 256])],
+ shift: 0,
+ }
+ }
+
+ pub fn view(&mut self) -> TableView {
+ let mask = if self.shift < 8 {
+ // Reusing existing table with a shifted mask
+ let mask = 1u8 << self.shift;
+
+ self.shift += 1;
+
+ mask
+ } else {
+ // Need to create a new table
+ let ident = format!("COMPACT_TABLE_{}", self.tables.len()).to_ident();
+
+ self.tables.push((ident, [0; 256]));
+ self.shift = 1;
+
+ 1
+ };
+
+ let (ref ident, ref mut table) = self.tables.last_mut().unwrap();
+
+ TableView { ident, table, mask }
+ }
+}
+
+impl<'a> TableView<'a> {
+ pub fn ident(&self) -> &'a Ident {
+ self.ident
+ }
+
+ pub fn flag(&mut self, byte: u8) {
+ self.table[byte as usize] |= self.mask;
+ }
+
+ pub fn mask(&self) -> Literal {
+ Literal::u8_unsuffixed(self.mask)
+ }
+}
+
+impl ToTokens for TableStack {
+ fn to_tokens(&self, out: &mut TokenStream) {
+ if self.shift == 0 {
+ return;
+ }
+
+ for (ident, table) in self.tables.iter() {
+ let bytes = table.iter().copied().map(Literal::u8_unsuffixed);
+
+ out.extend(quote! {
+ static #ident: [u8; 256] = [#(#bytes),*];
+ });
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs
new file mode 100644
index 00000000..6b59836b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/fork.rs
@@ -0,0 +1,267 @@
+use crate::graph::{Disambiguate, Graph, NodeId, Range};
+
+#[derive(Clone)]
+pub struct Fork {
+ /// LUT matching byte -> node id
+ lut: Box<[Option<NodeId>; 256]>,
+ /// State to go to if no arms are matching
+ pub miss: Option<NodeId>,
+}
+
+impl Fork {
+ pub fn new() -> Self {
+ Fork {
+ lut: Box::new([None; 256]),
+ miss: None,
+ }
+ }
+
+ pub fn miss<M>(mut self, miss: M) -> Self
+ where
+ M: Into<Option<NodeId>>,
+ {
+ self.miss = miss.into();
+ self
+ }
+
+ pub fn add_branch<R, T>(&mut self, range: R, then: NodeId, graph: &mut Graph<T>)
+ where
+ R: Into<Range>,
+ T: Disambiguate,
+ {
+ for byte in range.into() {
+ match &mut self.lut[byte as usize] {
+ Some(other) if *other != then => {
+ *other = graph.merge(*other, then);
+ }
+ opt => *opt = Some(then),
+ }
+ }
+ }
+
+ // TODO: Add result with a printable error
+ pub fn merge<T>(&mut self, other: Fork, graph: &mut Graph<T>)
+ where
+ T: Disambiguate,
+ {
+ self.miss = match (self.miss, other.miss) {
+ (None, None) => None,
+ (Some(id), None) | (None, Some(id)) => Some(id),
+ (Some(a), Some(b)) => Some(graph.merge(a, b)),
+ };
+
+ for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) {
+ *left = match (*left, *right) {
+ (None, None) => continue,
+ (Some(id), None) | (None, Some(id)) => Some(id),
+ (Some(a), Some(b)) => Some(graph.merge(a, b)),
+ }
+ }
+ }
+
+ pub fn branches(&self) -> ForkIter<'_> {
+ ForkIter {
+ offset: 0,
+ lut: &self.lut,
+ }
+ }
+
+ /// Checks if all bytes in the `range` have a branch on this
+ /// fork, and those branches are resolve to the same `NodeId`.
+ pub fn contains<R>(&self, range: R) -> Option<NodeId>
+ where
+ R: Into<Range>,
+ {
+ let mut range = range.into();
+ let byte = range.next()?;
+ let first = self.lut[byte as usize]?;
+
+ for byte in range {
+ if first != self.lut[byte as usize]? {
+ return None;
+ }
+ }
+
+ Some(first)
+ }
+
+ pub fn branch<R>(mut self, range: R, then: NodeId) -> Self
+ where
+ R: Into<Range>,
+ {
+ for byte in range.into() {
+ match &mut self.lut[byte as usize] {
+ Some(other) if *other != then => {
+ panic!("Overlapping branches");
+ }
+ opt => *opt = Some(then),
+ }
+ }
+ self
+ }
+
+ pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+ if let Some(id) = self.miss {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+
+ for (_, id) in self.branches() {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+ }
+}
+
+pub struct ForkIter<'a> {
+ offset: usize,
+ lut: &'a [Option<NodeId>; 256],
+}
+
+impl Iterator for ForkIter<'_> {
+ type Item = (Range, NodeId);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // Consume empty slots
+ self.offset += self.lut[self.offset..]
+ .iter()
+ .take_while(|next| next.is_none())
+ .count();
+
+ let then = self.lut.get(self.offset).copied().flatten()?;
+ let start = self.offset;
+
+ // Consume all slots with same NodeId target
+ self.offset += self.lut[self.offset..]
+ .iter()
+ .take_while(|next| **next == Some(then))
+ .count();
+
+ Some((
+ Range {
+ start: start as u8,
+ end: (self.offset - 1) as u8,
+ },
+ then,
+ ))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn fork_iter() {
+ let mut buf = [None; 256];
+
+ for byte in b'4'..=b'7' {
+ buf[byte as usize] = Some(NodeId::new(1));
+ }
+ for byte in b'a'..=b'd' {
+ buf[byte as usize] = Some(NodeId::new(2));
+ }
+
+ let iter = ForkIter {
+ offset: 0,
+ lut: &buf,
+ };
+
+ assert_eq!(
+ &[
+ (
+ Range {
+ start: b'4',
+ end: b'7'
+ },
+ NodeId::new(1)
+ ),
+ (
+ Range {
+ start: b'a',
+ end: b'd'
+ },
+ NodeId::new(2)
+ ),
+ ],
+ &*iter.collect::<Vec<_>>(),
+ );
+ }
+
+ #[test]
+ fn merge_no_conflict() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().branch(b'1', leaf1);
+
+ fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2));
+ }
+
+ #[test]
+ fn merge_miss_right() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().branch(b'1', leaf1);
+
+ fork.merge(Fork::new().miss(leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2));
+ }
+
+ #[test]
+ fn merge_miss_left() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().miss(leaf1);
+
+ fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1));
+ }
+
+ #[test]
+ fn contains_byte() {
+ let fork = Fork::new().branch('a'..='z', NodeId::new(42));
+
+ assert_eq!(fork.contains(b't'), Some(NodeId::new(42)));
+ }
+
+ #[test]
+ fn contains_range() {
+ let fork = Fork::new()
+ .branch('a'..='m', NodeId::new(42))
+ .branch('n'..='z', NodeId::new(42));
+
+ assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42)));
+ assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42)));
+ }
+
+ #[test]
+ fn contains_different_ranges() {
+ let fork = Fork::new()
+ .branch('a'..='m', NodeId::new(42))
+ .branch('n'..='z', NodeId::new(47));
+
+ assert_eq!(fork.contains('i'..='r'), None);
+ assert_eq!(fork.contains('a'..='z'), None);
+ assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42)));
+ assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47)));
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs
new file mode 100644
index 00000000..dc97bdf1
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/impls.rs
@@ -0,0 +1,220 @@
+use std::fmt::{self, Debug, Display};
+use std::hash::{Hash, Hasher};
+
+use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope};
+
+impl<T> From<Fork> for Node<T> {
+ fn from(fork: Fork) -> Self {
+ Node::Fork(fork)
+ }
+}
+impl<T> From<Rope> for Node<T> {
+ fn from(rope: Rope) -> Self {
+ Node::Rope(rope)
+ }
+}
+
+fn is_ascii(byte: u8) -> bool {
+ (0x20..0x7F).contains(&byte)
+}
+
+impl Hash for Fork {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ for branch in self.branches() {
+ branch.hash(state);
+ }
+ self.miss.hash(state);
+ }
+}
+
+impl<T> Hash for Node<T> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ match self {
+ Node::Rope(rope) => {
+ b"ROPE".hash(state);
+ rope.hash(state);
+ }
+ Node::Fork(fork) => {
+ b"FORK".hash(state);
+ fork.hash(state);
+ }
+ Node::Leaf(_) => b"LEAF".hash(state),
+ }
+ }
+}
+
+impl Debug for NodeId {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ Debug::fmt(&self.0, f)
+ }
+}
+
+/// We don't need debug impls in release builds
+// #[cfg(test)]
+mod debug {
+ use super::*;
+ use crate::graph::rope::Miss;
+ use crate::graph::Disambiguate;
+ use std::cmp::{Ord, Ordering};
+
+ impl Disambiguate for &str {
+ fn cmp(left: &&str, right: &&str) -> Ordering {
+ Ord::cmp(left, right)
+ }
+ }
+
+ impl Debug for Range {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let Range { start, end } = *self;
+
+ if start != end || !is_ascii(start) {
+ f.write_str("[")?;
+ }
+ match is_ascii(start) {
+ true => write!(f, "{}", start as char),
+ false => write!(f, "{:02X}", start),
+ }?;
+ if start != end {
+ match is_ascii(end) {
+ true => write!(f, "-{}]", end as char),
+ false => write!(f, "-{:02X}]", end),
+ }?;
+ } else if !is_ascii(start) {
+ f.write_str("]")?;
+ }
+ Ok(())
+ }
+ }
+
+ impl Display for Range {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ <Range as Debug>::fmt(self, f)
+ }
+ }
+
+ impl<T: Debug> Debug for Graph<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let entries = self
+ .nodes()
+ .iter()
+ .enumerate()
+ .filter_map(|(i, n)| n.as_ref().map(|n| (i, n)));
+
+ f.debug_map().entries(entries).finish()
+ }
+ }
+
+ struct Arm<T, U>(T, U);
+
+ impl<T, U> Debug for Arm<T, U>
+ where
+ T: Display,
+ U: Display,
+ {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{} ⇒ {}", self.0, self.1)
+ }
+ }
+
+ impl Debug for Fork {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut list = f.debug_set();
+
+ for (range, then) in self.branches() {
+ list.entry(&Arm(range, then));
+ }
+ if let Some(id) = self.miss {
+ list.entry(&Arm('_', id));
+ }
+
+ list.finish()
+ }
+ }
+
+ impl Display for Miss {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Miss::First(id) => Display::fmt(id, f),
+ Miss::Any(id) => write!(f, "{}*", id),
+ Miss::None => f.write_str("n/a"),
+ }
+ }
+ }
+
+ impl Debug for Rope {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use std::fmt::Write;
+
+ let mut rope = String::with_capacity(self.pattern.len());
+ for range in self.pattern.iter() {
+ write!(rope, "{}", range)?;
+ }
+
+ match self.miss.is_none() {
+ false => {
+ let mut list = f.debug_list();
+
+ list.entry(&Arm(rope, self.then));
+ list.entry(&Arm('_', self.miss));
+
+ list.finish()
+ }
+ true => Arm(rope, self.then).fmt(f),
+ }
+ }
+ }
+
+ impl PartialEq for Fork {
+ fn eq(&self, other: &Self) -> bool {
+ self.miss == other.miss && self.branches().eq(other.branches())
+ }
+ }
+
+ impl<T: Debug> Debug for Node<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Node::Fork(fork) => fork.fmt(f),
+ Node::Rope(rope) => rope.fmt(f),
+ Node::Leaf(leaf) => leaf.fmt(f),
+ }
+ }
+ }
+
+ use std::ops::RangeInclusive;
+
+ impl From<RangeInclusive<u8>> for Range {
+ fn from(range: RangeInclusive<u8>) -> Range {
+ Range {
+ start: *range.start(),
+ end: *range.end(),
+ }
+ }
+ }
+
+ impl From<RangeInclusive<char>> for Range {
+ fn from(range: RangeInclusive<char>) -> Range {
+ Range {
+ start: *range.start() as u8,
+ end: *range.end() as u8,
+ }
+ }
+ }
+
+ impl<T> PartialEq<Rope> for Node<T> {
+ fn eq(&self, other: &Rope) -> bool {
+ match self {
+ Node::Rope(rope) => rope == other,
+ _ => false,
+ }
+ }
+ }
+
+ impl<T> PartialEq<Fork> for Node<T> {
+ fn eq(&self, other: &Fork) -> bool {
+ match self {
+ Node::Fork(fork) => fork == other,
+ _ => false,
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs
new file mode 100644
index 00000000..757ced09
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/meta.rs
@@ -0,0 +1,174 @@
+use std::cmp::min;
+use std::collections::BTreeMap;
+use std::ops::{Index, IndexMut};
+
+use crate::graph::{Graph, Node, NodeId};
+
+#[derive(Debug)]
+pub struct Meta {
+ map: BTreeMap<NodeId, MetaItem>,
+}
+
+#[derive(Debug, Default)]
+pub struct MetaItem {
+ /// Number of references to this node
+ pub refcount: usize,
+ /// Minimum number of bytes that ought to be read for this
+ /// node to find a match
+ pub min_read: usize,
+ /// Marks whether or not this node leads to a loop entry node.
+ pub is_loop_init: bool,
+ /// Ids of other nodes that point to this node while this
+ /// node is on a stack (creating a loop)
+ pub loop_entry_from: Vec<NodeId>,
+}
+
+impl Index<NodeId> for Meta {
+ type Output = MetaItem;
+
+ fn index(&self, id: NodeId) -> &MetaItem {
+ &self.map[&id]
+ }
+}
+
+impl IndexMut<NodeId> for Meta {
+ fn index_mut(&mut self, id: NodeId) -> &mut MetaItem {
+ self.map.entry(id).or_default()
+ }
+}
+
+impl MetaItem {
+ fn loop_entry(&mut self, id: NodeId) {
+ if let Err(idx) = self.loop_entry_from.binary_search(&id) {
+ self.loop_entry_from.insert(idx, id);
+ }
+ }
+}
+
+impl Meta {
+ pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self {
+ let mut meta = Meta {
+ map: Default::default(),
+ };
+
+ meta.first_pass(root, root, graph, &mut Vec::new());
+
+ meta
+ }
+
+ pub fn first_pass<T>(
+ &mut self,
+ this: NodeId,
+ parent: NodeId,
+ graph: &Graph<T>,
+ stack: &mut Vec<NodeId>,
+ ) -> &MetaItem {
+ let meta = &mut self[this];
+ let is_done = meta.refcount > 0;
+
+ meta.refcount += 1;
+
+ if stack.contains(&this) {
+ meta.loop_entry(parent);
+ self[parent].is_loop_init = true;
+ }
+ if is_done {
+ return &self[this];
+ }
+
+ stack.push(this);
+
+ let mut min_read;
+
+ match &graph[this] {
+ Node::Fork(fork) => {
+ min_read = usize::MAX;
+ for (_, id) in fork.branches() {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 1;
+ } else {
+ min_read = min(min_read, meta.min_read + 1);
+ }
+ }
+ if let Some(id) = fork.miss {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 0;
+ } else {
+ min_read = min(min_read, meta.min_read);
+ }
+ }
+ if min_read == usize::MAX {
+ min_read = 0;
+ }
+ }
+ Node::Rope(rope) => {
+ min_read = rope.pattern.len();
+ let meta = self.first_pass(rope.then, this, graph, stack);
+
+ if !meta.is_loop_init {
+ min_read += meta.min_read;
+ }
+
+ if let Some(id) = rope.miss.first() {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 0;
+ } else {
+ min_read = min(min_read, meta.min_read);
+ }
+ }
+ }
+ Node::Leaf(_) => min_read = 0,
+ }
+
+ stack.pop();
+
+ let meta = &mut self[this];
+ meta.min_read = min_read;
+ let second_pass = meta.loop_entry_from.clone();
+
+ for id in second_pass {
+ self.meta_second_pass(id, graph);
+ }
+
+ &self[this]
+ }
+
+ fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) {
+ let mut min_read;
+
+ match &graph[id] {
+ Node::Fork(fork) => {
+ min_read = usize::MAX;
+ for (_, id) in fork.branches() {
+ let meta = &self[id];
+
+ if meta.is_loop_init {
+ min_read = 1;
+ } else {
+ min_read = min(min_read, meta.min_read + 1);
+ }
+ }
+ if min_read == usize::MAX {
+ min_read = 0;
+ }
+ }
+ Node::Rope(rope) => {
+ min_read = rope.pattern.len();
+ let meta = &self[rope.then];
+
+ if !meta.is_loop_init {
+ min_read += meta.min_read;
+ }
+ }
+ Node::Leaf(_) => unreachable!(),
+ }
+
+ self[id].min_read = min_read;
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs
new file mode 100644
index 00000000..6d218e8c
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/mod.rs
@@ -0,0 +1,566 @@
+use std::cmp::Ordering;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap as Map;
+use std::hash::{Hash, Hasher};
+use std::num::NonZeroU32;
+use std::ops::Index;
+
+use fnv::FnvHasher;
+
+mod fork;
+mod impls;
+mod meta;
+mod range;
+mod regex;
+mod rope;
+
+pub use self::fork::Fork;
+pub use self::meta::Meta;
+pub use self::range::Range;
+pub use self::rope::Rope;
+
+/// Disambiguation error during the attempt to merge two leaf
+/// nodes with the same priority
+#[derive(Debug)]
+pub struct DisambiguationError(pub NodeId, pub NodeId);
+
+pub struct Graph<Leaf> {
+ /// Internal storage of all allocated nodes. Once a node is
+ /// put here, it should never be mutated.
+ nodes: Vec<Option<Node<Leaf>>>,
+ /// When merging two nodes into a new node, we store the two
+ /// entry keys and the result, so that we don't merge the same
+ /// two nodes multiple times.
+ ///
+ /// Most of the time the entry we want to find will be the last
+ /// one that has been inserted, so we can use a vec with reverse
+ /// order search to get O(1) searches much faster than any *Map.
+ merges: Map<Merge, NodeId>,
+ /// Another map used for accounting. Before `.push`ing a new node
+ /// onto the graph (inserts are exempt), we hash it and find if
+ /// an identical(!) node has been created before.
+ hashes: Map<u64, NodeId>,
+ /// Instead of handling errors over return types, opt to collect
+ /// them internally.
+ errors: Vec<DisambiguationError>,
+ /// Deferred merges. When when attempting to merge a node with an
+ /// empty reserved slot, the merging operation is deferred until
+ /// the reserved slot is populated. This is a stack that keeps track
+ /// of all such deferred merges
+ deferred: Vec<DeferredMerge>,
+}
+
+/// Trait to be implemented on `Leaf` nodes in order to disambiguate
+/// between them.
+pub trait Disambiguate {
+ fn cmp(left: &Self, right: &Self) -> Ordering;
+}
+
+/// Id of a Node in the graph. `NodeId` can be referencing an empty
+/// slot that is going to be populated later in time.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct NodeId(NonZeroU32);
+
+impl Hash for NodeId {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ // Always use little-endian byte order for hashing to avoid
+ // different code generation on big-endian platforms due to
+ // iteration over a HashMap,
+ // see https://github.com/maciejhirsz/logos/issues/427.
+ state.write(&self.0.get().to_le_bytes())
+ }
+}
+
+impl NodeId {
+ fn get(self) -> usize {
+ self.0.get() as usize
+ }
+
+ fn new(n: usize) -> NodeId {
+ NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId"))
+ }
+}
+
+/// Unique reserved `NodeId` that is guaranteed to point to an
+/// empty allocated slot in the graph. It's safe to create multiple
+/// `NodeId` copies of `ReservedId`, however API users should never
+/// be able to clone a `ReservedId`, or create a new one from `NodeId`.
+///
+/// `ReservedId` is consumed once passed into `Graph::insert`.
+#[derive(Debug)]
+pub struct ReservedId(NodeId);
+
+impl ReservedId {
+ pub fn get(&self) -> NodeId {
+ self.0
+ }
+}
+
+/// Merge key used to lookup whether two nodes have been previously
+/// mered, so we can avoid duplicating merges, potentially running into
+/// loops that blow out the stack.
+///
+/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure
+/// that node merges are symmetric.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
+struct Merge(NodeId, NodeId);
+
+impl Merge {
+ fn new(a: NodeId, b: NodeId) -> Self {
+ if a < b {
+ Merge(a, b)
+ } else {
+ Merge(b, a)
+ }
+ }
+}
+
+/// When attempting to merge two nodes, one of which was not yet created,
+/// we can record such attempt, and execute the merge later on when the
+/// `awaiting` has been `insert`ed into the graph.
+#[derive(Debug)]
+pub struct DeferredMerge {
+ awaiting: NodeId,
+ with: NodeId,
+ into: ReservedId,
+}
+
+impl<Leaf> Graph<Leaf> {
+ pub fn new() -> Self {
+ Graph {
+ // Start with an empty slot so we can start
+ // counting NodeIds from 1 and use NonZero
+ // optimizations
+ nodes: vec![None],
+ merges: Map::new(),
+ hashes: Map::new(),
+ errors: Vec::new(),
+ deferred: Vec::new(),
+ }
+ }
+
+ pub fn errors(&self) -> &[DisambiguationError] {
+ &self.errors
+ }
+
+ fn next_id(&self) -> NodeId {
+ NodeId::new(self.nodes.len())
+ }
+
+ /// Reserve an empty slot for a node on the graph and return an
+ /// id for it. `ReservedId` cannot be cloned, and must be consumed
+ /// by calling `insert` on the graph.
+ pub fn reserve(&mut self) -> ReservedId {
+ let id = self.next_id();
+
+ self.nodes.push(None);
+
+ ReservedId(id)
+ }
+
+ /// Insert a node at a given, previously reserved id. Returns the
+ /// inserted `NodeId`.
+ pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId
+ where
+ N: Into<Node<Leaf>>,
+ Leaf: Disambiguate,
+ {
+ let id = reserved.get();
+
+ self.nodes[id.get()] = Some(node.into());
+
+ let mut awaiting = Vec::new();
+
+ // Partition out all `DeferredMerge`s that can be completed
+ // now that this `ReservedId` has a `Node` inserted into it.
+ for idx in (0..self.deferred.len()).rev() {
+ if self.deferred[idx].awaiting == id {
+ awaiting.push(self.deferred.remove(idx));
+ }
+ }
+
+ // Complete deferred merges. We've collected them from the back,
+ // so we must iterate through them from the back as well to restore
+ // proper order of merges in case there is some cascading going on.
+ for DeferredMerge {
+ awaiting,
+ with,
+ into,
+ } in awaiting.into_iter().rev()
+ {
+ self.merge_unchecked(awaiting, with, into);
+ }
+
+ id
+ }
+
+ /// Push a node onto the graph and get an id to it. If an identical
+ /// node has already been pushed on the graph, it will return the id
+ /// of that node instead.
+ pub fn push<B>(&mut self, node: B) -> NodeId
+ where
+ B: Into<Node<Leaf>>,
+ {
+ let node = node.into();
+
+ if let Node::Leaf(_) = node {
+ return self.push_unchecked(node);
+ }
+
+ let mut hasher = FnvHasher::default();
+ node.hash(&mut hasher);
+
+ let next_id = self.next_id();
+
+ match self.hashes.entry(hasher.finish()) {
+ Entry::Occupied(occupied) => {
+ let id = *occupied.get();
+
+ if self[id].eq(&node) {
+ return id;
+ }
+ }
+ Entry::Vacant(vacant) => {
+ vacant.insert(next_id);
+ }
+ }
+
+ self.push_unchecked(node)
+ }
+
+ fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId {
+ let id = self.next_id();
+
+ self.nodes.push(Some(node));
+
+ id
+ }
+
+ /// If nodes `a` and `b` have been already merged, return the
+ /// `NodeId` of the node they have been merged into.
+ fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> {
+ let probe = Merge::new(a, b);
+
+ self.merges.get(&probe).copied()
+ }
+
+ /// Mark that nodes `a` and `b` have been merged into `product`.
+ ///
+ /// This will also mark merging `a` and `product`, as well as
+ /// `b` and `product` into `product`, since those are symmetric
+ /// operations.
+ ///
+ /// This is necessary to break out asymmetric merge loops.
+ fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) {
+ self.merges.insert(Merge::new(a, b), product);
+ self.merges.insert(Merge::new(a, product), product);
+ self.merges.insert(Merge::new(b, product), product);
+ }
+
+ /// Merge the nodes at id `a` and `b`, returning a new id.
+ pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId
+ where
+ Leaf: Disambiguate,
+ {
+ if a == b {
+ return a;
+ }
+
+ // If the id pair is already merged (or is being merged), just return the id
+ if let Some(id) = self.find_merged(a, b) {
+ return id;
+ }
+
+ match (self.get(a), self.get(b)) {
+ (None, None) => {
+ panic!(
+ "Merging two reserved nodes! This is a bug, please report it:\n\
+ \n\
+ https://github.com/maciejhirsz/logos/issues"
+ );
+ }
+ (None, Some(_)) => {
+ let reserved = self.reserve();
+ let id = reserved.get();
+ self.deferred.push(DeferredMerge {
+ awaiting: a,
+ with: b,
+ into: reserved,
+ });
+ self.set_merged(a, b, id);
+
+ return id;
+ }
+ (Some(_), None) => {
+ let reserved = self.reserve();
+ let id = reserved.get();
+ self.deferred.push(DeferredMerge {
+ awaiting: b,
+ with: a,
+ into: reserved,
+ });
+ self.set_merged(a, b, id);
+
+ return id;
+ }
+ (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => {
+ return match Disambiguate::cmp(left, right) {
+ Ordering::Less => b,
+ Ordering::Greater => a,
+ Ordering::Equal => {
+ self.errors.push(DisambiguationError(a, b));
+
+ a
+ }
+ };
+ }
+ _ => (),
+ }
+
+ // Reserve the id for the merge and save it. Since the graph can contain loops,
+ // this prevents us from trying to merge the same id pair in a loop, blowing up
+ // the stack.
+ let reserved = self.reserve();
+ self.set_merged(a, b, reserved.get());
+
+ self.merge_unchecked(a, b, reserved)
+ }
+
+ /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are
+ /// not pointing to empty slots.
+ fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId
+ where
+ Leaf: Disambiguate,
+ {
+ let merged_rope = match (self.get(a), self.get(b)) {
+ (Some(Node::Rope(rope)), _) => {
+ let rope = rope.clone();
+
+ self.merge_rope(rope, b)
+ }
+ (_, Some(Node::Rope(rope))) => {
+ let rope = rope.clone();
+
+ self.merge_rope(rope, a)
+ }
+ _ => None,
+ };
+
+ if let Some(rope) = merged_rope {
+ return self.insert(reserved, rope);
+ }
+
+ let mut fork = self.fork_off(a);
+ fork.merge(self.fork_off(b), self);
+
+ let mut stack = vec![reserved.get()];
+
+ // Flatten the fork
+ while let Some(miss) = fork.miss {
+ if stack.contains(&miss) {
+ break;
+ }
+ stack.push(miss);
+
+ let other = match self.get(miss) {
+ Some(Node::Fork(other)) => other.clone(),
+ Some(Node::Rope(other)) => other.clone().into_fork(self),
+ _ => break,
+ };
+ match other.miss {
+ Some(id) if self.get(id).is_none() => break,
+ _ => (),
+ }
+ fork.miss = None;
+ fork.merge(other, self);
+ }
+
+ self.insert(reserved, fork)
+ }
+
+ fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope>
+ where
+ Leaf: Disambiguate,
+ {
+ match self.get(other) {
+ Some(Node::Fork(fork)) if rope.miss.is_none() => {
+ // Count how many consecutive ranges in this rope would
+ // branch into the fork that results in a loop.
+ //
+ // e.g.: for rope "foobar" and a looping fork [a-z]: 6
+ let count = rope
+ .pattern
+ .iter()
+ .take_while(|range| fork.contains(**range) == Some(other))
+ .count();
+
+ let mut rope = rope.split_at(count, self)?.miss_any(other);
+
+ rope.then = self.merge(rope.then, other);
+
+ Some(rope)
+ }
+ Some(Node::Rope(other)) => {
+ let (prefix, miss) = rope.prefix(other)?;
+
+ let (a, b) = (rope, other.clone());
+
+ let a = a.remainder(prefix.len(), self);
+ let b = b.remainder(prefix.len(), self);
+
+ let rope = Rope::new(prefix, self.merge(a, b)).miss(miss);
+
+ Some(rope)
+ }
+ Some(Node::Leaf(_)) | None => {
+ if rope.miss.is_none() {
+ Some(rope.miss(other))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ }
+ }
+
+ pub fn fork_off(&mut self, id: NodeId) -> Fork
+ where
+ Leaf: Disambiguate,
+ {
+ match self.get(id) {
+ Some(Node::Fork(fork)) => fork.clone(),
+ Some(Node::Rope(rope)) => rope.clone().into_fork(self),
+ Some(Node::Leaf(_)) | None => Fork::new().miss(id),
+ }
+ }
+
+ pub fn nodes(&self) -> &[Option<Node<Leaf>>] {
+ &self.nodes
+ }
+
+ /// Find all nodes that have no references and remove them.
+ pub fn shake(&mut self, root: NodeId) {
+ let mut filter = vec![false; self.nodes.len()];
+
+ filter[root.get()] = true;
+
+ self[root].shake(self, &mut filter);
+
+ for (id, referenced) in filter.into_iter().enumerate() {
+ if !referenced {
+ self.nodes[id] = None;
+ }
+ }
+ }
+
+ pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> {
+ self.nodes.get(id.get())?.as_ref()
+ }
+}
+
+impl<Leaf> Index<NodeId> for Graph<Leaf> {
+ type Output = Node<Leaf>;
+
+ fn index(&self, id: NodeId) -> &Node<Leaf> {
+ self.get(id).expect(
+ "Indexing into an empty node. This is a bug, please report it at:\n\
+ \n\
+ https://github.com/maciejhirsz/logos/issues",
+ )
+ }
+}
+
+impl std::fmt::Display for NodeId {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ std::fmt::Display::fmt(&self.0, f)
+ }
+}
+
+#[cfg_attr(test, derive(PartialEq))]
+pub enum Node<Leaf> {
+ /// Fork node, can lead to more than one state
+ Fork(Fork),
+ /// Rope node, can lead to one state on match, one state on miss
+ Rope(Rope),
+ /// Leaf node, terminal state
+ Leaf(Leaf),
+}
+
+impl<Leaf> Node<Leaf> {
+ pub fn miss(&self) -> Option<NodeId> {
+ match self {
+ Node::Rope(rope) => rope.miss.first(),
+ Node::Fork(fork) => fork.miss,
+ Node::Leaf(_) => None,
+ }
+ }
+
+ fn eq(&self, other: &Node<Leaf>) -> bool {
+ match (self, other) {
+ (Node::Fork(a), Node::Fork(b)) => a == b,
+ (Node::Rope(a), Node::Rope(b)) => a == b,
+ _ => false,
+ }
+ }
+
+ fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) {
+ match self {
+ Node::Fork(fork) => fork.shake(graph, filter),
+ Node::Rope(rope) => rope.shake(graph, filter),
+ Node::Leaf(_) => (),
+ }
+ }
+
+ pub fn unwrap_leaf(&self) -> &Leaf {
+ match self {
+ Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"),
+ Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"),
+ Node::Leaf(leaf) => leaf,
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn leaf_stack_size() {
+ use std::mem::size_of;
+
+ const WORD: usize = size_of::<usize>();
+ const NODE: usize = size_of::<Node<()>>();
+
+ assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE);
+ }
+
+ #[test]
+ fn create_a_loop() {
+ let mut graph = Graph::new();
+
+ let token = graph.push(Node::Leaf("IDENT"));
+ let id = graph.reserve();
+ let fork = Fork::new().branch('a'..='z', id.get()).miss(token);
+ let root = graph.insert(id, fork);
+
+ assert_eq!(graph[token], Node::Leaf("IDENT"));
+ assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),);
+ }
+
+ #[test]
+ fn fork_off() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = graph.push(Rope::new("rope", leaf));
+ let fork = graph.push(Fork::new().branch(b'!', leaf));
+
+ assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf));
+ assert_eq!(
+ graph.fork_off(rope),
+ Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1))
+ );
+ assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf));
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs
new file mode 100644
index 00000000..a4d23d3b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/range.rs
@@ -0,0 +1,144 @@
+use regex_syntax::hir::ClassBytesRange;
+use regex_syntax::hir::ClassUnicodeRange;
+use regex_syntax::utf8::Utf8Range;
+
+use std::cmp::{Ord, Ordering};
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct Range {
+ pub start: u8,
+ pub end: u8,
+}
+
+impl Range {
+ pub fn as_byte(&self) -> Option<u8> {
+ if self.is_byte() {
+ Some(self.start)
+ } else {
+ None
+ }
+ }
+
+ pub fn is_byte(&self) -> bool {
+ self.start == self.end
+ }
+}
+
+impl From<u8> for Range {
+ fn from(byte: u8) -> Range {
+ Range {
+ start: byte,
+ end: byte,
+ }
+ }
+}
+
+impl From<&u8> for Range {
+ fn from(byte: &u8) -> Range {
+ Range::from(*byte)
+ }
+}
+
+impl Iterator for Range {
+ type Item = u8;
+
+ fn next(&mut self) -> Option<u8> {
+ match self.start.cmp(&self.end) {
+ std::cmp::Ordering::Less => {
+ let res = self.start;
+ self.start += 1;
+
+ Some(res)
+ }
+ std::cmp::Ordering::Equal => {
+ let res = self.start;
+
+ // Necessary so that range 0xFF-0xFF doesn't loop forever
+ self.start = 0xFF;
+ self.end = 0x00;
+
+ Some(res)
+ }
+ std::cmp::Ordering::Greater => None,
+ }
+ }
+}
+
+impl PartialOrd for Range {
+ fn partial_cmp(&self, other: &Range) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for Range {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.start.cmp(&other.start)
+ }
+}
+
+impl From<Utf8Range> for Range {
+ fn from(r: Utf8Range) -> Range {
+ Range {
+ start: r.start,
+ end: r.end,
+ }
+ }
+}
+
+impl From<ClassUnicodeRange> for Range {
+ fn from(r: ClassUnicodeRange) -> Range {
+ let start = r.start() as u32;
+ let end = r.end() as u32;
+
+ if start >= 128 || end >= 128 && end != 0x0010FFFF {
+ panic!("Casting non-ascii ClassUnicodeRange to Range")
+ }
+
+ Range {
+ start: start as u8,
+ end: end as u8,
+ }
+ }
+}
+
+impl From<ClassBytesRange> for Range {
+ fn from(r: ClassBytesRange) -> Range {
+ Range {
+ start: r.start(),
+ end: r.end(),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn range_iter_one() {
+ let byte = Range::from(b'!');
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"!", &collected[..]);
+ }
+
+ #[test]
+ fn range_iter_few() {
+ let byte = Range {
+ start: b'a',
+ end: b'd',
+ };
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"abcd", &collected[..]);
+ }
+
+ #[test]
+ fn range_iter_bounds() {
+ let byte = Range::from(0xFA..=0xFF);
+
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]);
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs
new file mode 100644
index 00000000..d55c490f
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/regex.rs
@@ -0,0 +1,295 @@
+use std::fmt::Debug;
+
+use regex_syntax::utf8::Utf8Sequences;
+
+use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope};
+use crate::mir::{Class, ClassUnicode, Literal, Mir};
+
+impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
+ pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId {
+ self.parse_mir(&mir, then, None, None, false)
+ }
+
+ fn parse_mir(
+ &mut self,
+ mir: &Mir,
+ then: NodeId,
+ miss: Option<NodeId>,
+ reserved: Option<ReservedId>,
+ repeated: bool,
+ ) -> NodeId {
+ match mir {
+ Mir::Empty => then,
+ Mir::Loop(mir) => {
+ let reserved_first = reserved.unwrap_or_else(|| self.reserve());
+
+ let (new_then, new_miss);
+ if let Some(old_miss) = miss {
+ // We have to separate the first iteration from the other iterations,
+ // because the `old_miss` path must only be taken if we miss the first
+ // iteration.
+ let reserved_next = self.reserve();
+ new_then = self.parse_mir(
+ mir,
+ reserved_next.get(),
+ Some(then),
+ Some(reserved_next),
+ true,
+ );
+ new_miss = self.merge(old_miss, then);
+ } else {
+ new_then = reserved_first.get();
+ new_miss = then;
+ }
+
+ self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true)
+ }
+ Mir::Maybe(mir) => {
+ let miss = match miss {
+ Some(id) => self.merge(id, then),
+ None => then,
+ };
+
+ self.parse_mir(mir, then, Some(miss), reserved, true)
+ }
+ Mir::Alternation(alternation) => {
+ let mut fork = Fork::new().miss(miss);
+
+ for mir in alternation {
+ let id = self.parse_mir(mir, then, None, None, repeated);
+ let alt = self.fork_off(id);
+
+ fork.merge(alt, self);
+ }
+
+ self.insert_or_push(reserved, fork)
+ }
+ Mir::Literal(literal) => {
+ let pattern = literal.0.to_vec();
+
+ self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss))
+ }
+ Mir::Concat(concat) => {
+ // Take an initial guess at the capacity - estimates a little worse than an average case
+ // scenario by assuming every concat element is singular but has a full code-point unicode literal.
+ // The only way to get the actual size of the Vec is if every sub-concat node is added up.
+ let mut ropebuf: Vec<Range> = Vec::with_capacity(concat.len() * 4);
+ let mut then = then;
+
+ let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir {
+ Mir::Literal(Literal(bytes)) => {
+ ropebuf.extend(bytes.iter().rev().map(Into::<Range>::into));
+ true
+ }
+ Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => {
+ ropebuf.push(class.ranges()[0].into());
+ true
+ }
+ Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => {
+ ropebuf.push(class.ranges()[0].into());
+ true
+ }
+ _ => {
+ if !ropebuf.is_empty() {
+ let rope =
+ Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), *then);
+
+ *then = graph.push(rope);
+ ropebuf = Vec::with_capacity(concat.len() * 4);
+ }
+ false
+ }
+ };
+
+ for mir in concat[1..].iter().rev() {
+ if !handle_bytes(self, mir, &mut then) {
+ then = self.parse_mir(mir, then, None, None, false);
+ }
+ }
+
+ let first_mir = &concat[0];
+ if handle_bytes(self, first_mir, &mut then) {
+ let rope = Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), then)
+ .miss(miss);
+ self.insert_or_push(reserved, rope)
+ } else {
+ self.parse_mir(first_mir, then, miss, reserved, false)
+ }
+ }
+ Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => {
+ let mut ropes = class
+ .iter()
+ .flat_map(|range| Utf8Sequences::new(range.start(), range.end()))
+ .map(|sequence| Rope::new(sequence.as_slice(), then))
+ .collect::<Vec<_>>();
+
+ if ropes.len() == 1 {
+ let rope = ropes.remove(0);
+
+ return self.insert_or_push(reserved, rope.miss(miss));
+ }
+
+ let mut root = Fork::new().miss(miss);
+
+ for rope in ropes {
+ let fork = rope.into_fork(self);
+ root.merge(fork, self);
+ }
+
+ self.insert_or_push(reserved, root)
+ }
+ Mir::Class(class) => {
+ let mut fork = Fork::new().miss(miss);
+
+ let class: Vec<Range> = match class {
+ Class::Unicode(u) => u.iter().copied().map(Into::into).collect(),
+ Class::Bytes(b) => b.iter().copied().map(Into::into).collect(),
+ };
+
+ for range in class {
+ fork.add_branch(range, then, self);
+ }
+
+ self.insert_or_push(reserved, fork)
+ }
+ }
+ }
+
+ fn insert_or_push<N>(&mut self, id: Option<ReservedId>, node: N) -> NodeId
+ where
+ N: Into<Node<Leaf>>,
+ {
+ match id {
+ Some(id) => self.insert(id, node),
+ None => self.push(node),
+ }
+ }
+}
+
+/// Return whether current class unicode is ascii.
+///
+/// Because unicode ranges are iterated in increasing order,
+/// it is only necessary to check the last range.
+///
+/// If the check is performed in a repetition,
+/// a fast path is used by checking if end of range is 0x0010_FFFF.
+fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+ class.iter().last().map_or(true, |range| {
+ let start = range.start() as u32;
+ let end = range.end() as u32;
+ end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+ })
+}
+
+/// Return whether current class unicode is ascii and only contains
+/// one range.
+///
+/// See [`is_ascii`] function for more details.
+fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+ if class.ranges().len() != 1 {
+ return false;
+ }
+
+ let range = &class.ranges()[0];
+ let start = range.start() as u32;
+ let end = range.end() as u32;
+ end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::num::NonZeroU32;
+
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn rope() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("foobar").unwrap();
+
+ assert_eq!(mir.priority(), 12);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),)
+ }
+
+ #[test]
+ fn alternation() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("a|b").unwrap();
+
+ assert_eq!(mir.priority(), 2);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)),
+ );
+ }
+
+ #[test]
+ fn repeat() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("[a-z]*").unwrap();
+
+ assert_eq!(mir.priority(), 0);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(
+ Fork::new()
+ .branch('a'..='z', id) // goto self == loop
+ .miss(leaf)
+ ),
+ );
+ }
+
+ #[test]
+ fn maybe() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("[a-z]?").unwrap();
+
+ assert_eq!(mir.priority(), 0);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)),
+ );
+ }
+
+ #[test]
+ fn long_concat_389() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap();
+
+ assert_eq!(mir.priority(), 50);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+ let sub_id = NodeId(NonZeroU32::new(2).unwrap());
+
+ assert_eq!(
+ graph[id],
+ Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id))
+ );
+
+ assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf)))
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs
new file mode 100644
index 00000000..6e563fac
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/rope.rs
@@ -0,0 +1,330 @@
+use std::ops::Deref;
+
+use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range};
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Rope {
+ pub pattern: Pattern,
+ pub then: NodeId,
+ pub miss: Miss,
+}
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Pattern(pub Vec<Range>);
+
+impl Deref for Pattern {
+ type Target = [Range];
+
+ fn deref(&self) -> &[Range] {
+ &self.0
+ }
+}
+
+/// Because Ropes could potentially fail a match mid-pattern,
+/// a regular `Option` is not sufficient here.
+#[derive(PartialEq, Clone, Copy, Hash)]
+pub enum Miss {
+ /// Same as Option::None, error on fail
+ None,
+ /// Jump to id if first byte does not match, fail on partial match
+ First(NodeId),
+ /// Jump to id on partial or empty match
+ Any(NodeId),
+}
+
+impl Miss {
+ pub fn is_none(&self) -> bool {
+ matches!(self, Miss::None)
+ }
+
+ pub fn first(self) -> Option<NodeId> {
+ match self {
+ Miss::First(id) | Miss::Any(id) => Some(id),
+ _ => None,
+ }
+ }
+
+ pub fn take_first(&mut self) -> Option<NodeId> {
+ match *self {
+ Miss::First(id) => {
+ *self = Miss::None;
+
+ Some(id)
+ }
+ Miss::Any(id) => Some(id),
+ Miss::None => None,
+ }
+ }
+}
+
+impl From<Option<NodeId>> for Miss {
+ fn from(miss: Option<NodeId>) -> Self {
+ match miss {
+ Some(id) => Miss::First(id),
+ None => Miss::None,
+ }
+ }
+}
+
+impl From<NodeId> for Miss {
+ fn from(id: NodeId) -> Self {
+ Miss::First(id)
+ }
+}
+
+impl Rope {
+ pub fn new<P>(pattern: P, then: NodeId) -> Self
+ where
+ P: Into<Pattern>,
+ {
+ Rope {
+ pattern: pattern.into(),
+ then,
+ miss: Miss::None,
+ }
+ }
+
+ pub fn miss<M>(mut self, miss: M) -> Self
+ where
+ M: Into<Miss>,
+ {
+ self.miss = miss.into();
+ self
+ }
+
+ pub fn miss_any(mut self, miss: NodeId) -> Self {
+ self.miss = Miss::Any(miss);
+ self
+ }
+
+ pub fn into_fork<T>(mut self, graph: &mut Graph<T>) -> Fork
+ where
+ T: Disambiguate,
+ {
+ let first = self.pattern.0.remove(0);
+ let miss = self.miss.take_first();
+
+ // The new fork will lead to a new rope,
+ // or the old target if no new rope was created
+ let then = match self.pattern.len() {
+ 0 => self.then,
+ _ => graph.push(self),
+ };
+
+ Fork::new().branch(first, then).miss(miss)
+ }
+
+ pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> {
+ let count = self
+ .pattern
+ .iter()
+ .zip(other.pattern.iter())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let pattern = match count {
+ 0 => return None,
+ n => self.pattern[..n].into(),
+ };
+ let miss = match (self.miss, other.miss) {
+ (Miss::None, miss) => miss,
+ (miss, Miss::None) => miss,
+ _ => return None,
+ };
+
+ Some((pattern, miss))
+ }
+
+ pub fn split_at<T>(mut self, at: usize, graph: &mut Graph<T>) -> Option<Rope>
+ where
+ T: Disambiguate,
+ {
+ match at {
+ 0 => return None,
+ n if n == self.pattern.len() => return Some(self),
+ _ => (),
+ }
+
+ let (this, next) = self.pattern.split_at(at);
+
+ let next_miss = match self.miss {
+ Miss::Any(_) => self.miss,
+ _ => Miss::None,
+ };
+
+ let next = graph.push(Rope {
+ pattern: next.into(),
+ miss: next_miss,
+ then: self.then,
+ });
+
+ self.pattern = this.into();
+ self.then = next;
+
+ Some(self)
+ }
+
+ pub fn remainder<T>(mut self, at: usize, graph: &mut Graph<T>) -> NodeId
+ where
+ T: Disambiguate,
+ {
+ self.pattern = self.pattern[at..].into();
+
+ match self.pattern.len() {
+ 0 => self.then,
+ _ => graph.push(self),
+ }
+ }
+
+ pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+ if let Some(id) = self.miss.first() {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+
+ if !filter[self.then.get()] {
+ filter[self.then.get()] = true;
+ graph[self.then].shake(graph, filter);
+ }
+ }
+}
+
+impl Pattern {
+ pub fn to_bytes(&self) -> Option<Vec<u8>> {
+ let mut out = Vec::with_capacity(self.len());
+
+ for range in self.iter() {
+ out.push(range.as_byte()?);
+ }
+
+ Some(out)
+ }
+}
+
+impl<T> From<&[T]> for Pattern
+where
+ T: Into<Range> + Copy,
+{
+ fn from(slice: &[T]) -> Self {
+ Pattern(slice.iter().copied().map(Into::into).collect())
+ }
+}
+
+impl<T> From<Vec<T>> for Pattern
+where
+ T: Into<Range>,
+{
+ fn from(vec: Vec<T>) -> Self {
+ Pattern(vec.into_iter().map(Into::into).collect())
+ }
+}
+
+impl From<&str> for Pattern {
+ fn from(slice: &str) -> Self {
+ slice.as_bytes().into()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn into_fork() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("foobar", leaf);
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2)));
+ assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf));
+ }
+
+ #[test]
+ fn into_fork_one_byte() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("!", leaf);
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1)));
+ }
+
+ #[test]
+ fn into_fork_miss_any() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("42", leaf).miss_any(NodeId::new(42));
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(
+ fork,
+ Fork::new()
+ .branch(b'4', NodeId::new(2))
+ .miss(NodeId::new(42))
+ );
+ assert_eq!(
+ graph[NodeId::new(2)],
+ Rope::new("2", leaf).miss_any(NodeId::new(42))
+ );
+ }
+
+ #[test]
+ fn into_fork_miss_first() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42)));
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(
+ fork,
+ Fork::new()
+ .branch(b'4', NodeId::new(2))
+ .miss(NodeId::new(42))
+ );
+ assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf));
+ }
+
+ #[test]
+ fn split_at() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("foobar", leaf);
+
+ assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope);
+
+ let split = rope.split_at(3, &mut graph).unwrap();
+ let expected_id = NodeId::new(leaf.get() + 1);
+
+ assert_eq!(split, Rope::new("foo", expected_id));
+ assert_eq!(graph[expected_id], Rope::new("bar", leaf));
+ }
+
+ #[test]
+ fn pattern_to_bytes() {
+ let pat = Pattern::from("foobar");
+
+ assert_eq!(pat.to_bytes().unwrap(), b"foobar");
+
+ let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']);
+
+ assert_eq!(ranges.to_bytes(), None);
+ }
+}
diff --git a/vendor/logos-codegen/src/leaf.rs b/vendor/logos-codegen/src/leaf.rs
new file mode 100644
index 00000000..5e0b810e
--- /dev/null
+++ b/vendor/logos-codegen/src/leaf.rs
@@ -0,0 +1,118 @@
+use std::cmp::{Ord, Ordering};
+use std::fmt::{self, Debug, Display};
+
+use proc_macro2::{Span, TokenStream};
+use syn::{spanned::Spanned, Ident};
+
+use crate::graph::{Disambiguate, Node};
+use crate::util::MaybeVoid;
+
+#[derive(Clone)]
+pub struct Leaf<'t> {
+ pub ident: Option<&'t Ident>,
+ pub span: Span,
+ pub priority: usize,
+ pub field: MaybeVoid,
+ pub callback: Option<Callback>,
+}
+
+#[derive(Clone)]
+pub enum Callback {
+ Label(TokenStream),
+ Inline(Box<InlineCallback>),
+ Skip(Span),
+}
+
+#[derive(Clone)]
+pub struct InlineCallback {
+ pub arg: Ident,
+ pub body: TokenStream,
+ pub span: Span,
+}
+
+impl From<InlineCallback> for Callback {
+ fn from(inline: InlineCallback) -> Callback {
+ Callback::Inline(Box::new(inline))
+ }
+}
+
+impl Callback {
+ pub fn span(&self) -> Span {
+ match self {
+ Callback::Label(tokens) => tokens.span(),
+ Callback::Inline(inline) => inline.span,
+ Callback::Skip(span) => *span,
+ }
+ }
+}
+
+impl<'t> Leaf<'t> {
+ pub fn new(ident: &'t Ident, span: Span) -> Self {
+ Leaf {
+ ident: Some(ident),
+ span,
+ priority: 0,
+ field: MaybeVoid::Void,
+ callback: None,
+ }
+ }
+
+ pub fn new_skip(span: Span) -> Self {
+ Leaf {
+ ident: None,
+ span,
+ priority: 0,
+ field: MaybeVoid::Void,
+ callback: Some(Callback::Skip(span)),
+ }
+ }
+
+ pub fn callback(mut self, callback: Option<Callback>) -> Self {
+ self.callback = callback;
+ self
+ }
+
+ pub fn field(mut self, field: MaybeVoid) -> Self {
+ self.field = field;
+ self
+ }
+
+ pub fn priority(mut self, priority: usize) -> Self {
+ self.priority = priority;
+ self
+ }
+}
+
+impl Disambiguate for Leaf<'_> {
+ fn cmp(left: &Leaf, right: &Leaf) -> Ordering {
+ Ord::cmp(&left.priority, &right.priority)
+ }
+}
+
+impl<'t> From<Leaf<'t>> for Node<Leaf<'t>> {
+ fn from(leaf: Leaf<'t>) -> Self {
+ Node::Leaf(leaf)
+ }
+}
+
+impl Debug for Leaf<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "::{}", self)?;
+
+ match self.callback {
+ Some(Callback::Label(ref label)) => write!(f, " ({})", label),
+ Some(Callback::Inline(_)) => f.write_str(" (<inline>)"),
+ Some(Callback::Skip(_)) => f.write_str(" (<skip>)"),
+ None => Ok(()),
+ }
+ }
+}
+
+impl Display for Leaf<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.ident {
+ Some(ident) => Display::fmt(ident, f),
+ None => f.write_str("<skip>"),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/lib.rs b/vendor/logos-codegen/src/lib.rs
new file mode 100644
index 00000000..2b2d3db2
--- /dev/null
+++ b/vendor/logos-codegen/src/lib.rs
@@ -0,0 +1,391 @@
+//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
+//!
+//! # Logos
+//!
+//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
+
+// The `quote!` macro requires deep recursion.
+#![recursion_limit = "196"]
+#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
+
+mod error;
+mod generator;
+#[cfg(not(feature = "fuzzing"))]
+mod graph;
+#[cfg(feature = "fuzzing")]
+pub mod graph;
+mod leaf;
+#[cfg(not(feature = "fuzzing"))]
+mod mir;
+#[cfg(feature = "fuzzing")]
+pub mod mir;
+mod parser;
+mod util;
+
+#[macro_use]
+#[allow(missing_docs)]
+mod macros;
+
+use generator::Generator;
+use graph::{DisambiguationError, Fork, Graph, Rope};
+use leaf::Leaf;
+use parser::{IgnoreFlags, Mode, Parser};
+use quote::ToTokens;
+use util::MaybeVoid;
+
+use proc_macro2::{Delimiter, TokenStream, TokenTree};
+use quote::quote;
+use syn::parse_quote;
+use syn::spanned::Spanned;
+use syn::{Fields, ItemEnum};
+
+const LOGOS_ATTR: &str = "logos";
+const ERROR_ATTR: &str = "error";
+const TOKEN_ATTR: &str = "token";
+const REGEX_ATTR: &str = "regex";
+
+/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
+pub fn generate(input: TokenStream) -> TokenStream {
+ debug!("Reading input token streams");
+
+ let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
+
+ let name = &item.ident;
+
+ let mut parser = Parser::default();
+
+ for param in item.generics.params {
+ parser.parse_generic(param);
+ }
+
+ for attr in &mut item.attrs {
+ parser.try_parse_logos(attr);
+ }
+
+ let mut ropes = Vec::new();
+ let mut regex_ids = Vec::new();
+ let mut graph = Graph::new();
+
+ {
+ let errors = &mut parser.errors;
+
+ for literal in &parser.skips {
+ match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
+ Ok(mir) => {
+ let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ Err(err) => {
+ errors.err(err, literal.span());
+ }
+ }
+ }
+ }
+
+ debug!("Iterating through enum variants");
+
+ for variant in &mut item.variants {
+ let field = match &mut variant.fields {
+ Fields::Unit => MaybeVoid::Void,
+ Fields::Unnamed(fields) => {
+ if fields.unnamed.len() != 1 {
+ parser.err(
+ format!(
+ "Logos currently only supports variants with one field, found {}",
+ fields.unnamed.len(),
+ ),
+ fields.span(),
+ );
+ }
+
+ let ty = &mut fields
+ .unnamed
+ .first_mut()
+ .expect("Already checked len; qed")
+ .ty;
+ let ty = parser.get_type(ty);
+
+ MaybeVoid::Some(ty)
+ }
+ Fields::Named(fields) => {
+ parser.err("Logos doesn't support named fields yet.", fields.span());
+
+ MaybeVoid::Void
+ }
+ };
+
+ // Lazy leaf constructor to avoid cloning
+ let var_ident = &variant.ident;
+ let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
+
+ for attr in &mut variant.attrs {
+ let attr_name = match attr.path().get_ident() {
+ Some(ident) => ident.to_string(),
+ None => continue,
+ };
+
+ match attr_name.as_str() {
+ ERROR_ATTR => {
+ // TODO: Remove in future versions
+ parser.err(
+ "\
+ Since 0.13 Logos no longer requires the #[error] variant.\n\
+ \n\
+ For help with migration see release notes: \
+ https://github.com/maciejhirsz/logos/releases\
+ ",
+ attr.span(),
+ );
+ }
+ TOKEN_ATTR => {
+ let definition = match parser.parse_definition(attr) {
+ Some(definition) => definition,
+ None => {
+ parser.err("Expected #[token(...)]", attr.span());
+ continue;
+ }
+ };
+
+ if definition.ignore_flags.is_empty() {
+ let bytes = definition.literal.to_bytes();
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or(bytes.len() * 2))
+ .callback(definition.callback),
+ );
+
+ ropes.push(Rope::new(bytes, then));
+ } else {
+ let mir = definition
+ .literal
+ .escape_regex()
+ .to_mir(
+ &Default::default(),
+ definition.ignore_flags,
+ &mut parser.errors,
+ )
+ .expect("The literal should be perfectly valid regex");
+
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or_else(|| mir.priority()))
+ .callback(definition.callback),
+ );
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ }
+ REGEX_ATTR => {
+ let definition = match parser.parse_definition(attr) {
+ Some(definition) => definition,
+ None => {
+ parser.err("Expected #[regex(...)]", attr.span());
+ continue;
+ }
+ };
+ let mir = match definition.literal.to_mir(
+ &parser.subpatterns,
+ definition.ignore_flags,
+ &mut parser.errors,
+ ) {
+ Ok(mir) => mir,
+ Err(err) => {
+ parser.err(err, definition.literal.span());
+ continue;
+ }
+ };
+
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or_else(|| mir.priority()))
+ .callback(definition.callback),
+ );
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ _ => (),
+ }
+ }
+ }
+
+ let mut root = Fork::new();
+
+ debug!("Parsing additional options (extras, source, ...)");
+
+ let error_type = parser.error_type.take();
+ let extras = parser.extras.take();
+ let source = parser
+ .source
+ .take()
+ .map(strip_wrapping_parens)
+ .unwrap_or(match parser.mode {
+ Mode::Utf8 => quote!(str),
+ Mode::Binary => quote!([u8]),
+ });
+ let logos_path = parser
+ .logos_path
+ .take()
+ .unwrap_or_else(|| parse_quote!(::logos));
+
+ let generics = parser.generics();
+ let this = quote!(#name #generics);
+
+ let impl_logos = |body| {
+ quote! {
+ impl<'s> #logos_path::Logos<'s> for #this {
+ type Error = #error_type;
+
+ type Extras = #extras;
+
+ type Source = #source;
+
+ fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
+ #body
+ }
+ }
+ }
+ };
+
+ for id in regex_ids {
+ let fork = graph.fork_off(id);
+
+ root.merge(fork, &mut graph);
+ }
+ for rope in ropes {
+ root.merge(rope.into_fork(&mut graph), &mut graph);
+ }
+ while let Some(id) = root.miss.take() {
+ let fork = graph.fork_off(id);
+
+ if fork.branches().next().is_some() {
+ root.merge(fork, &mut graph);
+ } else {
+ break;
+ }
+ }
+
+ debug!("Checking if any two tokens have the same priority");
+
+ for &DisambiguationError(a, b) in graph.errors() {
+ let a = graph[a].unwrap_leaf();
+ let b = graph[b].unwrap_leaf();
+ let disambiguate = a.priority + 1;
+
+ let mut err = |a: &Leaf, b: &Leaf| {
+ parser.err(
+ format!(
+ "\
+ A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
+ \n\
+ hint: Consider giving one definition a higher priority: \
+ #[regex(..., priority = {disambiguate})]\
+ ",
+ ),
+ a.span
+ );
+ };
+
+ err(a, b);
+ err(b, a);
+ }
+
+ if let Some(errors) = parser.errors.render() {
+ return impl_logos(errors);
+ }
+
+ let root = graph.push(root);
+
+ graph.shake(root);
+
+ debug!("Generating code from graph:\n{graph:#?}");
+
+ let generator = Generator::new(name, &this, root, &graph);
+
+ let body = generator.generate();
+ impl_logos(quote! {
+ use #logos_path::internal::{LexerInternal, CallbackResult};
+
+ type Lexer<'s> = #logos_path::Lexer<'s, #this>;
+
+ fn _end<'s>(lex: &mut Lexer<'s>) {
+ lex.end()
+ }
+
+ fn _error<'s>(lex: &mut Lexer<'s>) {
+ lex.bump_unchecked(1);
+
+ lex.error();
+ }
+
+ #body
+ })
+}
+
+/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
+pub fn strip_attributes(input: TokenStream) -> TokenStream {
+ let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
+
+ strip_attrs_from_vec(&mut item.attrs);
+
+ for attr in &mut item.attrs {
+ if let syn::Meta::List(meta) = &mut attr.meta {
+ if meta.path.is_ident("derive") {
+ let mut tokens =
+ std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
+
+ while let Some(TokenTree::Ident(ident)) = tokens.next() {
+ let punct = tokens.next();
+
+ if ident == "Logos" {
+ continue;
+ }
+
+ meta.tokens.extend([TokenTree::Ident(ident)]);
+ meta.tokens.extend(punct);
+ }
+ }
+ }
+ }
+
+ for variant in &mut item.variants {
+ strip_attrs_from_vec(&mut variant.attrs);
+ for field in &mut variant.fields {
+ strip_attrs_from_vec(&mut field.attrs);
+ }
+ }
+
+ item.to_token_stream()
+}
+
+fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
+ attrs.retain(|attr| !is_logos_attr(attr))
+}
+
+fn is_logos_attr(attr: &syn::Attribute) -> bool {
+ attr.path().is_ident(LOGOS_ATTR)
+ || attr.path().is_ident(TOKEN_ATTR)
+ || attr.path().is_ident(REGEX_ATTR)
+}
+
+fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
+ let tts: Vec<TokenTree> = t.into_iter().collect();
+
+ if tts.len() != 1 {
+ tts.into_iter().collect()
+ } else {
+ match tts.into_iter().next().unwrap() {
+ TokenTree::Group(g) => {
+ if g.delimiter() == Delimiter::Parenthesis {
+ g.stream()
+ } else {
+ core::iter::once(TokenTree::Group(g)).collect()
+ }
+ }
+ tt => core::iter::once(tt).collect(),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/macros.rs b/vendor/logos-codegen/src/macros.rs
new file mode 100644
index 00000000..148c6f6a
--- /dev/null
+++ b/vendor/logos-codegen/src/macros.rs
@@ -0,0 +1,12 @@
+#[cfg(feature = "debug")]
+macro_rules! debug {
+ ($($arg:tt)*) => {
+ eprint!("[{}:{}:{}] ", file!(), line!(), column!());
+ eprintln!($($arg)*)
+ }
+}
+
+#[cfg(not(feature = "debug"))]
+macro_rules! debug {
+ ($($arg:tt)*) => {};
+}
diff --git a/vendor/logos-codegen/src/mir.rs b/vendor/logos-codegen/src/mir.rs
new file mode 100644
index 00000000..e07c5b78
--- /dev/null
+++ b/vendor/logos-codegen/src/mir.rs
@@ -0,0 +1,235 @@
+use std::convert::TryFrom;
+
+use lazy_static::lazy_static;
+use regex_syntax::hir::{Dot, Hir, HirKind};
+use regex_syntax::ParserBuilder;
+
+pub use regex_syntax::hir::{Class, ClassUnicode, Literal};
+
+use crate::error::{Error, Result};
+
+lazy_static! {
+ static ref DOT_UTF8: Hir = Hir::dot(Dot::AnyChar);
+ static ref DOT_BYTES: Hir = Hir::dot(Dot::AnyByte);
+}
+
+/// Middle Intermediate Representation of the regex, built from
+/// `regex_syntax`'s `Hir`. The goal here is to strip and canonicalize
+/// the tree, so that we don't have to do transformations later on the
+/// graph, with the potential of running into looping references.
+#[derive(Clone, Debug)]
+pub enum Mir {
+ Empty,
+ Loop(Box<Mir>),
+ Maybe(Box<Mir>),
+ Concat(Vec<Mir>),
+ Alternation(Vec<Mir>),
+ Class(Class),
+ Literal(Literal),
+}
+
+impl Mir {
+ pub fn utf8(source: &str) -> Result<Mir> {
+ Mir::try_from(ParserBuilder::new().build().parse(source)?)
+ }
+
+ pub fn utf8_ignore_case(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .case_insensitive(true)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn binary(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .utf8(false)
+ .unicode(false)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn binary_ignore_case(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .utf8(false)
+ .unicode(false)
+ .case_insensitive(true)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn priority(&self) -> usize {
+ match self {
+ Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0,
+ Mir::Concat(concat) => concat.iter().map(Mir::priority).sum(),
+ Mir::Alternation(alt) => alt.iter().map(Mir::priority).min().unwrap_or(0),
+ Mir::Class(_) => 2,
+ Mir::Literal(lit) => match std::str::from_utf8(&lit.0) {
+ Ok(s) => 2 * s.chars().count(),
+ Err(_) => 2 * lit.0.len(),
+ },
+ }
+ }
+}
+
+impl TryFrom<Hir> for Mir {
+ type Error = Error;
+
+ fn try_from(hir: Hir) -> Result<Mir> {
+ match hir.into_kind() {
+ HirKind::Empty => Ok(Mir::Empty),
+ HirKind::Concat(concat) => {
+ let mut out = Vec::with_capacity(concat.len());
+
+ fn extend(mir: Mir, out: &mut Vec<Mir>) {
+ match mir {
+ Mir::Concat(nested) => {
+ for child in nested {
+ extend(child, out);
+ }
+ }
+ mir => out.push(mir),
+ }
+ }
+
+ for hir in concat {
+ extend(Mir::try_from(hir)?, &mut out);
+ }
+
+ Ok(Mir::Concat(out))
+ }
+ HirKind::Alternation(alternation) => {
+ let alternation = alternation
+ .into_iter()
+ .map(Mir::try_from)
+ .collect::<Result<_>>()?;
+
+ Ok(Mir::Alternation(alternation))
+ }
+ HirKind::Literal(literal) => Ok(Mir::Literal(literal)),
+ HirKind::Class(class) => Ok(Mir::Class(class)),
+ HirKind::Repetition(repetition) => {
+ if !repetition.greedy {
+ return Err("#[regex]: non-greedy parsing is currently unsupported.".into());
+ }
+
+ let is_dot = if repetition.sub.properties().is_utf8() {
+ *repetition.sub == *DOT_UTF8
+ } else {
+ *repetition.sub == *DOT_BYTES
+ };
+ let mir = Mir::try_from(*repetition.sub)?;
+
+ match (repetition.min, repetition.max) {
+ (0..=1, None) if is_dot => {
+ Err(
+ "#[regex]: \".+\" and \".*\" patterns will greedily consume \
+ the entire source till the end as Logos does not allow \
+ backtracking. If you are looking to match everything until \
+ a specific character, you should use a negative character \
+ class. E.g., use regex r\"'[^']*'\" to match anything in \
+ between two quotes. Read more about that here: \
+ https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541."
+ .into()
+ )
+ }
+ // 0 or 1
+ (0, Some(1)) => Ok(Mir::Maybe(Box::new(mir))),
+ // 0 or more
+ (0, None) => Ok(Mir::Loop(Box::new(mir))),
+ // 1 or more
+ (1, None) => {
+ Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))]))
+ }
+ // Exact {n}
+ (n, Some(m)) if m == n => {
+ let mut out = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ Ok(Mir::Concat(out))
+ }
+ // At least {n,}
+ (n, None) => {
+ let mut out = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ out.push(Mir::Loop(Box::new(mir)));
+ Ok(Mir::Concat(out))
+ }
+ // Bounded {n, m}
+ (n, Some(m)) => {
+ let mut out = Vec::with_capacity(m as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ for _ in n..m {
+ out.push(Mir::Maybe(Box::new(mir.clone())));
+ }
+ Ok(Mir::Concat(out))
+ }
+ }
+ }
+ HirKind::Capture(capture) => Mir::try_from(*capture.sub),
+ HirKind::Look(_) => {
+ Err("#[regex]: look-around assertions are currently unsupported.".into())
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Mir;
+
+ #[test]
+ fn priorities() {
+ let regexes = [
+ ("a", 2),
+ ("à", 2),
+ ("京", 2),
+ ("Eté", 6),
+ ("Été", 6),
+ ("[a-z]+", 2),
+ ("a|b", 2),
+ ("a|[b-z]", 2),
+ ("(foo)+", 6),
+ ("foobar", 12),
+ ("(fooz|bar)+qux", 12),
+ ];
+
+ for (regex, expected) in regexes.iter() {
+ let mir = Mir::utf8(regex).unwrap();
+ assert_eq!(mir.priority(), *expected, "Failed for regex \"{}\"", regex);
+ }
+ }
+
+ #[test]
+ fn equivalent_patterns() {
+ let regexes = [
+ ("a|b", "[a-b]"),
+ ("1|2|3", "[1-3]"),
+ ("1+", "[1]+"),
+ ("c*", "[c]*"),
+ ("aaa", "a{3}"),
+ ("a[a]{2}", "a{3}"),
+ ];
+
+ for (regex_left, regex_right) in regexes.iter() {
+ let mir_left = Mir::utf8(regex_left).unwrap();
+ let mir_right = Mir::utf8(regex_right).unwrap();
+ assert_eq!(
+ mir_left.priority(),
+ mir_right.priority(),
+ "Regexes \"{regex_left}\" and \"{regex_right}\" \
+ are equivalent but have different priorities"
+ );
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs
new file mode 100644
index 00000000..a876fb59
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/definition.rs
@@ -0,0 +1,193 @@
+use proc_macro2::{Ident, Span};
+use syn::{spanned::Spanned, LitByteStr, LitStr};
+
+use crate::error::{Errors, Result};
+use crate::leaf::Callback;
+use crate::mir::Mir;
+use crate::parser::nested::NestedValue;
+use crate::parser::{IgnoreFlags, Parser, Subpatterns};
+
+use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive;
+
+pub struct Definition {
+ pub literal: Literal,
+ pub priority: Option<usize>,
+ pub callback: Option<Callback>,
+ pub ignore_flags: IgnoreFlags,
+}
+
+pub enum Literal {
+ Utf8(LitStr),
+ Bytes(LitByteStr),
+}
+
+impl Definition {
+ pub fn new(literal: Literal) -> Self {
+ Definition {
+ literal,
+ priority: None,
+ callback: None,
+ ignore_flags: IgnoreFlags::Empty,
+ }
+ }
+
+ pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) {
+ match (name.to_string().as_str(), value) {
+ ("priority", NestedValue::Assign(tokens)) => {
+ let prio = match tokens.to_string().parse() {
+ Ok(prio) => prio,
+ Err(_) => {
+ parser.err("Expected an unsigned integer", tokens.span());
+ return;
+ }
+ };
+
+ if self.priority.replace(prio).is_some() {
+ parser.err("Resetting previously set priority", tokens.span());
+ }
+ }
+ ("priority", _) => {
+ parser.err("Expected: priority = <integer>", name.span());
+ }
+ ("callback", NestedValue::Assign(tokens)) => {
+ let span = tokens.span();
+ let callback = match parser.parse_callback(tokens) {
+ Some(callback) => callback,
+ None => {
+ parser.err("Not a valid callback", span);
+ return;
+ }
+ };
+
+ if let Some(previous) = self.callback.replace(callback) {
+ parser
+ .err(
+ "Callback has been already set",
+ span.join(name.span()).unwrap(),
+ )
+ .err("Previous callback set here", previous.span());
+ }
+ }
+ ("callback", _) => {
+ parser.err("Expected: callback = ...", name.span());
+ }
+ ("ignore", NestedValue::Group(tokens)) => {
+ self.ignore_flags.parse_group(name, tokens, parser);
+ }
+ ("ignore", _) => {
+ parser.err("Expected: ignore(<flag>, ...)", name.span());
+ }
+ (unknown, _) => {
+ parser.err(
+ format!(
+ "\
+ Unknown nested attribute: {}\n\
+ \n\
+ Expected one of: priority, callback\
+ ",
+ unknown
+ ),
+ name.span(),
+ );
+ }
+ }
+ }
+}
+
+impl Literal {
+ pub fn to_bytes(&self) -> Vec<u8> {
+ match self {
+ Literal::Utf8(string) => string.value().into_bytes(),
+ Literal::Bytes(bytes) => bytes.value(),
+ }
+ }
+
+ pub fn escape_regex(&self) -> Literal {
+ match self {
+ Literal::Utf8(string) => Literal::Utf8(LitStr::new(
+ regex_syntax::escape(&string.value()).as_str(),
+ self.span(),
+ )),
+ Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new(
+ regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(),
+ self.span(),
+ )),
+ }
+ }
+
+ pub fn to_mir(
+ &self,
+ subpatterns: &Subpatterns,
+ ignore_flags: IgnoreFlags,
+ errors: &mut Errors,
+ ) -> Result<Mir> {
+ let value = subpatterns.fix(self, errors);
+
+ if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) {
+ match self {
+ Literal::Utf8(_) => {
+ Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive)
+ }
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) {
+ match self {
+ Literal::Utf8(_) => Mir::utf8_ignore_case(&value),
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else {
+ match self {
+ Literal::Utf8(_) => Mir::utf8(&value),
+ Literal::Bytes(_) => Mir::binary(&value),
+ }
+ }
+ }
+
+ pub fn span(&self) -> Span {
+ match self {
+ Literal::Utf8(string) => string.span(),
+ Literal::Bytes(bytes) => bytes.span(),
+ }
+ }
+}
+
+impl syn::parse::Parse for Literal {
+ fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+ let la = input.lookahead1();
+ if la.peek(LitStr) {
+ Ok(Literal::Utf8(input.parse()?))
+ } else if la.peek(LitByteStr) {
+ Ok(Literal::Bytes(input.parse()?))
+ } else {
+ Err(la.error())
+ }
+ }
+}
+
+pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String {
+ if bytes.is_ascii() {
+ unsafe {
+ // Unicode values are prohibited, so we can't use
+ // safe version of String::from_utf8
+ //
+ // We can, however, construct a safe ASCII string
+ return String::from_utf8_unchecked(bytes);
+ }
+ }
+
+ let mut string = String::with_capacity(bytes.len() * 2);
+
+ for byte in bytes {
+ if byte < 0x80 {
+ string.push(byte as char);
+ } else {
+ static DIGITS: [u8; 16] = *b"0123456789abcdef";
+
+ string.push_str(r"\x");
+ string.push(DIGITS[(byte / 16) as usize] as char);
+ string.push(DIGITS[(byte % 16) as usize] as char);
+ }
+ }
+
+ string
+}
diff --git a/vendor/logos-codegen/src/parser/ignore_flags.rs b/vendor/logos-codegen/src/parser/ignore_flags.rs
new file mode 100644
index 00000000..3a79d31b
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/ignore_flags.rs
@@ -0,0 +1,499 @@
+use std::ops::{BitAnd, BitOr};
+
+use proc_macro2::{Ident, TokenStream, TokenTree};
+
+use crate::parser::Parser;
+use crate::util::is_punct;
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct IgnoreFlags {
+ bits: u8,
+}
+
+#[allow(non_upper_case_globals)]
+impl IgnoreFlags {
+ pub const Empty: Self = Self::new(0x00);
+ pub const IgnoreCase: Self = Self::new(0x01);
+ pub const IgnoreAsciiCase: Self = Self::new(0x02);
+
+ #[inline]
+ pub const fn new(bits: u8) -> Self {
+ Self { bits }
+ }
+
+ /// Enables a variant.
+ #[inline]
+ pub fn enable(&mut self, variant: Self) {
+ self.bits |= variant.bits;
+ }
+
+ /// Checks if this `IgnoreFlags` contains *any* of the given variants.
+ #[inline]
+ pub fn contains(&self, variants: Self) -> bool {
+ self.bits & variants.bits != 0
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.bits == 0
+ }
+
+ /// Parses an identifier an enables it for `self`.
+ ///
+ /// Valid inputs are (that produces `true`):
+ /// * `"case"` (incompatible with `"ascii_case"`)
+ /// * `"ascii_case"` (incompatible with `"case"`)
+ ///
+ /// An error causes this function to return `false` and emits an error to
+ /// the given `Parser`.
+ fn parse_ident(&mut self, ident: Ident, parser: &mut Parser) -> bool {
+ match ident.to_string().as_str() {
+ "case" => {
+ if self.contains(Self::IgnoreAsciiCase) {
+ parser.err(
+ "\
+ The flag \"case\" cannot be used along with \"ascii_case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreCase);
+ true
+ }
+ }
+ "ascii_case" => {
+ if self.contains(Self::IgnoreCase) {
+ parser.err(
+ "\
+ The flag \"ascii_case\" cannot be used along with \"case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreAsciiCase);
+ true
+ }
+ }
+ unknown => {
+ parser.err(
+ format!(
+ "\
+ Unknown flag: {}\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ unknown
+ ),
+ ident.span(),
+ );
+ false
+ }
+ }
+ }
+
+ pub fn parse_group(&mut self, name: Ident, tokens: TokenStream, parser: &mut Parser) {
+ // Little finite state machine to parse "<flag>(,<flag>)*,?"
+
+ // FSM description for future maintenance
+ // 0: Initial state
+ // <flag> -> 1
+ // _ -> error
+ // 1: A flag was found
+ // , -> 2
+ // None -> done
+ // _ -> error
+ // 2: A comma was found (after a <flag>)
+ // <flag> -> 1
+ // None -> done
+ // _ -> error
+ let mut state = 0u8;
+
+ let mut tokens = tokens.into_iter();
+
+ loop {
+ state = match state {
+ 0 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ _ => {
+ parser.err(
+ "\
+ Invalid ignore flag\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ name.span(),
+ );
+ return;
+ }
+ },
+ 1 => match tokens.next() {
+ Some(tt) if is_punct(&tt, ',') => 2,
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ 2 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ _ => unreachable!("Internal Error: invalid state ({})", state),
+ }
+ }
+ }
+}
+
+impl BitOr for IgnoreFlags {
+ type Output = Self;
+
+ fn bitor(self, other: Self) -> Self {
+ Self::new(self.bits | other.bits)
+ }
+}
+
+impl BitAnd for IgnoreFlags {
+ type Output = Self;
+
+ fn bitand(self, other: Self) -> Self {
+ Self::new(self.bits & other.bits)
+ }
+}
+
+pub mod ascii_case {
+ use regex_syntax::hir;
+
+ use crate::mir::Mir;
+ use crate::parser::Literal;
+
+ macro_rules! literal {
+ ($byte:expr) => {
+ hir::Literal(Box::new([$byte]))
+ };
+ (@char $c:expr) => {
+ hir::Literal(
+ $c.encode_utf8(&mut [0; 4])
+ .as_bytes()
+ .to_vec()
+ .into_boxed_slice(),
+ )
+ };
+ }
+
+ pub trait MakeAsciiCaseInsensitive {
+ /// Creates a equivalent regular expression which ignore the letter casing
+ /// of ascii characters.
+ fn make_ascii_case_insensitive(self) -> Mir;
+ }
+
+ impl MakeAsciiCaseInsensitive for u8 {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii_lowercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self - 32)),
+ Mir::Literal(literal!(self)),
+ ])
+ } else if self.is_ascii_uppercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self)),
+ Mir::Literal(literal!(self + 32)),
+ ])
+ } else {
+ Mir::Literal(literal!(self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for char {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii() {
+ (self as u8).make_ascii_case_insensitive()
+ } else {
+ Mir::Literal(literal!(@char self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ Mir::Concat(
+ self.0
+ .iter()
+ .map(|x| x.make_ascii_case_insensitive())
+ .collect(),
+ )
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassBytes {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ self.case_fold_simple();
+ Mir::Class(hir::Class::Bytes(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassUnicode {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ use std::cmp;
+
+ // Manuall implementation to only perform the case folding on ascii characters.
+
+ let mut ranges = Vec::new();
+
+ for range in self.ranges() {
+ #[inline]
+ fn overlaps(st1: u8, end1: u8, st2: u8, end2: u8) -> bool {
+ (st2 <= st1 && st1 <= end2) || (st1 <= st2 && st2 <= end1)
+ }
+
+ #[inline]
+ fn make_ascii(c: char) -> Option<u8> {
+ if c.is_ascii() {
+ Some(c as u8)
+ } else {
+ None
+ }
+ }
+
+ match (make_ascii(range.start()), make_ascii(range.end())) {
+ (Some(start), Some(end)) => {
+ if overlaps(b'a', b'z', start, end) {
+ let lower = cmp::max(start, b'a');
+ let upper = cmp::min(end, b'z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower - 32) as char,
+ (upper - 32) as char,
+ ))
+ }
+
+ if overlaps(b'A', b'Z', start, end) {
+ let lower = cmp::max(start, b'A');
+ let upper = cmp::min(end, b'Z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower + 32) as char,
+ (upper + 32) as char,
+ ))
+ }
+ }
+ (Some(start), None) => {
+ if overlaps(b'a', b'z', start, b'z') {
+ let lower = cmp::max(start, b'a');
+ ranges.push(hir::ClassUnicodeRange::new((lower - 32) as char, 'Z'))
+ }
+
+ if overlaps(b'A', b'Z', start, b'Z') {
+ let lower = cmp::max(start, b'A');
+ ranges.push(hir::ClassUnicodeRange::new((lower + 32) as char, 'Z'))
+ }
+ }
+ _ => (),
+ }
+ }
+
+ self.union(&hir::ClassUnicode::new(ranges));
+
+ Mir::Class(hir::Class::Unicode(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Class {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ hir::Class::Bytes(b) => b.make_ascii_case_insensitive(),
+ hir::Class::Unicode(u) => u.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for &Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Literal::Bytes(bytes) => Mir::Concat(
+ bytes
+ .value()
+ .into_iter()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Literal::Utf8(s) => Mir::Concat(
+ s.value()
+ .chars()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for Mir {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Mir::Empty => Mir::Empty,
+ Mir::Loop(l) => Mir::Loop(Box::new(l.make_ascii_case_insensitive())),
+ Mir::Maybe(m) => Mir::Maybe(Box::new(m.make_ascii_case_insensitive())),
+ Mir::Concat(c) => Mir::Concat(
+ c.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Alternation(a) => Mir::Alternation(
+ a.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Class(c) => c.make_ascii_case_insensitive(),
+ Mir::Literal(l) => l.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ #[cfg(test)]
+ mod tests {
+ use super::MakeAsciiCaseInsensitive;
+ use crate::mir::{Class, Mir};
+ use regex_syntax::hir::{ClassUnicode, ClassUnicodeRange};
+
+ fn assert_range(in_s: char, in_e: char, expected: &[(char, char)]) {
+ let range = ClassUnicodeRange::new(in_s, in_e);
+ let class = ClassUnicode::new(vec![range]);
+
+ let expected =
+ ClassUnicode::new(expected.iter().map(|&(a, b)| ClassUnicodeRange::new(a, b)));
+
+ if let Mir::Class(Class::Unicode(result)) = class.make_ascii_case_insensitive() {
+ assert_eq!(result, expected);
+ } else {
+ panic!("Not a unicode class");
+ };
+ }
+
+ #[test]
+ fn no_letters_left() {
+ assert_range(' ', '+', &[(' ', '+')]);
+ }
+
+ #[test]
+ fn no_letters_right() {
+ assert_range('{', '~', &[('{', '~')]);
+ }
+
+ #[test]
+ fn no_letters_middle() {
+ assert_range('[', '`', &[('[', '`')]);
+ }
+
+ #[test]
+ fn lowercase_left_edge() {
+ assert_range('a', 'd', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn lowercase_right_edge() {
+ assert_range('r', 'z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_total() {
+ assert_range('a', 'z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_left_edge() {
+ assert_range('A', 'D', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn uppercase_right_edge() {
+ assert_range('R', 'Z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_total() {
+ assert_range('A', 'Z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_cross_left() {
+ assert_range('[', 'h', &[('[', 'h'), ('A', 'H')]);
+ }
+
+ #[test]
+ fn lowercase_cross_right() {
+ assert_range('d', '}', &[('d', '}'), ('D', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_cross_left() {
+ assert_range(';', 'H', &[(';', 'H'), ('a', 'h')]);
+ }
+
+ #[test]
+ fn uppercase_cross_right() {
+ assert_range('T', ']', &[('t', 'z'), ('T', ']')]);
+ }
+
+ #[test]
+ fn cross_both() {
+ assert_range('X', 'c', &[('X', 'c'), ('x', 'z'), ('A', 'C')]);
+ }
+
+ #[test]
+ fn all_letters() {
+ assert_range('+', '|', &[('+', '|')]);
+ }
+
+ #[test]
+ fn oob_all_letters() {
+ assert_range('#', 'é', &[('#', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_uppercase() {
+ assert_range('Q', 'é', &[('A', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_lowercase() {
+ assert_range('q', 'é', &[('q', 'é'), ('Q', 'Z')]);
+ }
+
+ #[test]
+ fn oob_no_letters() {
+ assert_range('|', 'é', &[('|', 'é')]);
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/mod.rs b/vendor/logos-codegen/src/parser/mod.rs
new file mode 100644
index 00000000..3ad7202e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/mod.rs
@@ -0,0 +1,331 @@
+use beef::lean::Cow;
+use proc_macro2::{Span, TokenStream, TokenTree};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Attribute, GenericParam, Lit, Meta, Type};
+
+use crate::error::Errors;
+use crate::leaf::{Callback, InlineCallback};
+use crate::util::{expect_punct, MaybeVoid};
+use crate::LOGOS_ATTR;
+
+mod definition;
+mod ignore_flags;
+mod nested;
+mod subpattern;
+mod type_params;
+
+pub use self::definition::{Definition, Literal};
+pub use self::ignore_flags::IgnoreFlags;
+use self::nested::{AttributeParser, Nested, NestedValue};
+pub use self::subpattern::Subpatterns;
+use self::type_params::{replace_lifetime, traverse_type, TypeParams};
+
+#[derive(Default)]
+pub struct Parser {
+ pub errors: Errors,
+ pub mode: Mode,
+ pub source: Option<TokenStream>,
+ pub skips: Vec<Literal>,
+ pub extras: MaybeVoid,
+ pub error_type: MaybeVoid,
+ pub subpatterns: Subpatterns,
+ pub logos_path: Option<TokenStream>,
+ types: TypeParams,
+}
+
+#[derive(Default)]
+pub enum Mode {
+ #[default]
+ Utf8,
+ Binary,
+}
+
+impl Parser {
+ pub fn parse_generic(&mut self, param: GenericParam) {
+ match param {
+ GenericParam::Lifetime(lt) => {
+ self.types.explicit_lifetime(lt, &mut self.errors);
+ }
+ GenericParam::Type(ty) => {
+ self.types.add(ty.ident);
+ }
+ GenericParam::Const(c) => {
+ self.err("Logos doesn't support const generics.", c.span());
+ }
+ }
+ }
+
+ pub fn generics(&mut self) -> Option<TokenStream> {
+ self.types.generics(&mut self.errors)
+ }
+
+ fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> {
+ match &mut attr.meta {
+ Meta::List(list) => {
+ let tokens = std::mem::replace(&mut list.tokens, TokenStream::new());
+
+ Some(AttributeParser::new(tokens))
+ }
+ _ => None,
+ }
+ }
+
+ /// Try to parse the main `#[logos(...)]`, does nothing if
+ /// the attribute's name isn't `logos`.
+ pub fn try_parse_logos(&mut self, attr: &mut Attribute) {
+ if !attr.path().is_ident(LOGOS_ATTR) {
+ return;
+ }
+
+ let nested = match self.parse_attr(attr) {
+ Some(tokens) => tokens,
+ None => {
+ self.err("Expected #[logos(...)]", attr.span());
+ return;
+ }
+ };
+
+ for nested in nested {
+ let (name, value) = match nested {
+ Nested::Named(name, value) => (name, value),
+ Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => {
+ self.err("Invalid nested attribute", tokens.span());
+ continue;
+ }
+ };
+
+ // IMPORTANT: Keep these sorted alphabetically for binary search down the line
+ #[allow(clippy::type_complexity)]
+ static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[
+ ("crate", |parser, span, value| match value {
+ NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path),
+ _ => {
+ parser.err("Expected: #[logos(crate = path::to::logos)]", span);
+ }
+ }),
+ ("error", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.error_type.replace(value) {
+ parser
+ .err("Error type can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(error = SomeType)]", span);
+ }
+ }),
+ ("extras", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.extras.replace(value) {
+ parser
+ .err("Extras can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(extras = SomeType)]", span);
+ }
+ }),
+ ("skip", |parser, span, value| match value {
+ NestedValue::Literal(lit) => {
+ if let Some(literal) = parser.parse_literal(Lit::new(lit)) {
+ parser.skips.push(literal);
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(skip \"regex literal\")]", span);
+ }
+ }),
+ ("source", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+ if let Some(previous) = parser.source.replace(value) {
+ parser
+ .err("Source can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(source = SomeType)]", span);
+ }
+ }),
+ ("subpattern", |parser, span, value| match value {
+ NestedValue::KeywordAssign(name, value) => {
+ parser.subpatterns.add(name, value, &mut parser.errors);
+ }
+ _ => {
+ parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span);
+ }
+ }),
+ ("type", |parser, span, value| match value {
+ NestedValue::KeywordAssign(generic, ty) => {
+ parser.types.set(generic, ty, &mut parser.errors);
+ }
+ _ => {
+ parser.err("Expected: #[logos(type T = SomeType)]", span);
+ }
+ }),
+ ];
+
+ match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) {
+ Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value),
+ Err(_) => {
+ let mut err = format!(
+ "Unknown nested attribute #[logos({name})], expected one of: {}",
+ NESTED_LOOKUP[0].0
+ );
+
+ for (allowed, _) in &NESTED_LOOKUP[1..] {
+ err.push_str(", ");
+ err.push_str(allowed);
+ }
+
+ self.err(err, name.span());
+ }
+ }
+ }
+ }
+
+ pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> {
+ match lit {
+ Lit::Str(string) => Some(Literal::Utf8(string)),
+ Lit::ByteStr(bytes) => {
+ self.mode = Mode::Binary;
+
+ Some(Literal::Bytes(bytes))
+ }
+ _ => {
+ self.err("Expected a &str or &[u8] slice", lit.span());
+
+ None
+ }
+ }
+ }
+
+ /// Parse attribute definition of a token:
+ ///
+ /// + `#[token(literal[, callback])]`
+ /// + `#[regex(literal[, callback])]`
+ pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> {
+ let mut nested = self.parse_attr(attr)?;
+
+ let literal = match nested.parsed::<Lit>()? {
+ Ok(lit) => self.parse_literal(lit)?,
+ Err(err) => {
+ self.err(err.to_string(), err.span());
+
+ return None;
+ }
+ };
+
+ let mut def = Definition::new(literal);
+
+ for (position, next) in nested.enumerate() {
+ match next {
+ Nested::Unexpected(tokens) => {
+ self.err("Unexpected token in attribute", tokens.span());
+ }
+ Nested::Unnamed(tokens) => match position {
+ 0 => def.callback = self.parse_callback(tokens),
+ _ => {
+ self.err(
+ "\
+ Expected a named argument at this position\n\
+ \n\
+ hint: If you are trying to define a callback here use: callback = ...\
+ ",
+ tokens.span(),
+ );
+ }
+ },
+ Nested::Named(name, value) => {
+ def.named_attr(name, value, self);
+ }
+ }
+ }
+
+ Some(def)
+ }
+
+ fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> {
+ let span = tokens.span();
+ let mut tokens = tokens.into_iter();
+
+ if let Some(tt) = expect_punct(tokens.next(), '|') {
+ let mut label = TokenStream::from(tt);
+
+ label.extend(tokens);
+
+ return Some(Callback::Label(label));
+ }
+
+ let first = tokens.next();
+ let error = expect_punct(tokens.next(), '|');
+
+ let arg = match (error, first) {
+ (None, Some(TokenTree::Ident(arg))) => arg,
+ _ => {
+ self.err(
+ "Inline callbacks must use closure syntax with exactly one parameter",
+ span,
+ );
+ return None;
+ }
+ };
+
+ let body = match tokens.next() {
+ Some(TokenTree::Group(group)) => group.stream(),
+ Some(first) => {
+ let mut body = TokenStream::from(first);
+
+ body.extend(tokens);
+ body
+ }
+ None => {
+ self.err("Callback missing a body", span);
+ return None;
+ }
+ };
+
+ let inline = InlineCallback { arg, body, span };
+
+ Some(inline.into())
+ }
+
+ /// Checks if `ty` is a declared generic param, if so replaces it
+ /// with a concrete type defined using #[logos(type T = Type)]
+ ///
+ /// If no matching generic param is found, all lifetimes are fixed
+ /// to the source lifetime
+ pub fn get_type(&self, ty: &mut Type) -> TokenStream {
+ traverse_type(ty, &mut |ty| {
+ if let Type::Path(tp) = ty {
+ // Skip types that begin with `self::`
+ if tp.qself.is_none() {
+ // If `ty` is a generic type parameter, try to find
+ // its concrete type defined with #[logos(type T = Type)]
+ if let Some(substitute) = self.types.find(&tp.path) {
+ *ty = substitute;
+ }
+ }
+ }
+ // If `ty` is a concrete type, fix its lifetimes to 'source
+ replace_lifetime(ty);
+ });
+
+ quote!(#ty)
+ }
+
+ pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ self.errors.err(message, span)
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/nested.rs b/vendor/logos-codegen/src/parser/nested.rs
new file mode 100644
index 00000000..44ecaeac
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/nested.rs
@@ -0,0 +1,146 @@
+use proc_macro2::token_stream::IntoIter as TokenIter;
+use proc_macro2::{Ident, Literal, TokenStream, TokenTree};
+use quote::quote;
+
+use crate::util::{expect_punct, is_punct};
+
+pub enum NestedValue {
+ /// `name = ...`
+ Assign(TokenStream),
+ /// `name "literal"`
+ Literal(Literal),
+ /// `name(...)`
+ Group(TokenStream),
+ /// `name ident = ...`
+ KeywordAssign(Ident, TokenStream),
+}
+
+pub enum Nested {
+ /// Unnamed nested attribute, such as a string,
+ /// callback closure, or a lone ident/path
+ ///
+ /// Note: a lone ident will be Named with no value instead
+ Unnamed(TokenStream),
+ /// Named: name ...
+ Named(Ident, NestedValue),
+ /// Unexpected token,
+ Unexpected(TokenStream),
+}
+
+pub struct AttributeParser {
+ inner: TokenIter,
+}
+
+pub struct Empty;
+
+impl From<Empty> for TokenStream {
+ fn from(_: Empty) -> TokenStream {
+ TokenStream::new()
+ }
+}
+
+impl AttributeParser {
+ pub fn new(stream: TokenStream) -> Self {
+ AttributeParser {
+ inner: stream.into_iter(),
+ }
+ }
+
+ pub fn parsed<T>(&mut self) -> Option<syn::Result<T>>
+ where
+ T: syn::parse::Parse,
+ {
+ let tokens = self.collect_tail(TokenStream::new());
+
+ if tokens.is_empty() {
+ return None;
+ }
+
+ Some(syn::parse2(tokens))
+ }
+
+ fn next_tt(&mut self) -> Option<TokenTree> {
+ expect_punct(self.inner.next(), ',')
+ }
+
+ fn collect_tail<T>(&mut self, first: T) -> TokenStream
+ where
+ T: Into<TokenStream>,
+ {
+ let mut out = first.into();
+
+ while let Some(tt) = self.next_tt() {
+ out.extend(Some(tt));
+ }
+
+ out
+ }
+
+ fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested {
+ let mut out = TokenStream::from(TokenTree::Ident(first));
+
+ out.extend(self.collect_tail(next));
+
+ Nested::Unnamed(out.into_iter().collect())
+ }
+
+ fn parse_assign(&mut self, name: Ident) -> Nested {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Assign(value))
+ }
+
+ fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested {
+ // TODO: Error if there are any tokens following
+ let _ = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Literal(lit))
+ }
+
+ fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested {
+ Nested::Named(name, NestedValue::Group(group))
+ }
+
+ fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested {
+ let error = expect_punct(self.next_tt(), '=');
+
+ match error {
+ Some(error) => {
+ let error = self.collect_tail(error);
+
+ Nested::Unexpected(error)
+ }
+ None => {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(keyword, NestedValue::KeywordAssign(name, value))
+ }
+ }
+ }
+}
+
+impl Iterator for AttributeParser {
+ type Item = Nested;
+
+ fn next(&mut self) -> Option<Nested> {
+ let first = self.inner.next()?;
+
+ let name = match first {
+ TokenTree::Ident(ident) => ident,
+ tt => {
+ let stream = self.collect_tail(tt);
+
+ return Some(Nested::Unnamed(stream.into_iter().collect()));
+ }
+ };
+
+ match self.next_tt() {
+ Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)),
+ Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)),
+ Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())),
+ Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)),
+ Some(next) => Some(self.parse_unnamed(name, next)),
+ None => Some(Nested::Unnamed(quote!(#name))),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs
new file mode 100644
index 00000000..eb620028
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/subpattern.rs
@@ -0,0 +1,97 @@
+use proc_macro2::TokenStream;
+use syn::Ident;
+
+use crate::error::Errors;
+use crate::mir::Mir;
+use crate::parser::definition::{bytes_to_regex_string, Literal};
+
+#[derive(Default)]
+pub struct Subpatterns {
+ map: Vec<(Ident, String)>,
+}
+
+impl Subpatterns {
+ pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) {
+ let lit = match syn::parse2::<Literal>(pattern) {
+ Ok(lit) => lit,
+ Err(e) => {
+ errors.err(e.to_string(), e.span());
+ return;
+ }
+ };
+
+ if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) {
+ errors
+ .err(format!("{} can only be assigned once", param), param.span())
+ .err("Previously assigned here", name.span());
+ return;
+ }
+
+ let fixed = self.fix(&lit, errors);
+
+ // Validate the literal as proper regex. If it's not, emit an error.
+ let mir = match &lit {
+ Literal::Utf8(_) => Mir::utf8(&fixed),
+ Literal::Bytes(_) => Mir::binary(&fixed),
+ };
+
+ if let Err(err) = mir {
+ errors.err(err, lit.span());
+ };
+
+ self.map.push((param, fixed));
+ }
+
+ pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
+ let mut i = 0;
+ let mut pattern = match lit {
+ Literal::Utf8(s) => s.value(),
+ Literal::Bytes(b) => bytes_to_regex_string(b.value()),
+ };
+
+ while let Some(f) = pattern[i..].find("(?&") {
+ i += f;
+ pattern.replace_range(i..i + 3, "(?:");
+ i += 3;
+
+ let subref_end = if let Some(f) = pattern[i..].find(')') {
+ i + f
+ } else {
+ pattern.truncate(i); // truncate so latter error doesn't suppress
+ break; // regex-syntax will report the unclosed group
+ };
+
+ let name = &pattern[i..subref_end];
+ let name = match syn::parse_str::<Ident>(name) {
+ Ok(name) => name,
+ Err(_) => {
+ errors.err(
+ format!("subpattern reference `{}` is not an identifier", name),
+ lit.span(),
+ );
+ // we emitted the error; make something up and continue
+ pattern.replace_range(i..subref_end, "_");
+ i += 2;
+ continue;
+ }
+ };
+
+ match self.map.iter().find(|(def, _)| *def == name) {
+ Some((_, subpattern)) => {
+ pattern.replace_range(i..subref_end, subpattern);
+ i += subpattern.len() + 1;
+ }
+ None => {
+ errors.err(
+ format!("subpattern reference `{}` has not been defined", name),
+ lit.span(),
+ );
+ // leaving `(?:name)` is fine
+ i = subref_end + 1;
+ }
+ }
+ }
+
+ pattern
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/type_params.rs b/vendor/logos-codegen/src/parser/type_params.rs
new file mode 100644
index 00000000..1be4948e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/type_params.rs
@@ -0,0 +1,200 @@
+use proc_macro2::{Ident, Span, TokenStream};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Lifetime, LifetimeParam, Path, Type};
+
+use crate::error::Errors;
+
+#[derive(Default)]
+pub struct TypeParams {
+ lifetime: bool,
+ type_params: Vec<(Ident, Option<Type>)>,
+}
+
+impl TypeParams {
+ pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) {
+ if self.lifetime {
+ let span = lt.span();
+
+ errors.err("Logos types can only have one lifetime can be set", span);
+ }
+
+ self.lifetime = true;
+ }
+
+ pub fn add(&mut self, param: Ident) {
+ self.type_params.push((param, None));
+ }
+
+ pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) {
+ let ty = match syn::parse2::<Type>(ty) {
+ Ok(mut ty) => {
+ replace_lifetimes(&mut ty);
+ ty
+ }
+ Err(err) => {
+ errors.err(err.to_string(), err.span());
+ return;
+ }
+ };
+
+ match self.type_params.iter_mut().find(|(name, _)| *name == param) {
+ Some((_, slot)) => {
+ if let Some(previous) = slot.replace(ty) {
+ errors
+ .err(
+ format!("{} can only have one type assigned to it", param),
+ param.span(),
+ )
+ .err("Previously assigned here", previous.span());
+ }
+ }
+ None => {
+ errors.err(
+ format!("{} is not a declared type parameter", param),
+ param.span(),
+ );
+ }
+ }
+ }
+
+ pub fn find(&self, path: &Path) -> Option<Type> {
+ for (ident, ty) in &self.type_params {
+ if path.is_ident(ident) {
+ return ty.clone();
+ }
+ }
+
+ None
+ }
+
+ pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> {
+ if !self.lifetime && self.type_params.is_empty() {
+ return None;
+ }
+
+ let mut generics = Vec::new();
+
+ if self.lifetime {
+ generics.push(quote!('s));
+ }
+
+ for (ty, replace) in self.type_params.iter() {
+ match replace {
+ Some(ty) => generics.push(quote!(#ty)),
+ None => {
+ errors.err(
+ format!(
+ "Generic type parameter without a concrete type\n\
+ \n\
+ Define a concrete type Logos can use: #[logos(type {} = Type)]",
+ ty,
+ ),
+ ty.span(),
+ );
+ }
+ }
+ }
+
+ if generics.is_empty() {
+ None
+ } else {
+ Some(quote!(<#(#generics),*>))
+ }
+ }
+}
+
+pub fn replace_lifetimes(ty: &mut Type) {
+ traverse_type(ty, &mut replace_lifetime)
+}
+
+pub fn replace_lifetime(ty: &mut Type) {
+ use syn::{GenericArgument, PathArguments};
+
+ match ty {
+ Type::Path(p) => {
+ p.path
+ .segments
+ .iter_mut()
+ .filter_map(|segment| match &mut segment.arguments {
+ PathArguments::AngleBracketed(ab) => Some(ab),
+ _ => None,
+ })
+ .flat_map(|ab| ab.args.iter_mut())
+ .for_each(|arg| {
+ if let GenericArgument::Lifetime(lt) = arg {
+ *lt = Lifetime::new("'s", lt.span());
+ }
+ });
+ }
+ Type::Reference(r) => {
+ let span = match r.lifetime.take() {
+ Some(lt) => lt.span(),
+ None => Span::call_site(),
+ };
+
+ r.lifetime = Some(Lifetime::new("'s", span));
+ }
+ _ => (),
+ }
+}
+
+pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) {
+ f(ty);
+ match ty {
+ Type::Array(array) => traverse_type(&mut array.elem, f),
+ Type::BareFn(bare_fn) => {
+ for input in &mut bare_fn.inputs {
+ traverse_type(&mut input.ty, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output {
+ traverse_type(ty, f);
+ }
+ }
+ Type::Group(group) => traverse_type(&mut group.elem, f),
+ Type::Paren(paren) => traverse_type(&mut paren.elem, f),
+ Type::Path(path) => traverse_path(&mut path.path, f),
+ Type::Ptr(p) => traverse_type(&mut p.elem, f),
+ Type::Reference(r) => traverse_type(&mut r.elem, f),
+ Type::Slice(slice) => traverse_type(&mut slice.elem, f),
+ Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| {
+ if let syn::TypeParamBound::Trait(trait_bound) = bound {
+ traverse_path(&mut trait_bound.path, f);
+ }
+ }),
+ Type::Tuple(tuple) => tuple
+ .elems
+ .iter_mut()
+ .for_each(|elem| traverse_type(elem, f)),
+ _ => (),
+ }
+}
+
+fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) {
+ for segment in &mut path.segments {
+ match &mut segment.arguments {
+ syn::PathArguments::None => (),
+ syn::PathArguments::AngleBracketed(args) => {
+ for arg in &mut args.args {
+ match arg {
+ syn::GenericArgument::Type(ty) => {
+ traverse_type(ty, f);
+ }
+ syn::GenericArgument::AssocType(assoc) => {
+ traverse_type(&mut assoc.ty, f);
+ }
+ _ => (),
+ }
+ }
+ }
+ syn::PathArguments::Parenthesized(args) => {
+ for arg in &mut args.inputs {
+ traverse_type(arg, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut args.output {
+ traverse_type(ty, f);
+ }
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/util.rs b/vendor/logos-codegen/src/util.rs
new file mode 100644
index 00000000..156de035
--- /dev/null
+++ b/vendor/logos-codegen/src/util.rs
@@ -0,0 +1,64 @@
+use proc_macro2::{Spacing, Span, TokenStream, TokenTree};
+use quote::{quote, ToTokens};
+use syn::Ident;
+
+/// Analog to Option<TokenStream>, except when put into the quote!
+/// macro, `MaybeVoid::Void` will produce `()`
+#[derive(Clone, Default)]
+pub enum MaybeVoid {
+ Some(TokenStream),
+ #[default]
+ Void,
+}
+
+impl MaybeVoid {
+ pub fn replace(&mut self, stream: TokenStream) -> MaybeVoid {
+ std::mem::replace(self, MaybeVoid::Some(stream))
+ }
+
+ pub fn take(&mut self) -> MaybeVoid {
+ std::mem::replace(self, MaybeVoid::Void)
+ }
+}
+
+impl ToTokens for MaybeVoid {
+ fn to_tokens(&self, out: &mut TokenStream) {
+ match self {
+ MaybeVoid::Some(stream) => out.extend(stream.clone()),
+ MaybeVoid::Void => out.extend(quote!(())),
+ }
+ }
+
+ fn to_token_stream(&self) -> TokenStream {
+ match self {
+ MaybeVoid::Some(stream) => stream.clone(),
+ MaybeVoid::Void => quote!(()),
+ }
+ }
+
+ fn into_token_stream(self) -> TokenStream {
+ match self {
+ MaybeVoid::Some(stream) => stream,
+ MaybeVoid::Void => quote!(()),
+ }
+ }
+}
+
+pub fn is_punct(tt: &TokenTree, expect: char) -> bool {
+ matches!(tt, TokenTree::Punct(punct) if punct.as_char() == expect && punct.spacing() == Spacing::Alone)
+}
+
+/// If supplied `tt` is a punct matching a char, returns `None`, else returns `tt`
+pub fn expect_punct(tt: Option<TokenTree>, expect: char) -> Option<TokenTree> {
+ tt.filter(|tt| !is_punct(tt, expect))
+}
+
+pub trait ToIdent {
+ fn to_ident(&self) -> Ident;
+}
+
+impl ToIdent for str {
+ fn to_ident(&self) -> Ident {
+ Ident::new(self, Span::call_site())
+ }
+}