diff options
Diffstat (limited to 'vendor/logos-codegen/src/generator/context.rs')
| -rw-r--r-- | vendor/logos-codegen/src/generator/context.rs | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/generator/context.rs b/vendor/logos-codegen/src/generator/context.rs new file mode 100644 index 00000000..dc52f594 --- /dev/null +++ b/vendor/logos-codegen/src/generator/context.rs @@ -0,0 +1,128 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use crate::generator::Generator; +use crate::graph::NodeId; + +/// This struct keeps track of bytes available to be read without +/// bounds checking across the tree. +/// +/// For example, a branch that matches 4 bytes followed by a fork +/// with smallest branch containing of 2 bytes can do a bounds check +/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size) +/// to be handled by the fork, avoiding bound checks there. +#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct Context { + /// Amount of bytes that haven't been bumped yet but should + /// before a new read is performed + at: usize, + /// Number of bytes available without bound checks + available: usize, + /// Whether or not the Lexer has been bumped at least by 1 byte + bumped: bool, + /// Node to backtrack to to in case an explicit match has failed. + /// If `None` will instead produce an error token. + backtrack: Option<NodeId>, +} + +impl Context { + pub fn can_backtrack(&self) -> bool { + self.backtrack.is_some() + } + + pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> { + self.backtrack = Some(miss?); + self.bump() + } + + pub const fn advance(self, n: usize) -> Self { + Context { + at: self.at + n, + ..self + } + } + + pub fn bump(&mut self) -> Option<TokenStream> { + match self.at { + 0 => None, + n => { + let tokens = quote!(lex.bump_unchecked(#n);); + self.at = 0; + self.available = 0; + self.bumped = true; + Some(tokens) + } + } + } + + pub fn remainder(&self) -> usize { + self.available.saturating_sub(self.at) + } + + pub fn read_byte(&mut self) -> TokenStream { + let at = self.at; + + self.advance(1); + + #[cfg(not(feature = "forbid_unsafe"))] + { + quote!(unsafe { lex.read_byte_unchecked(#at) }) + } + + #[cfg(feature = "forbid_unsafe")] + { + quote!(lex.read_byte(#at)) + } + } + + pub fn read(&mut self, len: usize) -> TokenStream { + self.available = len; + + match (self.at, len) { + (0, 0) => quote!(lex.read::<u8>()), + (a, 0) => quote!(lex.read_at::<u8>(#a)), + (0, l) => quote!(lex.read::<&[u8; #l]>()), + (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)), + } + } + + pub fn wipe(&mut self) { + self.available = 0; + } + + const fn backtrack(self) -> Self { + Context { + at: 0, + available: 0, + bumped: self.bumped, + backtrack: None, + } + } + + pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream { + self.wipe(); + match (miss, self.backtrack) { + (Some(id), _) => gen.goto(id, self).clone(), + (_, Some(id)) => gen.goto(id, self.backtrack()).clone(), + _ if self.bumped => quote!(lex.error()), + _ => quote!(_error(lex)), + } + } + + pub fn write_suffix(&self, buf: &mut String) { + use std::fmt::Write; + + if self.at > 0 { + let _ = write!(buf, "_at{}", self.at); + } + if self.available > 0 { + let _ = write!(buf, "_with{}", self.available); + } + if let Some(id) = self.backtrack { + let _ = write!(buf, "_ctx{}", id); + } + if self.bumped { + buf.push_str("_x"); + } + } +} |
