summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen/src/generator/fork.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/logos-codegen/src/generator/fork.rs')
-rw-r--r--vendor/logos-codegen/src/generator/fork.rs216
1 files changed, 216 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/generator/fork.rs b/vendor/logos-codegen/src/generator/fork.rs
new file mode 100644
index 00000000..11d4eab2
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/fork.rs
@@ -0,0 +1,216 @@
+use std::cmp::max;
+
+use fnv::FnvHashMap as Map;
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::graph::{Fork, NodeId, Range};
+use crate::util::ToIdent;
+
+type Targets = Map<NodeId, Vec<Range>>;
+
+impl Generator<'_> {
+ pub fn generate_fork(&mut self, this: NodeId, fork: &Fork, mut ctx: Context) -> TokenStream {
+ let mut targets: Targets = Map::default();
+
+ for (range, then) in fork.branches() {
+ targets.entry(then).or_default().push(range);
+ }
+ let loops_to_self = self.meta[this].loop_entry_from.contains(&this);
+
+ match targets.len() {
+ 1 if loops_to_self => return self.generate_fast_loop(fork, ctx),
+ 0..=2 => (),
+ _ => return self.generate_fork_jump_table(this, fork, targets, ctx),
+ }
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+ let branches = targets.into_iter().map(|(id, ranges)| {
+ let next = self.goto(id, ctx.advance(1));
+
+ match *ranges {
+ [range] => {
+ quote!(#range => #next,)
+ }
+ [a, b] if a.is_byte() && b.is_byte() => {
+ quote!(#a | #b => #next,)
+ }
+ _ => {
+ let test = self.generate_test(ranges).clone();
+ let next = self.goto(id, ctx.advance(1));
+
+ quote!(byte if #test(byte) => #next,)
+ }
+ }
+ });
+
+ quote! {
+ #read
+
+ match #byte {
+ #(#branches)*
+ _ => #miss,
+ }
+ }
+ }
+
+ fn generate_fork_jump_table(
+ &mut self,
+ this: NodeId,
+ fork: &Fork,
+ targets: Targets,
+ mut ctx: Context,
+ ) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+
+ let mut table: [u8; 256] = [0; 256];
+ let mut jumps = vec!["__".to_ident()];
+
+ let branches = targets
+ .into_iter()
+ .enumerate()
+ .map(|(idx, (id, ranges))| {
+ let idx = (idx as u8) + 1;
+ let next = self.goto(id, ctx.advance(1));
+ jumps.push(format!("J{}", id).to_ident());
+
+ for byte in ranges.into_iter().flatten() {
+ table[byte as usize] = idx;
+ }
+ let jump = jumps.last().unwrap();
+
+ quote!(Jump::#jump => #next,)
+ })
+ .collect::<TokenStream>();
+
+ let may_error = table.iter().any(|&idx| idx == 0);
+
+ let jumps = jumps.as_slice();
+ let table = table.iter().copied().map(|idx| &jumps[idx as usize]);
+
+ let jumps = if may_error { jumps } else { &jumps[1..] };
+ let error_branch = if may_error {
+ Some(quote!(Jump::__ => #miss))
+ } else {
+ None
+ };
+
+ quote! {
+ enum Jump {
+ #(#jumps,)*
+ }
+
+ const LUT: [Jump; 256] = {
+ use Jump::*;
+
+ [#(#table),*]
+ };
+
+ #read
+
+ match LUT[#byte as usize] {
+ #branches
+ #error_branch
+ }
+ }
+ }
+
+ fn fork_end(&self, this: NodeId, miss: &TokenStream) -> TokenStream {
+ if this == self.root {
+ quote!(_end(lex))
+ } else {
+ miss.clone()
+ }
+ }
+
+ fn fork_read(
+ &self,
+ this: NodeId,
+ end: TokenStream,
+ ctx: &mut Context,
+ ) -> (TokenStream, TokenStream) {
+ let min_read = self.meta[this].min_read;
+
+ if ctx.remainder() >= max(min_read, 1) {
+ let read = ctx.read_byte();
+
+ return (quote!(byte), quote!(let byte = #read;));
+ }
+
+ match min_read {
+ 0 | 1 => {
+ let read = ctx.read(0);
+
+ (
+ quote!(byte),
+ quote! {
+ let byte = match #read {
+ Some(byte) => byte,
+ None => return #end,
+ };
+ },
+ )
+ }
+ len => {
+ let read = ctx.read(len);
+
+ (
+ quote!(arr[0]),
+ quote! {
+ let arr = match #read {
+ Some(arr) => arr,
+ None => return #end,
+ };
+ },
+ )
+ }
+ }
+ }
+
+ fn generate_fast_loop(&mut self, fork: &Fork, ctx: Context) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let ranges = fork.branches().map(|(range, _)| range).collect::<Vec<_>>();
+ let test = self.generate_test(ranges);
+
+ quote! {
+ _fast_loop!(lex, #test, #miss);
+ }
+ }
+
+ pub fn fast_loop_macro() -> TokenStream {
+ quote! {
+ macro_rules! _fast_loop {
+ ($lex:ident, $test:ident, $miss:expr) => {
+ // Do one bounds check for multiple bytes till EOF
+ while let Some(arr) = $lex.read::<&[u8; 16]>() {
+ if $test(arr[0]) { if $test(arr[1]) { if $test(arr[2]) { if $test(arr[3]) {
+ if $test(arr[4]) { if $test(arr[5]) { if $test(arr[6]) { if $test(arr[7]) {
+ if $test(arr[8]) { if $test(arr[9]) { if $test(arr[10]) { if $test(arr[11]) {
+ if $test(arr[12]) { if $test(arr[13]) { if $test(arr[14]) { if $test(arr[15]) {
+
+ $lex.bump_unchecked(16); continue; } $lex.bump_unchecked(15); return $miss; }
+ $lex.bump_unchecked(14); return $miss; } $lex.bump_unchecked(13); return $miss; }
+ $lex.bump_unchecked(12); return $miss; } $lex.bump_unchecked(11); return $miss; }
+ $lex.bump_unchecked(10); return $miss; } $lex.bump_unchecked(9); return $miss; }
+ $lex.bump_unchecked(8); return $miss; } $lex.bump_unchecked(7); return $miss; }
+ $lex.bump_unchecked(6); return $miss; } $lex.bump_unchecked(5); return $miss; }
+ $lex.bump_unchecked(4); return $miss; } $lex.bump_unchecked(3); return $miss; }
+ $lex.bump_unchecked(2); return $miss; } $lex.bump_unchecked(1); return $miss; }
+
+ return $miss;
+ }
+
+ while $lex.test($test) {
+ $lex.bump_unchecked(1);
+ }
+
+ $miss
+ };
+ }
+ }
+ }
+}