1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
use proc_macro2::TokenStream;
use quote::quote;
use crate::generator::Generator;
use crate::graph::NodeId;
/// This struct keeps track of bytes available to be read without
/// bounds checking across the tree.
///
/// For example, a branch that matches 4 bytes followed by a fork
/// with smallest branch containing of 2 bytes can do a bounds check
/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size)
/// to be handled by the fork, avoiding bound checks there.
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct Context {
/// Amount of bytes that haven't been bumped yet but should
/// before a new read is performed
at: usize,
/// Number of bytes available without bound checks
available: usize,
/// Whether or not the Lexer has been bumped at least by 1 byte
bumped: bool,
/// Node to backtrack to to in case an explicit match has failed.
/// If `None` will instead produce an error token.
backtrack: Option<NodeId>,
}
impl Context {
pub fn can_backtrack(&self) -> bool {
self.backtrack.is_some()
}
pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> {
self.backtrack = Some(miss?);
self.bump()
}
pub const fn advance(self, n: usize) -> Self {
Context {
at: self.at + n,
..self
}
}
pub fn bump(&mut self) -> Option<TokenStream> {
match self.at {
0 => None,
n => {
let tokens = quote!(lex.bump_unchecked(#n););
self.at = 0;
self.available = 0;
self.bumped = true;
Some(tokens)
}
}
}
pub fn remainder(&self) -> usize {
self.available.saturating_sub(self.at)
}
pub fn read_byte(&mut self) -> TokenStream {
let at = self.at;
self.advance(1);
#[cfg(not(feature = "forbid_unsafe"))]
{
quote!(unsafe { lex.read_byte_unchecked(#at) })
}
#[cfg(feature = "forbid_unsafe")]
{
quote!(lex.read_byte(#at))
}
}
pub fn read(&mut self, len: usize) -> TokenStream {
self.available = len;
match (self.at, len) {
(0, 0) => quote!(lex.read::<u8>()),
(a, 0) => quote!(lex.read_at::<u8>(#a)),
(0, l) => quote!(lex.read::<&[u8; #l]>()),
(a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)),
}
}
pub fn wipe(&mut self) {
self.available = 0;
}
const fn backtrack(self) -> Self {
Context {
at: 0,
available: 0,
bumped: self.bumped,
backtrack: None,
}
}
pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream {
self.wipe();
match (miss, self.backtrack) {
(Some(id), _) => gen.goto(id, self).clone(),
(_, Some(id)) => gen.goto(id, self.backtrack()).clone(),
_ if self.bumped => quote!(lex.error()),
_ => quote!(_error(lex)),
}
}
pub fn write_suffix(&self, buf: &mut String) {
use std::fmt::Write;
if self.at > 0 {
let _ = write!(buf, "_at{}", self.at);
}
if self.available > 0 {
let _ = write!(buf, "_with{}", self.available);
}
if let Some(id) = self.backtrack {
let _ = write!(buf, "_ctx{}", id);
}
if self.bumped {
buf.push_str("_x");
}
}
}
|