From 8cdfa445d6629ffef4cb84967ff7017654045bc2 Mon Sep 17 00:00:00 2001 From: mo khan Date: Wed, 2 Jul 2025 18:36:06 -0600 Subject: chore: add vendor directory --- vendor/logos-codegen/src/parser/definition.rs | 193 ++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 vendor/logos-codegen/src/parser/definition.rs (limited to 'vendor/logos-codegen/src/parser/definition.rs') diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs new file mode 100644 index 00000000..a876fb59 --- /dev/null +++ b/vendor/logos-codegen/src/parser/definition.rs @@ -0,0 +1,193 @@ +use proc_macro2::{Ident, Span}; +use syn::{spanned::Spanned, LitByteStr, LitStr}; + +use crate::error::{Errors, Result}; +use crate::leaf::Callback; +use crate::mir::Mir; +use crate::parser::nested::NestedValue; +use crate::parser::{IgnoreFlags, Parser, Subpatterns}; + +use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive; + +pub struct Definition { + pub literal: Literal, + pub priority: Option, + pub callback: Option, + pub ignore_flags: IgnoreFlags, +} + +pub enum Literal { + Utf8(LitStr), + Bytes(LitByteStr), +} + +impl Definition { + pub fn new(literal: Literal) -> Self { + Definition { + literal, + priority: None, + callback: None, + ignore_flags: IgnoreFlags::Empty, + } + } + + pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { + match (name.to_string().as_str(), value) { + ("priority", NestedValue::Assign(tokens)) => { + let prio = match tokens.to_string().parse() { + Ok(prio) => prio, + Err(_) => { + parser.err("Expected an unsigned integer", tokens.span()); + return; + } + }; + + if self.priority.replace(prio).is_some() { + parser.err("Resetting previously set priority", tokens.span()); + } + } + ("priority", _) => { + parser.err("Expected: priority = ", name.span()); + } + ("callback", NestedValue::Assign(tokens)) => { + let span = tokens.span(); + let callback = match parser.parse_callback(tokens) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + + if let Some(previous) = self.callback.replace(callback) { + parser + .err( + "Callback has been already set", + span.join(name.span()).unwrap(), + ) + .err("Previous callback set here", previous.span()); + } + } + ("callback", _) => { + parser.err("Expected: callback = ...", name.span()); + } + ("ignore", NestedValue::Group(tokens)) => { + self.ignore_flags.parse_group(name, tokens, parser); + } + ("ignore", _) => { + parser.err("Expected: ignore(, ...)", name.span()); + } + (unknown, _) => { + parser.err( + format!( + "\ + Unknown nested attribute: {}\n\ + \n\ + Expected one of: priority, callback\ + ", + unknown + ), + name.span(), + ); + } + } + } +} + +impl Literal { + pub fn to_bytes(&self) -> Vec { + match self { + Literal::Utf8(string) => string.value().into_bytes(), + Literal::Bytes(bytes) => bytes.value(), + } + } + + pub fn escape_regex(&self) -> Literal { + match self { + Literal::Utf8(string) => Literal::Utf8(LitStr::new( + regex_syntax::escape(&string.value()).as_str(), + self.span(), + )), + Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new( + regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(), + self.span(), + )), + } + } + + pub fn to_mir( + &self, + subpatterns: &Subpatterns, + ignore_flags: IgnoreFlags, + errors: &mut Errors, + ) -> Result { + let value = subpatterns.fix(self, errors); + + if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) { + match self { + Literal::Utf8(_) => { + Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive) + } + Literal::Bytes(_) => Mir::binary_ignore_case(&value), + } + } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) { + match self { + Literal::Utf8(_) => Mir::utf8_ignore_case(&value), + Literal::Bytes(_) => Mir::binary_ignore_case(&value), + } + } else { + match self { + Literal::Utf8(_) => Mir::utf8(&value), + Literal::Bytes(_) => Mir::binary(&value), + } + } + } + + pub fn span(&self) -> Span { + match self { + Literal::Utf8(string) => string.span(), + Literal::Bytes(bytes) => bytes.span(), + } + } +} + +impl syn::parse::Parse for Literal { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let la = input.lookahead1(); + if la.peek(LitStr) { + Ok(Literal::Utf8(input.parse()?)) + } else if la.peek(LitByteStr) { + Ok(Literal::Bytes(input.parse()?)) + } else { + Err(la.error()) + } + } +} + +pub fn bytes_to_regex_string(bytes: Vec) -> String { + if bytes.is_ascii() { + unsafe { + // Unicode values are prohibited, so we can't use + // safe version of String::from_utf8 + // + // We can, however, construct a safe ASCII string + return String::from_utf8_unchecked(bytes); + } + } + + let mut string = String::with_capacity(bytes.len() * 2); + + for byte in bytes { + if byte < 0x80 { + string.push(byte as char); + } else { + static DIGITS: [u8; 16] = *b"0123456789abcdef"; + + string.push_str(r"\x"); + string.push(DIGITS[(byte / 16) as usize] as char); + string.push(DIGITS[(byte % 16) as usize] as char); + } + } + + string +} -- cgit v1.2.3