diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen/src/parser/subpattern.rs | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/logos-codegen/src/parser/subpattern.rs')
| -rw-r--r-- | vendor/logos-codegen/src/parser/subpattern.rs | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs new file mode 100644 index 00000000..eb620028 --- /dev/null +++ b/vendor/logos-codegen/src/parser/subpattern.rs @@ -0,0 +1,97 @@ +use proc_macro2::TokenStream; +use syn::Ident; + +use crate::error::Errors; +use crate::mir::Mir; +use crate::parser::definition::{bytes_to_regex_string, Literal}; + +#[derive(Default)] +pub struct Subpatterns { + map: Vec<(Ident, String)>, +} + +impl Subpatterns { + pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) { + let lit = match syn::parse2::<Literal>(pattern) { + Ok(lit) => lit, + Err(e) => { + errors.err(e.to_string(), e.span()); + return; + } + }; + + if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) { + errors + .err(format!("{} can only be assigned once", param), param.span()) + .err("Previously assigned here", name.span()); + return; + } + + let fixed = self.fix(&lit, errors); + + // Validate the literal as proper regex. If it's not, emit an error. + let mir = match &lit { + Literal::Utf8(_) => Mir::utf8(&fixed), + Literal::Bytes(_) => Mir::binary(&fixed), + }; + + if let Err(err) = mir { + errors.err(err, lit.span()); + }; + + self.map.push((param, fixed)); + } + + pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String { + let mut i = 0; + let mut pattern = match lit { + Literal::Utf8(s) => s.value(), + Literal::Bytes(b) => bytes_to_regex_string(b.value()), + }; + + while let Some(f) = pattern[i..].find("(?&") { + i += f; + pattern.replace_range(i..i + 3, "(?:"); + i += 3; + + let subref_end = if let Some(f) = pattern[i..].find(')') { + i + f + } else { + pattern.truncate(i); // truncate so latter error doesn't suppress + break; // regex-syntax will report the unclosed group + }; + + let name = &pattern[i..subref_end]; + let name = match syn::parse_str::<Ident>(name) { + Ok(name) => name, + Err(_) => { + errors.err( + format!("subpattern reference `{}` is not an identifier", name), + lit.span(), + ); + // we emitted the error; make something up and continue + pattern.replace_range(i..subref_end, "_"); + i += 2; + continue; + } + }; + + match self.map.iter().find(|(def, _)| *def == name) { + Some((_, subpattern)) => { + pattern.replace_range(i..subref_end, subpattern); + i += subpattern.len() + 1; + } + None => { + errors.err( + format!("subpattern reference `{}` has not been defined", name), + lit.span(), + ); + // leaving `(?:name)` is fine + i = subref_end + 1; + } + } + } + + pattern + } +} |
