diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-15 16:37:08 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-17 16:30:22 -0600 |
| commit | 45df4d0d9b577fecee798d672695fe24ff57fb1b (patch) | |
| tree | 1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/logos-codegen/src/mir.rs | |
| parent | f94f79608393d4ab127db63cc41668445ef6b243 (diff) | |
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based
authorization system with SpiceDB's relation-based authorization.
Key changes:
- Migrate from Rust to Go implementation
- Replace Cedar policies with SpiceDB schema and relationships
- Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks
- Update build system and dependencies for Go ecosystem
- Maintain Envoy integration for external authorization
This change enables more flexible permission modeling through SpiceDB's
Google Zanzibar inspired relation-based system, supporting complex
hierarchical permissions that were difficult to express in Cedar.
Breaking change: Existing Cedar policies and Rust-based configuration
will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/logos-codegen/src/mir.rs')
| -rw-r--r-- | vendor/logos-codegen/src/mir.rs | 235 |
1 files changed, 0 insertions, 235 deletions
diff --git a/vendor/logos-codegen/src/mir.rs b/vendor/logos-codegen/src/mir.rs deleted file mode 100644 index e07c5b78..00000000 --- a/vendor/logos-codegen/src/mir.rs +++ /dev/null @@ -1,235 +0,0 @@ -use std::convert::TryFrom; - -use lazy_static::lazy_static; -use regex_syntax::hir::{Dot, Hir, HirKind}; -use regex_syntax::ParserBuilder; - -pub use regex_syntax::hir::{Class, ClassUnicode, Literal}; - -use crate::error::{Error, Result}; - -lazy_static! { - static ref DOT_UTF8: Hir = Hir::dot(Dot::AnyChar); - static ref DOT_BYTES: Hir = Hir::dot(Dot::AnyByte); -} - -/// Middle Intermediate Representation of the regex, built from -/// `regex_syntax`'s `Hir`. The goal here is to strip and canonicalize -/// the tree, so that we don't have to do transformations later on the -/// graph, with the potential of running into looping references. -#[derive(Clone, Debug)] -pub enum Mir { - Empty, - Loop(Box<Mir>), - Maybe(Box<Mir>), - Concat(Vec<Mir>), - Alternation(Vec<Mir>), - Class(Class), - Literal(Literal), -} - -impl Mir { - pub fn utf8(source: &str) -> Result<Mir> { - Mir::try_from(ParserBuilder::new().build().parse(source)?) - } - - pub fn utf8_ignore_case(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .case_insensitive(true) - .build() - .parse(source)?, - ) - } - - pub fn binary(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .utf8(false) - .unicode(false) - .build() - .parse(source)?, - ) - } - - pub fn binary_ignore_case(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .utf8(false) - .unicode(false) - .case_insensitive(true) - .build() - .parse(source)?, - ) - } - - pub fn priority(&self) -> usize { - match self { - Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0, - Mir::Concat(concat) => concat.iter().map(Mir::priority).sum(), - Mir::Alternation(alt) => alt.iter().map(Mir::priority).min().unwrap_or(0), - Mir::Class(_) => 2, - Mir::Literal(lit) => match std::str::from_utf8(&lit.0) { - Ok(s) => 2 * s.chars().count(), - Err(_) => 2 * lit.0.len(), - }, - } - } -} - -impl TryFrom<Hir> for Mir { - type Error = Error; - - fn try_from(hir: Hir) -> Result<Mir> { - match hir.into_kind() { - HirKind::Empty => Ok(Mir::Empty), - HirKind::Concat(concat) => { - let mut out = Vec::with_capacity(concat.len()); - - fn extend(mir: Mir, out: &mut Vec<Mir>) { - match mir { - Mir::Concat(nested) => { - for child in nested { - extend(child, out); - } - } - mir => out.push(mir), - } - } - - for hir in concat { - extend(Mir::try_from(hir)?, &mut out); - } - - Ok(Mir::Concat(out)) - } - HirKind::Alternation(alternation) => { - let alternation = alternation - .into_iter() - .map(Mir::try_from) - .collect::<Result<_>>()?; - - Ok(Mir::Alternation(alternation)) - } - HirKind::Literal(literal) => Ok(Mir::Literal(literal)), - HirKind::Class(class) => Ok(Mir::Class(class)), - HirKind::Repetition(repetition) => { - if !repetition.greedy { - return Err("#[regex]: non-greedy parsing is currently unsupported.".into()); - } - - let is_dot = if repetition.sub.properties().is_utf8() { - *repetition.sub == *DOT_UTF8 - } else { - *repetition.sub == *DOT_BYTES - }; - let mir = Mir::try_from(*repetition.sub)?; - - match (repetition.min, repetition.max) { - (0..=1, None) if is_dot => { - Err( - "#[regex]: \".+\" and \".*\" patterns will greedily consume \ - the entire source till the end as Logos does not allow \ - backtracking. If you are looking to match everything until \ - a specific character, you should use a negative character \ - class. E.g., use regex r\"'[^']*'\" to match anything in \ - between two quotes. Read more about that here: \ - https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541." - .into() - ) - } - // 0 or 1 - (0, Some(1)) => Ok(Mir::Maybe(Box::new(mir))), - // 0 or more - (0, None) => Ok(Mir::Loop(Box::new(mir))), - // 1 or more - (1, None) => { - Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))])) - } - // Exact {n} - (n, Some(m)) if m == n => { - let mut out = Vec::with_capacity(n as usize); - for _ in 0..n { - out.push(mir.clone()); - } - Ok(Mir::Concat(out)) - } - // At least {n,} - (n, None) => { - let mut out = Vec::with_capacity(n as usize); - for _ in 0..n { - out.push(mir.clone()); - } - out.push(Mir::Loop(Box::new(mir))); - Ok(Mir::Concat(out)) - } - // Bounded {n, m} - (n, Some(m)) => { - let mut out = Vec::with_capacity(m as usize); - for _ in 0..n { - out.push(mir.clone()); - } - for _ in n..m { - out.push(Mir::Maybe(Box::new(mir.clone()))); - } - Ok(Mir::Concat(out)) - } - } - } - HirKind::Capture(capture) => Mir::try_from(*capture.sub), - HirKind::Look(_) => { - Err("#[regex]: look-around assertions are currently unsupported.".into()) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::Mir; - - #[test] - fn priorities() { - let regexes = [ - ("a", 2), - ("à", 2), - ("京", 2), - ("Eté", 6), - ("Été", 6), - ("[a-z]+", 2), - ("a|b", 2), - ("a|[b-z]", 2), - ("(foo)+", 6), - ("foobar", 12), - ("(fooz|bar)+qux", 12), - ]; - - for (regex, expected) in regexes.iter() { - let mir = Mir::utf8(regex).unwrap(); - assert_eq!(mir.priority(), *expected, "Failed for regex \"{}\"", regex); - } - } - - #[test] - fn equivalent_patterns() { - let regexes = [ - ("a|b", "[a-b]"), - ("1|2|3", "[1-3]"), - ("1+", "[1]+"), - ("c*", "[c]*"), - ("aaa", "a{3}"), - ("a[a]{2}", "a{3}"), - ]; - - for (regex_left, regex_right) in regexes.iter() { - let mir_left = Mir::utf8(regex_left).unwrap(); - let mir_right = Mir::utf8(regex_right).unwrap(); - assert_eq!( - mir_left.priority(), - mir_right.priority(), - "Regexes \"{regex_left}\" and \"{regex_right}\" \ - are equivalent but have different priorities" - ); - } - } -} |
