summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen/src/mir.rs
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-15 16:37:08 -0600
committermo khan <mo@mokhan.ca>2025-07-17 16:30:22 -0600
commit45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/logos-codegen/src/mir.rs
parentf94f79608393d4ab127db63cc41668445ef6b243 (diff)
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/logos-codegen/src/mir.rs')
-rw-r--r--vendor/logos-codegen/src/mir.rs235
1 files changed, 0 insertions, 235 deletions
diff --git a/vendor/logos-codegen/src/mir.rs b/vendor/logos-codegen/src/mir.rs
deleted file mode 100644
index e07c5b78..00000000
--- a/vendor/logos-codegen/src/mir.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-use std::convert::TryFrom;
-
-use lazy_static::lazy_static;
-use regex_syntax::hir::{Dot, Hir, HirKind};
-use regex_syntax::ParserBuilder;
-
-pub use regex_syntax::hir::{Class, ClassUnicode, Literal};
-
-use crate::error::{Error, Result};
-
-lazy_static! {
- static ref DOT_UTF8: Hir = Hir::dot(Dot::AnyChar);
- static ref DOT_BYTES: Hir = Hir::dot(Dot::AnyByte);
-}
-
-/// Middle Intermediate Representation of the regex, built from
-/// `regex_syntax`'s `Hir`. The goal here is to strip and canonicalize
-/// the tree, so that we don't have to do transformations later on the
-/// graph, with the potential of running into looping references.
-#[derive(Clone, Debug)]
-pub enum Mir {
- Empty,
- Loop(Box<Mir>),
- Maybe(Box<Mir>),
- Concat(Vec<Mir>),
- Alternation(Vec<Mir>),
- Class(Class),
- Literal(Literal),
-}
-
-impl Mir {
- pub fn utf8(source: &str) -> Result<Mir> {
- Mir::try_from(ParserBuilder::new().build().parse(source)?)
- }
-
- pub fn utf8_ignore_case(source: &str) -> Result<Mir> {
- Mir::try_from(
- ParserBuilder::new()
- .case_insensitive(true)
- .build()
- .parse(source)?,
- )
- }
-
- pub fn binary(source: &str) -> Result<Mir> {
- Mir::try_from(
- ParserBuilder::new()
- .utf8(false)
- .unicode(false)
- .build()
- .parse(source)?,
- )
- }
-
- pub fn binary_ignore_case(source: &str) -> Result<Mir> {
- Mir::try_from(
- ParserBuilder::new()
- .utf8(false)
- .unicode(false)
- .case_insensitive(true)
- .build()
- .parse(source)?,
- )
- }
-
- pub fn priority(&self) -> usize {
- match self {
- Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0,
- Mir::Concat(concat) => concat.iter().map(Mir::priority).sum(),
- Mir::Alternation(alt) => alt.iter().map(Mir::priority).min().unwrap_or(0),
- Mir::Class(_) => 2,
- Mir::Literal(lit) => match std::str::from_utf8(&lit.0) {
- Ok(s) => 2 * s.chars().count(),
- Err(_) => 2 * lit.0.len(),
- },
- }
- }
-}
-
-impl TryFrom<Hir> for Mir {
- type Error = Error;
-
- fn try_from(hir: Hir) -> Result<Mir> {
- match hir.into_kind() {
- HirKind::Empty => Ok(Mir::Empty),
- HirKind::Concat(concat) => {
- let mut out = Vec::with_capacity(concat.len());
-
- fn extend(mir: Mir, out: &mut Vec<Mir>) {
- match mir {
- Mir::Concat(nested) => {
- for child in nested {
- extend(child, out);
- }
- }
- mir => out.push(mir),
- }
- }
-
- for hir in concat {
- extend(Mir::try_from(hir)?, &mut out);
- }
-
- Ok(Mir::Concat(out))
- }
- HirKind::Alternation(alternation) => {
- let alternation = alternation
- .into_iter()
- .map(Mir::try_from)
- .collect::<Result<_>>()?;
-
- Ok(Mir::Alternation(alternation))
- }
- HirKind::Literal(literal) => Ok(Mir::Literal(literal)),
- HirKind::Class(class) => Ok(Mir::Class(class)),
- HirKind::Repetition(repetition) => {
- if !repetition.greedy {
- return Err("#[regex]: non-greedy parsing is currently unsupported.".into());
- }
-
- let is_dot = if repetition.sub.properties().is_utf8() {
- *repetition.sub == *DOT_UTF8
- } else {
- *repetition.sub == *DOT_BYTES
- };
- let mir = Mir::try_from(*repetition.sub)?;
-
- match (repetition.min, repetition.max) {
- (0..=1, None) if is_dot => {
- Err(
- "#[regex]: \".+\" and \".*\" patterns will greedily consume \
- the entire source till the end as Logos does not allow \
- backtracking. If you are looking to match everything until \
- a specific character, you should use a negative character \
- class. E.g., use regex r\"'[^']*'\" to match anything in \
- between two quotes. Read more about that here: \
- https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541."
- .into()
- )
- }
- // 0 or 1
- (0, Some(1)) => Ok(Mir::Maybe(Box::new(mir))),
- // 0 or more
- (0, None) => Ok(Mir::Loop(Box::new(mir))),
- // 1 or more
- (1, None) => {
- Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))]))
- }
- // Exact {n}
- (n, Some(m)) if m == n => {
- let mut out = Vec::with_capacity(n as usize);
- for _ in 0..n {
- out.push(mir.clone());
- }
- Ok(Mir::Concat(out))
- }
- // At least {n,}
- (n, None) => {
- let mut out = Vec::with_capacity(n as usize);
- for _ in 0..n {
- out.push(mir.clone());
- }
- out.push(Mir::Loop(Box::new(mir)));
- Ok(Mir::Concat(out))
- }
- // Bounded {n, m}
- (n, Some(m)) => {
- let mut out = Vec::with_capacity(m as usize);
- for _ in 0..n {
- out.push(mir.clone());
- }
- for _ in n..m {
- out.push(Mir::Maybe(Box::new(mir.clone())));
- }
- Ok(Mir::Concat(out))
- }
- }
- }
- HirKind::Capture(capture) => Mir::try_from(*capture.sub),
- HirKind::Look(_) => {
- Err("#[regex]: look-around assertions are currently unsupported.".into())
- }
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::Mir;
-
- #[test]
- fn priorities() {
- let regexes = [
- ("a", 2),
- ("à", 2),
- ("京", 2),
- ("Eté", 6),
- ("Été", 6),
- ("[a-z]+", 2),
- ("a|b", 2),
- ("a|[b-z]", 2),
- ("(foo)+", 6),
- ("foobar", 12),
- ("(fooz|bar)+qux", 12),
- ];
-
- for (regex, expected) in regexes.iter() {
- let mir = Mir::utf8(regex).unwrap();
- assert_eq!(mir.priority(), *expected, "Failed for regex \"{}\"", regex);
- }
- }
-
- #[test]
- fn equivalent_patterns() {
- let regexes = [
- ("a|b", "[a-b]"),
- ("1|2|3", "[1-3]"),
- ("1+", "[1]+"),
- ("c*", "[c]*"),
- ("aaa", "a{3}"),
- ("a[a]{2}", "a{3}"),
- ];
-
- for (regex_left, regex_right) in regexes.iter() {
- let mir_left = Mir::utf8(regex_left).unwrap();
- let mir_right = Mir::utf8(regex_right).unwrap();
- assert_eq!(
- mir_left.priority(),
- mir_right.priority(),
- "Regexes \"{regex_left}\" and \"{regex_right}\" \
- are equivalent but have different priorities"
- );
- }
- }
-}