feat: migrate from Cedar to SpiceDB authorization system

This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema.
author: mo khan <mo@mokhan.ca> 2025-07-15 16:37:08 -0600
committer: mo khan <mo@mokhan.ca> 2025-07-17 16:30:22 -0600
commit: 45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree: 1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax/src/utf8.rs
parent: f94f79608393d4ab127db63cc41668445ef6b243 (diff)
1 files changed, 0 insertions, 592 deletions
diff --git a/vendor/regex-syntax/src/utf8.rs b/vendor/regex-syntax/src/utf8.rs
deleted file mode 100644
index 69d74945..00000000
--- a/vendor/regex-syntax/src/utf8.rs
+++ /dev/null
@@ -1,592 +0,0 @@
-/*!
-Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes.
-
-This is sub-module is useful for constructing byte based automatons that need
-to embed UTF-8 decoding. The most common use of this module is in conjunction
-with the [`hir::ClassUnicodeRange`](crate::hir::ClassUnicodeRange) type.
-
-See the documentation on the `Utf8Sequences` iterator for more details and
-an example.
-
-# Wait, what is this?
-
-This is simplest to explain with an example. Let's say you wanted to test
-whether a particular byte sequence was a Cyrillic character. One possible
-scalar value range is `[0400-04FF]`. The set of allowed bytes for this
-range can be expressed as a sequence of byte ranges:
-
-```text
-[D0-D3][80-BF]
-```
-
-This is simple enough: simply encode the boundaries, `0400` encodes to
-`D0 80` and `04FF` encodes to `D3 BF`, and create ranges from each
-corresponding pair of bytes: `D0` to `D3` and `80` to `BF`.
-
-However, what if you wanted to add the Cyrillic Supplementary characters to
-your range? Your range might then become `[0400-052F]`. The same procedure
-as above doesn't quite work because `052F` encodes to `D4 AF`. The byte ranges
-you'd get from the previous transformation would be `[D0-D4][80-AF]`. However,
-this isn't quite correct because this range doesn't capture many characters,
-for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`).
-
-Instead, you need multiple sequences of byte ranges:
-
-```text
-[D0-D3][80-BF]  # matches codepoints 0400-04FF
-[D4][80-AF]     # matches codepoints 0500-052F
-```
-
-This gets even more complicated if you want bigger ranges, particularly if
-they naively contain surrogate codepoints. For example, the sequence of byte
-ranges for the basic multilingual plane (`[0000-FFFF]`) look like this:
-
-```text
-[0-7F]
-[C2-DF][80-BF]
-[E0][A0-BF][80-BF]
-[E1-EC][80-BF][80-BF]
-[ED][80-9F][80-BF]
-[EE-EF][80-BF][80-BF]
-```
-
-Note that the byte ranges above will *not* match any erroneous encoding of
-UTF-8, including encodings of surrogate codepoints.
-
-And, of course, for all of Unicode (`[000000-10FFFF]`):
-
-```text
-[0-7F]
-[C2-DF][80-BF]
-[E0][A0-BF][80-BF]
-[E1-EC][80-BF][80-BF]
-[ED][80-9F][80-BF]
-[EE-EF][80-BF][80-BF]
-[F0][90-BF][80-BF][80-BF]
-[F1-F3][80-BF][80-BF][80-BF]
-[F4][80-8F][80-BF][80-BF]
-```
-
-This module automates the process of creating these byte ranges from ranges of
-Unicode scalar values.
-
-# Lineage
-
-I got the idea and general implementation strategy from Russ Cox in his
-[article on regexps](https://web.archive.org/web/20160404141123/https://swtch.com/~rsc/regexp/regexp3.html) and RE2.
-Russ Cox got it from Ken Thompson's `grep` (no source, folk lore?).
-I also got the idea from
-[Lucene](https://github.com/apache/lucene-solr/blob/ae93f4e7ac6a3908046391de35d4f50a0d3c59ca/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java),
-which uses it for executing automata on their term index.
-*/
-
-use core::{char, fmt, iter::FusedIterator, slice};
-
-use alloc::{vec, vec::Vec};
-
-const MAX_UTF8_BYTES: usize = 4;
-
-/// Utf8Sequence represents a sequence of byte ranges.
-///
-/// To match a Utf8Sequence, a candidate byte sequence must match each
-/// successive range.
-///
-/// For example, if there are two ranges, `[C2-DF][80-BF]`, then the byte
-/// sequence `\xDD\x61` would not match because `0x61 < 0x80`.
-#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
-pub enum Utf8Sequence {
-    /// One byte range.
-    One(Utf8Range),
-    /// Two successive byte ranges.
-    Two([Utf8Range; 2]),
-    /// Three successive byte ranges.
-    Three([Utf8Range; 3]),
-    /// Four successive byte ranges.
-    Four([Utf8Range; 4]),
-}
-
-impl Utf8Sequence {
-    /// Creates a new UTF-8 sequence from the encoded bytes of a scalar value
-    /// range.
-    ///
-    /// This assumes that `start` and `end` have the same length.
-    fn from_encoded_range(start: &[u8], end: &[u8]) -> Self {
-        assert_eq!(start.len(), end.len());
-        match start.len() {
-            2 => Utf8Sequence::Two([
-                Utf8Range::new(start[0], end[0]),
-                Utf8Range::new(start[1], end[1]),
-            ]),
-            3 => Utf8Sequence::Three([
-                Utf8Range::new(start[0], end[0]),
-                Utf8Range::new(start[1], end[1]),
-                Utf8Range::new(start[2], end[2]),
-            ]),
-            4 => Utf8Sequence::Four([
-                Utf8Range::new(start[0], end[0]),
-                Utf8Range::new(start[1], end[1]),
-                Utf8Range::new(start[2], end[2]),
-                Utf8Range::new(start[3], end[3]),
-            ]),
-            n => unreachable!("invalid encoded length: {}", n),
-        }
-    }
-
-    /// Returns the underlying sequence of byte ranges as a slice.
-    pub fn as_slice(&self) -> &[Utf8Range] {
-        use self::Utf8Sequence::*;
-        match *self {
-            One(ref r) => slice::from_ref(r),
-            Two(ref r) => &r[..],
-            Three(ref r) => &r[..],
-            Four(ref r) => &r[..],
-        }
-    }
-
-    /// Returns the number of byte ranges in this sequence.
-    ///
-    /// The length is guaranteed to be in the closed interval `[1, 4]`.
-    pub fn len(&self) -> usize {
-        self.as_slice().len()
-    }
-
-    /// Reverses the ranges in this sequence.
-    ///
-    /// For example, if this corresponds to the following sequence:
-    ///
-    /// ```text
-    /// [D0-D3][80-BF]
-    /// ```
-    ///
-    /// Then after reversal, it will be
-    ///
-    /// ```text
-    /// [80-BF][D0-D3]
-    /// ```
-    ///
-    /// This is useful when one is constructing a UTF-8 automaton to match
-    /// character classes in reverse.
-    pub fn reverse(&mut self) {
-        match *self {
-            Utf8Sequence::One(_) => {}
-            Utf8Sequence::Two(ref mut x) => x.reverse(),
-            Utf8Sequence::Three(ref mut x) => x.reverse(),
-            Utf8Sequence::Four(ref mut x) => x.reverse(),
-        }
-    }
-
-    /// Returns true if and only if a prefix of `bytes` matches this sequence
-    /// of byte ranges.
-    pub fn matches(&self, bytes: &[u8]) -> bool {
-        if bytes.len() < self.len() {
-            return false;
-        }
-        for (&b, r) in bytes.iter().zip(self) {
-            if !r.matches(b) {
-                return false;
-            }
-        }
-        true
-    }
-}
-
-impl<'a> IntoIterator for &'a Utf8Sequence {
-    type IntoIter = slice::Iter<'a, Utf8Range>;
-    type Item = &'a Utf8Range;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.as_slice().iter()
-    }
-}
-
-impl fmt::Debug for Utf8Sequence {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use self::Utf8Sequence::*;
-        match *self {
-            One(ref r) => write!(f, "{:?}", r),
-            Two(ref r) => write!(f, "{:?}{:?}", r[0], r[1]),
-            Three(ref r) => write!(f, "{:?}{:?}{:?}", r[0], r[1], r[2]),
-            Four(ref r) => {
-                write!(f, "{:?}{:?}{:?}{:?}", r[0], r[1], r[2], r[3])
-            }
-        }
-    }
-}
-
-/// A single inclusive range of UTF-8 bytes.
-#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
-pub struct Utf8Range {
-    /// Start of byte range (inclusive).
-    pub start: u8,
-    /// End of byte range (inclusive).
-    pub end: u8,
-}
-
-impl Utf8Range {
-    fn new(start: u8, end: u8) -> Self {
-        Utf8Range { start, end }
-    }
-
-    /// Returns true if and only if the given byte is in this range.
-    pub fn matches(&self, b: u8) -> bool {
-        self.start <= b && b <= self.end
-    }
-}
-
-impl fmt::Debug for Utf8Range {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.start == self.end {
-            write!(f, "[{:X}]", self.start)
-        } else {
-            write!(f, "[{:X}-{:X}]", self.start, self.end)
-        }
-    }
-}
-
-/// An iterator over ranges of matching UTF-8 byte sequences.
-///
-/// The iteration represents an alternation of comprehensive byte sequences
-/// that match precisely the set of UTF-8 encoded scalar values.
-///
-/// A byte sequence corresponds to one of the scalar values in the range given
-/// if and only if it completely matches exactly one of the sequences of byte
-/// ranges produced by this iterator.
-///
-/// Each sequence of byte ranges matches a unique set of bytes. That is, no two
-/// sequences will match the same bytes.
-///
-/// # Example
-///
-/// This shows how to match an arbitrary byte sequence against a range of
-/// scalar values.
-///
-/// ```rust
-/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence};
-///
-/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
-///     for range in seqs {
-///         if range.matches(bytes) {
-///             return true;
-///         }
-///     }
-///     false
-/// }
-///
-/// // Test the basic multilingual plane.
-/// let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect();
-///
-/// // UTF-8 encoding of 'a'.
-/// assert!(matches(&seqs, &[0x61]));
-/// // UTF-8 encoding of '☃' (`\u{2603}`).
-/// assert!(matches(&seqs, &[0xE2, 0x98, 0x83]));
-/// // UTF-8 encoding of `\u{10348}` (outside the BMP).
-/// assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88]));
-/// // Tries to match against a UTF-8 encoding of a surrogate codepoint,
-/// // which is invalid UTF-8, and therefore fails, despite the fact that
-/// // the corresponding codepoint (0xD800) falls in the range given.
-/// assert!(!matches(&seqs, &[0xED, 0xA0, 0x80]));
-/// // And fails against plain old invalid UTF-8.
-/// assert!(!matches(&seqs, &[0xFF, 0xFF]));
-/// ```
-///
-/// If this example seems circuitous, that's because it is! It's meant to be
-/// illustrative. In practice, you could just try to decode your byte sequence
-/// and compare it with the scalar value range directly. However, this is not
-/// always possible (for example, in a byte based automaton).
-#[derive(Debug)]
-pub struct Utf8Sequences {
-    range_stack: Vec<ScalarRange>,
-}
-
-impl Utf8Sequences {
-    /// Create a new iterator over UTF-8 byte ranges for the scalar value range
-    /// given.
-    pub fn new(start: char, end: char) -> Self {
-        let range =
-            ScalarRange { start: u32::from(start), end: u32::from(end) };
-        Utf8Sequences { range_stack: vec![range] }
-    }
-
-    /// reset resets the scalar value range.
-    /// Any existing state is cleared, but resources may be reused.
-    ///
-    /// N.B. Benchmarks say that this method is dubious.
-    #[doc(hidden)]
-    pub fn reset(&mut self, start: char, end: char) {
-        self.range_stack.clear();
-        self.push(u32::from(start), u32::from(end));
-    }
-
-    fn push(&mut self, start: u32, end: u32) {
-        self.range_stack.push(ScalarRange { start, end });
-    }
-}
-
-struct ScalarRange {
-    start: u32,
-    end: u32,
-}
-
-impl fmt::Debug for ScalarRange {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "ScalarRange({:X}, {:X})", self.start, self.end)
-    }
-}
-
-impl Iterator for Utf8Sequences {
-    type Item = Utf8Sequence;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        'TOP: while let Some(mut r) = self.range_stack.pop() {
-            'INNER: loop {
-                if let Some((r1, r2)) = r.split() {
-                    self.push(r2.start, r2.end);
-                    r.start = r1.start;
-                    r.end = r1.end;
-                    continue 'INNER;
-                }
-                if !r.is_valid() {
-                    continue 'TOP;
-                }
-                for i in 1..MAX_UTF8_BYTES {
-                    let max = max_scalar_value(i);
-                    if r.start <= max && max < r.end {
-                        self.push(max + 1, r.end);
-                        r.end = max;
-                        continue 'INNER;
-                    }
-                }
-                if let Some(ascii_range) = r.as_ascii() {
-                    return Some(Utf8Sequence::One(ascii_range));
-                }
-                for i in 1..MAX_UTF8_BYTES {
-                    let m = (1 << (6 * i)) - 1;
-                    if (r.start & !m) != (r.end & !m) {
-                        if (r.start & m) != 0 {
-                            self.push((r.start | m) + 1, r.end);
-                            r.end = r.start | m;
-                            continue 'INNER;
-                        }
-                        if (r.end & m) != m {
-                            self.push(r.end & !m, r.end);
-                            r.end = (r.end & !m) - 1;
-                            continue 'INNER;
-                        }
-                    }
-                }
-                let mut start = [0; MAX_UTF8_BYTES];
-                let mut end = [0; MAX_UTF8_BYTES];
-                let n = r.encode(&mut start, &mut end);
-                return Some(Utf8Sequence::from_encoded_range(
-                    &start[0..n],
-                    &end[0..n],
-                ));
-            }
-        }
-        None
-    }
-}
-
-impl FusedIterator for Utf8Sequences {}
-
-impl ScalarRange {
-    /// split splits this range if it overlaps with a surrogate codepoint.
-    ///
-    /// Either or both ranges may be invalid.
-    fn split(&self) -> Option<(ScalarRange, ScalarRange)> {
-        if self.start < 0xE000 && self.end > 0xD7FF {
-            Some((
-                ScalarRange { start: self.start, end: 0xD7FF },
-                ScalarRange { start: 0xE000, end: self.end },
-            ))
-        } else {
-            None
-        }
-    }
-
-    /// is_valid returns true if and only if start <= end.
-    fn is_valid(&self) -> bool {
-        self.start <= self.end
-    }
-
-    /// as_ascii returns this range as a Utf8Range if and only if all scalar
-    /// values in this range can be encoded as a single byte.
-    fn as_ascii(&self) -> Option<Utf8Range> {
-        if self.is_ascii() {
-            let start = u8::try_from(self.start).unwrap();
-            let end = u8::try_from(self.end).unwrap();
-            Some(Utf8Range::new(start, end))
-        } else {
-            None
-        }
-    }
-
-    /// is_ascii returns true if the range is ASCII only (i.e., takes a single
-    /// byte to encode any scalar value).
-    fn is_ascii(&self) -> bool {
-        self.is_valid() && self.end <= 0x7f
-    }
-
-    /// encode writes the UTF-8 encoding of the start and end of this range
-    /// to the corresponding destination slices, and returns the number of
-    /// bytes written.
-    ///
-    /// The slices should have room for at least `MAX_UTF8_BYTES`.
-    fn encode(&self, start: &mut [u8], end: &mut [u8]) -> usize {
-        let cs = char::from_u32(self.start).unwrap();
-        let ce = char::from_u32(self.end).unwrap();
-        let ss = cs.encode_utf8(start);
-        let se = ce.encode_utf8(end);
-        assert_eq!(ss.len(), se.len());
-        ss.len()
-    }
-}
-
-fn max_scalar_value(nbytes: usize) -> u32 {
-    match nbytes {
-        1 => 0x007F,
-        2 => 0x07FF,
-        3 => 0xFFFF,
-        4 => 0x0010_FFFF,
-        _ => unreachable!("invalid UTF-8 byte sequence size"),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use core::char;
-
-    use alloc::{vec, vec::Vec};
-
-    use crate::utf8::{Utf8Range, Utf8Sequences};
-
-    fn rutf8(s: u8, e: u8) -> Utf8Range {
-        Utf8Range::new(s, e)
-    }
-
-    fn never_accepts_surrogate_codepoints(start: char, end: char) {
-        for cp in 0xD800..0xE000 {
-            let buf = encode_surrogate(cp);
-            for r in Utf8Sequences::new(start, end) {
-                if r.matches(&buf) {
-                    panic!(
-                        "Sequence ({:X}, {:X}) contains range {:?}, \
-                         which matches surrogate code point {:X} \
-                         with encoded bytes {:?}",
-                        u32::from(start),
-                        u32::from(end),
-                        r,
-                        cp,
-                        buf,
-                    );
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn codepoints_no_surrogates() {
-        never_accepts_surrogate_codepoints('\u{0}', '\u{FFFF}');
-        never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFF}');
-        never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFE}');
-        never_accepts_surrogate_codepoints('\u{80}', '\u{10FFFF}');
-        never_accepts_surrogate_codepoints('\u{D7FF}', '\u{E000}');
-    }
-
-    #[test]
-    fn single_codepoint_one_sequence() {
-        // Tests that every range of scalar values that contains a single
-        // scalar value is recognized by one sequence of byte ranges.
-        for i in 0x0..=0x0010_FFFF {
-            let c = match char::from_u32(i) {
-                None => continue,
-                Some(c) => c,
-            };
-            let seqs: Vec<_> = Utf8Sequences::new(c, c).collect();
-            assert_eq!(seqs.len(), 1);
-        }
-    }
-
-    #[test]
-    fn bmp() {
-        use crate::utf8::Utf8Sequence::*;
-
-        let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::<Vec<_>>();
-        assert_eq!(
-            seqs,
-            vec![
-                One(rutf8(0x0, 0x7F)),
-                Two([rutf8(0xC2, 0xDF), rutf8(0x80, 0xBF)]),
-                Three([
-                    rutf8(0xE0, 0xE0),
-                    rutf8(0xA0, 0xBF),
-                    rutf8(0x80, 0xBF)
-                ]),
-                Three([
-                    rutf8(0xE1, 0xEC),
-                    rutf8(0x80, 0xBF),
-                    rutf8(0x80, 0xBF)
-                ]),
-                Three([
-                    rutf8(0xED, 0xED),
-                    rutf8(0x80, 0x9F),
-                    rutf8(0x80, 0xBF)
-                ]),
-                Three([
-                    rutf8(0xEE, 0xEF),
-                    rutf8(0x80, 0xBF),
-                    rutf8(0x80, 0xBF)
-                ]),
-            ]
-        );
-    }
-
-    #[test]
-    fn reverse() {
-        use crate::utf8::Utf8Sequence::*;
-
-        let mut s = One(rutf8(0xA, 0xB));
-        s.reverse();
-        assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]);
-
-        let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]);
-        s.reverse();
-        assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]);
-
-        let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]);
-        s.reverse();
-        assert_eq!(
-            s.as_slice(),
-            &[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)]
-        );
-
-        let mut s = Four([
-            rutf8(0xA, 0xB),
-            rutf8(0xB, 0xC),
-            rutf8(0xC, 0xD),
-            rutf8(0xD, 0xE),
-        ]);
-        s.reverse();
-        assert_eq!(
-            s.as_slice(),
-            &[
-                rutf8(0xD, 0xE),
-                rutf8(0xC, 0xD),
-                rutf8(0xB, 0xC),
-                rutf8(0xA, 0xB)
-            ]
-        );
-    }
-
-    fn encode_surrogate(cp: u32) -> [u8; 3] {
-        const TAG_CONT: u8 = 0b1000_0000;
-        const TAG_THREE_B: u8 = 0b1110_0000;
-
-        assert!(0xD800 <= cp && cp < 0xE000);
-        let mut dst = [0; 3];
-        dst[0] = u8::try_from(cp >> 12 & 0x0F).unwrap() | TAG_THREE_B;
-        dst[1] = u8::try_from(cp >> 6 & 0x3F).unwrap() | TAG_CONT;
-        dst[2] = u8::try_from(cp & 0x3F).unwrap() | TAG_CONT;
-        dst
-    }
-}
author	mo khan <mo@mokhan.ca>	2025-07-15 16:37:08 -0600
committer	mo khan <mo@mokhan.ca>	2025-07-17 16:30:22 -0600
commit	45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree	1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax/src/utf8.rs
parent	f94f79608393d4ab127db63cc41668445ef6b243 (diff)