feat: migrate from Cedar to SpiceDB authorization system

This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema.
author: mo khan <mo@mokhan.ca> 2025-07-15 16:37:08 -0600
committer: mo khan <mo@mokhan.ca> 2025-07-17 16:30:22 -0600
commit: 45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree: 1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax/src/unicode.rs
parent: f94f79608393d4ab127db63cc41668445ef6b243 (diff)
1 files changed, 0 insertions, 1041 deletions
diff --git a/vendor/regex-syntax/src/unicode.rs b/vendor/regex-syntax/src/unicode.rs
deleted file mode 100644
index 07f78194..00000000
--- a/vendor/regex-syntax/src/unicode.rs
+++ /dev/null
@@ -1,1041 +0,0 @@
-use alloc::{
-    string::{String, ToString},
-    vec::Vec,
-};
-
-use crate::hir;
-
-/// An inclusive range of codepoints from a generated file (hence the static
-/// lifetime).
-type Range = &'static [(char, char)];
-
-/// An error that occurs when dealing with Unicode.
-///
-/// We don't impl the Error trait here because these always get converted
-/// into other public errors. (This error type isn't exported.)
-#[derive(Debug)]
-pub enum Error {
-    PropertyNotFound,
-    PropertyValueNotFound,
-    // Not used when unicode-perl is enabled.
-    #[allow(dead_code)]
-    PerlClassNotFound,
-}
-
-/// An error that occurs when Unicode-aware simple case folding fails.
-///
-/// This error can occur when the case mapping tables necessary for Unicode
-/// aware case folding are unavailable. This only occurs when the
-/// `unicode-case` feature is disabled. (The feature is enabled by default.)
-#[derive(Debug)]
-pub struct CaseFoldError(());
-
-#[cfg(feature = "std")]
-impl std::error::Error for CaseFoldError {}
-
-impl core::fmt::Display for CaseFoldError {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(
-            f,
-            "Unicode-aware case folding is not available \
-             (probably because the unicode-case feature is not enabled)"
-        )
-    }
-}
-
-/// An error that occurs when the Unicode-aware `\w` class is unavailable.
-///
-/// This error can occur when the data tables necessary for the Unicode aware
-/// Perl character class `\w` are unavailable. This only occurs when the
-/// `unicode-perl` feature is disabled. (The feature is enabled by default.)
-#[derive(Debug)]
-pub struct UnicodeWordError(());
-
-#[cfg(feature = "std")]
-impl std::error::Error for UnicodeWordError {}
-
-impl core::fmt::Display for UnicodeWordError {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(
-            f,
-            "Unicode-aware \\w class is not available \
-             (probably because the unicode-perl feature is not enabled)"
-        )
-    }
-}
-
-/// A state oriented traverser of the simple case folding table.
-///
-/// A case folder can be constructed via `SimpleCaseFolder::new()`, which will
-/// return an error if the underlying case folding table is unavailable.
-///
-/// After construction, it is expected that callers will use
-/// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly
-/// increasing order. For example, calling it on `b` and then on `a` is illegal
-/// and will result in a panic.
-///
-/// The main idea of this type is that it tries hard to make mapping lookups
-/// fast by exploiting the structure of the underlying table, and the ordering
-/// assumption enables this.
-#[derive(Debug)]
-pub struct SimpleCaseFolder {
-    /// The simple case fold table. It's a sorted association list, where the
-    /// keys are Unicode scalar values and the values are the corresponding
-    /// equivalence class (not including the key) of the "simple" case folded
-    /// Unicode scalar values.
-    table: &'static [(char, &'static [char])],
-    /// The last codepoint that was used for a lookup.
-    last: Option<char>,
-    /// The index to the entry in `table` corresponding to the smallest key `k`
-    /// such that `k > k0`, where `k0` is the most recent key lookup. Note that
-    /// in particular, `k0` may not be in the table!
-    next: usize,
-}
-
-impl SimpleCaseFolder {
-    /// Create a new simple case folder, returning an error if the underlying
-    /// case folding table is unavailable.
-    pub fn new() -> Result<SimpleCaseFolder, CaseFoldError> {
-        #[cfg(not(feature = "unicode-case"))]
-        {
-            Err(CaseFoldError(()))
-        }
-        #[cfg(feature = "unicode-case")]
-        {
-            Ok(SimpleCaseFolder {
-                table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE,
-                last: None,
-                next: 0,
-            })
-        }
-    }
-
-    /// Return the equivalence class of case folded codepoints for the given
-    /// codepoint. The equivalence class returned never includes the codepoint
-    /// given. If the given codepoint has no case folded codepoints (i.e.,
-    /// no entry in the underlying case folding table), then this returns an
-    /// empty slice.
-    ///
-    /// # Panics
-    ///
-    /// This panics when called with a `c` that is less than or equal to the
-    /// previous call. In other words, callers need to use this method with
-    /// strictly increasing values of `c`.
-    pub fn mapping(&mut self, c: char) -> &'static [char] {
-        if let Some(last) = self.last {
-            assert!(
-                last < c,
-                "got codepoint U+{:X} which occurs before \
-                 last codepoint U+{:X}",
-                u32::from(c),
-                u32::from(last),
-            );
-        }
-        self.last = Some(c);
-        if self.next >= self.table.len() {
-            return &[];
-        }
-        let (k, v) = self.table[self.next];
-        if k == c {
-            self.next += 1;
-            return v;
-        }
-        match self.get(c) {
-            Err(i) => {
-                self.next = i;
-                &[]
-            }
-            Ok(i) => {
-                // Since we require lookups to proceed
-                // in order, anything we find should be
-                // after whatever we thought might be
-                // next. Otherwise, the caller is either
-                // going out of order or we would have
-                // found our next key at 'self.next'.
-                assert!(i > self.next);
-                self.next = i + 1;
-                self.table[i].1
-            }
-        }
-    }
-
-    /// Returns true if and only if the given range overlaps with any region
-    /// of the underlying case folding table. That is, when true, there exists
-    /// at least one codepoint in the inclusive range `[start, end]` that has
-    /// a non-trivial equivalence class of case folded codepoints. Conversely,
-    /// when this returns false, all codepoints in the range `[start, end]`
-    /// correspond to the trivial equivalence class of case folded codepoints,
-    /// i.e., itself.
-    ///
-    /// This is useful to call before iterating over the codepoints in the
-    /// range and looking up the mapping for each. If you know none of the
-    /// mappings will return anything, then you might be able to skip doing it
-    /// altogether.
-    ///
-    /// # Panics
-    ///
-    /// This panics when `end < start`.
-    pub fn overlaps(&self, start: char, end: char) -> bool {
-        use core::cmp::Ordering;
-
-        assert!(start <= end);
-        self.table
-            .binary_search_by(|&(c, _)| {
-                if start <= c && c <= end {
-                    Ordering::Equal
-                } else if c > end {
-                    Ordering::Greater
-                } else {
-                    Ordering::Less
-                }
-            })
-            .is_ok()
-    }
-
-    /// Returns the index at which `c` occurs in the simple case fold table. If
-    /// `c` does not occur, then this returns an `i` such that `table[i-1].0 <
-    /// c` and `table[i].0 > c`.
-    fn get(&self, c: char) -> Result<usize, usize> {
-        self.table.binary_search_by_key(&c, |&(c1, _)| c1)
-    }
-}
-
-/// A query for finding a character class defined by Unicode. This supports
-/// either use of a property name directly, or lookup by property value. The
-/// former generally refers to Binary properties (see UTS#44, Table 8), but
-/// as a special exception (see UTS#18, Section 1.2) both general categories
-/// (an enumeration) and scripts (a catalog) are supported as if each of their
-/// possible values were a binary property.
-///
-/// In all circumstances, property names and values are normalized and
-/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
-///
-/// The lifetime `'a` refers to the shorter of the lifetimes of property name
-/// and property value.
-#[derive(Debug)]
-pub enum ClassQuery<'a> {
-    /// Return a class corresponding to a Unicode binary property, named by
-    /// a single letter.
-    OneLetter(char),
-    /// Return a class corresponding to a Unicode binary property.
-    ///
-    /// Note that, by special exception (see UTS#18, Section 1.2), both
-    /// general category values and script values are permitted here as if
-    /// they were a binary property.
-    Binary(&'a str),
-    /// Return a class corresponding to all codepoints whose property
-    /// (identified by `property_name`) corresponds to the given value
-    /// (identified by `property_value`).
-    ByValue {
-        /// A property name.
-        property_name: &'a str,
-        /// A property value.
-        property_value: &'a str,
-    },
-}
-
-impl<'a> ClassQuery<'a> {
-    fn canonicalize(&self) -> Result<CanonicalClassQuery, Error> {
-        match *self {
-            ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
-            ClassQuery::Binary(name) => self.canonical_binary(name),
-            ClassQuery::ByValue { property_name, property_value } => {
-                let property_name = symbolic_name_normalize(property_name);
-                let property_value = symbolic_name_normalize(property_value);
-
-                let canon_name = match canonical_prop(&property_name)? {
-                    None => return Err(Error::PropertyNotFound),
-                    Some(canon_name) => canon_name,
-                };
-                Ok(match canon_name {
-                    "General_Category" => {
-                        let canon = match canonical_gencat(&property_value)? {
-                            None => return Err(Error::PropertyValueNotFound),
-                            Some(canon) => canon,
-                        };
-                        CanonicalClassQuery::GeneralCategory(canon)
-                    }
-                    "Script" => {
-                        let canon = match canonical_script(&property_value)? {
-                            None => return Err(Error::PropertyValueNotFound),
-                            Some(canon) => canon,
-                        };
-                        CanonicalClassQuery::Script(canon)
-                    }
-                    _ => {
-                        let vals = match property_values(canon_name)? {
-                            None => return Err(Error::PropertyValueNotFound),
-                            Some(vals) => vals,
-                        };
-                        let canon_val =
-                            match canonical_value(vals, &property_value) {
-                                None => {
-                                    return Err(Error::PropertyValueNotFound)
-                                }
-                                Some(canon_val) => canon_val,
-                            };
-                        CanonicalClassQuery::ByValue {
-                            property_name: canon_name,
-                            property_value: canon_val,
-                        }
-                    }
-                })
-            }
-        }
-    }
-
-    fn canonical_binary(
-        &self,
-        name: &str,
-    ) -> Result<CanonicalClassQuery, Error> {
-        let norm = symbolic_name_normalize(name);
-
-        // This is a special case where 'cf' refers to the 'Format' general
-        // category, but where the 'cf' abbreviation is also an abbreviation
-        // for the 'Case_Folding' property. But we want to treat it as
-        // a general category. (Currently, we don't even support the
-        // 'Case_Folding' property. But if we do in the future, users will be
-        // required to spell it out.)
-        //
-        // Also 'sc' refers to the 'Currency_Symbol' general category, but is
-        // also the abbreviation for the 'Script' property. So we avoid calling
-        // 'canonical_prop' for it too, which would erroneously normalize it
-        // to 'Script'.
-        //
-        // Another case: 'lc' is an abbreviation for the 'Cased_Letter'
-        // general category, but is also an abbreviation for the 'Lowercase_Mapping'
-        // property. We don't currently support the latter, so as with 'cf'
-        // above, we treat 'lc' as 'Cased_Letter'.
-        if norm != "cf" && norm != "sc" && norm != "lc" {
-            if let Some(canon) = canonical_prop(&norm)? {
-                return Ok(CanonicalClassQuery::Binary(canon));
-            }
-        }
-        if let Some(canon) = canonical_gencat(&norm)? {
-            return Ok(CanonicalClassQuery::GeneralCategory(canon));
-        }
-        if let Some(canon) = canonical_script(&norm)? {
-            return Ok(CanonicalClassQuery::Script(canon));
-        }
-        Err(Error::PropertyNotFound)
-    }
-}
-
-/// Like ClassQuery, but its parameters have been canonicalized. This also
-/// differentiates binary properties from flattened general categories and
-/// scripts.
-#[derive(Debug, Eq, PartialEq)]
-enum CanonicalClassQuery {
-    /// The canonical binary property name.
-    Binary(&'static str),
-    /// The canonical general category name.
-    GeneralCategory(&'static str),
-    /// The canonical script name.
-    Script(&'static str),
-    /// An arbitrary association between property and value, both of which
-    /// have been canonicalized.
-    ///
-    /// Note that by construction, the property name of ByValue will never
-    /// be General_Category or Script. Those two cases are subsumed by the
-    /// eponymous variants.
-    ByValue {
-        /// The canonical property name.
-        property_name: &'static str,
-        /// The canonical property value.
-        property_value: &'static str,
-    },
-}
-
-/// Looks up a Unicode class given a query. If one doesn't exist, then
-/// `None` is returned.
-pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode, Error> {
-    use self::CanonicalClassQuery::*;
-
-    match query.canonicalize()? {
-        Binary(name) => bool_property(name),
-        GeneralCategory(name) => gencat(name),
-        Script(name) => script(name),
-        ByValue { property_name: "Age", property_value } => {
-            let mut class = hir::ClassUnicode::empty();
-            for set in ages(property_value)? {
-                class.union(&hir_class(set));
-            }
-            Ok(class)
-        }
-        ByValue { property_name: "Script_Extensions", property_value } => {
-            script_extension(property_value)
-        }
-        ByValue {
-            property_name: "Grapheme_Cluster_Break",
-            property_value,
-        } => gcb(property_value),
-        ByValue { property_name: "Sentence_Break", property_value } => {
-            sb(property_value)
-        }
-        ByValue { property_name: "Word_Break", property_value } => {
-            wb(property_value)
-        }
-        _ => {
-            // What else should we support?
-            Err(Error::PropertyNotFound)
-        }
-    }
-}
-
-/// Returns a Unicode aware class for \w.
-///
-/// This returns an error if the data is not available for \w.
-pub fn perl_word() -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-perl"))]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PerlClassNotFound)
-    }
-
-    #[cfg(feature = "unicode-perl")]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::perl_word::PERL_WORD;
-        Ok(hir_class(PERL_WORD))
-    }
-
-    imp()
-}
-
-/// Returns a Unicode aware class for \s.
-///
-/// This returns an error if the data is not available for \s.
-pub fn perl_space() -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PerlClassNotFound)
-    }
-
-    #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::perl_space::WHITE_SPACE;
-        Ok(hir_class(WHITE_SPACE))
-    }
-
-    #[cfg(feature = "unicode-bool")]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::property_bool::WHITE_SPACE;
-        Ok(hir_class(WHITE_SPACE))
-    }
-
-    imp()
-}
-
-/// Returns a Unicode aware class for \d.
-///
-/// This returns an error if the data is not available for \d.
-pub fn perl_digit() -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PerlClassNotFound)
-    }
-
-    #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER;
-        Ok(hir_class(DECIMAL_NUMBER))
-    }
-
-    #[cfg(feature = "unicode-gencat")]
-    fn imp() -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::general_category::DECIMAL_NUMBER;
-        Ok(hir_class(DECIMAL_NUMBER))
-    }
-
-    imp()
-}
-
-/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
-pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
-    let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
-        .iter()
-        .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
-        .collect();
-    hir::ClassUnicode::new(hir_ranges)
-}
-
-/// Returns true only if the given codepoint is in the `\w` character class.
-///
-/// If the `unicode-perl` feature is not enabled, then this returns an error.
-pub fn is_word_character(c: char) -> Result<bool, UnicodeWordError> {
-    #[cfg(not(feature = "unicode-perl"))]
-    fn imp(_: char) -> Result<bool, UnicodeWordError> {
-        Err(UnicodeWordError(()))
-    }
-
-    #[cfg(feature = "unicode-perl")]
-    fn imp(c: char) -> Result<bool, UnicodeWordError> {
-        use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD};
-
-        if u8::try_from(c).map_or(false, is_word_byte) {
-            return Ok(true);
-        }
-        Ok(PERL_WORD
-            .binary_search_by(|&(start, end)| {
-                use core::cmp::Ordering;
-
-                if start <= c && c <= end {
-                    Ordering::Equal
-                } else if start > c {
-                    Ordering::Greater
-                } else {
-                    Ordering::Less
-                }
-            })
-            .is_ok())
-    }
-
-    imp(c)
-}
-
-/// A mapping of property values for a specific property.
-///
-/// The first element of each tuple is a normalized property value while the
-/// second element of each tuple is the corresponding canonical property
-/// value.
-type PropertyValues = &'static [(&'static str, &'static str)];
-
-fn canonical_gencat(
-    normalized_value: &str,
-) -> Result<Option<&'static str>, Error> {
-    Ok(match normalized_value {
-        "any" => Some("Any"),
-        "assigned" => Some("Assigned"),
-        "ascii" => Some("ASCII"),
-        _ => {
-            let gencats = property_values("General_Category")?.unwrap();
-            canonical_value(gencats, normalized_value)
-        }
-    })
-}
-
-fn canonical_script(
-    normalized_value: &str,
-) -> Result<Option<&'static str>, Error> {
-    let scripts = property_values("Script")?.unwrap();
-    Ok(canonical_value(scripts, normalized_value))
-}
-
-/// Find the canonical property name for the given normalized property name.
-///
-/// If no such property exists, then `None` is returned.
-///
-/// The normalized property name must have been normalized according to
-/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
-///
-/// If the property names data is not available, then an error is returned.
-fn canonical_prop(
-    normalized_name: &str,
-) -> Result<Option<&'static str>, Error> {
-    #[cfg(not(any(
-        feature = "unicode-age",
-        feature = "unicode-bool",
-        feature = "unicode-gencat",
-        feature = "unicode-perl",
-        feature = "unicode-script",
-        feature = "unicode-segment",
-    )))]
-    fn imp(_: &str) -> Result<Option<&'static str>, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(any(
-        feature = "unicode-age",
-        feature = "unicode-bool",
-        feature = "unicode-gencat",
-        feature = "unicode-perl",
-        feature = "unicode-script",
-        feature = "unicode-segment",
-    ))]
-    fn imp(name: &str) -> Result<Option<&'static str>, Error> {
-        use crate::unicode_tables::property_names::PROPERTY_NAMES;
-
-        Ok(PROPERTY_NAMES
-            .binary_search_by_key(&name, |&(n, _)| n)
-            .ok()
-            .map(|i| PROPERTY_NAMES[i].1))
-    }
-
-    imp(normalized_name)
-}
-
-/// Find the canonical property value for the given normalized property
-/// value.
-///
-/// The given property values should correspond to the values for the property
-/// under question, which can be found using `property_values`.
-///
-/// If no such property value exists, then `None` is returned.
-///
-/// The normalized property value must have been normalized according to
-/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
-fn canonical_value(
-    vals: PropertyValues,
-    normalized_value: &str,
-) -> Option<&'static str> {
-    vals.binary_search_by_key(&normalized_value, |&(n, _)| n)
-        .ok()
-        .map(|i| vals[i].1)
-}
-
-/// Return the table of property values for the given property name.
-///
-/// If the property values data is not available, then an error is returned.
-fn property_values(
-    canonical_property_name: &'static str,
-) -> Result<Option<PropertyValues>, Error> {
-    #[cfg(not(any(
-        feature = "unicode-age",
-        feature = "unicode-bool",
-        feature = "unicode-gencat",
-        feature = "unicode-perl",
-        feature = "unicode-script",
-        feature = "unicode-segment",
-    )))]
-    fn imp(_: &'static str) -> Result<Option<PropertyValues>, Error> {
-        Err(Error::PropertyValueNotFound)
-    }
-
-    #[cfg(any(
-        feature = "unicode-age",
-        feature = "unicode-bool",
-        feature = "unicode-gencat",
-        feature = "unicode-perl",
-        feature = "unicode-script",
-        feature = "unicode-segment",
-    ))]
-    fn imp(name: &'static str) -> Result<Option<PropertyValues>, Error> {
-        use crate::unicode_tables::property_values::PROPERTY_VALUES;
-
-        Ok(PROPERTY_VALUES
-            .binary_search_by_key(&name, |&(n, _)| n)
-            .ok()
-            .map(|i| PROPERTY_VALUES[i].1))
-    }
-
-    imp(canonical_property_name)
-}
-
-// This is only used in some cases, but small enough to just let it be dead
-// instead of figuring out (and maintaining) the right set of features.
-#[allow(dead_code)]
-fn property_set(
-    name_map: &'static [(&'static str, Range)],
-    canonical: &'static str,
-) -> Option<Range> {
-    name_map
-        .binary_search_by_key(&canonical, |x| x.0)
-        .ok()
-        .map(|i| name_map[i].1)
-}
-
-/// Returns an iterator over Unicode Age sets. Each item corresponds to a set
-/// of codepoints that were added in a particular revision of Unicode. The
-/// iterator yields items in chronological order.
-///
-/// If the given age value isn't valid or if the data isn't available, then an
-/// error is returned instead.
-fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
-    #[cfg(not(feature = "unicode-age"))]
-    fn imp(_: &str) -> Result<impl Iterator<Item = Range>, Error> {
-        use core::option::IntoIter;
-        Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-age")]
-    fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
-        use crate::unicode_tables::age;
-
-        const AGES: &[(&str, Range)] = &[
-            ("V1_1", age::V1_1),
-            ("V2_0", age::V2_0),
-            ("V2_1", age::V2_1),
-            ("V3_0", age::V3_0),
-            ("V3_1", age::V3_1),
-            ("V3_2", age::V3_2),
-            ("V4_0", age::V4_0),
-            ("V4_1", age::V4_1),
-            ("V5_0", age::V5_0),
-            ("V5_1", age::V5_1),
-            ("V5_2", age::V5_2),
-            ("V6_0", age::V6_0),
-            ("V6_1", age::V6_1),
-            ("V6_2", age::V6_2),
-            ("V6_3", age::V6_3),
-            ("V7_0", age::V7_0),
-            ("V8_0", age::V8_0),
-            ("V9_0", age::V9_0),
-            ("V10_0", age::V10_0),
-            ("V11_0", age::V11_0),
-            ("V12_0", age::V12_0),
-            ("V12_1", age::V12_1),
-            ("V13_0", age::V13_0),
-            ("V14_0", age::V14_0),
-            ("V15_0", age::V15_0),
-            ("V15_1", age::V15_1),
-            ("V16_0", age::V16_0),
-        ];
-        assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
-
-        let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
-        match pos {
-            None => Err(Error::PropertyValueNotFound),
-            Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)),
-        }
-    }
-
-    imp(canonical_age)
-}
-
-/// Returns the Unicode HIR class corresponding to the given general category.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given general category could not be found, or if the general
-/// category data is not available, then an error is returned.
-fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-gencat"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-gencat")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::general_category::BY_NAME;
-        match name {
-            "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
-            "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
-            "Assigned" => {
-                let mut cls = gencat("Unassigned")?;
-                cls.negate();
-                Ok(cls)
-            }
-            name => property_set(BY_NAME, name)
-                .map(hir_class)
-                .ok_or(Error::PropertyValueNotFound),
-        }
-    }
-
-    match canonical_name {
-        "Decimal_Number" => perl_digit(),
-        name => imp(name),
-    }
-}
-
-/// Returns the Unicode HIR class corresponding to the given script.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given script could not be found, or if the script data is not
-/// available, then an error is returned.
-fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-script"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-script")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::script::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyValueNotFound)
-    }
-
-    imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given script extension.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given script extension could not be found, or if the script data is
-/// not available, then an error is returned.
-fn script_extension(
-    canonical_name: &'static str,
-) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-script"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-script")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::script_extension::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyValueNotFound)
-    }
-
-    imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given Unicode boolean
-/// property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given boolean property could not be found, or if the boolean
-/// property data is not available, then an error is returned.
-fn bool_property(
-    canonical_name: &'static str,
-) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-bool"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-bool")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::property_bool::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyNotFound)
-    }
-
-    match canonical_name {
-        "Decimal_Number" => perl_digit(),
-        "White_Space" => perl_space(),
-        name => imp(name),
-    }
-}
-
-/// Returns the Unicode HIR class corresponding to the given grapheme cluster
-/// break property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::grapheme_cluster_break::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyValueNotFound)
-    }
-
-    imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given word break
-/// property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::word_break::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyValueNotFound)
-    }
-
-    imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given sentence
-/// break property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
-    #[cfg(not(feature = "unicode-segment"))]
-    fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
-        Err(Error::PropertyNotFound)
-    }
-
-    #[cfg(feature = "unicode-segment")]
-    fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
-        use crate::unicode_tables::sentence_break::BY_NAME;
-        property_set(BY_NAME, name)
-            .map(hir_class)
-            .ok_or(Error::PropertyValueNotFound)
-    }
-
-    imp(canonical_name)
-}
-
-/// Like symbolic_name_normalize_bytes, but operates on a string.
-fn symbolic_name_normalize(x: &str) -> String {
-    let mut tmp = x.as_bytes().to_vec();
-    let len = symbolic_name_normalize_bytes(&mut tmp).len();
-    tmp.truncate(len);
-    // This should always succeed because `symbolic_name_normalize_bytes`
-    // guarantees that `&tmp[..len]` is always valid UTF-8.
-    //
-    // N.B. We could avoid the additional UTF-8 check here, but it's unlikely
-    // to be worth skipping the additional safety check. A benchmark must
-    // justify it first.
-    String::from_utf8(tmp).unwrap()
-}
-
-/// Normalize the given symbolic name in place according to UAX44-LM3.
-///
-/// A "symbolic name" typically corresponds to property names and property
-/// value aliases. Note, though, that it should not be applied to property
-/// string values.
-///
-/// The slice returned is guaranteed to be valid UTF-8 for all possible values
-/// of `slice`.
-///
-/// See: https://unicode.org/reports/tr44/#UAX44-LM3
-fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
-    // I couldn't find a place in the standard that specified that property
-    // names/aliases had a particular structure (unlike character names), but
-    // we assume that it's ASCII only and drop anything that isn't ASCII.
-    let mut start = 0;
-    let mut starts_with_is = false;
-    if slice.len() >= 2 {
-        // Ignore any "is" prefix.
-        starts_with_is = slice[0..2] == b"is"[..]
-            || slice[0..2] == b"IS"[..]
-            || slice[0..2] == b"iS"[..]
-            || slice[0..2] == b"Is"[..];
-        if starts_with_is {
-            start = 2;
-        }
-    }
-    let mut next_write = 0;
-    for i in start..slice.len() {
-        // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid
-        // UTF-8, we ensure that the slice contains only ASCII bytes. In
-        // particular, we drop every non-ASCII byte from the normalized string.
-        let b = slice[i];
-        if b == b' ' || b == b'_' || b == b'-' {
-            continue;
-        } else if b'A' <= b && b <= b'Z' {
-            slice[next_write] = b + (b'a' - b'A');
-            next_write += 1;
-        } else if b <= 0x7F {
-            slice[next_write] = b;
-            next_write += 1;
-        }
-    }
-    // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally
-    // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross
-    // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it
-    // is actually an alias for the 'Other' general category.
-    if starts_with_is && next_write == 1 && slice[0] == b'c' {
-        slice[0] = b'i';
-        slice[1] = b's';
-        slice[2] = b'c';
-        next_write = 3;
-    }
-    &mut slice[..next_write]
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
-        SimpleCaseFolder::new().unwrap().mapping(c).iter().copied()
-    }
-
-    #[cfg(feature = "unicode-case")]
-    fn contains_case_map(start: char, end: char) -> bool {
-        SimpleCaseFolder::new().unwrap().overlaps(start, end)
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_k() {
-        let xs: Vec<char> = simple_fold_ok('k').collect();
-        assert_eq!(xs, alloc::vec!['K', 'K']);
-
-        let xs: Vec<char> = simple_fold_ok('K').collect();
-        assert_eq!(xs, alloc::vec!['k', 'K']);
-
-        let xs: Vec<char> = simple_fold_ok('K').collect();
-        assert_eq!(xs, alloc::vec!['K', 'k']);
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-case")]
-    fn simple_fold_a() {
-        let xs: Vec<char> = simple_fold_ok('a').collect();
-        assert_eq!(xs, alloc::vec!['A']);
-
-        let xs: Vec<char> = simple_fold_ok('A').collect();
-        assert_eq!(xs, alloc::vec!['a']);
-    }
-
-    #[test]
-    #[cfg(not(feature = "unicode-case"))]
-    fn simple_fold_disabled() {
-        assert!(SimpleCaseFolder::new().is_err());
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-case")]
-    fn range_contains() {
-        assert!(contains_case_map('A', 'A'));
-        assert!(contains_case_map('Z', 'Z'));
-        assert!(contains_case_map('A', 'Z'));
-        assert!(contains_case_map('@', 'A'));
-        assert!(contains_case_map('Z', '['));
-        assert!(contains_case_map('☃', 'Ⰰ'));
-
-        assert!(!contains_case_map('[', '['));
-        assert!(!contains_case_map('[', '`'));
-
-        assert!(!contains_case_map('☃', '☃'));
-    }
-
-    #[test]
-    #[cfg(feature = "unicode-gencat")]
-    fn regression_466() {
-        use super::{CanonicalClassQuery, ClassQuery};
-
-        let q = ClassQuery::OneLetter('C');
-        assert_eq!(
-            q.canonicalize().unwrap(),
-            CanonicalClassQuery::GeneralCategory("Other")
-        );
-    }
-
-    #[test]
-    fn sym_normalize() {
-        let sym_norm = symbolic_name_normalize;
-
-        assert_eq!(sym_norm("Line_Break"), "linebreak");
-        assert_eq!(sym_norm("Line-break"), "linebreak");
-        assert_eq!(sym_norm("linebreak"), "linebreak");
-        assert_eq!(sym_norm("BA"), "ba");
-        assert_eq!(sym_norm("ba"), "ba");
-        assert_eq!(sym_norm("Greek"), "greek");
-        assert_eq!(sym_norm("isGreek"), "greek");
-        assert_eq!(sym_norm("IS_Greek"), "greek");
-        assert_eq!(sym_norm("isc"), "isc");
-        assert_eq!(sym_norm("is c"), "isc");
-        assert_eq!(sym_norm("is_c"), "isc");
-    }
-
-    #[test]
-    fn valid_utf8_symbolic() {
-        let mut x = b"abc\xFFxyz".to_vec();
-        let y = symbolic_name_normalize_bytes(&mut x);
-        assert_eq!(y, b"abcxyz");
-    }
-}
author	mo khan <mo@mokhan.ca>	2025-07-15 16:37:08 -0600
committer	mo khan <mo@mokhan.ca>	2025-07-17 16:30:22 -0600
commit	45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree	1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax/src/unicode.rs
parent	f94f79608393d4ab127db63cc41668445ef6b243 (diff)