diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-15 16:37:08 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-17 16:30:22 -0600 |
| commit | 45df4d0d9b577fecee798d672695fe24ff57fb1b (patch) | |
| tree | 1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/unicode-script/src/lib.rs | |
| parent | f94f79608393d4ab127db63cc41668445ef6b243 (diff) | |
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based
authorization system with SpiceDB's relation-based authorization.
Key changes:
- Migrate from Rust to Go implementation
- Replace Cedar policies with SpiceDB schema and relationships
- Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks
- Update build system and dependencies for Go ecosystem
- Maintain Envoy integration for external authorization
This change enables more flexible permission modeling through SpiceDB's
Google Zanzibar inspired relation-based system, supporting complex
hierarchical permissions that were difficult to express in Cedar.
Breaking change: Existing Cedar policies and Rust-based configuration
will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/unicode-script/src/lib.rs')
| -rw-r--r-- | vendor/unicode-script/src/lib.rs | 560 |
1 files changed, 0 insertions, 560 deletions
diff --git a/vendor/unicode-script/src/lib.rs b/vendor/unicode-script/src/lib.rs deleted file mode 100644 index a8e3026b..00000000 --- a/vendor/unicode-script/src/lib.rs +++ /dev/null @@ -1,560 +0,0 @@ -//! This crate exposes the Unicode `Script` and `Script_Extension` -//! properties from [UAX #24](http://www.unicode.org/reports/tr24/) - -#![cfg_attr(not(test), no_std)] -#![cfg_attr(feature = "bench", feature(test))] - -mod tables; - -use core::convert::TryFrom; -use core::fmt; -use core::u64; -pub use tables::script_extensions; -use tables::{get_script, get_script_extension, NEXT_SCRIPT}; -pub use tables::{Script, UNICODE_VERSION}; - -impl Script { - /// Get the full name of a script. - pub fn full_name(self) -> &'static str { - self.inner_full_name() - } - - /// Attempts to parse script name from the provided string. - /// Returns `None` if the provided string does not represent a valid - /// script full name. - pub fn from_full_name(input: &str) -> Option<Self> { - Self::inner_from_full_name(input) - } - - /// Get the four-character short name of a script. - pub fn short_name(self) -> &'static str { - self.inner_short_name() - } - - /// Attempts to parse script name from the provided string. - /// Returns `None` if the provided string does not represent a valid - /// script four-character short name. - pub fn from_short_name(input: &str) -> Option<Self> { - Self::inner_from_short_name(input) - } - - /// Is this script "Recommended" according to - /// [UAX #31](www.unicode.org/reports/tr31/#Table_Recommended_Scripts)? - pub fn is_recommended(self) -> bool { - use Script::*; - match self { - Common | Inherited | Arabic | Armenian | Bengali | Bopomofo | Cyrillic | Devanagari - | Ethiopic | Georgian | Greek | Gujarati | Gurmukhi | Han | Hangul | Hebrew - | Hiragana | Kannada | Katakana | Khmer | Lao | Latin | Malayalam | Myanmar | Oriya - | Sinhala | Tamil | Telugu | Thaana | Thai | Tibetan => true, - _ => false, - } - } -} - -impl From<Script> for ScriptExtension { - fn from(script: Script) -> Self { - if script == Script::Common { - ScriptExtension::new_common() - } else if script == Script::Inherited { - ScriptExtension::new_inherited() - } else if script == Script::Unknown { - ScriptExtension::new_unknown() - } else { - let mut first = 0; - let mut second = 0; - let mut third = 0; - let bit = script as u8; - // Find out which field it's in, and set the appropriate bit there - if bit < 64 { - first = 1 << bit as u64; - } else if bit < 128 { - // offset by 64 since `bit` is an absolute number, - // not relative to the chunk - second = 1 << (bit - 64) as u64; - } else { - third = 1 << (bit - 128) as u32; - } - ScriptExtension::new(first, second, third) - } - } -} - -impl TryFrom<ScriptExtension> for Script { - type Error = (); - fn try_from(ext: ScriptExtension) -> Result<Self, ()> { - if ext.is_common_or_inherited() { - if ext.common { - Ok(Script::Common) - } else { - Ok(Script::Inherited) - } - } else if ext.is_empty() { - Ok(Script::Unknown) - } else { - // filled elements will have set ones - let fo = ext.first.count_ones(); - let so = ext.second.count_ones(); - let to = ext.third.count_ones(); - // only one bit set, in the first chunk - if fo == 1 && so == 0 && to == 0 { - // use trailing_zeroes() to figure out which bit it is - Ok(Script::for_integer(ext.first.trailing_zeros() as u8)) - // only one bit set, in the second chunk - } else if fo == 0 && so == 1 && to == 0 { - Ok(Script::for_integer(64 + ext.second.trailing_zeros() as u8)) - // only one bit set, in the third chunk - } else if fo == 0 && so == 0 && to == 1 { - Ok(Script::for_integer(128 + ext.third.trailing_zeros() as u8)) - } else { - Err(()) - } - } - } -} - -impl Default for Script { - fn default() -> Self { - Script::Common - } -} - -impl From<char> for Script { - fn from(o: char) -> Self { - o.script() - } -} - -impl fmt::Display for Script { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.full_name()) - } -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -#[non_exhaustive] -/// A value for the `Script_Extension` property -/// -/// [`ScriptExtension`] is one or more [`Script`] -/// -/// This is essentially an optimized version of `Vec<Script>` that uses bitfields -pub struct ScriptExtension { - // A bitset for the first 64 scripts - first: u64, - // A bitset for the scripts 65-128 - second: u64, - // A bitset for scripts after 128 - third: u64, - // Both Common and Inherited are represented by all used bits being set, - // this flag lets us distinguish the two. - common: bool, -} - -impl ScriptExtension { - // We don't use the complete u64 of `third`, so the "all" value is not just u32::MAX - // Instead, we take the number of the next (unused) script bit, subtract 128 to bring - // it in the range of `third`, create a u64 with just that bit set, and subtract 1 - // to create one with all the lower bits set. - const THIRD_MAX: u64 = ((1 << (NEXT_SCRIPT - 128)) - 1); - - pub(crate) const fn new(first: u64, second: u64, third: u64) -> Self { - ScriptExtension { - first, - second, - third, - common: false, - } - } - - pub(crate) const fn new_common() -> Self { - ScriptExtension { - first: u64::MAX, - second: u64::MAX, - third: Self::THIRD_MAX, - common: true, - } - } - - pub(crate) const fn new_inherited() -> Self { - ScriptExtension { - first: u64::MAX, - second: u64::MAX, - third: Self::THIRD_MAX, - common: false, - } - } - - pub(crate) const fn new_unknown() -> Self { - ScriptExtension { - first: 0, - second: 0, - third: 0, - common: false, - } - } - - const fn is_common_or_inherited(self) -> bool { - (self.first == u64::MAX) & (self.second == u64::MAX) & (self.third == Self::THIRD_MAX) - } - - /// Checks if the script extension is Common - pub const fn is_common(self) -> bool { - self.is_common_or_inherited() & self.common - } - - /// Checks if the script extension is Inherited - pub const fn is_inherited(self) -> bool { - self.is_common_or_inherited() & !self.common - } - - /// Checks if the script extension is empty (unknown) - pub const fn is_empty(self) -> bool { - (self.first == 0) & (self.second == 0) & (self.third == 0) - } - - /// Returns the number of scripts in the script extension - pub fn len(self) -> usize { - if self.is_common_or_inherited() { - 1 - } else { - (self.first.count_ones() + self.second.count_ones() + self.third.count_ones()) as usize - } - } - - /// Intersect this `ScriptExtension` with another `ScriptExtension`. Produces `Unknown` if things - /// do not intersect. This is equivalent to [`ScriptExtension::intersection`] but it stores the result - /// in `self` - /// - /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting - /// everything, the intersection of `Common` and `Inherited` is `Inherited` - pub fn intersect_with(&mut self, other: Self) { - *self = self.intersection(other) - } - - /// Find the intersection between two ScriptExtensions. Returns Unknown if things - /// do not intersect. - /// - /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting - /// everything, the intersection of `Common` and `Inherited` is `Inherited` - pub const fn intersection(self, other: Self) -> Self { - let first = self.first & other.first; - let second = self.second & other.second; - let third = self.third & other.third; - let common = self.common & other.common; - ScriptExtension { - first, - second, - third, - common, - } - } - - /// Find the union between two ScriptExtensions. - /// - /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting - /// everything, the union of `Common` and `Inherited` is `Common` - pub const fn union(self, other: Self) -> Self { - let first = self.first | other.first; - let second = self.second | other.second; - let third = self.third | other.third; - let common = self.common | other.common; - ScriptExtension { - first, - second, - third, - common, - } - } - - /// Check if this ScriptExtension contains the given script - /// - /// Should be used with specific scripts only, this will - /// return `true` if `self` is not `Unknown` and `script` is - /// `Common` or `Inherited` - pub fn contains_script(self, script: Script) -> bool { - !self.intersection(script.into()).is_empty() - } - - /// Get the intersection of script extensions of all characters - /// in a string. - pub fn for_str(x: &str) -> Self { - let mut ext = ScriptExtension::default(); - for ch in x.chars() { - ext.intersect_with(ch.into()); - } - ext - } - - /// Iterate over the scripts in this script extension - /// - /// Will never yield Script::Unknown - pub fn iter(self) -> ScriptIterator { - ScriptIterator { ext: self } - } -} - -impl Default for ScriptExtension { - fn default() -> Self { - ScriptExtension::new_common() - } -} - -impl From<char> for ScriptExtension { - fn from(o: char) -> Self { - o.script_extension() - } -} - -impl From<&'_ str> for ScriptExtension { - fn from(o: &'_ str) -> Self { - Self::for_str(o) - } -} - -impl fmt::Debug for ScriptExtension { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "ScriptExtension(")?; - fmt::Display::fmt(self, f)?; - write!(f, ")") - } -} - -impl fmt::Display for ScriptExtension { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_common() { - write!(f, "Common")?; - } else if self.is_inherited() { - write!(f, "Inherited")?; - } else if self.is_empty() { - write!(f, "Unknown")?; - } else { - let mut first = true; - for script in self.iter() { - if !first { - write!(f, " + ")?; - first = false; - } - script.full_name().fmt(f)?; - } - } - Ok(()) - } -} - -/// Extension trait on `char` for calculating script properties -pub trait UnicodeScript { - /// Get the script for a given character - fn script(&self) -> Script; - /// Get the Script_Extension for a given character - fn script_extension(&self) -> ScriptExtension; -} - -impl UnicodeScript for char { - fn script(&self) -> Script { - get_script(*self).unwrap_or(Script::Unknown) - } - - fn script_extension(&self) -> ScriptExtension { - get_script_extension(*self).unwrap_or_else(|| self.script().into()) - } -} - -/// Iterator over scripts in a [ScriptExtension]. -/// -/// Can be obtained ia [ScriptExtension::iter()] -pub struct ScriptIterator { - ext: ScriptExtension, -} - -impl Iterator for ScriptIterator { - type Item = Script; - - fn next(&mut self) -> Option<Script> { - if self.ext.is_common_or_inherited() { - let common = self.ext.common; - self.ext = ScriptExtension::new_unknown(); - if common { - Some(Script::Common) - } else { - Some(Script::Inherited) - } - // Are there bits left in the first chunk? - } else if self.ext.first != 0 { - // Find the next bit - let bit = self.ext.first.trailing_zeros(); - // unset just that bit - self.ext.first &= !(1 << bit); - Some(Script::for_integer(bit as u8)) - // Are there bits left in the second chunk? - } else if self.ext.second != 0 { - let bit = self.ext.second.trailing_zeros(); - self.ext.second &= !(1 << bit); - Some(Script::for_integer(64 + bit as u8)) - // Are there bits left in the third chunk? - } else if self.ext.third != 0 { - let bit = self.ext.third.trailing_zeros(); - self.ext.third &= !(1 << bit); - Some(Script::for_integer(128 + bit as u8)) - } else { - // Script::Unknown - None - } - } -} - -#[cfg(test)] -mod tests { - use crate::*; - use std::collections::HashSet; - use std::convert::TryInto; - - #[cfg(feature = "bench")] - use test::bench::Bencher; - #[cfg(feature = "bench")] - extern crate test; - - #[test] - fn test_conversion() { - let mut seen_scripts = HashSet::new(); - let mut seen_exts = HashSet::new(); - for bit in 0..NEXT_SCRIPT { - let script = Script::for_integer(bit); - let ext = script.into(); - if seen_scripts.contains(&script) { - panic!("Found script {:?} twice!", script) - } - if seen_exts.contains(&ext) { - panic!("Found extension {:?} twice!", ext) - } - seen_scripts.insert(script); - seen_exts.insert(ext); - assert_eq!(script as u8, bit); - assert!(!ScriptExtension::new_common().intersection(ext).is_empty()); - assert!(!ScriptExtension::new_inherited() - .intersection(ext) - .is_empty()); - assert!(ScriptExtension::new_unknown().intersection(ext).is_empty()); - assert_eq!(ext.iter().collect::<Vec<_>>(), vec![script]); - assert_eq!(Ok(script), ext.try_into()); - } - } - - #[test] - fn test_specific() { - let s = "सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे."; - let ext = ScriptExtension::for_str(s); - assert_eq!(ext, script_extensions::DEVA); - println!( - "{:?}", - script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH - ); - println!( - "{:?}", - ext.intersection( - script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH - ) - ); - assert!(!ext - .intersection(script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH) - .is_empty()); - - let u = ext.union(Script::Dogra.into()); - assert_eq!( - u.intersection( - script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH - ), - u - ); - } - - #[test] - fn test_specific_ext() { - let ext = script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH; - - let all: HashSet<_> = ext.iter().collect(); - - for bit in 0..NEXT_SCRIPT { - let script = Script::for_integer(bit); - - if all.contains(&script) { - assert!(ext.contains_script(script)) - } else { - assert!(!ext.contains_script(script)) - } - } - - assert!(ext.contains_script(Script::Devanagari)); - assert!(ext.contains_script(Script::Dogra)); - assert!(ext.contains_script(Script::Gujarati)); - assert!(ext.contains_script(Script::Gurmukhi)); - assert!(ext.contains_script(Script::Khojki)); - assert!(ext.contains_script(Script::Kaithi)); - assert!(ext.contains_script(Script::Mahajani)); - assert!(ext.contains_script(Script::Modi)); - assert!(ext.contains_script(Script::Khudawadi)); - assert!(ext.contains_script(Script::Takri)); - assert!(ext.contains_script(Script::Tirhuta)); - - let scr: Result<Script, _> = ext.try_into(); - assert!(scr.is_err()); - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_script_intersection(b: &mut Bencher) { - b.iter(|| { - let script = test::black_box(Script::Devanagari); - let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); - test::black_box(ext.intersection(script.into())); - }) - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_ext_to_script(b: &mut Bencher) { - let ext: ScriptExtension = Script::Devanagari.into(); - b.iter(|| { - let ext = test::black_box(ext); - let script: Result<Script, _> = ext.try_into(); - let _ = test::black_box(script); - }) - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_script_to_ext(b: &mut Bencher) { - b.iter(|| { - let script = test::black_box(Script::Devanagari); - let ext: ScriptExtension = script.into(); - test::black_box(ext); - }) - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_ext_intersection(b: &mut Bencher) { - b.iter(|| { - let e1 = test::black_box(script_extensions::ARAB_GARA_NKOO_ROHG_SYRC_THAA_YEZI); - let e2 = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); - test::black_box(e2.intersection(e1)); - }) - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_to_vec(b: &mut Bencher) { - b.iter(|| { - let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); - test::black_box(ext.iter().collect::<Vec<_>>()); - }) - } - - #[cfg(feature = "bench")] - #[bench] - fn bench_string_ext(b: &mut Bencher) { - b.iter(|| { - let s = test::black_box("सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे."); - test::black_box(ScriptExtension::for_str(s)); - }) - } -} |
