From 45df4d0d9b577fecee798d672695fe24ff57fb1b Mon Sep 17 00:00:00 2001 From: mo khan Date: Tue, 15 Jul 2025 16:37:08 -0600 Subject: feat: migrate from Cedar to SpiceDB authorization system This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema. --- vendor/rustc_lexer/.cargo-checksum.json | 1 - vendor/rustc_lexer/Cargo.toml | 26 -- vendor/rustc_lexer/src/cursor.rs | 57 ---- vendor/rustc_lexer/src/lib.rs | 562 ------------------------------- vendor/rustc_lexer/src/unescape.rs | 305 ----------------- vendor/rustc_lexer/src/unescape/tests.rs | 271 --------------- 6 files changed, 1222 deletions(-) delete mode 100644 vendor/rustc_lexer/.cargo-checksum.json delete mode 100644 vendor/rustc_lexer/Cargo.toml delete mode 100644 vendor/rustc_lexer/src/cursor.rs delete mode 100644 vendor/rustc_lexer/src/lib.rs delete mode 100644 vendor/rustc_lexer/src/unescape.rs delete mode 100644 vendor/rustc_lexer/src/unescape/tests.rs (limited to 'vendor/rustc_lexer') diff --git a/vendor/rustc_lexer/.cargo-checksum.json b/vendor/rustc_lexer/.cargo-checksum.json deleted file mode 100644 index 04e82318..00000000 --- a/vendor/rustc_lexer/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.toml":"9b7e100fce6f31d499bdb9c6eb7acccb19d8d561634e4ae7112cff3d536afa7c","src/cursor.rs":"8c442ec1f16870c013f170f8a4967a011c86ac5a75cefc23127726b36a848f51","src/lib.rs":"e75190c347bd8574d145e120cf3f56abe83f86639e85c80d7b4f84dab540d0b8","src/unescape.rs":"a782961eacfa5daf4f8cbd18a82fc33cd9fae2caca6363e5bedbc78acd8f1ae1","src/unescape/tests.rs":"4def6c86f7a63a50a8740dc11617671ac3934d72d0df16b98a4d328105eac711"},"package":"c86aae0c77166108c01305ee1a36a1e77289d7dc6ca0a3cd91ff4992de2d16a5"} \ No newline at end of file diff --git a/vendor/rustc_lexer/Cargo.toml b/vendor/rustc_lexer/Cargo.toml deleted file mode 100644 index aa487d44..00000000 --- a/vendor/rustc_lexer/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies -# -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) - -[package] -edition = "2018" -name = "rustc_lexer" -version = "0.1.0" -authors = ["The Rust Project Developers"] -description = "Rust lexer used by rustc. No stability guarantees are provided.\n" -license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/rust/" - -[lib] -name = "rustc_lexer" -doctest = false -[dependencies.unicode-xid] -version = "0.2.0" diff --git a/vendor/rustc_lexer/src/cursor.rs b/vendor/rustc_lexer/src/cursor.rs deleted file mode 100644 index 5831159c..00000000 --- a/vendor/rustc_lexer/src/cursor.rs +++ /dev/null @@ -1,57 +0,0 @@ -use std::str::Chars; - -pub(crate) struct Cursor<'a> { - initial_len: usize, - chars: Chars<'a>, - #[cfg(debug_assertions)] - prev: char, -} - -pub(crate) const EOF_CHAR: char = '\0'; - -impl<'a> Cursor<'a> { - pub(crate) fn new(input: &'a str) -> Cursor<'a> { - Cursor { - initial_len: input.len(), - chars: input.chars(), - #[cfg(debug_assertions)] - prev: EOF_CHAR, - } - } - /// For debug assertions only - pub(crate) fn prev(&self) -> char { - #[cfg(debug_assertions)] - { - self.prev - } - - #[cfg(not(debug_assertions))] - { - '\0' - } - } - pub(crate) fn nth_char(&self, n: usize) -> char { - self.chars().nth(n).unwrap_or(EOF_CHAR) - } - pub(crate) fn is_eof(&self) -> bool { - self.chars.as_str().is_empty() - } - pub(crate) fn len_consumed(&self) -> usize { - self.initial_len - self.chars.as_str().len() - } - /// Returns an iterator over the remaining characters. - fn chars(&self) -> Chars<'a> { - self.chars.clone() - } - /// Moves to the next character. - pub(crate) fn bump(&mut self) -> Option { - let c = self.chars.next()?; - - #[cfg(debug_assertions)] - { - self.prev = c; - } - - Some(c) - } -} diff --git a/vendor/rustc_lexer/src/lib.rs b/vendor/rustc_lexer/src/lib.rs deleted file mode 100644 index 30a5175d..00000000 --- a/vendor/rustc_lexer/src/lib.rs +++ /dev/null @@ -1,562 +0,0 @@ -// We want to be able to build this crate with a stable compiler, so no -// `#![feature]` attributes should be added. - -mod cursor; -pub mod unescape; - -use crate::cursor::{Cursor, EOF_CHAR}; - -pub struct Token { - pub kind: TokenKind, - pub len: usize, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum TokenKind { - LineComment, - BlockComment { terminated: bool }, - Whitespace, - Ident, - RawIdent, - Literal { kind: LiteralKind, suffix_start: usize }, - Lifetime { starts_with_number: bool }, - Semi, - Comma, - Dot, - OpenParen, - CloseParen, - OpenBrace, - CloseBrace, - OpenBracket, - CloseBracket, - At, - Pound, - Tilde, - Question, - Colon, - Dollar, - Eq, - Not, - Lt, - Gt, - Minus, - And, - Or, - Plus, - Star, - Slash, - Caret, - Percent, - Unknown, -} -use self::TokenKind::*; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum LiteralKind { - Int { base: Base, empty_int: bool }, - Float { base: Base, empty_exponent: bool }, - Char { terminated: bool }, - Byte { terminated: bool }, - Str { terminated: bool }, - ByteStr { terminated: bool }, - RawStr { n_hashes: usize, started: bool, terminated: bool }, - RawByteStr { n_hashes: usize, started: bool, terminated: bool }, -} -use self::LiteralKind::*; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum Base { - Binary, - Octal, - Hexadecimal, - Decimal, -} - -impl Token { - fn new(kind: TokenKind, len: usize) -> Token { - Token { kind, len } - } -} - -pub fn strip_shebang(input: &str) -> Option { - debug_assert!(!input.is_empty()); - if !input.starts_with("#!") || input.starts_with("#![") { - return None; - } - Some(input.find('\n').unwrap_or(input.len())) -} - -pub fn first_token(input: &str) -> Token { - debug_assert!(!input.is_empty()); - Cursor::new(input).advance_token() -} - -pub fn tokenize(mut input: &str) -> impl Iterator + '_ { - std::iter::from_fn(move || { - if input.is_empty() { - return None; - } - let token = first_token(input); - input = &input[token.len..]; - Some(token) - }) -} - -// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these -// classes. - -/// True if `c` is considered a whitespace according to Rust language definition. -pub fn is_whitespace(c: char) -> bool { - // This is Pattern_White_Space. - // - // Note that this set is stable (ie, it doesn't change with different - // Unicode versions), so it's ok to just hard-code the values. - - match c { - // Usual ASCII suspects - | '\u{0009}' // \t - | '\u{000A}' // \n - | '\u{000B}' // vertical tab - | '\u{000C}' // form feed - | '\u{000D}' // \r - | '\u{0020}' // space - - // NEXT LINE from latin1 - | '\u{0085}' - - // Bidi markers - | '\u{200E}' // LEFT-TO-RIGHT MARK - | '\u{200F}' // RIGHT-TO-LEFT MARK - - // Dedicated whitespace characters from Unicode - | '\u{2028}' // LINE SEPARATOR - | '\u{2029}' // PARAGRAPH SEPARATOR - => true, - _ => false, - } -} - -/// True if `c` is valid as a first character of an identifier. -pub fn is_id_start(c: char) -> bool { - // This is XID_Start OR '_' (which formally is not a XID_Start). - // We also add fast-path for ascii idents - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c)) -} - -/// True if `c` is valid as a non-first character of an identifier. -pub fn is_id_continue(c: char) -> bool { - // This is exactly XID_Continue. - // We also add fast-path for ascii idents - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c)) -} - - -impl Cursor<'_> { - fn advance_token(&mut self) -> Token { - let first_char = self.bump().unwrap(); - let token_kind = match first_char { - '/' => match self.nth_char(0) { - '/' => self.line_comment(), - '*' => self.block_comment(), - _ => Slash, - }, - c if is_whitespace(c) => self.whitespace(), - 'r' => match (self.nth_char(0), self.nth_char(1)) { - ('#', c1) if is_id_start(c1) => self.raw_ident(), - ('#', _) | ('"', _) => { - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = RawStr { n_hashes, started, terminated }; - Literal { kind, suffix_start } - } - _ => self.ident(), - }, - 'b' => match (self.nth_char(0), self.nth_char(1)) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes, started, terminated }; - Literal { kind, suffix_start } - } - _ => self.ident(), - }, - c if is_id_start(c) => self.ident(), - c @ '0'..='9' => { - let literal_kind = self.number(c); - let suffix_start = self.len_consumed(); - self.eat_literal_suffix(); - TokenKind::Literal { kind: literal_kind, suffix_start } - } - ';' => Semi, - ',' => Comma, - '.' => Dot, - '(' => OpenParen, - ')' => CloseParen, - '{' => OpenBrace, - '}' => CloseBrace, - '[' => OpenBracket, - ']' => CloseBracket, - '@' => At, - '#' => Pound, - '~' => Tilde, - '?' => Question, - ':' => Colon, - '$' => Dollar, - '=' => Eq, - '!' => Not, - '<' => Lt, - '>' => Gt, - '-' => Minus, - '&' => And, - '|' => Or, - '+' => Plus, - '*' => Star, - '^' => Caret, - '%' => Percent, - '\'' => self.lifetime_or_char(), - '"' => { - let terminated = self.double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Str { terminated }; - Literal { kind, suffix_start } - } - _ => Unknown, - }; - Token::new(token_kind, self.len_consumed()) - } - - fn line_comment(&mut self) -> TokenKind { - debug_assert!(self.prev() == '/' && self.nth_char(0) == '/'); - self.bump(); - loop { - match self.nth_char(0) { - '\n' => break, - EOF_CHAR if self.is_eof() => break, - _ => { - self.bump(); - } - } - } - LineComment - } - - fn block_comment(&mut self) -> TokenKind { - debug_assert!(self.prev() == '/' && self.nth_char(0) == '*'); - self.bump(); - let mut depth = 1usize; - while let Some(c) = self.bump() { - match c { - '/' if self.nth_char(0) == '*' => { - self.bump(); - depth += 1; - } - '*' if self.nth_char(0) == '/' => { - self.bump(); - depth -= 1; - if depth == 0 { - break; - } - } - _ => (), - } - } - - BlockComment { terminated: depth == 0 } - } - - fn whitespace(&mut self) -> TokenKind { - debug_assert!(is_whitespace(self.prev())); - while is_whitespace(self.nth_char(0)) { - self.bump(); - } - Whitespace - } - - fn raw_ident(&mut self) -> TokenKind { - debug_assert!( - self.prev() == 'r' - && self.nth_char(0) == '#' - && is_id_start(self.nth_char(1)) - ); - self.bump(); - self.bump(); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - RawIdent - } - - fn ident(&mut self) -> TokenKind { - debug_assert!(is_id_start(self.prev())); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - Ident - } - - fn number(&mut self, first_digit: char) -> LiteralKind { - debug_assert!('0' <= self.prev() && self.prev() <= '9'); - let mut base = Base::Decimal; - if first_digit == '0' { - let has_digits = match self.nth_char(0) { - 'b' => { - base = Base::Binary; - self.bump(); - self.eat_decimal_digits() - } - 'o' => { - base = Base::Octal; - self.bump(); - self.eat_decimal_digits() - } - 'x' => { - base = Base::Hexadecimal; - self.bump(); - self.eat_hexadecimal_digits() - } - '0'..='9' | '_' | '.' | 'e' | 'E' => { - self.eat_decimal_digits(); - true - } - // just a 0 - _ => return Int { base, empty_int: false }, - }; - if !has_digits { - return Int { base, empty_int: true }; - } - } else { - self.eat_decimal_digits(); - }; - - match self.nth_char(0) { - // Don't be greedy if this is actually an - // integer literal followed by field/method access or a range pattern - // (`0..2` and `12.foo()`) - '.' if self.nth_char(1) != '.' - && !is_id_start(self.nth_char(1)) => - { - // might have stuff after the ., and if it does, it needs to start - // with a number - self.bump(); - let mut empty_exponent = false; - if self.nth_char(0).is_digit(10) { - self.eat_decimal_digits(); - match self.nth_char(0) { - 'e' | 'E' => { - self.bump(); - empty_exponent = self.float_exponent().is_err() - } - _ => (), - } - } - Float { base, empty_exponent } - } - 'e' | 'E' => { - self.bump(); - let empty_exponent = self.float_exponent().is_err(); - Float { base, empty_exponent } - } - _ => Int { base, empty_int: false }, - } - } - - fn lifetime_or_char(&mut self) -> TokenKind { - debug_assert!(self.prev() == '\''); - let mut starts_with_number = false; - if (is_id_start(self.nth_char(0)) - || self.nth_char(0).is_digit(10) && { - starts_with_number = true; - true - }) - && self.nth_char(1) != '\'' - { - self.bump(); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - - return if self.nth_char(0) == '\'' { - self.bump(); - let kind = Char { terminated: true }; - Literal { kind, suffix_start: self.len_consumed() } - } else { - Lifetime { starts_with_number } - }; - } - let terminated = self.single_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Char { terminated }; - return Literal { kind, suffix_start }; - } - - fn single_quoted_string(&mut self) -> bool { - debug_assert!(self.prev() == '\''); - // parse `'''` as a single char literal - if self.nth_char(0) == '\'' && self.nth_char(1) == '\'' { - self.bump(); - } - let mut first = true; - loop { - match self.nth_char(0) { - '/' if !first => break, - '\n' if self.nth_char(1) != '\'' => break, - EOF_CHAR if self.is_eof() => break, - '\'' => { - self.bump(); - return true; - } - '\\' => { - self.bump(); - self.bump(); - } - _ => { - self.bump(); - } - } - first = false; - } - false - } - - fn double_quoted_string(&mut self) -> bool { - debug_assert!(self.prev() == '"'); - loop { - match self.nth_char(0) { - '"' => { - self.bump(); - return true; - } - EOF_CHAR if self.is_eof() => return false, - '\\' if self.nth_char(1) == '\\' || self.nth_char(1) == '"' => { - self.bump(); - } - _ => (), - } - self.bump(); - } - } - - fn raw_double_quoted_string(&mut self) -> (usize, bool, bool) { - debug_assert!(self.prev() == 'r'); - let n_hashes = { - let mut acc: usize = 0; - loop { - match self.bump() { - Some('#') => acc += 1, - Some('"') => break acc, - None | Some(_) => return (acc, false, false), - } - } - }; - - loop { - match self.bump() { - Some('"') => { - let mut acc = n_hashes; - while self.nth_char(0) == '#' && acc > 0 { - self.bump(); - acc -= 1; - } - if acc == 0 { - return (n_hashes, true, true); - } - } - Some(_) => (), - None => return (n_hashes, true, false), - } - } - } - - fn eat_decimal_digits(&mut self) -> bool { - let mut has_digits = false; - loop { - match self.nth_char(0) { - '_' => { - self.bump(); - } - '0'..='9' => { - has_digits = true; - self.bump(); - } - _ => break, - } - } - has_digits - } - - fn eat_hexadecimal_digits(&mut self) -> bool { - let mut has_digits = false; - loop { - match self.nth_char(0) { - '_' => { - self.bump(); - } - '0'..='9' | 'a'..='f' | 'A'..='F' => { - has_digits = true; - self.bump(); - } - _ => break, - } - } - has_digits - } - - fn float_exponent(&mut self) -> Result<(), ()> { - debug_assert!(self.prev() == 'e' || self.prev() == 'E'); - if self.nth_char(0) == '-' || self.nth_char(0) == '+' { - self.bump(); - } - if self.eat_decimal_digits() { Ok(()) } else { Err(()) } - } - - fn eat_literal_suffix(&mut self) { - if !is_id_start(self.nth_char(0)) { - return; - } - self.bump(); - - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - } -} diff --git a/vendor/rustc_lexer/src/unescape.rs b/vendor/rustc_lexer/src/unescape.rs deleted file mode 100644 index c709b752..00000000 --- a/vendor/rustc_lexer/src/unescape.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Utilities for validating string and char literals and turning them into -//! values they represent. - -use std::str::Chars; -use std::ops::Range; - -#[cfg(test)] -mod tests; - -#[derive(Debug, PartialEq, Eq)] -pub enum EscapeError { - ZeroChars, - MoreThanOneChar, - - LoneSlash, - InvalidEscape, - BareCarriageReturn, - BareCarriageReturnInRawString, - EscapeOnlyChar, - - TooShortHexEscape, - InvalidCharInHexEscape, - OutOfRangeHexEscape, - - NoBraceInUnicodeEscape, - InvalidCharInUnicodeEscape, - EmptyUnicodeEscape, - UnclosedUnicodeEscape, - LeadingUnderscoreUnicodeEscape, - OverlongUnicodeEscape, - LoneSurrogateUnicodeEscape, - OutOfRangeUnicodeEscape, - - UnicodeEscapeInByte, - NonAsciiCharInByte, - NonAsciiCharInByteString, -} - -/// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error -pub fn unescape_char(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); - unescape_char_or_byte(&mut chars, Mode::Char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -pub fn unescape_str(literal_text: &str, callback: &mut F) -where - F: FnMut(Range, Result), -{ - unescape_str_or_byte_str(literal_text, Mode::Str, callback) -} - -pub fn unescape_byte(literal_text: &str) -> Result { - let mut chars = literal_text.chars(); - unescape_char_or_byte(&mut chars, Mode::Byte) - .map(byte_from_char) - .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -pub fn unescape_byte_str(literal_text: &str, callback: &mut F) -where - F: FnMut(Range, Result), -{ - unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| { - callback(range, char.map(byte_from_char)) - }) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. -pub fn unescape_raw_str(literal_text: &str, callback: &mut F) -where - F: FnMut(Range, Result), -{ - unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. -pub fn unescape_raw_byte_str(literal_text: &str, callback: &mut F) -where - F: FnMut(Range, Result), -{ - unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| { - callback(range, char.map(byte_from_char)) - }) -} - -#[derive(Debug, Clone, Copy)] -pub enum Mode { - Char, - Str, - Byte, - ByteStr, -} - -impl Mode { - pub fn in_single_quotes(self) -> bool { - match self { - Mode::Char | Mode::Byte => true, - Mode::Str | Mode::ByteStr => false, - } - } - - pub fn in_double_quotes(self) -> bool { - !self.in_single_quotes() - } - - pub fn is_bytes(self) -> bool { - match self { - Mode::Byte | Mode::ByteStr => true, - Mode::Char | Mode::Str => false, - } - } -} - - -fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result { - if first_char != '\\' { - return match first_char { - '\t' | '\n' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar), - '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar), - _ => { - if mode.is_bytes() && !first_char.is_ascii() { - return Err(EscapeError::NonAsciiCharInByte); - } - Ok(first_char) - } - }; - } - - let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; - - let res = match second_char { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', - - 'x' => { - let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let value = hi * 16 + lo; - - if !mode.is_bytes() && !is_ascii(value) { - return Err(EscapeError::OutOfRangeHexEscape); - } - let value = value as u8; - - value as char - } - - 'u' => { - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } - - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - if mode.is_bytes() { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or_else(|| { - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - })?; - } - Some(c) => { - let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - continue; - } - let digit = digit as u32; - value = value * 16 + digit; - } - }; - } - } - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(res) -} - -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = scan_escape(first_char, chars, mode)?; - if chars.next().is_some() { - return Err(EscapeError::MoreThanOneChar); - } - Ok(res) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -fn unescape_str_or_byte_str(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - assert!(mode.in_double_quotes()); - let initial_len = src.len(); - let mut chars = src.chars(); - while let Some(first_char) = chars.next() { - let start = initial_len - chars.as_str().len() - first_char.len_utf8(); - - let unescaped_char = match first_char { - '\\' => { - let second_char = chars.clone().next(); - match second_char { - Some('\n') => { - skip_ascii_whitespace(&mut chars); - continue; - } - _ => scan_escape(first_char, &mut chars, mode), - } - } - '\n' => Ok('\n'), - '\t' => Ok('\t'), - _ => scan_escape(first_char, &mut chars, mode), - }; - let end = initial_len - chars.as_str().len(); - callback(start..end, unescaped_char); - } - - fn skip_ascii_whitespace(chars: &mut Chars<'_>) { - let str = chars.as_str(); - let first_non_space = str - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(str.len()); - *chars = str[first_non_space..].chars() - } -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_byte_str(literal_text: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - assert!(mode.in_double_quotes()); - let initial_len = literal_text.len(); - - let mut chars = literal_text.chars(); - while let Some(curr) = chars.next() { - let start = initial_len - chars.as_str().len() - curr.len_utf8(); - - let result = match curr { - '\r' => Err(EscapeError::BareCarriageReturnInRawString), - c if mode.is_bytes() && !c.is_ascii() => - Err(EscapeError::NonAsciiCharInByteString), - c => Ok(c), - }; - let end = initial_len - chars.as_str().len(); - - callback(start..end, result); - } -} - -fn byte_from_char(c: char) -> u8 { - let res = c as u32; - assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)"); - res as u8 -} - -fn is_ascii(x: u32) -> bool { - x <= 0x7F -} diff --git a/vendor/rustc_lexer/src/unescape/tests.rs b/vendor/rustc_lexer/src/unescape/tests.rs deleted file mode 100644 index e7b1ff64..00000000 --- a/vendor/rustc_lexer/src/unescape/tests.rs +++ /dev/null @@ -1,271 +0,0 @@ -use super::*; - -#[test] -fn test_unescape_char_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - check(r"\u{0}x", EscapeError::MoreThanOneChar); - check(r"\u{1F63b}}", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - check("\\\r", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - check(r"\xff", EscapeError::OutOfRangeHexEscape); - check(r"\xFF", EscapeError::OutOfRangeHexEscape); - check(r"\x80", EscapeError::OutOfRangeHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - - check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape); - - check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape); -} - -#[test] -fn test_unescape_char_good() { - fn check(literal_text: &str, expected_char: char) { - let actual_result = unescape_char(literal_text); - assert_eq!(actual_result, Ok(expected_char)); - } - - check("a", 'a'); - check("ы", 'ы'); - check("🦀", '🦀'); - - check(r#"\""#, '"'); - check(r"\n", '\n'); - check(r"\r", '\r'); - check(r"\t", '\t'); - check(r"\\", '\\'); - check(r"\'", '\''); - check(r"\0", '\0'); - - check(r"\x00", '\0'); - check(r"\x5a", 'Z'); - check(r"\x5A", 'Z'); - check(r"\x7f", 127 as char); - - check(r"\u{0}", '\0'); - check(r"\u{000000}", '\0'); - check(r"\u{41}", 'A'); - check(r"\u{0041}", 'A'); - check(r"\u{00_41}", 'A'); - check(r"\u{4__1__}", 'A'); - check(r"\u{1F63b}", '😻'); -} - -#[test] -fn test_unescape_str_good() { - fn check(literal_text: &str, expected: &str) { - let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_str(literal_text, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(c), - Err(e) => buf = Err((range, e)), - } - } - }); - let buf = buf.as_ref().map(|it| it.as_ref()); - assert_eq!(buf, Ok(expected)) - } - - check("foo", "foo"); - check("", ""); - check(" \t\n", " \t\n"); - - check("hello \\\n world", "hello world"); - check("thread's", "thread's") -} - -#[test] -fn test_unescape_byte_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err); - assert_eq!(actual_result, Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - - check("ы", EscapeError::NonAsciiCharInByte); - check("🦀", EscapeError::NonAsciiCharInByte); - - check(r"\u{0}", EscapeError::UnicodeEscapeInByte); - check(r"\u{000000}", EscapeError::UnicodeEscapeInByte); - check(r"\u{41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0041}", EscapeError::UnicodeEscapeInByte); - check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0}x", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte); - check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{D800}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte); -} - -#[test] -fn test_unescape_byte_good() { - fn check(literal_text: &str, expected_byte: u8) { - let actual_result = unescape_byte(literal_text); - assert_eq!(actual_result, Ok(expected_byte)); - } - - check("a", b'a'); - - check(r#"\""#, b'"'); - check(r"\n", b'\n'); - check(r"\r", b'\r'); - check(r"\t", b'\t'); - check(r"\\", b'\\'); - check(r"\'", b'\''); - check(r"\0", b'\0'); - - check(r"\x00", b'\0'); - check(r"\x5a", b'Z'); - check(r"\x5A", b'Z'); - check(r"\x7f", 127); - check(r"\x80", 128); - check(r"\xff", 255); - check(r"\xFF", 255); -} - -#[test] -fn test_unescape_byte_str_good() { - fn check(literal_text: &str, expected: &[u8]) { - let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_byte_str(literal_text, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(c), - Err(e) => buf = Err((range, e)), - } - } - }); - let buf = buf.as_ref().map(|it| it.as_ref()); - assert_eq!(buf, Ok(expected)) - } - - check("foo", b"foo"); - check("", b""); - check(" \t\n", b" \t\n"); - - check("hello \\\n world", b"hello world"); - check("thread's", b"thread's") -} - -#[test] -fn test_unescape_raw_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_raw_str(literal, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]); -} - -#[test] -fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_raw_byte_str(literal, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]); - check( - "🦀a", - &[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))], - ); -} -- cgit v1.2.3