diff options
Diffstat (limited to 'vendor/rustc_lexer/src/lib.rs')
| -rw-r--r-- | vendor/rustc_lexer/src/lib.rs | 562 |
1 files changed, 0 insertions, 562 deletions
diff --git a/vendor/rustc_lexer/src/lib.rs b/vendor/rustc_lexer/src/lib.rs deleted file mode 100644 index 30a5175d..00000000 --- a/vendor/rustc_lexer/src/lib.rs +++ /dev/null @@ -1,562 +0,0 @@ -// We want to be able to build this crate with a stable compiler, so no -// `#![feature]` attributes should be added. - -mod cursor; -pub mod unescape; - -use crate::cursor::{Cursor, EOF_CHAR}; - -pub struct Token { - pub kind: TokenKind, - pub len: usize, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum TokenKind { - LineComment, - BlockComment { terminated: bool }, - Whitespace, - Ident, - RawIdent, - Literal { kind: LiteralKind, suffix_start: usize }, - Lifetime { starts_with_number: bool }, - Semi, - Comma, - Dot, - OpenParen, - CloseParen, - OpenBrace, - CloseBrace, - OpenBracket, - CloseBracket, - At, - Pound, - Tilde, - Question, - Colon, - Dollar, - Eq, - Not, - Lt, - Gt, - Minus, - And, - Or, - Plus, - Star, - Slash, - Caret, - Percent, - Unknown, -} -use self::TokenKind::*; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum LiteralKind { - Int { base: Base, empty_int: bool }, - Float { base: Base, empty_exponent: bool }, - Char { terminated: bool }, - Byte { terminated: bool }, - Str { terminated: bool }, - ByteStr { terminated: bool }, - RawStr { n_hashes: usize, started: bool, terminated: bool }, - RawByteStr { n_hashes: usize, started: bool, terminated: bool }, -} -use self::LiteralKind::*; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum Base { - Binary, - Octal, - Hexadecimal, - Decimal, -} - -impl Token { - fn new(kind: TokenKind, len: usize) -> Token { - Token { kind, len } - } -} - -pub fn strip_shebang(input: &str) -> Option<usize> { - debug_assert!(!input.is_empty()); - if !input.starts_with("#!") || input.starts_with("#![") { - return None; - } - Some(input.find('\n').unwrap_or(input.len())) -} - -pub fn first_token(input: &str) -> Token { - debug_assert!(!input.is_empty()); - Cursor::new(input).advance_token() -} - -pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ { - std::iter::from_fn(move || { - if input.is_empty() { - return None; - } - let token = first_token(input); - input = &input[token.len..]; - Some(token) - }) -} - -// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these -// classes. - -/// True if `c` is considered a whitespace according to Rust language definition. -pub fn is_whitespace(c: char) -> bool { - // This is Pattern_White_Space. - // - // Note that this set is stable (ie, it doesn't change with different - // Unicode versions), so it's ok to just hard-code the values. - - match c { - // Usual ASCII suspects - | '\u{0009}' // \t - | '\u{000A}' // \n - | '\u{000B}' // vertical tab - | '\u{000C}' // form feed - | '\u{000D}' // \r - | '\u{0020}' // space - - // NEXT LINE from latin1 - | '\u{0085}' - - // Bidi markers - | '\u{200E}' // LEFT-TO-RIGHT MARK - | '\u{200F}' // RIGHT-TO-LEFT MARK - - // Dedicated whitespace characters from Unicode - | '\u{2028}' // LINE SEPARATOR - | '\u{2029}' // PARAGRAPH SEPARATOR - => true, - _ => false, - } -} - -/// True if `c` is valid as a first character of an identifier. -pub fn is_id_start(c: char) -> bool { - // This is XID_Start OR '_' (which formally is not a XID_Start). - // We also add fast-path for ascii idents - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c)) -} - -/// True if `c` is valid as a non-first character of an identifier. -pub fn is_id_continue(c: char) -> bool { - // This is exactly XID_Continue. - // We also add fast-path for ascii idents - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c)) -} - - -impl Cursor<'_> { - fn advance_token(&mut self) -> Token { - let first_char = self.bump().unwrap(); - let token_kind = match first_char { - '/' => match self.nth_char(0) { - '/' => self.line_comment(), - '*' => self.block_comment(), - _ => Slash, - }, - c if is_whitespace(c) => self.whitespace(), - 'r' => match (self.nth_char(0), self.nth_char(1)) { - ('#', c1) if is_id_start(c1) => self.raw_ident(), - ('#', _) | ('"', _) => { - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = RawStr { n_hashes, started, terminated }; - Literal { kind, suffix_start } - } - _ => self.ident(), - }, - 'b' => match (self.nth_char(0), self.nth_char(1)) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let (n_hashes, started, terminated) = self.raw_double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes, started, terminated }; - Literal { kind, suffix_start } - } - _ => self.ident(), - }, - c if is_id_start(c) => self.ident(), - c @ '0'..='9' => { - let literal_kind = self.number(c); - let suffix_start = self.len_consumed(); - self.eat_literal_suffix(); - TokenKind::Literal { kind: literal_kind, suffix_start } - } - ';' => Semi, - ',' => Comma, - '.' => Dot, - '(' => OpenParen, - ')' => CloseParen, - '{' => OpenBrace, - '}' => CloseBrace, - '[' => OpenBracket, - ']' => CloseBracket, - '@' => At, - '#' => Pound, - '~' => Tilde, - '?' => Question, - ':' => Colon, - '$' => Dollar, - '=' => Eq, - '!' => Not, - '<' => Lt, - '>' => Gt, - '-' => Minus, - '&' => And, - '|' => Or, - '+' => Plus, - '*' => Star, - '^' => Caret, - '%' => Percent, - '\'' => self.lifetime_or_char(), - '"' => { - let terminated = self.double_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Str { terminated }; - Literal { kind, suffix_start } - } - _ => Unknown, - }; - Token::new(token_kind, self.len_consumed()) - } - - fn line_comment(&mut self) -> TokenKind { - debug_assert!(self.prev() == '/' && self.nth_char(0) == '/'); - self.bump(); - loop { - match self.nth_char(0) { - '\n' => break, - EOF_CHAR if self.is_eof() => break, - _ => { - self.bump(); - } - } - } - LineComment - } - - fn block_comment(&mut self) -> TokenKind { - debug_assert!(self.prev() == '/' && self.nth_char(0) == '*'); - self.bump(); - let mut depth = 1usize; - while let Some(c) = self.bump() { - match c { - '/' if self.nth_char(0) == '*' => { - self.bump(); - depth += 1; - } - '*' if self.nth_char(0) == '/' => { - self.bump(); - depth -= 1; - if depth == 0 { - break; - } - } - _ => (), - } - } - - BlockComment { terminated: depth == 0 } - } - - fn whitespace(&mut self) -> TokenKind { - debug_assert!(is_whitespace(self.prev())); - while is_whitespace(self.nth_char(0)) { - self.bump(); - } - Whitespace - } - - fn raw_ident(&mut self) -> TokenKind { - debug_assert!( - self.prev() == 'r' - && self.nth_char(0) == '#' - && is_id_start(self.nth_char(1)) - ); - self.bump(); - self.bump(); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - RawIdent - } - - fn ident(&mut self) -> TokenKind { - debug_assert!(is_id_start(self.prev())); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - Ident - } - - fn number(&mut self, first_digit: char) -> LiteralKind { - debug_assert!('0' <= self.prev() && self.prev() <= '9'); - let mut base = Base::Decimal; - if first_digit == '0' { - let has_digits = match self.nth_char(0) { - 'b' => { - base = Base::Binary; - self.bump(); - self.eat_decimal_digits() - } - 'o' => { - base = Base::Octal; - self.bump(); - self.eat_decimal_digits() - } - 'x' => { - base = Base::Hexadecimal; - self.bump(); - self.eat_hexadecimal_digits() - } - '0'..='9' | '_' | '.' | 'e' | 'E' => { - self.eat_decimal_digits(); - true - } - // just a 0 - _ => return Int { base, empty_int: false }, - }; - if !has_digits { - return Int { base, empty_int: true }; - } - } else { - self.eat_decimal_digits(); - }; - - match self.nth_char(0) { - // Don't be greedy if this is actually an - // integer literal followed by field/method access or a range pattern - // (`0..2` and `12.foo()`) - '.' if self.nth_char(1) != '.' - && !is_id_start(self.nth_char(1)) => - { - // might have stuff after the ., and if it does, it needs to start - // with a number - self.bump(); - let mut empty_exponent = false; - if self.nth_char(0).is_digit(10) { - self.eat_decimal_digits(); - match self.nth_char(0) { - 'e' | 'E' => { - self.bump(); - empty_exponent = self.float_exponent().is_err() - } - _ => (), - } - } - Float { base, empty_exponent } - } - 'e' | 'E' => { - self.bump(); - let empty_exponent = self.float_exponent().is_err(); - Float { base, empty_exponent } - } - _ => Int { base, empty_int: false }, - } - } - - fn lifetime_or_char(&mut self) -> TokenKind { - debug_assert!(self.prev() == '\''); - let mut starts_with_number = false; - if (is_id_start(self.nth_char(0)) - || self.nth_char(0).is_digit(10) && { - starts_with_number = true; - true - }) - && self.nth_char(1) != '\'' - { - self.bump(); - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - - return if self.nth_char(0) == '\'' { - self.bump(); - let kind = Char { terminated: true }; - Literal { kind, suffix_start: self.len_consumed() } - } else { - Lifetime { starts_with_number } - }; - } - let terminated = self.single_quoted_string(); - let suffix_start = self.len_consumed(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Char { terminated }; - return Literal { kind, suffix_start }; - } - - fn single_quoted_string(&mut self) -> bool { - debug_assert!(self.prev() == '\''); - // parse `'''` as a single char literal - if self.nth_char(0) == '\'' && self.nth_char(1) == '\'' { - self.bump(); - } - let mut first = true; - loop { - match self.nth_char(0) { - '/' if !first => break, - '\n' if self.nth_char(1) != '\'' => break, - EOF_CHAR if self.is_eof() => break, - '\'' => { - self.bump(); - return true; - } - '\\' => { - self.bump(); - self.bump(); - } - _ => { - self.bump(); - } - } - first = false; - } - false - } - - fn double_quoted_string(&mut self) -> bool { - debug_assert!(self.prev() == '"'); - loop { - match self.nth_char(0) { - '"' => { - self.bump(); - return true; - } - EOF_CHAR if self.is_eof() => return false, - '\\' if self.nth_char(1) == '\\' || self.nth_char(1) == '"' => { - self.bump(); - } - _ => (), - } - self.bump(); - } - } - - fn raw_double_quoted_string(&mut self) -> (usize, bool, bool) { - debug_assert!(self.prev() == 'r'); - let n_hashes = { - let mut acc: usize = 0; - loop { - match self.bump() { - Some('#') => acc += 1, - Some('"') => break acc, - None | Some(_) => return (acc, false, false), - } - } - }; - - loop { - match self.bump() { - Some('"') => { - let mut acc = n_hashes; - while self.nth_char(0) == '#' && acc > 0 { - self.bump(); - acc -= 1; - } - if acc == 0 { - return (n_hashes, true, true); - } - } - Some(_) => (), - None => return (n_hashes, true, false), - } - } - } - - fn eat_decimal_digits(&mut self) -> bool { - let mut has_digits = false; - loop { - match self.nth_char(0) { - '_' => { - self.bump(); - } - '0'..='9' => { - has_digits = true; - self.bump(); - } - _ => break, - } - } - has_digits - } - - fn eat_hexadecimal_digits(&mut self) -> bool { - let mut has_digits = false; - loop { - match self.nth_char(0) { - '_' => { - self.bump(); - } - '0'..='9' | 'a'..='f' | 'A'..='F' => { - has_digits = true; - self.bump(); - } - _ => break, - } - } - has_digits - } - - fn float_exponent(&mut self) -> Result<(), ()> { - debug_assert!(self.prev() == 'e' || self.prev() == 'E'); - if self.nth_char(0) == '-' || self.nth_char(0) == '+' { - self.bump(); - } - if self.eat_decimal_digits() { Ok(()) } else { Err(()) } - } - - fn eat_literal_suffix(&mut self) { - if !is_id_start(self.nth_char(0)) { - return; - } - self.bump(); - - while is_id_continue(self.nth_char(0)) { - self.bump(); - } - } -} |
