summaryrefslogtreecommitdiff
path: root/vendor/time/src/format_description/parse/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/time/src/format_description/parse/lexer.rs')
-rw-r--r--vendor/time/src/format_description/parse/lexer.rs284
1 files changed, 284 insertions, 0 deletions
diff --git a/vendor/time/src/format_description/parse/lexer.rs b/vendor/time/src/format_description/parse/lexer.rs
new file mode 100644
index 00000000..a63722e1
--- /dev/null
+++ b/vendor/time/src/format_description/parse/lexer.rs
@@ -0,0 +1,284 @@
+//! Lexer for parsing format descriptions.
+
+use core::iter;
+
+use super::{attach_location, unused, Error, Location, Spanned, SpannedValue};
+
+/// An iterator over the lexed tokens.
+pub(super) struct Lexed<I: Iterator> {
+ /// The internal iterator.
+ iter: iter::Peekable<I>,
+}
+
+impl<I: Iterator> Iterator for Lexed<I> {
+ type Item = I::Item;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next()
+ }
+}
+
+impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
+ /// Peek at the next item in the iterator.
+ pub(super) fn peek(&mut self) -> Option<&I::Item> {
+ self.iter.peek()
+ }
+
+ /// Consume the next token if it is whitespace.
+ pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
+ if let Some(&Ok(Token::ComponentPart {
+ kind: ComponentKind::Whitespace,
+ value,
+ })) = self.peek()
+ {
+ self.next(); // consume
+ Some(value)
+ } else {
+ None
+ }
+ }
+
+ /// Consume the next token if it is a component item that is not whitespace.
+ pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
+ if let Some(&Ok(Token::ComponentPart {
+ kind: ComponentKind::NotWhitespace,
+ value,
+ })) = self.peek()
+ {
+ self.next(); // consume
+ Some(value)
+ } else {
+ None
+ }
+ }
+
+ /// Consume the next token if it is an opening bracket.
+ pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
+ if let Some(&Ok(Token::Bracket {
+ kind: BracketKind::Opening,
+ location,
+ })) = self.peek()
+ {
+ self.next(); // consume
+ Some(location)
+ } else {
+ None
+ }
+ }
+
+ /// Peek at the next token if it is a closing bracket.
+ pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
+ if let Some(Ok(Token::Bracket {
+ kind: BracketKind::Closing,
+ location,
+ })) = self.peek()
+ {
+ Some(location)
+ } else {
+ None
+ }
+ }
+
+ /// Consume the next token if it is a closing bracket.
+ pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
+ if let Some(&Ok(Token::Bracket {
+ kind: BracketKind::Closing,
+ location,
+ })) = self.peek()
+ {
+ self.next(); // consume
+ Some(location)
+ } else {
+ None
+ }
+ }
+}
+
+/// A token emitted by the lexer. There is no semantic meaning at this stage.
+pub(super) enum Token<'a> {
+ /// A literal string, formatted and parsed as-is.
+ Literal(Spanned<&'a [u8]>),
+ /// An opening or closing bracket. May or may not be the start or end of a component.
+ Bracket {
+ /// Whether the bracket is opening or closing.
+ kind: BracketKind,
+ /// Where the bracket was in the format string.
+ location: Location,
+ },
+ /// One part of a component. This could be its name, a modifier, or whitespace.
+ ComponentPart {
+ /// Whether the part is whitespace or not.
+ kind: ComponentKind,
+ /// The part itself.
+ value: Spanned<&'a [u8]>,
+ },
+}
+
+/// What type of bracket is present.
+pub(super) enum BracketKind {
+ /// An opening bracket: `[`
+ Opening,
+ /// A closing bracket: `]`
+ Closing,
+}
+
+/// Indicates whether the component is whitespace or not.
+pub(super) enum ComponentKind {
+ Whitespace,
+ NotWhitespace,
+}
+
+/// Parse the string into a series of [`Token`]s.
+///
+/// `VERSION` controls the version of the format description that is being parsed. Currently, this
+/// must be 1 or 2.
+///
+/// - When `VERSION` is 1, `[[` is the only escape sequence, resulting in a literal `[`.
+/// - When `VERSION` is 2, all escape sequences begin with `\`. The only characters that may
+/// currently follow are `\`, `[`, and `]`, all of which result in the literal character. All
+/// other characters result in a lex error.
+pub(super) fn lex<const VERSION: usize>(
+ mut input: &[u8],
+) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
+ validate_version!(VERSION);
+
+ let mut depth: u8 = 0;
+ let mut iter = attach_location(input.iter()).peekable();
+ let mut second_bracket_location = None;
+
+ let iter = iter::from_fn(move || {
+ // The flag is only set when version is zero.
+ if version!(..=1) {
+ // There is a flag set to emit the second half of an escaped bracket pair.
+ if let Some(location) = second_bracket_location.take() {
+ return Some(Ok(Token::Bracket {
+ kind: BracketKind::Opening,
+ location,
+ }));
+ }
+ }
+
+ Some(Ok(match iter.next()? {
+ // possible escape sequence
+ (b'\\', backslash_loc) if version!(2..) => {
+ match iter.next() {
+ Some((b'\\' | b'[' | b']', char_loc)) => {
+ // The escaped character is emitted as-is.
+ let char = &input[1..2];
+ input = &input[2..];
+ if depth == 0 {
+ Token::Literal(char.spanned(backslash_loc.to(char_loc)))
+ } else {
+ Token::ComponentPart {
+ kind: ComponentKind::NotWhitespace,
+ value: char.spanned(backslash_loc.to(char_loc)),
+ }
+ }
+ }
+ Some((_, loc)) => {
+ return Some(Err(Error {
+ _inner: unused(loc.error("invalid escape sequence")),
+ public: crate::error::InvalidFormatDescription::Expected {
+ what: "valid escape sequence",
+ index: loc.byte as usize,
+ },
+ }));
+ }
+ None => {
+ return Some(Err(Error {
+ _inner: unused(backslash_loc.error("unexpected end of input")),
+ public: crate::error::InvalidFormatDescription::Expected {
+ what: "valid escape sequence",
+ index: backslash_loc.byte as usize,
+ },
+ }));
+ }
+ }
+ }
+ // potentially escaped opening bracket
+ (b'[', location) if version!(..=1) => {
+ if let Some((_, second_location)) = iter.next_if(|&(&byte, _)| byte == b'[') {
+ // Escaped bracket. Store the location of the second so we can emit it later.
+ second_bracket_location = Some(second_location);
+ input = &input[2..];
+ } else {
+ // opening bracket
+ depth += 1;
+ input = &input[1..];
+ }
+
+ Token::Bracket {
+ kind: BracketKind::Opening,
+ location,
+ }
+ }
+ // opening bracket
+ (b'[', location) => {
+ depth += 1;
+ input = &input[1..];
+
+ Token::Bracket {
+ kind: BracketKind::Opening,
+ location,
+ }
+ }
+ // closing bracket
+ (b']', location) if depth > 0 => {
+ depth -= 1;
+ input = &input[1..];
+
+ Token::Bracket {
+ kind: BracketKind::Closing,
+ location,
+ }
+ }
+ // literal
+ (_, start_location) if depth == 0 => {
+ let mut bytes = 1;
+ let mut end_location = start_location;
+
+ while let Some((_, location)) =
+ iter.next_if(|&(&byte, _)| !((version!(2..) && byte == b'\\') || byte == b'['))
+ {
+ end_location = location;
+ bytes += 1;
+ }
+
+ let value = &input[..bytes];
+ input = &input[bytes..];
+
+ Token::Literal(value.spanned(start_location.to(end_location)))
+ }
+ // component part
+ (byte, start_location) => {
+ let mut bytes = 1;
+ let mut end_location = start_location;
+ let is_whitespace = byte.is_ascii_whitespace();
+
+ while let Some((_, location)) = iter.next_if(|&(byte, _)| {
+ !matches!(byte, b'\\' | b'[' | b']')
+ && is_whitespace == byte.is_ascii_whitespace()
+ }) {
+ end_location = location;
+ bytes += 1;
+ }
+
+ let value = &input[..bytes];
+ input = &input[bytes..];
+
+ Token::ComponentPart {
+ kind: if is_whitespace {
+ ComponentKind::Whitespace
+ } else {
+ ComponentKind::NotWhitespace
+ },
+ value: value.spanned(start_location.to(end_location)),
+ }
+ }
+ }))
+ });
+
+ Lexed {
+ iter: iter.peekable(),
+ }
+}