diff options
Diffstat (limited to 'vendor/base64/src/alphabet.rs')
| -rw-r--r-- | vendor/base64/src/alphabet.rs | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/vendor/base64/src/alphabet.rs b/vendor/base64/src/alphabet.rs new file mode 100644 index 00000000..b07bfdfe --- /dev/null +++ b/vendor/base64/src/alphabet.rs @@ -0,0 +1,285 @@ +//! Provides [Alphabet] and constants for alphabets commonly used in the wild. + +use crate::PAD_BYTE; +use core::{convert, fmt}; +#[cfg(any(feature = "std", test))] +use std::error; + +const ALPHABET_SIZE: usize = 64; + +/// An alphabet defines the 64 ASCII characters (symbols) used for base64. +/// +/// Common alphabets are provided as constants, and custom alphabets +/// can be made via `from_str` or the `TryFrom<str>` implementation. +/// +/// # Examples +/// +/// Building and using a custom Alphabet: +/// +/// ``` +/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); +/// +/// let engine = base64::engine::GeneralPurpose::new( +/// &custom, +/// base64::engine::general_purpose::PAD); +/// ``` +/// +/// Building a const: +/// +/// ``` +/// use base64::alphabet::Alphabet; +/// +/// static CUSTOM: Alphabet = { +/// // Result::unwrap() isn't const yet, but panic!() is OK +/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") { +/// Ok(x) => x, +/// Err(_) => panic!("creation of alphabet failed"), +/// } +/// }; +/// ``` +/// +/// Building lazily: +/// +/// ``` +/// use base64::{ +/// alphabet::Alphabet, +/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, +/// }; +/// use once_cell::sync::Lazy; +/// +/// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| +/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap() +/// ); +/// ``` +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Alphabet { + pub(crate) symbols: [u8; ALPHABET_SIZE], +} + +impl Alphabet { + /// Performs no checks so that it can be const. + /// Used only for known-valid strings. + const fn from_str_unchecked(alphabet: &str) -> Self { + let mut symbols = [0_u8; ALPHABET_SIZE]; + let source_bytes = alphabet.as_bytes(); + + // a way to copy that's allowed in const fn + let mut index = 0; + while index < ALPHABET_SIZE { + symbols[index] = source_bytes[index]; + index += 1; + } + + Self { symbols } + } + + /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. + /// + /// The `=` byte is not allowed as it is used for padding. + pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { + let bytes = alphabet.as_bytes(); + if bytes.len() != ALPHABET_SIZE { + return Err(ParseAlphabetError::InvalidLength); + } + + { + let mut index = 0; + while index < ALPHABET_SIZE { + let byte = bytes[index]; + + // must be ascii printable. 127 (DEL) is commonly considered printable + // for some reason but clearly unsuitable for base64. + if !(byte >= 32_u8 && byte <= 126_u8) { + return Err(ParseAlphabetError::UnprintableByte(byte)); + } + // = is assumed to be padding, so cannot be used as a symbol + if byte == PAD_BYTE { + return Err(ParseAlphabetError::ReservedByte(byte)); + } + + // Check for duplicates while staying within what const allows. + // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit + // microsecond range. + + let mut probe_index = 0; + while probe_index < ALPHABET_SIZE { + if probe_index == index { + probe_index += 1; + continue; + } + + let probe_byte = bytes[probe_index]; + + if byte == probe_byte { + return Err(ParseAlphabetError::DuplicatedByte(byte)); + } + + probe_index += 1; + } + + index += 1; + } + } + + Ok(Self::from_str_unchecked(alphabet)) + } + + /// Create a `&str` from the symbols in the `Alphabet` + pub fn as_str(&self) -> &str { + core::str::from_utf8(&self.symbols).unwrap() + } +} + +impl convert::TryFrom<&str> for Alphabet { + type Error = ParseAlphabetError; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + Self::new(value) + } +} + +/// Possible errors when constructing an [Alphabet] from a `str`. +#[derive(Debug, Eq, PartialEq)] +pub enum ParseAlphabetError { + /// Alphabets must be 64 ASCII bytes + InvalidLength, + /// All bytes must be unique + DuplicatedByte(u8), + /// All bytes must be printable (in the range `[32, 126]`). + UnprintableByte(u8), + /// `=` cannot be used + ReservedByte(u8), +} + +impl fmt::Display for ParseAlphabetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), + Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), + Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), + Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), + } + } +} + +#[cfg(any(feature = "std", test))] +impl error::Error for ParseAlphabetError {} + +/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. +/// +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 +pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", +); + +/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. +/// +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 +pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", +); + +/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). +/// +/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. +pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", +); + +/// The bcrypt alphabet. +pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( + "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", +); + +/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). +/// +/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) +pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", +); + +/// The alphabet used in BinHex 4.0 files. +/// +/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) +pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( + "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr", +); + +#[cfg(test)] +mod tests { + use crate::alphabet::*; + use core::convert::TryFrom as _; + + #[test] + fn detects_duplicate_start() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'A'), + Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn detects_duplicate_end() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'/'), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") + .unwrap_err() + ); + } + + #[test] + fn detects_duplicate_middle() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'Z'), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn detects_length() { + assert_eq!( + ParseAlphabetError::InvalidLength, + Alphabet::new( + "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", + ) + .unwrap_err() + ); + } + + #[test] + fn detects_padding() { + assert_eq!( + ParseAlphabetError::ReservedByte(b'='), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") + .unwrap_err() + ); + } + + #[test] + fn detects_unprintable() { + // form feed + assert_eq!( + ParseAlphabetError::UnprintableByte(0xc), + Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn same_as_unchecked() { + assert_eq!( + STANDARD, + Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap() + ); + } + + #[test] + fn str_same_as_input() { + let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let a = Alphabet::try_from(alphabet).unwrap(); + assert_eq!(alphabet, a.as_str()) + } +} |
