diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/base64/src/engine/general_purpose | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/base64/src/engine/general_purpose')
| -rw-r--r-- | vendor/base64/src/engine/general_purpose/decode.rs | 357 | ||||
| -rw-r--r-- | vendor/base64/src/engine/general_purpose/decode_suffix.rs | 162 | ||||
| -rw-r--r-- | vendor/base64/src/engine/general_purpose/mod.rs | 352 |
3 files changed, 871 insertions, 0 deletions
diff --git a/vendor/base64/src/engine/general_purpose/decode.rs b/vendor/base64/src/engine/general_purpose/decode.rs new file mode 100644 index 00000000..b55d3fc5 --- /dev/null +++ b/vendor/base64/src/engine/general_purpose/decode.rs @@ -0,0 +1,357 @@ +use crate::{ + engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode}, + DecodeError, DecodeSliceError, PAD_BYTE, +}; + +#[doc(hidden)] +pub struct GeneralPurposeEstimate { + /// input len % 4 + rem: usize, + conservative_decoded_len: usize, +} + +impl GeneralPurposeEstimate { + pub(crate) fn new(encoded_len: usize) -> Self { + let rem = encoded_len % 4; + Self { + rem, + conservative_decoded_len: (encoded_len / 4 + (rem > 0) as usize) * 3, + } + } +} + +impl DecodeEstimate for GeneralPurposeEstimate { + fn decoded_len_estimate(&self) -> usize { + self.conservative_decoded_len + } +} + +/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs. +/// Returns the decode metadata, or an error. +// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is +// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment, +// but this is fragile and the best setting changes with only minor code modifications. +#[inline] +pub(crate) fn decode_helper( + input: &[u8], + estimate: GeneralPurposeEstimate, + output: &mut [u8], + decode_table: &[u8; 256], + decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, +) -> Result<DecodeMetadata, DecodeSliceError> { + let input_complete_nonterminal_quads_len = + complete_quads_len(input, estimate.rem, output.len(), decode_table)?; + + const UNROLLED_INPUT_CHUNK_SIZE: usize = 32; + const UNROLLED_OUTPUT_CHUNK_SIZE: usize = UNROLLED_INPUT_CHUNK_SIZE / 4 * 3; + + let input_complete_quads_after_unrolled_chunks_len = + input_complete_nonterminal_quads_len % UNROLLED_INPUT_CHUNK_SIZE; + + let input_unrolled_loop_len = + input_complete_nonterminal_quads_len - input_complete_quads_after_unrolled_chunks_len; + + // chunks of 32 bytes + for (chunk_index, chunk) in input[..input_unrolled_loop_len] + .chunks_exact(UNROLLED_INPUT_CHUNK_SIZE) + .enumerate() + { + let input_index = chunk_index * UNROLLED_INPUT_CHUNK_SIZE; + let chunk_output = &mut output[chunk_index * UNROLLED_OUTPUT_CHUNK_SIZE + ..(chunk_index + 1) * UNROLLED_OUTPUT_CHUNK_SIZE]; + + decode_chunk_8( + &chunk[0..8], + input_index, + decode_table, + &mut chunk_output[0..6], + )?; + decode_chunk_8( + &chunk[8..16], + input_index + 8, + decode_table, + &mut chunk_output[6..12], + )?; + decode_chunk_8( + &chunk[16..24], + input_index + 16, + decode_table, + &mut chunk_output[12..18], + )?; + decode_chunk_8( + &chunk[24..32], + input_index + 24, + decode_table, + &mut chunk_output[18..24], + )?; + } + + // remaining quads, except for the last possibly partial one, as it may have padding + let output_unrolled_loop_len = input_unrolled_loop_len / 4 * 3; + let output_complete_quad_len = input_complete_nonterminal_quads_len / 4 * 3; + { + let output_after_unroll = &mut output[output_unrolled_loop_len..output_complete_quad_len]; + + for (chunk_index, chunk) in input + [input_unrolled_loop_len..input_complete_nonterminal_quads_len] + .chunks_exact(4) + .enumerate() + { + let chunk_output = &mut output_after_unroll[chunk_index * 3..chunk_index * 3 + 3]; + + decode_chunk_4( + chunk, + input_unrolled_loop_len + chunk_index * 4, + decode_table, + chunk_output, + )?; + } + } + + super::decode_suffix::decode_suffix( + input, + input_complete_nonterminal_quads_len, + output, + output_complete_quad_len, + decode_table, + decode_allow_trailing_bits, + padding_mode, + ) +} + +/// Returns the length of complete quads, except for the last one, even if it is complete. +/// +/// Returns an error if the output len is not big enough for decoding those complete quads, or if +/// the input % 4 == 1, and that last byte is an invalid value other than a pad byte. +/// +/// - `input` is the base64 input +/// - `input_len_rem` is input len % 4 +/// - `output_len` is the length of the output slice +pub(crate) fn complete_quads_len( + input: &[u8], + input_len_rem: usize, + output_len: usize, + decode_table: &[u8; 256], +) -> Result<usize, DecodeSliceError> { + debug_assert!(input.len() % 4 == input_len_rem); + + // detect a trailing invalid byte, like a newline, as a user convenience + if input_len_rem == 1 { + let last_byte = input[input.len() - 1]; + // exclude pad bytes; might be part of padding that extends from earlier in the input + if last_byte != PAD_BYTE && decode_table[usize::from(last_byte)] == INVALID_VALUE { + return Err(DecodeError::InvalidByte(input.len() - 1, last_byte).into()); + } + }; + + // skip last quad, even if it's complete, as it may have padding + let input_complete_nonterminal_quads_len = input + .len() + .saturating_sub(input_len_rem) + // if rem was 0, subtract 4 to avoid padding + .saturating_sub((input_len_rem == 0) as usize * 4); + debug_assert!( + input.is_empty() || (1..=4).contains(&(input.len() - input_complete_nonterminal_quads_len)) + ); + + // check that everything except the last quad handled by decode_suffix will fit + if output_len < input_complete_nonterminal_quads_len / 4 * 3 { + return Err(DecodeSliceError::OutputSliceTooSmall); + }; + Ok(input_complete_nonterminal_quads_len) +} + +/// Decode 8 bytes of input into 6 bytes of output. +/// +/// `input` is the 8 bytes to decode. +/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors +/// accurately) +/// `decode_table` is the lookup table for the particular base64 alphabet. +/// `output` will have its first 6 bytes overwritten +// yes, really inline (worth 30-50% speedup) +#[inline(always)] +fn decode_chunk_8( + input: &[u8], + index_at_start_of_input: usize, + decode_table: &[u8; 256], + output: &mut [u8], +) -> Result<(), DecodeError> { + let morsel = decode_table[usize::from(input[0])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0])); + } + let mut accum = u64::from(morsel) << 58; + + let morsel = decode_table[usize::from(input[1])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 1, + input[1], + )); + } + accum |= u64::from(morsel) << 52; + + let morsel = decode_table[usize::from(input[2])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 2, + input[2], + )); + } + accum |= u64::from(morsel) << 46; + + let morsel = decode_table[usize::from(input[3])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 3, + input[3], + )); + } + accum |= u64::from(morsel) << 40; + + let morsel = decode_table[usize::from(input[4])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 4, + input[4], + )); + } + accum |= u64::from(morsel) << 34; + + let morsel = decode_table[usize::from(input[5])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 5, + input[5], + )); + } + accum |= u64::from(morsel) << 28; + + let morsel = decode_table[usize::from(input[6])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 6, + input[6], + )); + } + accum |= u64::from(morsel) << 22; + + let morsel = decode_table[usize::from(input[7])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 7, + input[7], + )); + } + accum |= u64::from(morsel) << 16; + + output[..6].copy_from_slice(&accum.to_be_bytes()[..6]); + + Ok(()) +} + +/// Like [decode_chunk_8] but for 4 bytes of input and 3 bytes of output. +#[inline(always)] +fn decode_chunk_4( + input: &[u8], + index_at_start_of_input: usize, + decode_table: &[u8; 256], + output: &mut [u8], +) -> Result<(), DecodeError> { + let morsel = decode_table[usize::from(input[0])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0])); + } + let mut accum = u32::from(morsel) << 26; + + let morsel = decode_table[usize::from(input[1])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 1, + input[1], + )); + } + accum |= u32::from(morsel) << 20; + + let morsel = decode_table[usize::from(input[2])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 2, + input[2], + )); + } + accum |= u32::from(morsel) << 14; + + let morsel = decode_table[usize::from(input[3])]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 3, + input[3], + )); + } + accum |= u32::from(morsel) << 8; + + output[..3].copy_from_slice(&accum.to_be_bytes()[..3]); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::engine::general_purpose::STANDARD; + + #[test] + fn decode_chunk_8_writes_only_6_bytes() { + let input = b"Zm9vYmFy"; // "foobar" + let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7]; + + decode_chunk_8(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap(); + assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output); + } + + #[test] + fn decode_chunk_4_writes_only_3_bytes() { + let input = b"Zm9v"; // "foobar" + let mut output = [0_u8, 1, 2, 3]; + + decode_chunk_4(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap(); + assert_eq!(&vec![b'f', b'o', b'o', 3], &output); + } + + #[test] + fn estimate_short_lengths() { + for (range, decoded_len_estimate) in [ + (0..=0, 0), + (1..=4, 3), + (5..=8, 6), + (9..=12, 9), + (13..=16, 12), + (17..=20, 15), + ] { + for encoded_len in range { + let estimate = GeneralPurposeEstimate::new(encoded_len); + assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate()); + } + } + } + + #[test] + fn estimate_via_u128_inflation() { + // cover both ends of usize + (0..1000) + .chain(usize::MAX - 1000..=usize::MAX) + .for_each(|encoded_len| { + // inflate to 128 bit type to be able to safely use the easy formulas + let len_128 = encoded_len as u128; + + let estimate = GeneralPurposeEstimate::new(encoded_len); + assert_eq!( + (len_128 + 3) / 4 * 3, + estimate.conservative_decoded_len as u128 + ); + }) + } +} diff --git a/vendor/base64/src/engine/general_purpose/decode_suffix.rs b/vendor/base64/src/engine/general_purpose/decode_suffix.rs new file mode 100644 index 00000000..02aaf514 --- /dev/null +++ b/vendor/base64/src/engine/general_purpose/decode_suffix.rs @@ -0,0 +1,162 @@ +use crate::{ + engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode}, + DecodeError, DecodeSliceError, PAD_BYTE, +}; + +/// Decode the last 0-4 bytes, checking for trailing set bits and padding per the provided +/// parameters. +/// +/// Returns the decode metadata representing the total number of bytes decoded, including the ones +/// indicated as already written by `output_index`. +pub(crate) fn decode_suffix( + input: &[u8], + input_index: usize, + output: &mut [u8], + mut output_index: usize, + decode_table: &[u8; 256], + decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, +) -> Result<DecodeMetadata, DecodeSliceError> { + debug_assert!((input.len() - input_index) <= 4); + + // Decode any leftovers that might not be a complete input chunk of 4 bytes. + // Use a u32 as a stack-resident 4 byte buffer. + let mut morsels_in_leftover = 0; + let mut padding_bytes_count = 0; + // offset from input_index + let mut first_padding_offset: usize = 0; + let mut last_symbol = 0_u8; + let mut morsels = [0_u8; 4]; + + for (leftover_index, &b) in input[input_index..].iter().enumerate() { + // '=' padding + if b == PAD_BYTE { + // There can be bad padding bytes in a few ways: + // 1 - Padding with non-padding characters after it + // 2 - Padding after zero or one characters in the current quad (should only + // be after 2 or 3 chars) + // 3 - More than two characters of padding. If 3 or 4 padding chars + // are in the same quad, that implies it will be caught by #2. + // If it spreads from one quad to another, it will be an invalid byte + // in the first quad. + // 4 - Non-canonical padding -- 1 byte when it should be 2, etc. + // Per config, non-canonical but still functional non- or partially-padded base64 + // may be treated as an error condition. + + if leftover_index < 2 { + // Check for error #2. + // Either the previous byte was padding, in which case we would have already hit + // this case, or it wasn't, in which case this is the first such error. + debug_assert!( + leftover_index == 0 || (leftover_index == 1 && padding_bytes_count == 0) + ); + let bad_padding_index = input_index + leftover_index; + return Err(DecodeError::InvalidByte(bad_padding_index, b).into()); + } + + if padding_bytes_count == 0 { + first_padding_offset = leftover_index; + } + + padding_bytes_count += 1; + continue; + } + + // Check for case #1. + // To make '=' handling consistent with the main loop, don't allow + // non-suffix '=' in trailing chunk either. Report error as first + // erroneous padding. + if padding_bytes_count > 0 { + return Err( + DecodeError::InvalidByte(input_index + first_padding_offset, PAD_BYTE).into(), + ); + } + + last_symbol = b; + + // can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding. + // Pack the leftovers from left to right. + let morsel = decode_table[b as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(input_index + leftover_index, b).into()); + } + + morsels[morsels_in_leftover] = morsel; + morsels_in_leftover += 1; + } + + // If there was 1 trailing byte, and it was valid, and we got to this point without hitting + // an invalid byte, now we can report invalid length + if !input.is_empty() && morsels_in_leftover < 2 { + return Err(DecodeError::InvalidLength(input_index + morsels_in_leftover).into()); + } + + match padding_mode { + DecodePaddingMode::Indifferent => { /* everything we care about was already checked */ } + DecodePaddingMode::RequireCanonical => { + // allow empty input + if (padding_bytes_count + morsels_in_leftover) % 4 != 0 { + return Err(DecodeError::InvalidPadding.into()); + } + } + DecodePaddingMode::RequireNone => { + if padding_bytes_count > 0 { + // check at the end to make sure we let the cases of padding that should be InvalidByte + // get hit + return Err(DecodeError::InvalidPadding.into()); + } + } + } + + // When encoding 1 trailing byte (e.g. 0xFF), 2 base64 bytes ("/w") are needed. + // / is the symbol for 63 (0x3F, bottom 6 bits all set) and w is 48 (0x30, top 2 bits + // of bottom 6 bits set). + // When decoding two symbols back to one trailing byte, any final symbol higher than + // w would still decode to the original byte because we only care about the top two + // bits in the bottom 6, but would be a non-canonical encoding. So, we calculate a + // mask based on how many bits are used for just the canonical encoding, and optionally + // error if any other bits are set. In the example of one encoded byte -> 2 symbols, + // 2 symbols can technically encode 12 bits, but the last 4 are non-canonical, and + // useless since there are no more symbols to provide the necessary 4 additional bits + // to finish the second original byte. + + let leftover_bytes_to_append = morsels_in_leftover * 6 / 8; + // Put the up to 6 complete bytes as the high bytes. + // Gain a couple percent speedup from nudging these ORs to use more ILP with a two-way split. + let mut leftover_num = (u32::from(morsels[0]) << 26) + | (u32::from(morsels[1]) << 20) + | (u32::from(morsels[2]) << 14) + | (u32::from(morsels[3]) << 8); + + // if there are bits set outside the bits we care about, last symbol encodes trailing bits that + // will not be included in the output + let mask = !0_u32 >> (leftover_bytes_to_append * 8); + if !decode_allow_trailing_bits && (leftover_num & mask) != 0 { + // last morsel is at `morsels_in_leftover` - 1 + return Err(DecodeError::InvalidLastSymbol( + input_index + morsels_in_leftover - 1, + last_symbol, + ) + .into()); + } + + // Strangely, this approach benchmarks better than writing bytes one at a time, + // or copy_from_slice into output. + for _ in 0..leftover_bytes_to_append { + let hi_byte = (leftover_num >> 24) as u8; + leftover_num <<= 8; + *output + .get_mut(output_index) + .ok_or(DecodeSliceError::OutputSliceTooSmall)? = hi_byte; + output_index += 1; + } + + Ok(DecodeMetadata::new( + output_index, + if padding_bytes_count > 0 { + Some(input_index + first_padding_offset) + } else { + None + }, + )) +} diff --git a/vendor/base64/src/engine/general_purpose/mod.rs b/vendor/base64/src/engine/general_purpose/mod.rs new file mode 100644 index 00000000..6fe95809 --- /dev/null +++ b/vendor/base64/src/engine/general_purpose/mod.rs @@ -0,0 +1,352 @@ +//! Provides the [GeneralPurpose] engine and associated config types. +use crate::{ + alphabet, + alphabet::Alphabet, + engine::{Config, DecodeMetadata, DecodePaddingMode}, + DecodeSliceError, +}; +use core::convert::TryInto; + +pub(crate) mod decode; +pub(crate) mod decode_suffix; + +pub use decode::GeneralPurposeEstimate; + +pub(crate) const INVALID_VALUE: u8 = 255; + +/// A general-purpose base64 engine. +/// +/// - It uses no vector CPU instructions, so it will work on any system. +/// - It is reasonably fast (~2-3GiB/s). +/// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation. + +#[derive(Debug, Clone)] +pub struct GeneralPurpose { + encode_table: [u8; 64], + decode_table: [u8; 256], + config: GeneralPurposeConfig, +} + +impl GeneralPurpose { + /// Create a `GeneralPurpose` engine from an [Alphabet]. + /// + /// While not very expensive to initialize, ideally these should be cached + /// if the engine will be used repeatedly. + pub const fn new(alphabet: &Alphabet, config: GeneralPurposeConfig) -> Self { + Self { + encode_table: encode_table(alphabet), + decode_table: decode_table(alphabet), + config, + } + } +} + +impl super::Engine for GeneralPurpose { + type Config = GeneralPurposeConfig; + type DecodeEstimate = GeneralPurposeEstimate; + + fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize { + let mut input_index: usize = 0; + + const BLOCKS_PER_FAST_LOOP: usize = 4; + const LOW_SIX_BITS: u64 = 0x3F; + + // we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need + // 2 trailing bytes to be available to read.. + let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2); + let mut output_index = 0; + + if last_fast_index > 0 { + while input_index <= last_fast_index { + // Major performance wins from letting the optimizer do the bounds check once, mostly + // on the output side + let input_chunk = + &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))]; + let output_chunk = + &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)]; + + // Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent + // to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for + // large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect + // that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte + // SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once. + // Plus, single-digit percentage performance differences might well be quite different + // on different hardware. + + let input_u64 = read_u64(&input_chunk[0..]); + + output_chunk[0] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[1] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[2] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[3] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[4] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[5] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[6] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[7] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[6..]); + + output_chunk[8] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[9] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[10] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[11] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[12] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[13] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[14] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[15] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[12..]); + + output_chunk[16] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[17] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[18] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[19] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[20] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[21] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[22] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[23] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[18..]); + + output_chunk[24] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[25] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[26] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[27] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[28] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[29] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[30] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[31] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + output_index += BLOCKS_PER_FAST_LOOP * 8; + input_index += BLOCKS_PER_FAST_LOOP * 6; + } + } + + // Encode what's left after the fast loop. + + const LOW_SIX_BITS_U8: u8 = 0x3F; + + let rem = input.len() % 3; + let start_of_rem = input.len() - rem; + + // start at the first index not handled by fast loop, which may be 0. + + while input_index < start_of_rem { + let input_chunk = &input[input_index..(input_index + 3)]; + let output_chunk = &mut output[output_index..(output_index + 4)]; + + output_chunk[0] = self.encode_table[(input_chunk[0] >> 2) as usize]; + output_chunk[1] = self.encode_table + [((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize]; + output_chunk[2] = self.encode_table + [((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize]; + output_chunk[3] = self.encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize]; + + input_index += 3; + output_index += 4; + } + + if rem == 2 { + output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; + output[output_index + 1] = + self.encode_table[((input[start_of_rem] << 4 | input[start_of_rem + 1] >> 4) + & LOW_SIX_BITS_U8) as usize]; + output[output_index + 2] = + self.encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize]; + output_index += 3; + } else if rem == 1 { + output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; + output[output_index + 1] = + self.encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize]; + output_index += 2; + } + + output_index + } + + fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate { + GeneralPurposeEstimate::new(input_len) + } + + fn internal_decode( + &self, + input: &[u8], + output: &mut [u8], + estimate: Self::DecodeEstimate, + ) -> Result<DecodeMetadata, DecodeSliceError> { + decode::decode_helper( + input, + estimate, + output, + &self.decode_table, + self.config.decode_allow_trailing_bits, + self.config.decode_padding_mode, + ) + } + + fn config(&self) -> &Self::Config { + &self.config + } +} + +/// Returns a table mapping a 6-bit index to the ASCII byte encoding of the index +pub(crate) const fn encode_table(alphabet: &Alphabet) -> [u8; 64] { + // the encode table is just the alphabet: + // 6-bit index lookup -> printable byte + let mut encode_table = [0_u8; 64]; + { + let mut index = 0; + while index < 64 { + encode_table[index] = alphabet.symbols[index]; + index += 1; + } + } + + encode_table +} + +/// Returns a table mapping base64 bytes as the lookup index to either: +/// - [INVALID_VALUE] for bytes that aren't members of the alphabet +/// - a byte whose lower 6 bits are the value that was encoded into the index byte +pub(crate) const fn decode_table(alphabet: &Alphabet) -> [u8; 256] { + let mut decode_table = [INVALID_VALUE; 256]; + + // Since the table is full of `INVALID_VALUE` already, we only need to overwrite + // the parts that are valid. + let mut index = 0; + while index < 64 { + // The index in the alphabet is the 6-bit value we care about. + // Since the index is in 0-63, it is safe to cast to u8. + decode_table[alphabet.symbols[index] as usize] = index as u8; + index += 1; + } + + decode_table +} + +#[inline] +fn read_u64(s: &[u8]) -> u64 { + u64::from_be_bytes(s[..8].try_into().unwrap()) +} + +/// Contains configuration parameters for base64 encoding and decoding. +/// +/// ``` +/// # use base64::engine::GeneralPurposeConfig; +/// let config = GeneralPurposeConfig::new() +/// .with_encode_padding(false); +/// // further customize using `.with_*` methods as needed +/// ``` +/// +/// The constants [PAD] and [NO_PAD] cover most use cases. +/// +/// To specify the characters used, see [Alphabet]. +#[derive(Clone, Copy, Debug)] +pub struct GeneralPurposeConfig { + encode_padding: bool, + decode_allow_trailing_bits: bool, + decode_padding_mode: DecodePaddingMode, +} + +impl GeneralPurposeConfig { + /// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and + /// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`. + /// + /// This probably matches most people's expectations, but consider disabling padding to save + /// a few bytes unless you specifically need it for compatibility with some legacy system. + pub const fn new() -> Self { + Self { + // RFC states that padding must be applied by default + encode_padding: true, + decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireCanonical, + } + } + + /// Create a new config based on `self` with an updated `padding` setting. + /// + /// If `padding` is `true`, encoding will append either 1 or 2 `=` padding characters as needed + /// to produce an output whose length is a multiple of 4. + /// + /// Padding is not needed for correct decoding and only serves to waste bytes, but it's in the + /// [spec](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2). + /// + /// For new applications, consider not using padding if the decoders you're using don't require + /// padding to be present. + pub const fn with_encode_padding(self, padding: bool) -> Self { + Self { + encode_padding: padding, + ..self + } + } + + /// Create a new config based on `self` with an updated `decode_allow_trailing_bits` setting. + /// + /// Most users will not need to configure this. It's useful if you need to decode base64 + /// produced by a buggy encoder that has bits set in the unused space on the last base64 + /// character as per [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode). + /// If invalid trailing bits are present and this is `true`, those bits will + /// be silently ignored, else `DecodeError::InvalidLastSymbol` will be emitted. + pub const fn with_decode_allow_trailing_bits(self, allow: bool) -> Self { + Self { + decode_allow_trailing_bits: allow, + ..self + } + } + + /// Create a new config based on `self` with an updated `decode_padding_mode` setting. + /// + /// Padding is not useful in terms of representing encoded data -- it makes no difference to + /// the decoder if padding is present or not, so if you have some un-padded input to decode, it + /// is perfectly fine to use `DecodePaddingMode::Indifferent` to prevent errors from being + /// emitted. + /// + /// However, since in practice + /// [people who learned nothing from BER vs DER seem to expect base64 to have one canonical encoding](https://eprint.iacr.org/2022/361), + /// the default setting is the stricter `DecodePaddingMode::RequireCanonicalPadding`. + /// + /// Or, if "canonical" in your circumstance means _no_ padding rather than padding to the + /// next multiple of four, there's `DecodePaddingMode::RequireNoPadding`. + pub const fn with_decode_padding_mode(self, mode: DecodePaddingMode) -> Self { + Self { + decode_padding_mode: mode, + ..self + } + } +} + +impl Default for GeneralPurposeConfig { + /// Delegates to [GeneralPurposeConfig::new]. + fn default() -> Self { + Self::new() + } +} + +impl Config for GeneralPurposeConfig { + fn encode_padding(&self) -> bool { + self.encode_padding + } +} + +/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [PAD] config. +pub const STANDARD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, PAD); + +/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [NO_PAD] config. +pub const STANDARD_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); + +/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [PAD] config. +pub const URL_SAFE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, PAD); + +/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [NO_PAD] config. +pub const URL_SAFE_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD); + +/// Include padding bytes when encoding, and require that they be present when decoding. +/// +/// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves +/// little purpose in practice. +pub const PAD: GeneralPurposeConfig = GeneralPurposeConfig::new(); + +/// Don't add padding when encoding, and require no padding when decoding. +pub const NO_PAD: GeneralPurposeConfig = GeneralPurposeConfig::new() + .with_encode_padding(false) + .with_decode_padding_mode(DecodePaddingMode::RequireNone); |
