diff options
Diffstat (limited to 'vendor/base64/src/read')
| -rw-r--r-- | vendor/base64/src/read/decoder.rs | 335 | ||||
| -rw-r--r-- | vendor/base64/src/read/decoder_tests.rs | 487 | ||||
| -rw-r--r-- | vendor/base64/src/read/mod.rs | 6 |
3 files changed, 828 insertions, 0 deletions
diff --git a/vendor/base64/src/read/decoder.rs b/vendor/base64/src/read/decoder.rs new file mode 100644 index 00000000..781f6f88 --- /dev/null +++ b/vendor/base64/src/read/decoder.rs @@ -0,0 +1,335 @@ +use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE}; +use std::{cmp, fmt, io}; + +// This should be large, but it has to fit on the stack. +pub(crate) const BUF_SIZE: usize = 1024; + +// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). +const BASE64_CHUNK_SIZE: usize = 4; +const DECODED_CHUNK_SIZE: usize = 3; + +/// A `Read` implementation that decodes base64 data read from an underlying reader. +/// +/// # Examples +/// +/// ``` +/// use std::io::Read; +/// use std::io::Cursor; +/// use base64::engine::general_purpose; +/// +/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. +/// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); +/// let mut decoder = base64::read::DecoderReader::new( +/// &mut wrapped_reader, +/// &general_purpose::STANDARD); +/// +/// // handle errors as you normally would +/// let mut result = Vec::new(); +/// decoder.read_to_end(&mut result).unwrap(); +/// +/// assert_eq!(b"asdf", &result[..]); +/// +/// ``` +pub struct DecoderReader<'e, E: Engine, R: io::Read> { + engine: &'e E, + /// Where b64 data is read from + inner: R, + + /// Holds b64 data read from the delegate reader. + b64_buffer: [u8; BUF_SIZE], + /// The start of the pending buffered data in `b64_buffer`. + b64_offset: usize, + /// The amount of buffered b64 data after `b64_offset` in `b64_len`. + b64_len: usize, + /// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a + /// decoded chunk in to, we have to be able to hang on to a few decoded bytes. + /// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to + /// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest + /// into here, which seems like a lot of complexity for 1 extra byte of storage. + decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE], + /// Index of start of decoded data in `decoded_chunk_buffer` + decoded_offset: usize, + /// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer` + decoded_len: usize, + /// Input length consumed so far. + /// Used to provide accurate offsets in errors + input_consumed_len: usize, + /// offset of previously seen padding, if any + padding_offset: Option<usize>, +} + +// exclude b64_buffer as it's uselessly large +impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("DecoderReader") + .field("b64_offset", &self.b64_offset) + .field("b64_len", &self.b64_len) + .field("decoded_chunk_buffer", &self.decoded_chunk_buffer) + .field("decoded_offset", &self.decoded_offset) + .field("decoded_len", &self.decoded_len) + .field("input_consumed_len", &self.input_consumed_len) + .field("padding_offset", &self.padding_offset) + .finish() + } +} + +impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { + /// Create a new decoder that will read from the provided reader `r`. + pub fn new(reader: R, engine: &'e E) -> Self { + DecoderReader { + engine, + inner: reader, + b64_buffer: [0; BUF_SIZE], + b64_offset: 0, + b64_len: 0, + decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE], + decoded_offset: 0, + decoded_len: 0, + input_consumed_len: 0, + padding_offset: None, + } + } + + /// Write as much as possible of the decoded buffer into the target buffer. + /// Must only be called when there is something to write and space to write into. + /// Returns a Result with the number of (decoded) bytes copied. + fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { + debug_assert!(self.decoded_len > 0); + debug_assert!(!buf.is_empty()); + + let copy_len = cmp::min(self.decoded_len, buf.len()); + debug_assert!(copy_len > 0); + debug_assert!(copy_len <= self.decoded_len); + + buf[..copy_len].copy_from_slice( + &self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len], + ); + + self.decoded_offset += copy_len; + self.decoded_len -= copy_len; + + debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); + + Ok(copy_len) + } + + /// Read into the remaining space in the buffer after the current contents. + /// Must only be called when there is space to read into in the buffer. + /// Returns the number of bytes read. + fn read_from_delegate(&mut self) -> io::Result<usize> { + debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); + + let read = self + .inner + .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; + self.b64_len += read; + + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + + Ok(read) + } + + /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the + /// caller's responsibility to choose the number of b64 bytes to decode correctly. + /// + /// Returns a Result with the number of decoded bytes written to `buf`. + /// + /// # Panics + /// + /// panics if `buf` is too small + fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> { + debug_assert!(self.b64_len >= b64_len_to_decode); + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + debug_assert!(!buf.is_empty()); + + let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode]; + let decode_metadata = self + .engine + .internal_decode( + b64_to_decode, + buf, + self.engine.internal_decoded_len_estimate(b64_len_to_decode), + ) + .map_err(|dse| match dse { + DecodeSliceError::DecodeError(de) => { + match de { + DecodeError::InvalidByte(offset, byte) => { + match (byte, self.padding_offset) { + // if there was padding in a previous block of decoding that happened to + // be correct, and we now find more padding that happens to be incorrect, + // to be consistent with non-reader decodes, record the error at the first + // padding + (PAD_BYTE, Some(first_pad_offset)) => { + DecodeError::InvalidByte(first_pad_offset, PAD_BYTE) + } + _ => { + DecodeError::InvalidByte(self.input_consumed_len + offset, byte) + } + } + } + DecodeError::InvalidLength(len) => { + DecodeError::InvalidLength(self.input_consumed_len + len) + } + DecodeError::InvalidLastSymbol(offset, byte) => { + DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte) + } + DecodeError::InvalidPadding => DecodeError::InvalidPadding, + } + } + DecodeSliceError::OutputSliceTooSmall => { + unreachable!("buf is sized correctly in calling code") + } + }) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + if let Some(offset) = self.padding_offset { + // we've already seen padding + if decode_metadata.decoded_len > 0 { + // we read more after already finding padding; report error at first padding byte + return Err(io::Error::new( + io::ErrorKind::InvalidData, + DecodeError::InvalidByte(offset, PAD_BYTE), + )); + } + } + + self.padding_offset = self.padding_offset.or(decode_metadata + .padding_offset + .map(|offset| self.input_consumed_len + offset)); + self.input_consumed_len += b64_len_to_decode; + self.b64_offset += b64_len_to_decode; + self.b64_len -= b64_len_to_decode; + + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + + Ok(decode_metadata.decoded_len) + } + + /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded + /// input from. + /// + /// Because `DecoderReader` performs internal buffering, the state of the inner reader is + /// unspecified. This function is mainly provided because the inner reader type may provide + /// additional functionality beyond the `Read` implementation which may still be useful. + pub fn into_inner(self) -> R { + self.inner + } +} + +impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { + /// Decode input from the wrapped reader. + /// + /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes + /// written in `buf`. + /// + /// Where possible, this function buffers base64 to minimize the number of read() calls to the + /// delegate reader. + /// + /// # Errors + /// + /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid + /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + if buf.is_empty() { + return Ok(0); + } + + // offset == BUF_SIZE when we copied it all last time + debug_assert!(self.b64_offset <= BUF_SIZE); + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + debug_assert!(if self.b64_offset == BUF_SIZE { + self.b64_len == 0 + } else { + self.b64_len <= BUF_SIZE + }); + + debug_assert!(if self.decoded_len == 0 { + // can be = when we were able to copy the complete chunk + self.decoded_offset <= DECODED_CHUNK_SIZE + } else { + self.decoded_offset < DECODED_CHUNK_SIZE + }); + + // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one + // byte into the provided buf, so the effective length should only be 3 momentarily between + // when we decode and when we copy into the target buffer. + debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); + debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); + + if self.decoded_len > 0 { + // we have a few leftover decoded bytes; flush that rather than pull in more b64 + self.flush_decoded_buf(buf) + } else { + let mut at_eof = false; + while self.b64_len < BASE64_CHUNK_SIZE { + // Copy any bytes we have to the start of the buffer. + self.b64_buffer + .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0); + self.b64_offset = 0; + + // then fill in more data + let read = self.read_from_delegate()?; + if read == 0 { + // we never read into an empty buf, so 0 => we've hit EOF + at_eof = true; + break; + } + } + + if self.b64_len == 0 { + debug_assert!(at_eof); + // we must be at EOF, and we have no data left to decode + return Ok(0); + }; + + debug_assert!(if at_eof { + // if we are at eof, we may not have a complete chunk + self.b64_len > 0 + } else { + // otherwise, we must have at least one chunk + self.b64_len >= BASE64_CHUNK_SIZE + }); + + debug_assert_eq!(0, self.decoded_len); + + if buf.len() < DECODED_CHUNK_SIZE { + // caller requested an annoyingly short read + // have to write to a tmp buf first to avoid double mutable borrow + let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; + // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have + // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 + // tokens, not 1, since 1 token can't decode to 1 byte). + let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); + + let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; + self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); + + self.decoded_offset = 0; + self.decoded_len = decoded; + + // can be less than 3 on last block due to padding + debug_assert!(decoded <= 3); + + self.flush_decoded_buf(buf) + } else { + let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) + .checked_mul(BASE64_CHUNK_SIZE) + .expect("too many chunks"); + debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); + + let b64_bytes_available_to_decode = if at_eof { + self.b64_len + } else { + // only use complete chunks + self.b64_len - self.b64_len % 4 + }; + + let actual_decode_len = cmp::min( + b64_bytes_that_can_decode_into_buf, + b64_bytes_available_to_decode, + ); + self.decode_to_buf(actual_decode_len, buf) + } + } + } +} diff --git a/vendor/base64/src/read/decoder_tests.rs b/vendor/base64/src/read/decoder_tests.rs new file mode 100644 index 00000000..f3431457 --- /dev/null +++ b/vendor/base64/src/read/decoder_tests.rs @@ -0,0 +1,487 @@ +use std::{ + cmp, + io::{self, Read as _}, + iter, +}; + +use rand::{Rng as _, RngCore as _}; + +use super::decoder::{DecoderReader, BUF_SIZE}; +use crate::{ + alphabet, + engine::{general_purpose::STANDARD, Engine, GeneralPurpose}, + tests::{random_alphabet, random_config, random_engine}, + DecodeError, PAD_BYTE, +}; + +#[test] +fn simple() { + let tests: &[(&[u8], &[u8])] = &[ + (&b"0"[..], &b"MA=="[..]), + (b"01", b"MDE="), + (b"012", b"MDEy"), + (b"0123", b"MDEyMw=="), + (b"01234", b"MDEyMzQ="), + (b"012345", b"MDEyMzQ1"), + (b"0123456", b"MDEyMzQ1Ng=="), + (b"01234567", b"MDEyMzQ1Njc="), + (b"012345678", b"MDEyMzQ1Njc4"), + (b"0123456789", b"MDEyMzQ1Njc4OQ=="), + ][..]; + + for (text_expected, base64data) in tests.iter() { + // Read n bytes at a time. + for n in 1..base64data.len() + 1 { + let mut wrapped_reader = io::Cursor::new(base64data); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD); + + // handle errors as you normally would + let mut text_got = Vec::new(); + let mut buffer = vec![0u8; n]; + while let Ok(read) = decoder.read(&mut buffer[..]) { + if read == 0 { + break; + } + text_got.extend_from_slice(&buffer[..read]); + } + + assert_eq!( + text_got, + *text_expected, + "\nGot: {}\nExpected: {}", + String::from_utf8_lossy(&text_got[..]), + String::from_utf8_lossy(text_expected) + ); + } + } +} + +// Make sure we error out on trailing junk. +#[test] +fn trailing_junk() { + let tests: &[&[u8]] = &[&b"MDEyMzQ1Njc4*!@#$%^&"[..], b"MDEyMzQ1Njc4OQ== "][..]; + + for base64data in tests.iter() { + // Read n bytes at a time. + for n in 1..base64data.len() + 1 { + let mut wrapped_reader = io::Cursor::new(base64data); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD); + + // handle errors as you normally would + let mut buffer = vec![0u8; n]; + let mut saw_error = false; + loop { + match decoder.read(&mut buffer[..]) { + Err(_) => { + saw_error = true; + break; + } + Ok(0) => break, + Ok(_len) => (), + } + } + + assert!(saw_error); + } + } +} + +#[test] +fn handles_short_read_from_delegate() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut decoded = Vec::new(); + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + decoded.clear(); + + let size = rng.gen_range(0..(10 * BUF_SIZE)); + bytes.extend(iter::repeat(0).take(size)); + bytes.truncate(size); + rng.fill_bytes(&mut bytes[..size]); + assert_eq!(size, bytes.len()); + + let engine = random_engine(&mut rng); + engine.encode_string(&bytes[..], &mut b64); + + let mut wrapped_reader = io::Cursor::new(b64.as_bytes()); + let mut short_reader = RandomShortRead { + delegate: &mut wrapped_reader, + rng: &mut rng, + }; + + let mut decoder = DecoderReader::new(&mut short_reader, &engine); + + let decoded_len = decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(size, decoded_len); + assert_eq!(&bytes[..], &decoded[..]); + } +} + +#[test] +fn read_in_short_increments() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut decoded = Vec::new(); + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + decoded.clear(); + + let size = rng.gen_range(0..(10 * BUF_SIZE)); + bytes.extend(iter::repeat(0).take(size)); + // leave room to play around with larger buffers + decoded.extend(iter::repeat(0).take(size * 3)); + + rng.fill_bytes(&mut bytes[..]); + assert_eq!(size, bytes.len()); + + let engine = random_engine(&mut rng); + + engine.encode_string(&bytes[..], &mut b64); + + let mut wrapped_reader = io::Cursor::new(&b64[..]); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut decoder); + } +} + +#[test] +fn read_in_short_increments_with_short_delegate_reads() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut decoded = Vec::new(); + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + decoded.clear(); + + let size = rng.gen_range(0..(10 * BUF_SIZE)); + bytes.extend(iter::repeat(0).take(size)); + // leave room to play around with larger buffers + decoded.extend(iter::repeat(0).take(size * 3)); + + rng.fill_bytes(&mut bytes[..]); + assert_eq!(size, bytes.len()); + + let engine = random_engine(&mut rng); + + engine.encode_string(&bytes[..], &mut b64); + + let mut base_reader = io::Cursor::new(&b64[..]); + let mut decoder = DecoderReader::new(&mut base_reader, &engine); + let mut short_reader = RandomShortRead { + delegate: &mut decoder, + rng: &mut rand::thread_rng(), + }; + + consume_with_short_reads_and_validate( + &mut rng, + &bytes[..], + &mut decoded, + &mut short_reader, + ); + } +} + +#[test] +fn reports_invalid_last_symbol_correctly() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut b64_bytes = Vec::new(); + let mut decoded = Vec::new(); + let mut bulk_decoded = Vec::new(); + + for _ in 0..1_000 { + bytes.clear(); + b64.clear(); + b64_bytes.clear(); + + let size = rng.gen_range(1..(10 * BUF_SIZE)); + bytes.extend(iter::repeat(0).take(size)); + decoded.extend(iter::repeat(0).take(size)); + rng.fill_bytes(&mut bytes[..]); + assert_eq!(size, bytes.len()); + + let config = random_config(&mut rng); + let alphabet = random_alphabet(&mut rng); + // changing padding will cause invalid padding errors when we twiddle the last byte + let engine = GeneralPurpose::new(alphabet, config.with_encode_padding(false)); + engine.encode_string(&bytes[..], &mut b64); + b64_bytes.extend(b64.bytes()); + assert_eq!(b64_bytes.len(), b64.len()); + + // change the last character to every possible symbol. Should behave the same as bulk + // decoding whether invalid or valid. + for &s1 in alphabet.symbols.iter() { + decoded.clear(); + bulk_decoded.clear(); + + // replace the last + *b64_bytes.last_mut().unwrap() = s1; + let bulk_res = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded); + + let mut wrapped_reader = io::Cursor::new(&b64_bytes[..]); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let stream_res = decoder.read_to_end(&mut decoded).map(|_| ()).map_err(|e| { + e.into_inner() + .and_then(|e| e.downcast::<DecodeError>().ok()) + }); + + assert_eq!(bulk_res.map_err(|e| Some(Box::new(e))), stream_res); + } + } +} + +#[test] +fn reports_invalid_byte_correctly() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut stream_decoded = Vec::new(); + let mut bulk_decoded = Vec::new(); + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + stream_decoded.clear(); + bulk_decoded.clear(); + + let size = rng.gen_range(1..(10 * BUF_SIZE)); + bytes.extend(iter::repeat(0).take(size)); + rng.fill_bytes(&mut bytes[..size]); + assert_eq!(size, bytes.len()); + + let engine = GeneralPurpose::new(&alphabet::STANDARD, random_config(&mut rng)); + + engine.encode_string(&bytes[..], &mut b64); + // replace one byte, somewhere, with '*', which is invalid + let bad_byte_pos = rng.gen_range(0..b64.len()); + let mut b64_bytes = b64.bytes().collect::<Vec<u8>>(); + b64_bytes[bad_byte_pos] = b'*'; + + let mut wrapped_reader = io::Cursor::new(b64_bytes.clone()); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let read_decode_err = decoder + .read_to_end(&mut stream_decoded) + .map_err(|e| { + let kind = e.kind(); + let inner = e + .into_inner() + .and_then(|e| e.downcast::<DecodeError>().ok()); + inner.map(|i| (*i, kind)) + }) + .err() + .and_then(|o| o); + + let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded).err(); + + // it's tricky to predict where the invalid data's offset will be since if it's in the last + // chunk it will be reported at the first padding location because it's treated as invalid + // padding. So, we just check that it's the same as it is for decoding all at once. + assert_eq!( + bulk_decode_err.map(|e| (e, io::ErrorKind::InvalidData)), + read_decode_err + ); + } +} + +#[test] +fn internal_padding_error_with_short_read_concatenated_texts_invalid_byte_error() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut reader_decoded = Vec::new(); + let mut bulk_decoded = Vec::new(); + + // encodes with padding, requires that padding be present so we don't get InvalidPadding + // just because padding is there at all + let engine = STANDARD; + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + reader_decoded.clear(); + bulk_decoded.clear(); + + // at least 2 bytes so there can be a split point between bytes + let size = rng.gen_range(2..(10 * BUF_SIZE)); + bytes.resize(size, 0); + rng.fill_bytes(&mut bytes[..size]); + + // Concatenate two valid b64s, yielding padding in the middle. + // This avoids scenarios that are challenging to assert on, like random padding location + // that might be InvalidLastSymbol when decoded at certain buffer sizes but InvalidByte + // when done all at once. + let split = loop { + // find a split point that will produce padding on the first part + let s = rng.gen_range(1..size); + if s % 3 != 0 { + // short enough to need padding + break s; + }; + }; + + engine.encode_string(&bytes[..split], &mut b64); + assert!(b64.contains('='), "split: {}, b64: {}", split, b64); + let bad_byte_pos = b64.find('=').unwrap(); + engine.encode_string(&bytes[split..], &mut b64); + let b64_bytes = b64.as_bytes(); + + // short read to make it plausible for padding to happen on a read boundary + let read_len = rng.gen_range(1..10); + let mut wrapped_reader = ShortRead { + max_read_len: read_len, + delegate: io::Cursor::new(&b64_bytes), + }; + + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let read_decode_err = decoder + .read_to_end(&mut reader_decoded) + .map_err(|e| { + *e.into_inner() + .and_then(|e| e.downcast::<DecodeError>().ok()) + .unwrap() + }) + .unwrap_err(); + + let bulk_decode_err = engine.decode_vec(b64_bytes, &mut bulk_decoded).unwrap_err(); + + assert_eq!( + bulk_decode_err, + read_decode_err, + "read len: {}, bad byte pos: {}, b64: {}", + read_len, + bad_byte_pos, + std::str::from_utf8(b64_bytes).unwrap() + ); + assert_eq!( + DecodeError::InvalidByte( + split / 3 * 4 + + match split % 3 { + 1 => 2, + 2 => 3, + _ => unreachable!(), + }, + PAD_BYTE + ), + read_decode_err + ); + } +} + +#[test] +fn internal_padding_anywhere_error() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut reader_decoded = Vec::new(); + + // encodes with padding, requires that padding be present so we don't get InvalidPadding + // just because padding is there at all + let engine = STANDARD; + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + reader_decoded.clear(); + + bytes.resize(10 * BUF_SIZE, 0); + rng.fill_bytes(&mut bytes[..]); + + // Just shove a padding byte in there somewhere. + // The specific error to expect is challenging to predict precisely because it + // will vary based on the position of the padding in the quad and the read buffer + // length, but SOMETHING should go wrong. + + engine.encode_string(&bytes[..], &mut b64); + let mut b64_bytes = b64.as_bytes().to_vec(); + // put padding somewhere other than the last quad + b64_bytes[rng.gen_range(0..bytes.len() - 4)] = PAD_BYTE; + + // short read to make it plausible for padding to happen on a read boundary + let read_len = rng.gen_range(1..10); + let mut wrapped_reader = ShortRead { + max_read_len: read_len, + delegate: io::Cursor::new(&b64_bytes), + }; + + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let result = decoder.read_to_end(&mut reader_decoded); + assert!(result.is_err()); + } +} + +fn consume_with_short_reads_and_validate<R: io::Read>( + rng: &mut rand::rngs::ThreadRng, + expected_bytes: &[u8], + decoded: &mut [u8], + short_reader: &mut R, +) { + let mut total_read = 0_usize; + loop { + assert!( + total_read <= expected_bytes.len(), + "tr {} size {}", + total_read, + expected_bytes.len() + ); + if total_read == expected_bytes.len() { + assert_eq!(expected_bytes, &decoded[..total_read]); + // should be done + assert_eq!(0, short_reader.read(&mut *decoded).unwrap()); + // didn't write anything + assert_eq!(expected_bytes, &decoded[..total_read]); + + break; + } + let decode_len = rng.gen_range(1..cmp::max(2, expected_bytes.len() * 2)); + + let read = short_reader + .read(&mut decoded[total_read..total_read + decode_len]) + .unwrap(); + total_read += read; + } +} + +/// Limits how many bytes a reader will provide in each read call. +/// Useful for shaking out code that may work fine only with typical input sources that always fill +/// the buffer. +struct RandomShortRead<'a, 'b, R: io::Read, N: rand::Rng> { + delegate: &'b mut R, + rng: &'a mut N, +} + +impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R, N> { + fn read(&mut self, buf: &mut [u8]) -> Result<usize, io::Error> { + // avoid 0 since it means EOF for non-empty buffers + let effective_len = cmp::min(self.rng.gen_range(1..20), buf.len()); + + self.delegate.read(&mut buf[..effective_len]) + } +} + +struct ShortRead<R: io::Read> { + delegate: R, + max_read_len: usize, +} + +impl<R: io::Read> io::Read for ShortRead<R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let len = self.max_read_len.max(buf.len()); + self.delegate.read(&mut buf[..len]) + } +} diff --git a/vendor/base64/src/read/mod.rs b/vendor/base64/src/read/mod.rs new file mode 100644 index 00000000..85606448 --- /dev/null +++ b/vendor/base64/src/read/mod.rs @@ -0,0 +1,6 @@ +//! Implementations of `io::Read` to transparently decode base64. +mod decoder; +pub use self::decoder::DecoderReader; + +#[cfg(test)] +mod decoder_tests; |
