summaryrefslogtreecommitdiff
path: root/vendor/object/src/read/read_cache.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/object/src/read/read_cache.rs')
-rw-r--r--vendor/object/src/read/read_cache.rs261
1 files changed, 261 insertions, 0 deletions
diff --git a/vendor/object/src/read/read_cache.rs b/vendor/object/src/read/read_cache.rs
new file mode 100644
index 00000000..bfb9bf73
--- /dev/null
+++ b/vendor/object/src/read/read_cache.rs
@@ -0,0 +1,261 @@
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::cell::RefCell;
+use core::convert::TryInto;
+use core::mem;
+use core::ops::Range;
+#[cfg(feature = "std")]
+use std::io::{Read, Seek, SeekFrom};
+
+#[cfg(not(feature = "std"))]
+use alloc::collections::btree_map::{BTreeMap as Map, Entry};
+#[cfg(feature = "std")]
+use std::collections::hash_map::{Entry, HashMap as Map};
+
+use crate::read::ReadRef;
+
+/// An implementation of [`ReadRef`] for data in a stream that implements
+/// `Read + Seek`.
+///
+/// Contains a cache of read-only blocks of data, allowing references to
+/// them to be returned. Entries in the cache are never removed.
+/// Entries are keyed on the offset and size of the read.
+/// Currently overlapping reads are considered separate reads.
+///
+/// This is primarily intended for environments where memory mapped files
+/// are not available or not suitable, such as WebAssembly.
+///
+/// Note that malformed files can cause the cache to grow much larger than
+/// the file size.
+#[derive(Debug)]
+pub struct ReadCache<R: ReadCacheOps> {
+ cache: RefCell<ReadCacheInternal<R>>,
+}
+
+#[derive(Debug)]
+struct ReadCacheInternal<R: ReadCacheOps> {
+ read: R,
+ bufs: Map<(u64, u64), Box<[u8]>>,
+ strings: Map<(u64, u8), Box<[u8]>>,
+ len: Option<u64>,
+}
+
+impl<R: ReadCacheOps> ReadCacheInternal<R> {
+ /// Ensures this range is contained in the len of the file
+ fn range_in_bounds(&mut self, range: &Range<u64>) -> Result<(), ()> {
+ if range.start <= range.end && range.end <= self.len()? {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ /// The length of the underlying read, memoized
+ fn len(&mut self) -> Result<u64, ()> {
+ match self.len {
+ Some(len) => Ok(len),
+ None => {
+ let len = self.read.len()?;
+ self.len = Some(len);
+ Ok(len)
+ }
+ }
+ }
+}
+
+impl<R: ReadCacheOps> ReadCache<R> {
+ /// Create an empty `ReadCache` for the given stream.
+ pub fn new(read: R) -> Self {
+ ReadCache {
+ cache: RefCell::new(ReadCacheInternal {
+ read,
+ bufs: Map::new(),
+ strings: Map::new(),
+ len: None,
+ }),
+ }
+ }
+
+ /// Return an implementation of `ReadRef` that restricts reads
+ /// to the given range of the stream.
+ pub fn range(&self, offset: u64, size: u64) -> ReadCacheRange<'_, R> {
+ ReadCacheRange {
+ r: self,
+ offset,
+ size,
+ }
+ }
+
+ /// Free buffers used by the cache.
+ pub fn clear(&mut self) {
+ self.cache.borrow_mut().bufs.clear();
+ }
+
+ /// Unwrap this `ReadCache<R>`, returning the underlying reader.
+ pub fn into_inner(self) -> R {
+ self.cache.into_inner().read
+ }
+}
+
+impl<'a, R: ReadCacheOps> ReadRef<'a> for &'a ReadCache<R> {
+ fn len(self) -> Result<u64, ()> {
+ self.cache.borrow_mut().len()
+ }
+
+ fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> {
+ if size == 0 {
+ return Ok(&[]);
+ }
+ let cache = &mut *self.cache.borrow_mut();
+ cache.range_in_bounds(&(offset..(offset.saturating_add(size))))?;
+ let buf = match cache.bufs.entry((offset, size)) {
+ Entry::Occupied(entry) => entry.into_mut(),
+ Entry::Vacant(entry) => {
+ let size = size.try_into().map_err(|_| ())?;
+ cache.read.seek(offset)?;
+ let mut bytes = Vec::new();
+ bytes.try_reserve_exact(size).map_err(|_| ())?;
+ bytes.resize(size, 0);
+ let mut bytes = bytes.into_boxed_slice();
+ cache.read.read_exact(&mut bytes)?;
+ entry.insert(bytes)
+ }
+ };
+ // Extend the lifetime to that of self.
+ // This is OK because we never mutate or remove entries.
+ Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
+ }
+
+ fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8], ()> {
+ let cache = &mut *self.cache.borrow_mut();
+ cache.range_in_bounds(&range)?;
+ let buf = match cache.strings.entry((range.start, delimiter)) {
+ Entry::Occupied(entry) => entry.into_mut(),
+ Entry::Vacant(entry) => {
+ cache.read.seek(range.start)?;
+
+ let max_check: usize = (range.end - range.start).try_into().map_err(|_| ())?;
+ // Strings should be relatively small.
+ // TODO: make this configurable?
+ let max_check = max_check.min(4096);
+
+ let mut bytes = Vec::new();
+ let mut checked = 0;
+ loop {
+ bytes.resize((checked + 256).min(max_check), 0);
+ let read = cache.read.read(&mut bytes[checked..])?;
+ if read == 0 {
+ return Err(());
+ }
+ if let Some(len) = memchr::memchr(delimiter, &bytes[checked..][..read]) {
+ bytes.truncate(checked + len);
+ break entry.insert(bytes.into_boxed_slice());
+ }
+ checked += read;
+ if checked >= max_check {
+ return Err(());
+ }
+ }
+ }
+ };
+ // Extend the lifetime to that of self.
+ // This is OK because we never mutate or remove entries.
+ Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
+ }
+}
+
+/// An implementation of [`ReadRef`] for a range of data in a stream that
+/// implements `Read + Seek`.
+///
+/// Shares an underlying [`ReadCache`] with a lifetime of `'a`.
+#[derive(Debug)]
+pub struct ReadCacheRange<'a, R: ReadCacheOps> {
+ r: &'a ReadCache<R>,
+ offset: u64,
+ size: u64,
+}
+
+impl<'a, R: ReadCacheOps> Clone for ReadCacheRange<'a, R> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<'a, R: ReadCacheOps> Copy for ReadCacheRange<'a, R> {}
+
+impl<'a, R: ReadCacheOps> ReadRef<'a> for ReadCacheRange<'a, R> {
+ fn len(self) -> Result<u64, ()> {
+ Ok(self.size)
+ }
+
+ fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> {
+ if size == 0 {
+ return Ok(&[]);
+ }
+ let end = offset.checked_add(size).ok_or(())?;
+ if end > self.size {
+ return Err(());
+ }
+ let r_offset = self.offset.checked_add(offset).ok_or(())?;
+ self.r.read_bytes_at(r_offset, size)
+ }
+
+ fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8], ()> {
+ let r_start = self.offset.checked_add(range.start).ok_or(())?;
+ let r_end = self.offset.checked_add(range.end).ok_or(())?;
+ let bytes = self.r.read_bytes_at_until(r_start..r_end, delimiter)?;
+ let size = bytes.len().try_into().map_err(|_| ())?;
+ let end = range.start.checked_add(size).ok_or(())?;
+ if end > self.size {
+ return Err(());
+ }
+ Ok(bytes)
+ }
+}
+
+/// Operations required to implement [`ReadCache`].
+///
+/// This is a subset of the `Read` and `Seek` traits.
+/// A blanket implementation is provided for all types that implement
+/// `Read + Seek`.
+#[allow(clippy::len_without_is_empty)]
+pub trait ReadCacheOps {
+ /// Return the length of the stream.
+ ///
+ /// Equivalent to `std::io::Seek::seek(SeekFrom::End(0))`.
+ fn len(&mut self) -> Result<u64, ()>;
+
+ /// Seek to the given position in the stream.
+ ///
+ /// Equivalent to `std::io::Seek::seek` with `SeekFrom::Start(pos)`.
+ fn seek(&mut self, pos: u64) -> Result<u64, ()>;
+
+ /// Read up to `buf.len()` bytes into `buf`.
+ ///
+ /// Equivalent to `std::io::Read::read`.
+ fn read(&mut self, buf: &mut [u8]) -> Result<usize, ()>;
+
+ /// Read exactly `buf.len()` bytes into `buf`.
+ ///
+ /// Equivalent to `std::io::Read::read_exact`.
+ fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()>;
+}
+
+#[cfg(feature = "std")]
+impl<T: Read + Seek> ReadCacheOps for T {
+ fn len(&mut self) -> Result<u64, ()> {
+ self.seek(SeekFrom::End(0)).map_err(|_| ())
+ }
+
+ fn seek(&mut self, pos: u64) -> Result<u64, ()> {
+ self.seek(SeekFrom::Start(pos)).map_err(|_| ())
+ }
+
+ fn read(&mut self, buf: &mut [u8]) -> Result<usize, ()> {
+ Read::read(self, buf).map_err(|_| ())
+ }
+
+ fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()> {
+ Read::read_exact(self, buf).map_err(|_| ())
+ }
+}