summaryrefslogtreecommitdiff
path: root/vendor/string_cache/src/atom.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/string_cache/src/atom.rs')
-rw-r--r--vendor/string_cache/src/atom.rs415
1 files changed, 0 insertions, 415 deletions
diff --git a/vendor/string_cache/src/atom.rs b/vendor/string_cache/src/atom.rs
deleted file mode 100644
index 5a8aa7f0..00000000
--- a/vendor/string_cache/src/atom.rs
+++ /dev/null
@@ -1,415 +0,0 @@
-// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use crate::dynamic_set::{dynamic_set, Entry};
-use crate::static_sets::StaticAtomSet;
-use debug_unreachable::debug_unreachable;
-
-use std::borrow::Cow;
-use std::cmp::Ordering::{self, Equal};
-use std::fmt;
-use std::hash::{Hash, Hasher};
-use std::marker::PhantomData;
-use std::mem;
-use std::num::NonZeroU64;
-use std::ops;
-use std::slice;
-use std::str;
-use std::sync::atomic::Ordering::SeqCst;
-
-const DYNAMIC_TAG: u8 = 0b_00;
-const INLINE_TAG: u8 = 0b_01; // len in upper nybble
-const STATIC_TAG: u8 = 0b_10;
-const TAG_MASK: u64 = 0b_11;
-const LEN_OFFSET: u64 = 4;
-const LEN_MASK: u64 = 0xF0;
-
-const MAX_INLINE_LEN: usize = 7;
-const STATIC_SHIFT_BITS: usize = 32;
-
-/// Represents a string that has been interned.
-///
-/// While the type definition for `Atom` indicates that it generic on a particular
-/// implementation of an atom set, you don't need to worry about this. Atoms can be static
-/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they
-/// can be dynamic and created by you on an `EmptyStaticAtomSet`.
-///
-/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted;
-/// this means that you may need to `.clone()` an atom to keep copies to it in different
-/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`.
-///
-/// ## Creating an atom at runtime
-///
-/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code
-/// may then do something like read data from somewhere and extract tokens that need to be
-/// compared to the atoms. In this case, you can use `Atom::from(&str)` or
-/// `Atom::from(String)`. These create a reference-counted atom which will be
-/// automatically freed when all references to it are dropped.
-///
-/// This means that your application can safely have a loop which tokenizes data, creates
-/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without
-/// running the risk of arbitrary memory consumption from creating large numbers of atoms —
-/// as long as your application does not store clones of the atoms it creates along the
-/// way.
-///
-/// For example, the following is safe and will not consume arbitrary amounts of memory:
-///
-/// ```ignore
-/// let untrusted_data = "large amounts of text ...";
-///
-/// for token in untrusted_data.split_whitespace() {
-/// let atom = Atom::from(token); // interns the string
-///
-/// if atom == Atom::from("keyword") {
-/// // handle that keyword
-/// } else if atom == Atom::from("another_keyword") {
-/// // handle that keyword
-/// } else {
-/// println!("unknown keyword");
-/// }
-/// } // atom is dropped here, so it is not kept around in memory
-/// ```
-#[derive(PartialEq, Eq)]
-// NOTE: Deriving PartialEq requires that a given string must always be interned the same way.
-pub struct Atom<Static> {
- unsafe_data: NonZeroU64,
- phantom: PhantomData<Static>,
-}
-
-// This isn't really correct as the Atoms can technically take up space. But I guess it's ok
-// as it is possible to measure the size of the atom set separately/
-#[cfg(feature = "malloc_size_of")]
-impl<Static: StaticAtomSet> malloc_size_of::MallocSizeOf for Atom<Static> {
- fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize {
- 0
- }
-}
-
-// FIXME: bound removed from the struct definition before of this error for pack_static:
-// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable"
-// https://github.com/rust-lang/rust/issues/57563
-impl<Static> Atom<Static> {
- /// For the atom!() macros
- #[inline(always)]
- #[doc(hidden)]
- pub const fn pack_static(n: u32) -> Self {
- Self {
- unsafe_data: unsafe {
- // STATIC_TAG ensures this is non-zero
- NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
- },
- phantom: PhantomData,
- }
- }
-
- /// For the atom!() macros
- #[inline(always)]
- #[doc(hidden)]
- pub const fn pack_inline(mut n: u64, len: u8) -> Self {
- if cfg!(target_endian = "big") {
- // Reverse order of top 7 bytes.
- // Bottom 8 bits of `n` are zero, and we need that to remain so.
- // String data is stored in top 7 bytes, tag and length in bottom byte.
- n = n.to_le() << 8;
- }
-
- let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
- Self {
- // INLINE_TAG ensures this is never zero
- unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
- phantom: PhantomData,
- }
- }
-
- fn tag(&self) -> u8 {
- (self.unsafe_data.get() & TAG_MASK) as u8
- }
-}
-
-impl<Static: StaticAtomSet> Atom<Static> {
- /// Return the internal representation. For testing.
- #[doc(hidden)]
- pub fn unsafe_data(&self) -> u64 {
- self.unsafe_data.get()
- }
-
- /// Return true if this is a static Atom. For testing.
- #[doc(hidden)]
- pub fn is_static(&self) -> bool {
- self.tag() == STATIC_TAG
- }
-
- /// Return true if this is a dynamic Atom. For testing.
- #[doc(hidden)]
- pub fn is_dynamic(&self) -> bool {
- self.tag() == DYNAMIC_TAG
- }
-
- /// Return true if this is an inline Atom. For testing.
- #[doc(hidden)]
- pub fn is_inline(&self) -> bool {
- self.tag() == INLINE_TAG
- }
-
- fn static_index(&self) -> u64 {
- self.unsafe_data.get() >> STATIC_SHIFT_BITS
- }
-
- /// Get the hash of the string as it is stored in the set.
- pub fn get_hash(&self) -> u32 {
- match self.tag() {
- DYNAMIC_TAG => {
- let entry = self.unsafe_data.get() as *const Entry;
- unsafe { (*entry).hash }
- }
- STATIC_TAG => Static::get().hashes[self.static_index() as usize],
- INLINE_TAG => {
- let data = self.unsafe_data.get();
- // This may or may not be great...
- ((data >> 32) ^ data) as u32
- }
- _ => unsafe { debug_unreachable!() },
- }
- }
-
- pub fn try_static(string_to_add: &str) -> Option<Self> {
- Self::try_static_internal(string_to_add).ok()
- }
-
- fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> {
- let static_set = Static::get();
- let hash = phf_shared::hash(&*string_to_add, &static_set.key);
- let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
-
- if static_set.atoms[index as usize] == string_to_add {
- Ok(Self::pack_static(index))
- } else {
- Err(hash)
- }
- }
-}
-
-impl<Static: StaticAtomSet> Default for Atom<Static> {
- #[inline]
- fn default() -> Self {
- Atom::pack_static(Static::empty_string_index())
- }
-}
-
-impl<Static: StaticAtomSet> Hash for Atom<Static> {
- #[inline]
- fn hash<H>(&self, state: &mut H)
- where
- H: Hasher,
- {
- state.write_u32(self.get_hash())
- }
-}
-
-impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
- fn from(string_to_add: Cow<'a, str>) -> Self {
- let len = string_to_add.len();
- if len == 0 {
- Self::pack_static(Static::empty_string_index())
- } else if len <= MAX_INLINE_LEN {
- let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
- {
- let dest = inline_atom_slice_mut(&mut data);
- dest[..len].copy_from_slice(string_to_add.as_bytes());
- }
- Atom {
- // INLINE_TAG ensures this is never zero
- unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
- phantom: PhantomData,
- }
- } else {
- Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
- let ptr: std::ptr::NonNull<Entry> = dynamic_set().insert(string_to_add, hash.g);
- let data = ptr.as_ptr() as u64;
- debug_assert!(0 == data & TAG_MASK);
- Atom {
- // The address of a ptr::NonNull is non-zero
- unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
- phantom: PhantomData,
- }
- })
- }
- }
-}
-
-impl<Static: StaticAtomSet> Clone for Atom<Static> {
- #[inline(always)]
- fn clone(&self) -> Self {
- if self.tag() == DYNAMIC_TAG {
- let entry = self.unsafe_data.get() as *const Entry;
- unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
- }
- Atom { ..*self }
- }
-}
-
-impl<Static> Drop for Atom<Static> {
- #[inline]
- fn drop(&mut self) {
- if self.tag() == DYNAMIC_TAG {
- let entry = self.unsafe_data.get() as *const Entry;
- if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
- drop_slow(self)
- }
- }
-
- // Out of line to guide inlining.
- fn drop_slow<Static>(this: &mut Atom<Static>) {
- dynamic_set().remove(this.unsafe_data.get() as *mut Entry);
- }
- }
-}
-
-impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
- type Target = str;
-
- #[inline]
- fn deref(&self) -> &str {
- unsafe {
- match self.tag() {
- DYNAMIC_TAG => {
- let entry = self.unsafe_data.get() as *const Entry;
- &(*entry).string
- }
- INLINE_TAG => {
- let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
- debug_assert!(len as usize <= MAX_INLINE_LEN);
- let src = inline_atom_slice(&self.unsafe_data);
- str::from_utf8_unchecked(src.get_unchecked(..(len as usize)))
- }
- STATIC_TAG => Static::get().atoms[self.static_index() as usize],
- _ => debug_unreachable!(),
- }
- }
- }
-}
-
-impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
- #[inline]
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- let ty_str = unsafe {
- match self.tag() {
- DYNAMIC_TAG => "dynamic",
- INLINE_TAG => "inline",
- STATIC_TAG => "static",
- _ => debug_unreachable!(),
- }
- };
-
- write!(f, "Atom('{}' type={})", &*self, ty_str)
- }
-}
-
-impl<Static: StaticAtomSet> PartialOrd for Atom<Static> {
- #[inline]
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- if self.unsafe_data == other.unsafe_data {
- return Some(Equal);
- }
- self.as_ref().partial_cmp(other.as_ref())
- }
-}
-
-impl<Static: StaticAtomSet> Ord for Atom<Static> {
- #[inline]
- fn cmp(&self, other: &Self) -> Ordering {
- if self.unsafe_data == other.unsafe_data {
- return Equal;
- }
- self.as_ref().cmp(other.as_ref())
- }
-}
-
-// AsciiExt requires mutating methods, so we just implement the non-mutating ones.
-// We don't need to implement is_ascii because there's no performance improvement
-// over the one from &str.
-impl<Static: StaticAtomSet> Atom<Static> {
- fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self {
- let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
- let buffer = unsafe { &mut *buffer.as_mut_ptr() };
-
- if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
- buffer_prefix.copy_from_slice(s.as_bytes());
- let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
- f(as_str);
- Atom::from(&*as_str)
- } else {
- let mut string = s.to_owned();
- f(&mut string);
- Atom::from(string)
- }
- }
-
- /// Like [`to_ascii_uppercase`].
- ///
- /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase
- pub fn to_ascii_uppercase(&self) -> Self {
- for (i, b) in self.bytes().enumerate() {
- if let b'a'..=b'z' = b {
- return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase());
- }
- }
- self.clone()
- }
-
- /// Like [`to_ascii_lowercase`].
- ///
- /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase
- pub fn to_ascii_lowercase(&self) -> Self {
- for (i, b) in self.bytes().enumerate() {
- if let b'A'..=b'Z' = b {
- return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase());
- }
- }
- self.clone()
- }
-
- /// Like [`eq_ignore_ascii_case`].
- ///
- /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
- pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
- (self == other) || self.eq_str_ignore_ascii_case(&**other)
- }
-
- /// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`.
- ///
- /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
- pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
- (&**self).eq_ignore_ascii_case(other)
- }
-}
-
-#[inline(always)]
-fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
- let x: *const NonZeroU64 = x;
- let mut data = x as *const u8;
- // All except the lowest byte, which is first in little-endian, last in big-endian.
- if cfg!(target_endian = "little") {
- data = unsafe { data.offset(1) };
- }
- let len = 7;
- unsafe { slice::from_raw_parts(data, len) }
-}
-
-#[inline(always)]
-fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
- let x: *mut u64 = x;
- let mut data = x as *mut u8;
- // All except the lowest byte, which is first in little-endian, last in big-endian.
- if cfg!(target_endian = "little") {
- data = unsafe { data.offset(1) };
- }
- let len = 7;
- unsafe { slice::from_raw_parts_mut(data, len) }
-}