diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-02 18:36:06 -0600 |
| commit | 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch) | |
| tree | 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/string_cache/src | |
| parent | 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff) | |
chore: add vendor directory
Diffstat (limited to 'vendor/string_cache/src')
| -rw-r--r-- | vendor/string_cache/src/atom.rs | 415 | ||||
| -rw-r--r-- | vendor/string_cache/src/dynamic_set.rs | 112 | ||||
| -rw-r--r-- | vendor/string_cache/src/lib.rs | 139 | ||||
| -rw-r--r-- | vendor/string_cache/src/static_sets.rs | 64 | ||||
| -rw-r--r-- | vendor/string_cache/src/trivial_impls.rs | 119 |
5 files changed, 849 insertions, 0 deletions
diff --git a/vendor/string_cache/src/atom.rs b/vendor/string_cache/src/atom.rs new file mode 100644 index 00000000..5a8aa7f0 --- /dev/null +++ b/vendor/string_cache/src/atom.rs @@ -0,0 +1,415 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::dynamic_set::{dynamic_set, Entry}; +use crate::static_sets::StaticAtomSet; +use debug_unreachable::debug_unreachable; + +use std::borrow::Cow; +use std::cmp::Ordering::{self, Equal}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; +use std::mem; +use std::num::NonZeroU64; +use std::ops; +use std::slice; +use std::str; +use std::sync::atomic::Ordering::SeqCst; + +const DYNAMIC_TAG: u8 = 0b_00; +const INLINE_TAG: u8 = 0b_01; // len in upper nybble +const STATIC_TAG: u8 = 0b_10; +const TAG_MASK: u64 = 0b_11; +const LEN_OFFSET: u64 = 4; +const LEN_MASK: u64 = 0xF0; + +const MAX_INLINE_LEN: usize = 7; +const STATIC_SHIFT_BITS: usize = 32; + +/// Represents a string that has been interned. +/// +/// While the type definition for `Atom` indicates that it generic on a particular +/// implementation of an atom set, you don't need to worry about this. Atoms can be static +/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they +/// can be dynamic and created by you on an `EmptyStaticAtomSet`. +/// +/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted; +/// this means that you may need to `.clone()` an atom to keep copies to it in different +/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`. +/// +/// ## Creating an atom at runtime +/// +/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code +/// may then do something like read data from somewhere and extract tokens that need to be +/// compared to the atoms. In this case, you can use `Atom::from(&str)` or +/// `Atom::from(String)`. These create a reference-counted atom which will be +/// automatically freed when all references to it are dropped. +/// +/// This means that your application can safely have a loop which tokenizes data, creates +/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without +/// running the risk of arbitrary memory consumption from creating large numbers of atoms — +/// as long as your application does not store clones of the atoms it creates along the +/// way. +/// +/// For example, the following is safe and will not consume arbitrary amounts of memory: +/// +/// ```ignore +/// let untrusted_data = "large amounts of text ..."; +/// +/// for token in untrusted_data.split_whitespace() { +/// let atom = Atom::from(token); // interns the string +/// +/// if atom == Atom::from("keyword") { +/// // handle that keyword +/// } else if atom == Atom::from("another_keyword") { +/// // handle that keyword +/// } else { +/// println!("unknown keyword"); +/// } +/// } // atom is dropped here, so it is not kept around in memory +/// ``` +#[derive(PartialEq, Eq)] +// NOTE: Deriving PartialEq requires that a given string must always be interned the same way. +pub struct Atom<Static> { + unsafe_data: NonZeroU64, + phantom: PhantomData<Static>, +} + +// This isn't really correct as the Atoms can technically take up space. But I guess it's ok +// as it is possible to measure the size of the atom set separately/ +#[cfg(feature = "malloc_size_of")] +impl<Static: StaticAtomSet> malloc_size_of::MallocSizeOf for Atom<Static> { + fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize { + 0 + } +} + +// FIXME: bound removed from the struct definition before of this error for pack_static: +// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable" +// https://github.com/rust-lang/rust/issues/57563 +impl<Static> Atom<Static> { + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_static(n: u32) -> Self { + Self { + unsafe_data: unsafe { + // STATIC_TAG ensures this is non-zero + NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS)) + }, + phantom: PhantomData, + } + } + + /// For the atom!() macros + #[inline(always)] + #[doc(hidden)] + pub const fn pack_inline(mut n: u64, len: u8) -> Self { + if cfg!(target_endian = "big") { + // Reverse order of top 7 bytes. + // Bottom 8 bits of `n` are zero, and we need that to remain so. + // String data is stored in top 7 bytes, tag and length in bottom byte. + n = n.to_le() << 8; + } + + let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n; + Self { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } + + fn tag(&self) -> u8 { + (self.unsafe_data.get() & TAG_MASK) as u8 + } +} + +impl<Static: StaticAtomSet> Atom<Static> { + /// Return the internal representation. For testing. + #[doc(hidden)] + pub fn unsafe_data(&self) -> u64 { + self.unsafe_data.get() + } + + /// Return true if this is a static Atom. For testing. + #[doc(hidden)] + pub fn is_static(&self) -> bool { + self.tag() == STATIC_TAG + } + + /// Return true if this is a dynamic Atom. For testing. + #[doc(hidden)] + pub fn is_dynamic(&self) -> bool { + self.tag() == DYNAMIC_TAG + } + + /// Return true if this is an inline Atom. For testing. + #[doc(hidden)] + pub fn is_inline(&self) -> bool { + self.tag() == INLINE_TAG + } + + fn static_index(&self) -> u64 { + self.unsafe_data.get() >> STATIC_SHIFT_BITS + } + + /// Get the hash of the string as it is stored in the set. + pub fn get_hash(&self) -> u32 { + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; + unsafe { (*entry).hash } + } + STATIC_TAG => Static::get().hashes[self.static_index() as usize], + INLINE_TAG => { + let data = self.unsafe_data.get(); + // This may or may not be great... + ((data >> 32) ^ data) as u32 + } + _ => unsafe { debug_unreachable!() }, + } + } + + pub fn try_static(string_to_add: &str) -> Option<Self> { + Self::try_static_internal(string_to_add).ok() + } + + fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> { + let static_set = Static::get(); + let hash = phf_shared::hash(&*string_to_add, &static_set.key); + let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len()); + + if static_set.atoms[index as usize] == string_to_add { + Ok(Self::pack_static(index)) + } else { + Err(hash) + } + } +} + +impl<Static: StaticAtomSet> Default for Atom<Static> { + #[inline] + fn default() -> Self { + Atom::pack_static(Static::empty_string_index()) + } +} + +impl<Static: StaticAtomSet> Hash for Atom<Static> { + #[inline] + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + state.write_u32(self.get_hash()) + } +} + +impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> { + fn from(string_to_add: Cow<'a, str>) -> Self { + let len = string_to_add.len(); + if len == 0 { + Self::pack_static(Static::empty_string_index()) + } else if len <= MAX_INLINE_LEN { + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET); + { + let dest = inline_atom_slice_mut(&mut data); + dest[..len].copy_from_slice(string_to_add.as_bytes()); + } + Atom { + // INLINE_TAG ensures this is never zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + } else { + Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| { + let ptr: std::ptr::NonNull<Entry> = dynamic_set().insert(string_to_add, hash.g); + let data = ptr.as_ptr() as u64; + debug_assert!(0 == data & TAG_MASK); + Atom { + // The address of a ptr::NonNull is non-zero + unsafe_data: unsafe { NonZeroU64::new_unchecked(data) }, + phantom: PhantomData, + } + }) + } + } +} + +impl<Static: StaticAtomSet> Clone for Atom<Static> { + #[inline(always)] + fn clone(&self) -> Self { + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + unsafe { &*entry }.ref_count.fetch_add(1, SeqCst); + } + Atom { ..*self } + } +} + +impl<Static> Drop for Atom<Static> { + #[inline] + fn drop(&mut self) { + if self.tag() == DYNAMIC_TAG { + let entry = self.unsafe_data.get() as *const Entry; + if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 { + drop_slow(self) + } + } + + // Out of line to guide inlining. + fn drop_slow<Static>(this: &mut Atom<Static>) { + dynamic_set().remove(this.unsafe_data.get() as *mut Entry); + } + } +} + +impl<Static: StaticAtomSet> ops::Deref for Atom<Static> { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { + match self.tag() { + DYNAMIC_TAG => { + let entry = self.unsafe_data.get() as *const Entry; + &(*entry).string + } + INLINE_TAG => { + let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET; + debug_assert!(len as usize <= MAX_INLINE_LEN); + let src = inline_atom_slice(&self.unsafe_data); + str::from_utf8_unchecked(src.get_unchecked(..(len as usize))) + } + STATIC_TAG => Static::get().atoms[self.static_index() as usize], + _ => debug_unreachable!(), + } + } + } +} + +impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ty_str = unsafe { + match self.tag() { + DYNAMIC_TAG => "dynamic", + INLINE_TAG => "inline", + STATIC_TAG => "static", + _ => debug_unreachable!(), + } + }; + + write!(f, "Atom('{}' type={})", &*self, ty_str) + } +} + +impl<Static: StaticAtomSet> PartialOrd for Atom<Static> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + if self.unsafe_data == other.unsafe_data { + return Some(Equal); + } + self.as_ref().partial_cmp(other.as_ref()) + } +} + +impl<Static: StaticAtomSet> Ord for Atom<Static> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + if self.unsafe_data == other.unsafe_data { + return Equal; + } + self.as_ref().cmp(other.as_ref()) + } +} + +// AsciiExt requires mutating methods, so we just implement the non-mutating ones. +// We don't need to implement is_ascii because there's no performance improvement +// over the one from &str. +impl<Static: StaticAtomSet> Atom<Static> { + fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self { + let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit(); + let buffer = unsafe { &mut *buffer.as_mut_ptr() }; + + if let Some(buffer_prefix) = buffer.get_mut(..s.len()) { + buffer_prefix.copy_from_slice(s.as_bytes()); + let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) }; + f(as_str); + Atom::from(&*as_str) + } else { + let mut string = s.to_owned(); + f(&mut string); + Atom::from(string) + } + } + + /// Like [`to_ascii_uppercase`]. + /// + /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase + pub fn to_ascii_uppercase(&self) -> Self { + for (i, b) in self.bytes().enumerate() { + if let b'a'..=b'z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase()); + } + } + self.clone() + } + + /// Like [`to_ascii_lowercase`]. + /// + /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase + pub fn to_ascii_lowercase(&self) -> Self { + for (i, b) in self.bytes().enumerate() { + if let b'A'..=b'Z' = b { + return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase()); + } + } + self.clone() + } + + /// Like [`eq_ignore_ascii_case`]. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + (self == other) || self.eq_str_ignore_ascii_case(&**other) + } + + /// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`. + /// + /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case + pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { + (&**self).eq_ignore_ascii_case(other) + } +} + +#[inline(always)] +fn inline_atom_slice(x: &NonZeroU64) -> &[u8] { + let x: *const NonZeroU64 = x; + let mut data = x as *const u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = unsafe { data.offset(1) }; + } + let len = 7; + unsafe { slice::from_raw_parts(data, len) } +} + +#[inline(always)] +fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] { + let x: *mut u64 = x; + let mut data = x as *mut u8; + // All except the lowest byte, which is first in little-endian, last in big-endian. + if cfg!(target_endian = "little") { + data = unsafe { data.offset(1) }; + } + let len = 7; + unsafe { slice::from_raw_parts_mut(data, len) } +} diff --git a/vendor/string_cache/src/dynamic_set.rs b/vendor/string_cache/src/dynamic_set.rs new file mode 100644 index 00000000..4442b4da --- /dev/null +++ b/vendor/string_cache/src/dynamic_set.rs @@ -0,0 +1,112 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use parking_lot::Mutex; +use std::borrow::Cow; +use std::mem; +use std::ptr::NonNull; +use std::sync::atomic::AtomicIsize; +use std::sync::atomic::Ordering::SeqCst; +use std::sync::OnceLock; + +const NB_BUCKETS: usize = 1 << 12; // 4096 +const BUCKET_MASK: u32 = (1 << 12) - 1; + +pub(crate) struct Set { + buckets: Box<[Mutex<Option<Box<Entry>>>]>, +} + +pub(crate) struct Entry { + pub(crate) string: Box<str>, + pub(crate) hash: u32, + pub(crate) ref_count: AtomicIsize, + next_in_bucket: Option<Box<Entry>>, +} + +// Addresses are a multiples of this, +// and therefore have have TAG_MASK bits unset, available for tagging. +pub(crate) const ENTRY_ALIGNMENT: usize = 4; + +#[test] +fn entry_alignment_is_sufficient() { + assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT); +} + +pub(crate) fn dynamic_set() -> &'static Set { + // NOTE: Using const initialization for buckets breaks the small-stack test. + // ``` + // // buckets: [Mutex<Option<Box<Entry>>>; NB_BUCKETS], + // const MUTEX: Mutex<Option<Box<Entry>>> = Mutex::new(None); + // let buckets = Box::new([MUTEX; NB_BUCKETS]); + // ``` + static DYNAMIC_SET: OnceLock<Set> = OnceLock::new(); + + DYNAMIC_SET.get_or_init(|| { + let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect(); + Set { buckets } + }) +} + +impl Set { + pub(crate) fn insert(&self, string: Cow<str>, hash: u32) -> NonNull<Entry> { + let bucket_index = (hash & BUCKET_MASK) as usize; + let mut linked_list = self.buckets[bucket_index].lock(); + + { + let mut ptr: Option<&mut Box<Entry>> = linked_list.as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && *entry.string == *string { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return NonNull::from(&mut **entry); + } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + entry.ref_count.fetch_sub(1, SeqCst); + break; + } + ptr = entry.next_in_bucket.as_mut(); + } + } + debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let mut entry = Box::new(Entry { + next_in_bucket: linked_list.take(), + hash, + ref_count: AtomicIsize::new(1), + string: string.into_boxed_str(), + }); + let ptr = NonNull::from(&mut *entry); + *linked_list = Some(entry); + ptr + } + + pub(crate) fn remove(&self, ptr: *mut Entry) { + let value: &Entry = unsafe { &*ptr }; + let bucket_index = (value.hash & BUCKET_MASK) as usize; + + let mut linked_list = self.buckets[bucket_index].lock(); + debug_assert!(value.ref_count.load(SeqCst) == 0); + let mut current: &mut Option<Box<Entry>> = &mut linked_list; + + while let Some(entry_ptr) = current.as_mut() { + let entry_ptr: *mut Entry = &mut **entry_ptr; + if entry_ptr == ptr { + mem::drop(mem::replace(current, unsafe { + (*entry_ptr).next_in_bucket.take() + })); + break; + } + current = unsafe { &mut (*entry_ptr).next_in_bucket }; + } + } +} diff --git a/vendor/string_cache/src/lib.rs b/vendor/string_cache/src/lib.rs new file mode 100644 index 00000000..441cb4ef --- /dev/null +++ b/vendor/string_cache/src/lib.rs @@ -0,0 +1,139 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! +//! A library for interning things that are `AsRef<str>`. +//! +//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the +//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an +//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). +//! +//! Generated `Atom`s will have assocated macros to intern static strings at compile-time. +//! +//! # Examples +//! +//! Here are two examples, one with compile-time `Atom`s, and one without. +//! +//! ## With compile-time atoms +//! +//! In `Cargo.toml`: +//! ```toml +//! [dependencies] +//! string_cache = "0.8" +//! +//! [dev-dependencies] +//! string_cache_codegen = "0.5" +//! ``` +//! +//! In `build.rs`: +//! +//! ```ignore +//! extern crate string_cache_codegen; +//! +//! use std::env; +//! use std::path::Path; +//! +//! fn main() { +//! string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!") +//! .atoms(&["foo", "bar"]) +//! .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs")) +//! .unwrap() +//! } +//! ``` +//! +//! In `lib.rs`: +//! +//! ```ignore +//! extern crate string_cache; +//! +//! mod foo { +//! include!(concat!(env!("OUT_DIR"), "/foo_atom.rs")); +//! } +//! +//! fn use_the_atom(t: &str) { +//! match *t { +//! foo_atom!("foo") => println!("Found foo!"), +//! foo_atom!("bar") => println!("Found bar!"), +//! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error +//! _ => { +//! println!("String not interned"); +//! // We can intern strings at runtime as well +//! foo::FooAtom::from(t) +//! } +//! } +//! } +//! ``` +//! +//! ## No compile-time atoms +//! +//! ``` +//! # extern crate string_cache; +//! use string_cache::DefaultAtom; +//! +//! # fn main() { +//! let mut interned_stuff = Vec::new(); +//! let text = "here is a sentence of text that will be tokenised and +//! interned and some repeated tokens is of text and"; +//! for word in text.split_whitespace() { +//! let seen_before = interned_stuff.iter() +//! // We can use impl PartialEq<T> where T is anything string-like +//! // to compare to interned strings to either other interned strings, +//! // or actual strings Comparing two interned strings is very fast +//! // (normally a single cpu operation). +//! .filter(|interned_word| interned_word == &word) +//! .count(); +//! if seen_before > 0 { +//! println!(r#"Seen the word "{}" {} times"#, word, seen_before); +//! } else { +//! println!(r#"Not seen the word "{}" before"#, word); +//! } +//! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for +//! // Atom<Static> to intern a new string. +//! interned_stuff.push(DefaultAtom::from(word)); +//! } +//! # } +//! ``` +//! + +#![cfg_attr(test, deny(warnings))] + +// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match +// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while +// manually impl-ing the other, because it seems easy for the two to drift apart, causing the +// invariant to be violated. +// +// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and +// copying are this library's purpose. So we know what the PartialEq comparison is going to do. +// +// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner +// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to +// differ would be if the table entry changed between invocations, and that would be really bad. +#![allow(clippy::derive_hash_xor_eq)] + +mod atom; +mod dynamic_set; +mod static_sets; +mod trivial_impls; + +pub use atom::Atom; +pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; + +/// Use this if you don’t care about static atoms. +pub type DefaultAtom = Atom<EmptyStaticAtomSet>; + +// Some minor tests of internal layout here. +// See ../integration-tests for much more. + +/// Guard against accidental changes to the sizes of things. +#[test] +fn assert_sizes() { + use std::mem::size_of; + assert_eq!(size_of::<DefaultAtom>(), 8); + assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),); +} diff --git a/vendor/string_cache/src/static_sets.rs b/vendor/string_cache/src/static_sets.rs new file mode 100644 index 00000000..f7f1799f --- /dev/null +++ b/vendor/string_cache/src/static_sets.rs @@ -0,0 +1,64 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// A static `PhfStrSet` +/// +/// This trait is implemented by static sets of interned strings generated using +/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically. +/// +/// It is used by the methods of [`Atom`] to check if a string is present in the static set. +/// +/// [`Atom`]: struct.Atom.html +pub trait StaticAtomSet: Ord { + /// Get the location of the static string set in the binary. + fn get() -> &'static PhfStrSet; + /// Get the index of the empty string, which is in every set and is used for `Atom::default`. + fn empty_string_index() -> u32; +} + +/// A string set created using a [perfect hash function], specifically +/// [Hash, Displace and Compress]. +/// +/// See the CHD document for the meaning of the struct fields. +/// +/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function +/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf +pub struct PhfStrSet { + #[doc(hidden)] + pub key: u64, + #[doc(hidden)] + pub disps: &'static [(u32, u32)], + #[doc(hidden)] + pub atoms: &'static [&'static str], + #[doc(hidden)] + pub hashes: &'static [u32], +} + +/// An empty static atom set for when only dynamic strings will be added +#[derive(PartialEq, Eq, PartialOrd, Ord)] +pub struct EmptyStaticAtomSet; + +impl StaticAtomSet for EmptyStaticAtomSet { + fn get() -> &'static PhfStrSet { + // The name is a lie: this set is not empty (it contains the empty string) + // but that’s only to avoid divisions by zero in rust-phf. + static SET: PhfStrSet = PhfStrSet { + key: 0, + disps: &[(0, 0)], + atoms: &[""], + // "" SipHash'd, and xored with u64_hash_to_u32. + hashes: &[0x3ddddef3], + }; + &SET + } + + fn empty_string_index() -> u32 { + 0 + } +} diff --git a/vendor/string_cache/src/trivial_impls.rs b/vendor/string_cache/src/trivial_impls.rs new file mode 100644 index 00000000..960dde0d --- /dev/null +++ b/vendor/string_cache/src/trivial_impls.rs @@ -0,0 +1,119 @@ +// Copyright 2014 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::{Atom, StaticAtomSet}; +#[cfg(feature = "serde_support")] +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; +use std::fmt; + +impl<Static: StaticAtomSet> ::precomputed_hash::PrecomputedHash for Atom<Static> { + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +impl<'a, Static: StaticAtomSet> From<&'a Atom<Static>> for Atom<Static> { + fn from(atom: &'a Self) -> Self { + atom.clone() + } +} + +impl<Static: StaticAtomSet> PartialEq<str> for Atom<Static> { + fn eq(&self, other: &str) -> bool { + &self[..] == other + } +} + +impl<Static: StaticAtomSet> PartialEq<Atom<Static>> for str { + fn eq(&self, other: &Atom<Static>) -> bool { + self == &other[..] + } +} + +impl<Static: StaticAtomSet> PartialEq<String> for Atom<Static> { + fn eq(&self, other: &String) -> bool { + self[..] == other[..] + } +} + +impl<'a, Static: StaticAtomSet> From<&'a str> for Atom<Static> { + #[inline] + fn from(string_to_add: &str) -> Self { + Atom::from(Cow::Borrowed(string_to_add)) + } +} + +impl<Static: StaticAtomSet> From<String> for Atom<Static> { + #[inline] + fn from(string_to_add: String) -> Self { + Atom::from(Cow::Owned(string_to_add)) + } +} + +impl<Static: StaticAtomSet> fmt::Display for Atom<Static> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + <str as fmt::Display>::fmt(self, f) + } +} + +impl<Static: StaticAtomSet> AsRef<str> for Atom<Static> { + fn as_ref(&self) -> &str { + self + } +} + +#[cfg(feature = "serde_support")] +impl<Static: StaticAtomSet> Serialize for Atom<Static> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let string: &str = self.as_ref(); + string.serialize(serializer) + } +} + +#[cfg(feature = "serde_support")] +impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom<Static> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'a>, + { + use serde::de; + use std::marker::PhantomData; + + struct AtomVisitor<Static: StaticAtomSet>(PhantomData<Static>); + + impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor<Static> { + type Value = Atom<Static>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "an Atom") + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(Atom::from(v)) + } + + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(Atom::from(v)) + } + } + + deserializer.deserialize_str(AtomVisitor(PhantomData)) + } +} |
