chore: add vendor directory

author: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
committer: mo khan <mo@mokhan.ca> 2025-07-02 18:36:06 -0600
commit: 8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree: 22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/string_cache/src
parent: 4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
5 files changed, 849 insertions, 0 deletions
diff --git a/vendor/string_cache/src/atom.rs b/vendor/string_cache/src/atom.rs
new file mode 100644
index 00000000..5a8aa7f0
--- /dev/null
+++ b/vendor/string_cache/src/atom.rs
@@ -0,0 +1,415 @@
+// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::dynamic_set::{dynamic_set, Entry};
+use crate::static_sets::StaticAtomSet;
+use debug_unreachable::debug_unreachable;
+
+use std::borrow::Cow;
+use std::cmp::Ordering::{self, Equal};
+use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::marker::PhantomData;
+use std::mem;
+use std::num::NonZeroU64;
+use std::ops;
+use std::slice;
+use std::str;
+use std::sync::atomic::Ordering::SeqCst;
+
+const DYNAMIC_TAG: u8 = 0b_00;
+const INLINE_TAG: u8 = 0b_01; // len in upper nybble
+const STATIC_TAG: u8 = 0b_10;
+const TAG_MASK: u64 = 0b_11;
+const LEN_OFFSET: u64 = 4;
+const LEN_MASK: u64 = 0xF0;
+
+const MAX_INLINE_LEN: usize = 7;
+const STATIC_SHIFT_BITS: usize = 32;
+
+/// Represents a string that has been interned.
+///
+/// While the type definition for `Atom` indicates that it generic on a particular
+/// implementation of an atom set, you don't need to worry about this.  Atoms can be static
+/// and come from a `StaticAtomSet` generated by the `string_cache_codegen` crate, or they
+/// can be dynamic and created by you on an `EmptyStaticAtomSet`.
+///
+/// `Atom` implements `Clone` but not `Copy`, since internally atoms are reference-counted;
+/// this means that you may need to `.clone()` an atom to keep copies to it in different
+/// places, or when passing it to a function that takes an `Atom` rather than an `&Atom`.
+///
+/// ## Creating an atom at runtime
+///
+/// If you use `string_cache_codegen` to generate a precomputed list of atoms, your code
+/// may then do something like read data from somewhere and extract tokens that need to be
+/// compared to the atoms.  In this case, you can use `Atom::from(&str)` or
+/// `Atom::from(String)`.  These create a reference-counted atom which will be
+/// automatically freed when all references to it are dropped.
+///
+/// This means that your application can safely have a loop which tokenizes data, creates
+/// atoms from the tokens, and compares the atoms to a predefined set of keywords, without
+/// running the risk of arbitrary memory consumption from creating large numbers of atoms —
+/// as long as your application does not store clones of the atoms it creates along the
+/// way.
+///
+/// For example, the following is safe and will not consume arbitrary amounts of memory:
+///
+/// ```ignore
+/// let untrusted_data = "large amounts of text ...";
+///
+/// for token in untrusted_data.split_whitespace() {
+///     let atom = Atom::from(token); // interns the string
+///
+///     if atom == Atom::from("keyword") {
+///         // handle that keyword
+///     } else if atom == Atom::from("another_keyword") {
+///         // handle that keyword
+///     } else {
+///         println!("unknown keyword");
+///     }
+/// } // atom is dropped here, so it is not kept around in memory
+/// ```
+#[derive(PartialEq, Eq)]
+// NOTE: Deriving PartialEq requires that a given string must always be interned the same way.
+pub struct Atom<Static> {
+    unsafe_data: NonZeroU64,
+    phantom: PhantomData<Static>,
+}
+
+// This isn't really correct as the Atoms can technically take up space. But I guess it's ok
+// as it is possible to measure the size of the atom set separately/
+#[cfg(feature = "malloc_size_of")]
+impl<Static: StaticAtomSet> malloc_size_of::MallocSizeOf for Atom<Static> {
+    fn size_of(&self, _ops: &mut malloc_size_of::MallocSizeOfOps) -> usize {
+        0
+    }
+}
+
+// FIXME: bound removed from the struct definition before of this error for pack_static:
+// "error[E0723]: trait bounds other than `Sized` on const fn parameters are unstable"
+// https://github.com/rust-lang/rust/issues/57563
+impl<Static> Atom<Static> {
+    /// For the atom!() macros
+    #[inline(always)]
+    #[doc(hidden)]
+    pub const fn pack_static(n: u32) -> Self {
+        Self {
+            unsafe_data: unsafe {
+                // STATIC_TAG ensures this is non-zero
+                NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
+            },
+            phantom: PhantomData,
+        }
+    }
+
+    /// For the atom!() macros
+    #[inline(always)]
+    #[doc(hidden)]
+    pub const fn pack_inline(mut n: u64, len: u8) -> Self {
+        if cfg!(target_endian = "big") {
+            // Reverse order of top 7 bytes.
+            // Bottom 8 bits of `n` are zero, and we need that to remain so.
+            // String data is stored in top 7 bytes, tag and length in bottom byte.
+            n = n.to_le() << 8;
+        }
+
+        let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
+        Self {
+            // INLINE_TAG ensures this is never zero
+            unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
+            phantom: PhantomData,
+        }
+    }
+
+    fn tag(&self) -> u8 {
+        (self.unsafe_data.get() & TAG_MASK) as u8
+    }
+}
+
+impl<Static: StaticAtomSet> Atom<Static> {
+    /// Return the internal representation. For testing.
+    #[doc(hidden)]
+    pub fn unsafe_data(&self) -> u64 {
+        self.unsafe_data.get()
+    }
+
+    /// Return true if this is a static Atom. For testing.
+    #[doc(hidden)]
+    pub fn is_static(&self) -> bool {
+        self.tag() == STATIC_TAG
+    }
+
+    /// Return true if this is a dynamic Atom. For testing.
+    #[doc(hidden)]
+    pub fn is_dynamic(&self) -> bool {
+        self.tag() == DYNAMIC_TAG
+    }
+
+    /// Return true if this is an inline Atom. For testing.
+    #[doc(hidden)]
+    pub fn is_inline(&self) -> bool {
+        self.tag() == INLINE_TAG
+    }
+
+    fn static_index(&self) -> u64 {
+        self.unsafe_data.get() >> STATIC_SHIFT_BITS
+    }
+
+    /// Get the hash of the string as it is stored in the set.
+    pub fn get_hash(&self) -> u32 {
+        match self.tag() {
+            DYNAMIC_TAG => {
+                let entry = self.unsafe_data.get() as *const Entry;
+                unsafe { (*entry).hash }
+            }
+            STATIC_TAG => Static::get().hashes[self.static_index() as usize],
+            INLINE_TAG => {
+                let data = self.unsafe_data.get();
+                // This may or may not be great...
+                ((data >> 32) ^ data) as u32
+            }
+            _ => unsafe { debug_unreachable!() },
+        }
+    }
+
+    pub fn try_static(string_to_add: &str) -> Option<Self> {
+        Self::try_static_internal(string_to_add).ok()
+    }
+
+    fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> {
+        let static_set = Static::get();
+        let hash = phf_shared::hash(&*string_to_add, &static_set.key);
+        let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
+
+        if static_set.atoms[index as usize] == string_to_add {
+            Ok(Self::pack_static(index))
+        } else {
+            Err(hash)
+        }
+    }
+}
+
+impl<Static: StaticAtomSet> Default for Atom<Static> {
+    #[inline]
+    fn default() -> Self {
+        Atom::pack_static(Static::empty_string_index())
+    }
+}
+
+impl<Static: StaticAtomSet> Hash for Atom<Static> {
+    #[inline]
+    fn hash<H>(&self, state: &mut H)
+    where
+        H: Hasher,
+    {
+        state.write_u32(self.get_hash())
+    }
+}
+
+impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
+    fn from(string_to_add: Cow<'a, str>) -> Self {
+        let len = string_to_add.len();
+        if len == 0 {
+            Self::pack_static(Static::empty_string_index())
+        } else if len <= MAX_INLINE_LEN {
+            let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
+            {
+                let dest = inline_atom_slice_mut(&mut data);
+                dest[..len].copy_from_slice(string_to_add.as_bytes());
+            }
+            Atom {
+                // INLINE_TAG ensures this is never zero
+                unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
+                phantom: PhantomData,
+            }
+        } else {
+            Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
+                let ptr: std::ptr::NonNull<Entry> = dynamic_set().insert(string_to_add, hash.g);
+                let data = ptr.as_ptr() as u64;
+                debug_assert!(0 == data & TAG_MASK);
+                Atom {
+                    // The address of a ptr::NonNull is non-zero
+                    unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
+                    phantom: PhantomData,
+                }
+            })
+        }
+    }
+}
+
+impl<Static: StaticAtomSet> Clone for Atom<Static> {
+    #[inline(always)]
+    fn clone(&self) -> Self {
+        if self.tag() == DYNAMIC_TAG {
+            let entry = self.unsafe_data.get() as *const Entry;
+            unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
+        }
+        Atom { ..*self }
+    }
+}
+
+impl<Static> Drop for Atom<Static> {
+    #[inline]
+    fn drop(&mut self) {
+        if self.tag() == DYNAMIC_TAG {
+            let entry = self.unsafe_data.get() as *const Entry;
+            if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
+                drop_slow(self)
+            }
+        }
+
+        // Out of line to guide inlining.
+        fn drop_slow<Static>(this: &mut Atom<Static>) {
+            dynamic_set().remove(this.unsafe_data.get() as *mut Entry);
+        }
+    }
+}
+
+impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
+    type Target = str;
+
+    #[inline]
+    fn deref(&self) -> &str {
+        unsafe {
+            match self.tag() {
+                DYNAMIC_TAG => {
+                    let entry = self.unsafe_data.get() as *const Entry;
+                    &(*entry).string
+                }
+                INLINE_TAG => {
+                    let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
+                    debug_assert!(len as usize <= MAX_INLINE_LEN);
+                    let src = inline_atom_slice(&self.unsafe_data);
+                    str::from_utf8_unchecked(src.get_unchecked(..(len as usize)))
+                }
+                STATIC_TAG => Static::get().atoms[self.static_index() as usize],
+                _ => debug_unreachable!(),
+            }
+        }
+    }
+}
+
+impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let ty_str = unsafe {
+            match self.tag() {
+                DYNAMIC_TAG => "dynamic",
+                INLINE_TAG => "inline",
+                STATIC_TAG => "static",
+                _ => debug_unreachable!(),
+            }
+        };
+
+        write!(f, "Atom('{}' type={})", &*self, ty_str)
+    }
+}
+
+impl<Static: StaticAtomSet> PartialOrd for Atom<Static> {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        if self.unsafe_data == other.unsafe_data {
+            return Some(Equal);
+        }
+        self.as_ref().partial_cmp(other.as_ref())
+    }
+}
+
+impl<Static: StaticAtomSet> Ord for Atom<Static> {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.unsafe_data == other.unsafe_data {
+            return Equal;
+        }
+        self.as_ref().cmp(other.as_ref())
+    }
+}
+
+// AsciiExt requires mutating methods, so we just implement the non-mutating ones.
+// We don't need to implement is_ascii because there's no performance improvement
+// over the one from &str.
+impl<Static: StaticAtomSet> Atom<Static> {
+    fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self {
+        let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
+        let buffer = unsafe { &mut *buffer.as_mut_ptr() };
+
+        if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
+            buffer_prefix.copy_from_slice(s.as_bytes());
+            let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
+            f(as_str);
+            Atom::from(&*as_str)
+        } else {
+            let mut string = s.to_owned();
+            f(&mut string);
+            Atom::from(string)
+        }
+    }
+
+    /// Like [`to_ascii_uppercase`].
+    ///
+    /// [`to_ascii_uppercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase
+    pub fn to_ascii_uppercase(&self) -> Self {
+        for (i, b) in self.bytes().enumerate() {
+            if let b'a'..=b'z' = b {
+                return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase());
+            }
+        }
+        self.clone()
+    }
+
+    /// Like [`to_ascii_lowercase`].
+    ///
+    /// [`to_ascii_lowercase`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_lowercase
+    pub fn to_ascii_lowercase(&self) -> Self {
+        for (i, b) in self.bytes().enumerate() {
+            if let b'A'..=b'Z' = b {
+                return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase());
+            }
+        }
+        self.clone()
+    }
+
+    /// Like [`eq_ignore_ascii_case`].
+    ///
+    /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
+    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
+        (self == other) || self.eq_str_ignore_ascii_case(&**other)
+    }
+
+    /// Like [`eq_ignore_ascii_case`], but takes an unhashed string as `other`.
+    ///
+    /// [`eq_ignore_ascii_case`]: https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.eq_ignore_ascii_case
+    pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
+        (&**self).eq_ignore_ascii_case(other)
+    }
+}
+
+#[inline(always)]
+fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
+        let x: *const NonZeroU64 = x;
+        let mut data = x as *const u8;
+        // All except the lowest byte, which is first in little-endian, last in big-endian.
+        if cfg!(target_endian = "little") {
+            data = unsafe { data.offset(1) };
+        }
+        let len = 7;
+        unsafe { slice::from_raw_parts(data, len) }   
+}
+
+#[inline(always)]
+fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {   
+        let x: *mut u64 = x;
+        let mut data = x as *mut u8;
+        // All except the lowest byte, which is first in little-endian, last in big-endian.
+        if cfg!(target_endian = "little") {
+            data = unsafe { data.offset(1) };
+        }
+        let len = 7;
+        unsafe { slice::from_raw_parts_mut(data, len) }
+}
diff --git a/vendor/string_cache/src/dynamic_set.rs b/vendor/string_cache/src/dynamic_set.rs
new file mode 100644
index 00000000..4442b4da
--- /dev/null
+++ b/vendor/string_cache/src/dynamic_set.rs
@@ -0,0 +1,112 @@
+// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use parking_lot::Mutex;
+use std::borrow::Cow;
+use std::mem;
+use std::ptr::NonNull;
+use std::sync::atomic::AtomicIsize;
+use std::sync::atomic::Ordering::SeqCst;
+use std::sync::OnceLock;
+
+const NB_BUCKETS: usize = 1 << 12; // 4096
+const BUCKET_MASK: u32 = (1 << 12) - 1;
+
+pub(crate) struct Set {
+    buckets: Box<[Mutex<Option<Box<Entry>>>]>,
+}
+
+pub(crate) struct Entry {
+    pub(crate) string: Box<str>,
+    pub(crate) hash: u32,
+    pub(crate) ref_count: AtomicIsize,
+    next_in_bucket: Option<Box<Entry>>,
+}
+
+// Addresses are a multiples of this,
+// and therefore have have TAG_MASK bits unset, available for tagging.
+pub(crate) const ENTRY_ALIGNMENT: usize = 4;
+
+#[test]
+fn entry_alignment_is_sufficient() {
+    assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
+}
+
+pub(crate) fn dynamic_set() -> &'static Set {
+    // NOTE: Using const initialization for buckets breaks the small-stack test.
+    // ```
+    // // buckets: [Mutex<Option<Box<Entry>>>; NB_BUCKETS],
+    // const MUTEX: Mutex<Option<Box<Entry>>> = Mutex::new(None);
+    // let buckets = Box::new([MUTEX; NB_BUCKETS]);
+    // ```
+    static DYNAMIC_SET: OnceLock<Set> = OnceLock::new();
+
+    DYNAMIC_SET.get_or_init(|| {
+        let buckets = (0..NB_BUCKETS).map(|_| Mutex::new(None)).collect();
+        Set { buckets }
+    })
+}
+
+impl Set {
+    pub(crate) fn insert(&self, string: Cow<str>, hash: u32) -> NonNull<Entry> {
+        let bucket_index = (hash & BUCKET_MASK) as usize;
+        let mut linked_list = self.buckets[bucket_index].lock();
+
+        {
+            let mut ptr: Option<&mut Box<Entry>> = linked_list.as_mut();
+
+            while let Some(entry) = ptr.take() {
+                if entry.hash == hash && *entry.string == *string {
+                    if entry.ref_count.fetch_add(1, SeqCst) > 0 {
+                        return NonNull::from(&mut **entry);
+                    }
+                    // Uh-oh. The pointer's reference count was zero, which means someone may try
+                    // to free it. (Naive attempts to defend against this, for example having the
+                    // destructor check to see whether the reference count is indeed zero, don't
+                    // work due to ABA.) Thus we need to temporarily add a duplicate string to the
+                    // list.
+                    entry.ref_count.fetch_sub(1, SeqCst);
+                    break;
+                }
+                ptr = entry.next_in_bucket.as_mut();
+            }
+        }
+        debug_assert!(mem::align_of::<Entry>() >= ENTRY_ALIGNMENT);
+        let string = string.into_owned();
+        let mut entry = Box::new(Entry {
+            next_in_bucket: linked_list.take(),
+            hash,
+            ref_count: AtomicIsize::new(1),
+            string: string.into_boxed_str(),
+        });
+        let ptr = NonNull::from(&mut *entry);
+        *linked_list = Some(entry);
+        ptr
+    }
+
+    pub(crate) fn remove(&self, ptr: *mut Entry) {
+        let value: &Entry = unsafe { &*ptr };
+        let bucket_index = (value.hash & BUCKET_MASK) as usize;
+
+        let mut linked_list = self.buckets[bucket_index].lock();
+        debug_assert!(value.ref_count.load(SeqCst) == 0);
+        let mut current: &mut Option<Box<Entry>> = &mut linked_list;
+
+        while let Some(entry_ptr) = current.as_mut() {
+            let entry_ptr: *mut Entry = &mut **entry_ptr;
+            if entry_ptr == ptr {
+                mem::drop(mem::replace(current, unsafe {
+                    (*entry_ptr).next_in_bucket.take()
+                }));
+                break;
+            }
+            current = unsafe { &mut (*entry_ptr).next_in_bucket };
+        }
+    }
+}
diff --git a/vendor/string_cache/src/lib.rs b/vendor/string_cache/src/lib.rs
new file mode 100644
index 00000000..441cb4ef
--- /dev/null
+++ b/vendor/string_cache/src/lib.rs
@@ -0,0 +1,139 @@
+// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//!
+//! A library for interning things that are `AsRef<str>`.
+//!
+//! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the
+//! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an
+//! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`).
+//!
+//! Generated `Atom`s will have assocated macros to intern static strings at compile-time.
+//!
+//! # Examples
+//!
+//! Here are two examples, one with compile-time `Atom`s, and one without.
+//!
+//! ## With compile-time atoms
+//!
+//! In `Cargo.toml`:
+//! ```toml
+//! [dependencies]
+//! string_cache = "0.8"
+//!
+//! [dev-dependencies]
+//! string_cache_codegen = "0.5"
+//! ```
+//!
+//! In `build.rs`:
+//!
+//! ```ignore
+//! extern crate string_cache_codegen;
+//!
+//! use std::env;
+//! use std::path::Path;
+//!
+//! fn main() {
+//!     string_cache_codegen::AtomType::new("foo::FooAtom", "foo_atom!")
+//!         .atoms(&["foo", "bar"])
+//!         .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))
+//!         .unwrap()
+//! }
+//! ```
+//!
+//! In `lib.rs`:
+//!
+//! ```ignore
+//! extern crate string_cache;
+//!
+//! mod foo {
+//!     include!(concat!(env!("OUT_DIR"), "/foo_atom.rs"));
+//! }
+//!
+//! fn use_the_atom(t: &str) {
+//!     match *t {
+//!         foo_atom!("foo") => println!("Found foo!"),
+//!         foo_atom!("bar") => println!("Found bar!"),
+//!         // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error
+//!         _ => {
+//!             println!("String not interned");
+//!             // We can intern strings at runtime as well
+//!             foo::FooAtom::from(t)
+//!         }
+//!     }
+//! }
+//! ```
+//!
+//! ## No compile-time atoms
+//!
+//! ```
+//! # extern crate string_cache;
+//! use string_cache::DefaultAtom;
+//!
+//! # fn main() {
+//! let mut interned_stuff = Vec::new();
+//! let text = "here is a sentence of text that will be tokenised and
+//!             interned and some repeated tokens is of text and";
+//! for word in text.split_whitespace() {
+//!     let seen_before = interned_stuff.iter()
+//!         // We can use impl PartialEq<T> where T is anything string-like
+//!         // to compare to interned strings to either other interned strings,
+//!         // or actual strings  Comparing two interned strings is very fast
+//!         // (normally a single cpu operation).
+//!         .filter(|interned_word| interned_word == &word)
+//!         .count();
+//!     if seen_before > 0 {
+//!         println!(r#"Seen the word "{}" {} times"#, word, seen_before);
+//!     } else {
+//!         println!(r#"Not seen the word "{}" before"#, word);
+//!     }
+//!     // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for
+//!     // Atom<Static> to intern a new string.
+//!     interned_stuff.push(DefaultAtom::from(word));
+//! }
+//! # }
+//! ```
+//!
+
+#![cfg_attr(test, deny(warnings))]
+
+// Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match
+// with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while
+// manually impl-ing the other, because it seems easy for the two to drift apart, causing the
+// invariant to be violated.
+//
+// But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and
+// copying are this library's purpose. So we know what the PartialEq comparison is going to do.
+//
+// The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner
+// tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to
+// differ would be if the table entry changed between invocations, and that would be really bad.
+#![allow(clippy::derive_hash_xor_eq)]
+
+mod atom;
+mod dynamic_set;
+mod static_sets;
+mod trivial_impls;
+
+pub use atom::Atom;
+pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet};
+
+/// Use this if you don’t care about static atoms.
+pub type DefaultAtom = Atom<EmptyStaticAtomSet>;
+
+// Some minor tests of internal layout here.
+// See ../integration-tests for much more.
+
+/// Guard against accidental changes to the sizes of things.
+#[test]
+fn assert_sizes() {
+    use std::mem::size_of;
+    assert_eq!(size_of::<DefaultAtom>(), 8);
+    assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),);
+}
diff --git a/vendor/string_cache/src/static_sets.rs b/vendor/string_cache/src/static_sets.rs
new file mode 100644
index 00000000..f7f1799f
--- /dev/null
+++ b/vendor/string_cache/src/static_sets.rs
@@ -0,0 +1,64 @@
+// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/// A static `PhfStrSet`
+///
+/// This trait is implemented by static sets of interned strings generated using
+/// `string_cache_codegen`, and `EmptyStaticAtomSet` for when strings will be added dynamically.
+///
+/// It is used by the methods of [`Atom`] to check if a string is present in the static set.
+///
+/// [`Atom`]: struct.Atom.html
+pub trait StaticAtomSet: Ord {
+    /// Get the location of the static string set in the binary.
+    fn get() -> &'static PhfStrSet;
+    /// Get the index of the empty string, which is in every set and is used for `Atom::default`.
+    fn empty_string_index() -> u32;
+}
+
+/// A string set created using a [perfect hash function], specifically
+/// [Hash, Displace and Compress].
+///
+/// See the CHD document for the meaning of the struct fields.
+///
+/// [perfect hash function]: https://en.wikipedia.org/wiki/Perfect_hash_function
+/// [Hash, Displace and Compress]: http://cmph.sourceforge.net/papers/esa09.pdf
+pub struct PhfStrSet {
+    #[doc(hidden)]
+    pub key: u64,
+    #[doc(hidden)]
+    pub disps: &'static [(u32, u32)],
+    #[doc(hidden)]
+    pub atoms: &'static [&'static str],
+    #[doc(hidden)]
+    pub hashes: &'static [u32],
+}
+
+/// An empty static atom set for when only dynamic strings will be added
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+pub struct EmptyStaticAtomSet;
+
+impl StaticAtomSet for EmptyStaticAtomSet {
+    fn get() -> &'static PhfStrSet {
+        // The name is a lie: this set is not empty (it contains the empty string)
+        // but that’s only to avoid divisions by zero in rust-phf.
+        static SET: PhfStrSet = PhfStrSet {
+            key: 0,
+            disps: &[(0, 0)],
+            atoms: &[""],
+            // "" SipHash'd, and xored with u64_hash_to_u32.
+            hashes: &[0x3ddddef3],
+        };
+        &SET
+    }
+
+    fn empty_string_index() -> u32 {
+        0
+    }
+}
diff --git a/vendor/string_cache/src/trivial_impls.rs b/vendor/string_cache/src/trivial_impls.rs
new file mode 100644
index 00000000..960dde0d
--- /dev/null
+++ b/vendor/string_cache/src/trivial_impls.rs
@@ -0,0 +1,119 @@
+// Copyright 2014 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use crate::{Atom, StaticAtomSet};
+#[cfg(feature = "serde_support")]
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use std::borrow::Cow;
+use std::fmt;
+
+impl<Static: StaticAtomSet> ::precomputed_hash::PrecomputedHash for Atom<Static> {
+    fn precomputed_hash(&self) -> u32 {
+        self.get_hash()
+    }
+}
+
+impl<'a, Static: StaticAtomSet> From<&'a Atom<Static>> for Atom<Static> {
+    fn from(atom: &'a Self) -> Self {
+        atom.clone()
+    }
+}
+
+impl<Static: StaticAtomSet> PartialEq<str> for Atom<Static> {
+    fn eq(&self, other: &str) -> bool {
+        &self[..] == other
+    }
+}
+
+impl<Static: StaticAtomSet> PartialEq<Atom<Static>> for str {
+    fn eq(&self, other: &Atom<Static>) -> bool {
+        self == &other[..]
+    }
+}
+
+impl<Static: StaticAtomSet> PartialEq<String> for Atom<Static> {
+    fn eq(&self, other: &String) -> bool {
+        self[..] == other[..]
+    }
+}
+
+impl<'a, Static: StaticAtomSet> From<&'a str> for Atom<Static> {
+    #[inline]
+    fn from(string_to_add: &str) -> Self {
+        Atom::from(Cow::Borrowed(string_to_add))
+    }
+}
+
+impl<Static: StaticAtomSet> From<String> for Atom<Static> {
+    #[inline]
+    fn from(string_to_add: String) -> Self {
+        Atom::from(Cow::Owned(string_to_add))
+    }
+}
+
+impl<Static: StaticAtomSet> fmt::Display for Atom<Static> {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        <str as fmt::Display>::fmt(self, f)
+    }
+}
+
+impl<Static: StaticAtomSet> AsRef<str> for Atom<Static> {
+    fn as_ref(&self) -> &str {
+        self
+    }
+}
+
+#[cfg(feature = "serde_support")]
+impl<Static: StaticAtomSet> Serialize for Atom<Static> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let string: &str = self.as_ref();
+        string.serialize(serializer)
+    }
+}
+
+#[cfg(feature = "serde_support")]
+impl<'a, Static: StaticAtomSet> Deserialize<'a> for Atom<Static> {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'a>,
+    {
+        use serde::de;
+        use std::marker::PhantomData;
+
+        struct AtomVisitor<Static: StaticAtomSet>(PhantomData<Static>);
+
+        impl<'de, Static: StaticAtomSet> de::Visitor<'de> for AtomVisitor<Static> {
+            type Value = Atom<Static>;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+                write!(formatter, "an Atom")
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: de::Error,
+            {
+                Ok(Atom::from(v))
+            }
+
+            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+            where
+                E: de::Error,
+            {
+                Ok(Atom::from(v))
+            }
+        }
+
+        deserializer.deserialize_str(AtomVisitor(PhantomData))
+    }
+}
author	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
committer	mo khan <mo@mokhan.ca>	2025-07-02 18:36:06 -0600
commit	8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree	22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/string_cache/src
parent	4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)