use std::{
borrow::Borrow,
fmt,
hash::{BuildHasherDefault, Hash, Hasher},
mem::{self, ManuallyDrop},
ptr::NonNull,
sync::OnceLock,
};
use dashmap::{DashMap, SharedValue};
use hashbrown::{hash_map::RawEntryMut, HashMap};
use rustc_hash::FxHasher;
use sptr::Strict;
use triomphe::Arc;
pub mod symbols;
const _: () = assert!(std::mem::size_of::<Box<str>>() == std::mem::size_of::<&str>());
const _: () = assert!(std::mem::align_of::<Box<str>>() == std::mem::align_of::<&str>());
const _: () = assert!(std::mem::size_of::<Arc<Box<str>>>() == std::mem::size_of::<&&str>());
const _: () = assert!(std::mem::align_of::<Arc<Box<str>>>() == std::mem::align_of::<&&str>());
const _: () =
assert!(std::mem::size_of::<*const *const str>() == std::mem::size_of::<TaggedArcPtr>());
const _: () =
assert!(std::mem::align_of::<*const *const str>() == std::mem::align_of::<TaggedArcPtr>());
const _: () = assert!(std::mem::size_of::<Arc<Box<str>>>() == std::mem::size_of::<TaggedArcPtr>());
const _: () =
assert!(std::mem::align_of::<Arc<Box<str>>>() == std::mem::align_of::<TaggedArcPtr>());
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
struct TaggedArcPtr {
packed: NonNull<*const str>,
}
unsafe impl Send for TaggedArcPtr {}
unsafe impl Sync for TaggedArcPtr {}
impl TaggedArcPtr {
const BOOL_BITS: usize = true as usize;
const fn non_arc(r: &'static &'static str) -> Self {
assert!(
mem::align_of::<&'static &'static str>().trailing_zeros() as usize > Self::BOOL_BITS
);
let packed =
unsafe { NonNull::new_unchecked((r as *const &str).cast::<*const str>().cast_mut()) };
Self { packed }
}
fn arc(arc: Arc<Box<str>>) -> Self {
assert!(
mem::align_of::<&'static &'static str>().trailing_zeros() as usize > Self::BOOL_BITS
);
Self {
packed: Self::pack_arc(
unsafe { NonNull::new_unchecked(Arc::into_raw(arc).cast_mut().cast()) },
),
}
}
#[inline]
pub(crate) unsafe fn try_as_arc_owned(self) -> Option<ManuallyDrop<Arc<Box<str>>>> {
let tag = Strict::addr(self.packed.as_ptr()) & Self::BOOL_BITS;
if tag != 0 {
Some(ManuallyDrop::new(unsafe {
Arc::from_raw(self.pointer().as_ptr().cast::<Box<str>>())
}))
} else {
None
}
}
#[inline]
fn pack_arc(ptr: NonNull<*const str>) -> NonNull<*const str> {
let packed_tag = true as usize;
let self_addr = ptr.as_ptr() as *const *const str as usize;
let addr = self_addr | packed_tag;
let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr as isize);
unsafe { NonNull::new_unchecked(ptr.as_ptr().cast::<u8>().wrapping_offset(offset).cast()) }
}
#[inline]
pub(crate) fn pointer(self) -> NonNull<*const str> {
unsafe {
NonNull::new_unchecked(Strict::map_addr(self.packed.as_ptr(), |addr| {
addr & !Self::BOOL_BITS
}))
}
}
#[inline]
pub(crate) fn as_str(&self) -> &str {
unsafe { *self.pointer().as_ptr().cast::<&str>() }
}
}
#[derive(PartialEq, Eq, Hash)]
pub struct Symbol {
repr: TaggedArcPtr,
}
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
const _: () = assert!(std::mem::size_of::<Symbol>() == std::mem::size_of::<NonNull<()>>());
const _: () = assert!(std::mem::align_of::<Symbol>() == std::mem::align_of::<NonNull<()>>());
static MAP: OnceLock<DashMap<SymbolProxy, (), BuildHasherDefault<FxHasher>>> = OnceLock::new();
impl Symbol {
pub fn intern(s: &str) -> Self {
let (mut shard, hash) = Self::select_shard(s);
match shard.raw_entry_mut().from_key_hashed_nocheck(hash, s) {
RawEntryMut::Occupied(occ) => Self { repr: increase_arc_refcount(occ.key().0) },
RawEntryMut::Vacant(vac) => Self {
repr: increase_arc_refcount(
vac.insert_hashed_nocheck(
hash,
SymbolProxy(TaggedArcPtr::arc(Arc::new(Box::<str>::from(s)))),
SharedValue::new(()),
)
.0
.0,
),
},
}
}
pub fn integer(i: usize) -> Self {
match i {
0 => symbols::INTEGER_0.clone(),
1 => symbols::INTEGER_1.clone(),
2 => symbols::INTEGER_2.clone(),
3 => symbols::INTEGER_3.clone(),
4 => symbols::INTEGER_4.clone(),
5 => symbols::INTEGER_5.clone(),
6 => symbols::INTEGER_6.clone(),
7 => symbols::INTEGER_7.clone(),
8 => symbols::INTEGER_8.clone(),
9 => symbols::INTEGER_9.clone(),
10 => symbols::INTEGER_10.clone(),
11 => symbols::INTEGER_11.clone(),
12 => symbols::INTEGER_12.clone(),
13 => symbols::INTEGER_13.clone(),
14 => symbols::INTEGER_14.clone(),
15 => symbols::INTEGER_15.clone(),
i => Symbol::intern(&format!("{i}")),
}
}
pub fn empty() -> Self {
symbols::__empty.clone()
}
pub fn as_str(&self) -> &str {
self.repr.as_str()
}
#[inline]
fn select_shard(
s: &str,
) -> (
dashmap::RwLockWriteGuard<
'static,
HashMap<SymbolProxy, SharedValue<()>, BuildHasherDefault<FxHasher>>,
>,
u64,
) {
let storage = MAP.get_or_init(symbols::prefill);
let hash = {
let mut hasher = std::hash::BuildHasher::build_hasher(storage.hasher());
s.hash(&mut hasher);
hasher.finish()
};
let shard_idx = storage.determine_shard(hash as usize);
let shard = &storage.shards()[shard_idx];
(shard.write(), hash)
}
#[cold]
fn drop_slow(arc: &Arc<Box<str>>) {
let (mut shard, hash) = Self::select_shard(arc);
match Arc::count(arc) {
0 => unreachable!(),
1 => unreachable!(),
2 => (),
_ => {
return;
}
}
let ptr = match shard.raw_entry_mut().from_key_hashed_nocheck::<str>(hash, arc.as_ref()) {
RawEntryMut::Occupied(occ) => occ.remove_entry(),
RawEntryMut::Vacant(_) => unreachable!(),
}
.0
.0;
ManuallyDrop::into_inner(unsafe { ptr.try_as_arc_owned().unwrap() });
debug_assert_eq!(Arc::count(arc), 1);
if shard.len() * 2 < shard.capacity() {
shard.shrink_to_fit();
}
}
}
impl Drop for Symbol {
#[inline]
fn drop(&mut self) {
let Some(arc) = (unsafe { self.repr.try_as_arc_owned() }) else {
return;
};
if Arc::count(&arc) == 2 {
Self::drop_slow(&arc);
}
ManuallyDrop::into_inner(arc);
}
}
impl Clone for Symbol {
fn clone(&self) -> Self {
Self { repr: increase_arc_refcount(self.repr) }
}
}
fn increase_arc_refcount(repr: TaggedArcPtr) -> TaggedArcPtr {
let Some(arc) = (unsafe { repr.try_as_arc_owned() }) else {
return repr;
};
mem::forget(Arc::clone(&arc));
repr
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
#[derive(Debug, PartialEq, Eq)]
struct SymbolProxy(TaggedArcPtr);
impl Hash for SymbolProxy {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_str().hash(state);
}
}
impl Borrow<str> for SymbolProxy {
fn borrow(&self) -> &str {
self.0.as_str()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke_test() {
Symbol::intern("isize");
let base_len = MAP.get().unwrap().len();
let hello = Symbol::intern("hello");
let world = Symbol::intern("world");
let more_worlds = world.clone();
let bang = Symbol::intern("!");
let q = Symbol::intern("?");
assert_eq!(MAP.get().unwrap().len(), base_len + 4);
let bang2 = Symbol::intern("!");
assert_eq!(MAP.get().unwrap().len(), base_len + 4);
drop(bang2);
assert_eq!(MAP.get().unwrap().len(), base_len + 4);
drop(q);
assert_eq!(MAP.get().unwrap().len(), base_len + 3);
let default = Symbol::intern("default");
let many_worlds = world.clone();
assert_eq!(MAP.get().unwrap().len(), base_len + 3);
assert_eq!(
"hello default world!",
format!("{} {} {}{}", hello.as_str(), default.as_str(), world.as_str(), bang.as_str())
);
drop(default);
assert_eq!(
"hello world!",
format!("{} {}{}", hello.as_str(), world.as_str(), bang.as_str())
);
drop(many_worlds);
drop(more_worlds);
drop(hello);
drop(world);
drop(bang);
assert_eq!(MAP.get().unwrap().len(), base_len);
}
}