1use crate::{InternerSymbol, Symbol};
2use boxcar::Vec as LFVec;
3use bumpalo::Bump;
4use dashmap::DashMap;
5use hashbrown::hash_table;
6use std::{collections::hash_map::RandomState, hash::BuildHasher};
7use thread_local::ThreadLocal;
8
9pub(crate) type Map<S> = DashMap<MapKey, S, NoHasherBuilder>;
15pub(crate) type MapKey = (u64, &'static [u8]);
16pub(crate) type RawMapKey<S> = (MapKey, S);
17
18type Arena = ThreadLocal<Bump>;
20
21pub struct BytesInterner<S = Symbol, H = RandomState> {
25 pub(crate) map: Map<S>,
26 hash_builder: H,
27 strs: LFVec<&'static [u8]>,
28 arena: Arena,
29}
30
31impl Default for BytesInterner {
32 #[inline]
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl BytesInterner<Symbol, RandomState> {
39 #[inline]
41 pub fn new() -> Self {
42 Self::with_capacity(0)
43 }
44
45 #[inline]
47 pub fn with_capacity(capacity: usize) -> Self {
48 Self::with_capacity_and_hasher(capacity, Default::default())
49 }
50}
51
52impl<S: InternerSymbol, H: BuildHasher> BytesInterner<S, H> {
53 #[inline]
55 pub fn with_hasher(hash_builder: H) -> Self {
56 Self::with_capacity_and_hasher(0, hash_builder)
57 }
58
59 pub fn with_capacity_and_hasher(capacity: usize, hash_builder: H) -> Self {
61 let map = Map::with_capacity_and_hasher(capacity, Default::default());
62 let strs = LFVec::with_capacity(capacity);
63 Self { map, strs, arena: Default::default(), hash_builder }
64 }
65
66 #[inline]
68 pub fn len(&self) -> usize {
69 self.strs.count()
70 }
71
72 #[inline]
74 pub fn is_empty(&self) -> bool {
75 self.len() == 0
76 }
77
78 #[inline]
82 pub fn iter(&self) -> impl ExactSizeIterator<Item = (S, &[u8])> + Clone {
83 self.all_symbols().map(|s| (s, self.resolve(s)))
84 }
85
86 #[inline]
88 pub fn all_symbols(&self) -> impl ExactSizeIterator<Item = S> + Send + Sync + Clone {
89 (0..self.len()).map(S::from_usize)
90 }
91
92 pub fn intern(&self, s: &[u8]) -> S {
99 self.do_intern(s, alloc)
100 }
101
102 pub fn intern_mut(&mut self, s: &[u8]) -> S {
112 self.do_intern_mut(s, alloc)
113 }
114
115 pub fn intern_static<'a, 'b: 'a>(&'a self, s: &'b [u8]) -> S {
120 self.do_intern(s, no_alloc)
121 }
122
123 pub fn intern_mut_static<'a, 'b: 'a>(&'a mut self, s: &'b [u8]) -> S {
130 self.do_intern_mut(s, no_alloc)
131 }
132
133 pub fn intern_many<'a>(&self, strings: impl IntoIterator<Item = &'a [u8]>) {
141 for s in strings {
142 self.intern(s);
143 }
144 }
145
146 pub fn intern_many_mut<'a>(&mut self, strings: impl IntoIterator<Item = &'a [u8]>) {
156 for s in strings {
157 self.intern_mut(s);
158 }
159 }
160
161 pub fn intern_many_static<'a, 'b: 'a>(&'a self, strings: impl IntoIterator<Item = &'b [u8]>) {
166 for s in strings {
167 self.intern_static(s);
168 }
169 }
170
171 pub fn intern_many_mut_static<'a, 'b: 'a>(
178 &'a mut self,
179 strings: impl IntoIterator<Item = &'b [u8]>,
180 ) {
181 for s in strings {
182 self.intern_mut_static(s);
183 }
184 }
185
186 #[inline]
193 #[must_use]
194 #[cfg_attr(debug_assertions, track_caller)]
195 pub fn resolve(&self, sym: S) -> &[u8] {
196 if cfg!(debug_assertions) {
197 self.strs.get(sym.to_usize()).expect("symbol out of bounds")
198 } else {
199 unsafe { self.strs.get_unchecked(sym.to_usize()) }
200 }
201 }
202
203 #[inline]
204 fn do_intern(&self, s: &[u8], alloc: impl FnOnce(&Arena, &[u8]) -> &'static [u8]) -> S {
205 let hash = self.hash(s);
206 let shard_idx = self.map.determine_shard(hash as usize);
207 let shard = &*self.map.shards()[shard_idx];
208
209 if let Some((_, v)) = cvt(&shard.read()).find(hash, mk_eq(s)) {
210 return *v.get();
211 }
212
213 get_or_insert(&self.strs, &self.arena, s, hash, cvt_mut(&mut shard.write()), alloc)
214 }
215
216 #[inline]
217 fn do_intern_mut(&mut self, s: &[u8], alloc: impl FnOnce(&Arena, &[u8]) -> &'static [u8]) -> S {
218 let hash = self.hash(s);
219 let shard_idx = self.map.determine_shard(hash as usize);
220 let shard = &mut *self.map.shards_mut()[shard_idx];
221
222 get_or_insert(&self.strs, &self.arena, s, hash, cvt_mut(shard.get_mut()), alloc)
223 }
224
225 #[inline]
226 fn hash(&self, s: &[u8]) -> u64 {
227 use std::hash::Hasher;
229 let mut h = self.hash_builder.build_hasher();
230 h.write(s);
231 h.finish()
232 }
233}
234
235pub(crate) type NoHasherBuilder = std::hash::BuildHasherDefault<NoHasher>;
236
237pub(crate) enum NoHasher {}
238impl Default for NoHasher {
239 #[inline]
240 fn default() -> Self {
241 unreachable!()
242 }
243}
244impl std::hash::Hasher for NoHasher {
245 #[inline]
246 fn finish(&self) -> u64 {
247 match *self {}
248 }
249 #[inline]
250 fn write(&mut self, _bytes: &[u8]) {
251 match *self {}
252 }
253}
254
255#[inline]
256fn get_or_insert<S: InternerSymbol>(
257 strs: &LFVec<&'static [u8]>,
258 arena: &Arena,
259 s: &[u8],
260 hash: u64,
261 shard: &mut hash_table::HashTable<RawMapKey<dashmap::SharedValue<S>>>,
262 alloc: impl FnOnce(&Arena, &[u8]) -> &'static [u8],
263) -> S {
264 match shard.entry(hash, mk_eq(s), hasher) {
265 hash_table::Entry::Occupied(e) => *e.get().1.get(),
266 hash_table::Entry::Vacant(e) => {
267 let s = alloc(arena, s);
268 let i = strs.push(s);
269 let new_sym = S::from_usize(i);
270 e.insert(((hash, s), dashmap::SharedValue::new(new_sym)));
271 new_sym
272 }
273 }
274}
275
276#[inline]
277fn hasher<S>(((hash, _), _): &RawMapKey<S>) -> u64 {
278 *hash
279}
280
281#[inline]
282fn mk_eq<S>(s: &[u8]) -> impl Fn(&RawMapKey<S>) -> bool + Copy + '_ {
283 move |((_, ss), _): &RawMapKey<S>| s == *ss
284}
285
286#[inline]
287fn alloc(arena: &Arena, s: &[u8]) -> &'static [u8] {
288 unsafe {
290 std::mem::transmute::<&[u8], &'static [u8]>(arena.get_or_default().alloc_slice_copy(s))
291 }
292}
293
294#[inline]
295fn no_alloc(_: &Arena, s: &[u8]) -> &'static [u8] {
296 unsafe { std::mem::transmute::<&[u8], &'static [u8]>(s) }
298}
299
300#[inline]
302fn cvt<T>(old: &hashbrown::raw::RawTable<T>) -> &hash_table::HashTable<T> {
303 unsafe { std::mem::transmute(old) }
304}
305
306#[inline]
307fn cvt_mut<T>(old: &mut hashbrown::raw::RawTable<T>) -> &mut hash_table::HashTable<T> {
308 unsafe { std::mem::transmute(old) }
309}