inturn/interner/str.rs
1use crate::{BytesInterner, InternerSymbol, Symbol};
2use std::{collections::hash_map::RandomState, hash::BuildHasher};
3
4/// String interner.
5///
6/// This is a thin wrapper around [`BytesInterner`] that uses `str` instead of `[u8]`.
7///
8/// See the [crate-level docs][crate] for more details.
9pub struct Interner<S = Symbol, H = RandomState> {
10 pub(crate) inner: BytesInterner<S, H>,
11}
12
13impl Default for Interner {
14 #[inline]
15 fn default() -> Self {
16 Self::new()
17 }
18}
19
20impl Interner<Symbol, RandomState> {
21 /// Creates a new, empty `Interner` with the default symbol and hasher.
22 #[inline]
23 pub fn new() -> Self {
24 Self::with_capacity(0)
25 }
26
27 /// Creates a new `Interner` with the given capacity and default symbol and hasher.
28 #[inline]
29 pub fn with_capacity(capacity: usize) -> Self {
30 Self::with_capacity_and_hasher(capacity, Default::default())
31 }
32}
33
34impl<S: InternerSymbol, H: BuildHasher> Interner<S, H> {
35 /// Creates a new `Interner` with the given custom hasher.
36 #[inline]
37 pub fn with_hasher(hash_builder: H) -> Self {
38 Self::with_capacity_and_hasher(0, hash_builder)
39 }
40
41 /// Creates a new `Interner` with the given capacitiy and custom hasher.
42 pub fn with_capacity_and_hasher(capacity: usize, hash_builder: H) -> Self {
43 Self { inner: BytesInterner::with_capacity_and_hasher(capacity, hash_builder) }
44 }
45
46 /// Returns the number of unique strings in the interner.
47 #[inline]
48 pub fn len(&self) -> usize {
49 self.inner.len()
50 }
51
52 /// Returns `true` if the interner is empty.
53 #[inline]
54 pub fn is_empty(&self) -> bool {
55 self.len() == 0
56 }
57
58 /// Returns an iterator over the interned strings and their corresponding `Symbol`s.
59 ///
60 /// Does not guarantee that it includes symbols added after the iterator was created.
61 #[inline]
62 pub fn iter(&self) -> impl ExactSizeIterator<Item = (S, &str)> + Clone {
63 self.all_symbols().map(|s| (s, self.resolve(s)))
64 }
65
66 /// Returns an iterator over all symbols in the interner.
67 #[inline]
68 pub fn all_symbols(&self) -> impl ExactSizeIterator<Item = S> + Send + Sync + Clone {
69 (0..self.len()).map(S::from_usize)
70 }
71
72 /// Interns a string, returning its unique `Symbol`.
73 ///
74 /// Allocates the string internally if it is not already interned.
75 ///
76 /// If `s` outlives `self`, like `&'static str`, prefer using
77 /// [`intern_static`](Self::intern_static), as it will not allocate the string on the heap.
78 pub fn intern(&self, s: &str) -> S {
79 self.inner.intern(s.as_bytes())
80 }
81
82 /// Interns a string, returning its unique `Symbol`.
83 ///
84 /// Allocates the string internally if it is not already interned.
85 ///
86 /// If `s` outlives `self`, like `&'static str`, prefer using
87 /// [`intern_mut_static`](Self::intern_mut_static), as it will not allocate the string on the
88 /// heap.
89 ///
90 /// By taking `&mut self`, this never acquires any locks.
91 pub fn intern_mut(&mut self, s: &str) -> S {
92 self.inner.intern_mut(s.as_bytes())
93 }
94
95 /// Interns a static string, returning its unique `Symbol`.
96 ///
97 /// Note that this only requires that `s` outlives `self`, which means we can avoid allocating
98 /// the string.
99 pub fn intern_static<'a, 'b: 'a>(&'a self, s: &'b str) -> S {
100 self.inner.intern_static(s.as_bytes())
101 }
102
103 /// Interns a static string, returning its unique `Symbol`.
104 ///
105 /// Note that this only requires that `s` outlives `self`, which means we can avoid allocating
106 /// the string.
107 ///
108 /// By taking `&mut self`, this never acquires any locks.
109 pub fn intern_mut_static<'a, 'b: 'a>(&'a mut self, s: &'b str) -> S {
110 self.inner.intern_mut_static(s.as_bytes())
111 }
112
113 /// Interns multiple strings.
114 ///
115 /// Allocates the strings internally if they are not already interned.
116 ///
117 /// If the strings outlive `self`, like `&'static str`, prefer using
118 /// [`intern_many_static`](Self::intern_many_static), as it will not allocate the strings on the
119 /// heap.
120 pub fn intern_many<'a>(&self, strings: impl IntoIterator<Item = &'a str>) {
121 self.inner.intern_many(strings.into_iter().map(str::as_bytes));
122 }
123
124 /// Interns multiple strings.
125 ///
126 /// Allocates the strings internally if they are not already interned.
127 ///
128 /// If the strings outlive `self`, like `&'static str`, prefer using
129 /// [`intern_many_mut_static`](Self::intern_many_mut_static), as it will not allocate the
130 /// strings on the heap.
131 ///
132 /// By taking `&mut self`, this never acquires any locks.
133 pub fn intern_many_mut<'a>(&mut self, strings: impl IntoIterator<Item = &'a str>) {
134 self.inner.intern_many_mut(strings.into_iter().map(str::as_bytes));
135 }
136
137 /// Interns multiple static strings.
138 ///
139 /// Note that this only requires that the strings outlive `self`, which means we can avoid
140 /// allocating the strings.
141 pub fn intern_many_static<'a, 'b: 'a>(&'a self, strings: impl IntoIterator<Item = &'b str>) {
142 self.inner.intern_many_static(strings.into_iter().map(str::as_bytes));
143 }
144
145 /// Interns multiple static strings.
146 ///
147 /// Note that this only requires that the strings outlive `self`, which means we can avoid
148 /// allocating the strings.
149 ///
150 /// By taking `&mut self`, this never acquires any locks.
151 pub fn intern_many_mut_static<'a, 'b: 'a>(
152 &'a mut self,
153 strings: impl IntoIterator<Item = &'b str>,
154 ) {
155 self.inner.intern_many_mut_static(strings.into_iter().map(str::as_bytes));
156 }
157
158 /// Maps a `Symbol` to its string. This is a cheap, lock-free operation.
159 ///
160 /// # Panics
161 ///
162 /// Panics if `Symbol` is out of bounds of this `Interner`. You should only use `Symbol`s
163 /// created by this `Interner`.
164 #[inline]
165 #[must_use]
166 #[cfg_attr(debug_assertions, track_caller)]
167 pub fn resolve(&self, sym: S) -> &str {
168 // SAFETY: Only `str`s are interned.
169 unsafe { std::str::from_utf8_unchecked(self.inner.resolve(sym)) }
170 }
171}