inturn/interner/
str.rs

1use crate::{BytesInterner, InternerSymbol, Symbol};
2use std::{collections::hash_map::RandomState, hash::BuildHasher};
3
4/// String interner.
5///
6/// This is a thin wrapper around [`BytesInterner`] that uses `str` instead of `[u8]`.
7///
8/// See the [crate-level docs][crate] for more details.
9pub struct Interner<S = Symbol, H = RandomState> {
10    pub(crate) inner: BytesInterner<S, H>,
11}
12
13impl Default for Interner {
14    #[inline]
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl Interner<Symbol, RandomState> {
21    /// Creates a new, empty `Interner` with the default symbol and hasher.
22    #[inline]
23    pub fn new() -> Self {
24        Self::with_capacity(0)
25    }
26
27    /// Creates a new `Interner` with the given capacity and default symbol and hasher.
28    #[inline]
29    pub fn with_capacity(capacity: usize) -> Self {
30        Self::with_capacity_and_hasher(capacity, Default::default())
31    }
32}
33
34impl<S: InternerSymbol, H: BuildHasher> Interner<S, H> {
35    /// Creates a new `Interner` with the given custom hasher.
36    #[inline]
37    pub fn with_hasher(hash_builder: H) -> Self {
38        Self::with_capacity_and_hasher(0, hash_builder)
39    }
40
41    /// Creates a new `Interner` with the given capacitiy and custom hasher.
42    pub fn with_capacity_and_hasher(capacity: usize, hash_builder: H) -> Self {
43        Self { inner: BytesInterner::with_capacity_and_hasher(capacity, hash_builder) }
44    }
45
46    /// Returns the number of unique strings in the interner.
47    #[inline]
48    pub fn len(&self) -> usize {
49        self.inner.len()
50    }
51
52    /// Returns `true` if the interner is empty.
53    #[inline]
54    pub fn is_empty(&self) -> bool {
55        self.len() == 0
56    }
57
58    /// Returns an iterator over the interned strings and their corresponding `Symbol`s.
59    ///
60    /// Does not guarantee that it includes symbols added after the iterator was created.
61    #[inline]
62    pub fn iter(&self) -> impl ExactSizeIterator<Item = (S, &str)> + Clone {
63        self.all_symbols().map(|s| (s, self.resolve(s)))
64    }
65
66    /// Returns an iterator over all symbols in the interner.
67    #[inline]
68    pub fn all_symbols(&self) -> impl ExactSizeIterator<Item = S> + Send + Sync + Clone {
69        (0..self.len()).map(S::from_usize)
70    }
71
72    /// Interns a string, returning its unique `Symbol`.
73    ///
74    /// Allocates the string internally if it is not already interned.
75    ///
76    /// If `s` outlives `self`, like `&'static str`, prefer using
77    /// [`intern_static`](Self::intern_static), as it will not allocate the string on the heap.
78    pub fn intern(&self, s: &str) -> S {
79        self.inner.intern(s.as_bytes())
80    }
81
82    /// Interns a string, returning its unique `Symbol`.
83    ///
84    /// Allocates the string internally if it is not already interned.
85    ///
86    /// If `s` outlives `self`, like `&'static str`, prefer using
87    /// [`intern_mut_static`](Self::intern_mut_static), as it will not allocate the string on the
88    /// heap.
89    ///
90    /// By taking `&mut self`, this never acquires any locks.
91    pub fn intern_mut(&mut self, s: &str) -> S {
92        self.inner.intern_mut(s.as_bytes())
93    }
94
95    /// Interns a static string, returning its unique `Symbol`.
96    ///
97    /// Note that this only requires that `s` outlives `self`, which means we can avoid allocating
98    /// the string.
99    pub fn intern_static<'a, 'b: 'a>(&'a self, s: &'b str) -> S {
100        self.inner.intern_static(s.as_bytes())
101    }
102
103    /// Interns a static string, returning its unique `Symbol`.
104    ///
105    /// Note that this only requires that `s` outlives `self`, which means we can avoid allocating
106    /// the string.
107    ///
108    /// By taking `&mut self`, this never acquires any locks.
109    pub fn intern_mut_static<'a, 'b: 'a>(&'a mut self, s: &'b str) -> S {
110        self.inner.intern_mut_static(s.as_bytes())
111    }
112
113    /// Interns multiple strings.
114    ///
115    /// Allocates the strings internally if they are not already interned.
116    ///
117    /// If the strings outlive `self`, like `&'static str`, prefer using
118    /// [`intern_many_static`](Self::intern_many_static), as it will not allocate the strings on the
119    /// heap.
120    pub fn intern_many<'a>(&self, strings: impl IntoIterator<Item = &'a str>) {
121        self.inner.intern_many(strings.into_iter().map(str::as_bytes));
122    }
123
124    /// Interns multiple strings.
125    ///
126    /// Allocates the strings internally if they are not already interned.
127    ///
128    /// If the strings outlive `self`, like `&'static str`, prefer using
129    /// [`intern_many_mut_static`](Self::intern_many_mut_static), as it will not allocate the
130    /// strings on the heap.
131    ///
132    /// By taking `&mut self`, this never acquires any locks.
133    pub fn intern_many_mut<'a>(&mut self, strings: impl IntoIterator<Item = &'a str>) {
134        self.inner.intern_many_mut(strings.into_iter().map(str::as_bytes));
135    }
136
137    /// Interns multiple static strings.
138    ///
139    /// Note that this only requires that the strings outlive `self`, which means we can avoid
140    /// allocating the strings.
141    pub fn intern_many_static<'a, 'b: 'a>(&'a self, strings: impl IntoIterator<Item = &'b str>) {
142        self.inner.intern_many_static(strings.into_iter().map(str::as_bytes));
143    }
144
145    /// Interns multiple static strings.
146    ///
147    /// Note that this only requires that the strings outlive `self`, which means we can avoid
148    /// allocating the strings.
149    ///
150    /// By taking `&mut self`, this never acquires any locks.
151    pub fn intern_many_mut_static<'a, 'b: 'a>(
152        &'a mut self,
153        strings: impl IntoIterator<Item = &'b str>,
154    ) {
155        self.inner.intern_many_mut_static(strings.into_iter().map(str::as_bytes));
156    }
157
158    /// Maps a `Symbol` to its string. This is a cheap, lock-free operation.
159    ///
160    /// # Panics
161    ///
162    /// Panics if `Symbol` is out of bounds of this `Interner`. You should only use `Symbol`s
163    /// created by this `Interner`.
164    #[inline]
165    #[must_use]
166    #[cfg_attr(debug_assertions, track_caller)]
167    pub fn resolve(&self, sym: S) -> &str {
168        // SAFETY: Only `str`s are interned.
169        unsafe { std::str::from_utf8_unchecked(self.inner.resolve(sym)) }
170    }
171}