zerovec/varzerovec/
slice.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::VarZeroVecComponents;
6use super::*;
7use crate::ule::*;
8use alloc::boxed::Box;
9use alloc::vec::Vec;
10use core::cmp::{Ord, Ordering, PartialOrd};
11use core::fmt;
12use core::marker::PhantomData;
13use core::mem;
14
15use core::ops::Index;
16use core::ops::Range;
17
18/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
19/// where `T` is not `Sized`.
20///
21/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
22/// owned data and as such is ideal for deserialization since most human readable
23/// serialization formats cannot unconditionally deserialize zero-copy.
24///
25/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
26/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
27/// using `VarZeroVec<ZeroSlice<T>>`.
28///
29/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
30/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
31///
32/// This type can be nested within itself to allow for multi-level nested `Vec`s.
33///
34/// # Examples
35///
36/// ## Nested Slices
37///
38/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
39///
40/// ```rust
41/// use zerovec::{VarZeroSlice, VarZeroVec};
42/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
43/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
44/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
45/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
46/// let strings_12 = vec![&*strings_1, &*strings_2];
47/// let strings_34 = vec![&*strings_3, &*strings_4];
48/// let all_strings = vec![strings_12, strings_34];
49///
50/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
51/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
52/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
53/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
54/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
55/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
56/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
57///
58/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
59///     .iter()
60///     .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
61///         v.iter()
62///             .map(|x: &VarZeroSlice<_>| {
63///                 x.as_varzerovec()
64///                     .iter()
65///                     .map(|s| s.to_owned())
66///                     .collect::<Vec<String>>()
67///             })
68///             .collect::<Vec<_>>()
69///     })
70///     .collect::<Vec<_>>();
71/// assert_eq!(reconstructed, all_strings);
72///
73/// let bytes = vzv_all.as_bytes();
74/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
75///     VarZeroVec::parse_byte_slice(bytes).unwrap();
76/// assert_eq!(vzv_from_bytes, vzv_all);
77/// ```
78///
79/// ## Iterate over Windows
80///
81/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
82/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
83///
84/// ```
85/// use zerovec::VarZeroVec;
86///
87/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
88/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
89///
90/// let mut it = vzv.iter().peekable();
91/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
92///     // Evaluate (x, y) here.
93/// #   pairs.push((x, y));
94/// }
95/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
96/// ```
97//
98// safety invariant: The slice MUST be one which parses to
99// a valid VarZeroVecComponents<T>
100#[repr(transparent)]
101pub struct VarZeroSlice<T: ?Sized, F = Index16> {
102    marker: PhantomData<(F, T)>,
103    /// The original slice this was constructed from
104    entire_slice: [u8],
105}
106
107impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
108    /// Construct a new empty VarZeroSlice
109    pub const fn new_empty() -> &'static Self {
110        // The empty VZV is special-cased to the empty slice
111        unsafe { mem::transmute(&[] as &[u8]) }
112    }
113
114    /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
115    #[inline]
116    pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
117        unsafe {
118            // safety: VarZeroSlice is guaranteed to parse here
119            VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
120        }
121    }
122
123    /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
124    ///
125    /// # Safety
126    ///
127    /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
128    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
129        // self is really just a wrapper around a byte slice
130        mem::transmute(bytes)
131    }
132
133    /// Get the number of elements in this slice
134    ///
135    /// # Example
136    ///
137    /// ```rust
138    /// # use zerovec::ule::ZeroVecError;
139    /// # use zerovec::VarZeroVec;
140    ///
141    /// let strings = vec!["foo", "bar", "baz", "quux"];
142    /// let vec = VarZeroVec::<str>::from(&strings);
143    ///
144    /// assert_eq!(vec.len(), 4);
145    /// # Ok::<(), ZeroVecError>(())
146    /// ```
147    pub fn len(&self) -> usize {
148        self.as_components().len()
149    }
150
151    /// Returns `true` if the slice contains no elements.
152    ///
153    /// # Examples
154    ///
155    /// ```
156    /// # use zerovec::ule::ZeroVecError;
157    /// # use zerovec::VarZeroVec;
158    ///
159    /// let strings: Vec<String> = vec![];
160    /// let vec = VarZeroVec::<str>::from(&strings);
161    ///
162    /// assert!(vec.is_empty());
163    /// # Ok::<(), ZeroVecError>(())
164    /// ```
165    pub fn is_empty(&self) -> bool {
166        self.as_components().is_empty()
167    }
168
169    /// Obtain an iterator over this slice's elements
170    ///
171    /// # Example
172    ///
173    /// ```rust
174    /// # use zerovec::ule::ZeroVecError;
175    /// # use zerovec::VarZeroVec;
176    ///
177    /// let strings = vec!["foo", "bar", "baz", "quux"];
178    /// let vec = VarZeroVec::<str>::from(&strings);
179    ///
180    /// let mut iter_results: Vec<&str> = vec.iter().collect();
181    /// assert_eq!(iter_results[0], "foo");
182    /// assert_eq!(iter_results[1], "bar");
183    /// assert_eq!(iter_results[2], "baz");
184    /// assert_eq!(iter_results[3], "quux");
185    /// # Ok::<(), ZeroVecError>(())
186    /// ```
187    pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> {
188        self.as_components().iter()
189    }
190
191    /// Get one of this slice's elements, returning `None` if the index is out of bounds
192    ///
193    /// # Example
194    ///
195    /// ```rust
196    /// # use zerovec::ule::ZeroVecError;
197    /// # use zerovec::VarZeroVec;
198    ///
199    /// let strings = vec!["foo", "bar", "baz", "quux"];
200    /// let vec = VarZeroVec::<str>::from(&strings);
201    ///
202    /// let mut iter_results: Vec<&str> = vec.iter().collect();
203    /// assert_eq!(vec.get(0), Some("foo"));
204    /// assert_eq!(vec.get(1), Some("bar"));
205    /// assert_eq!(vec.get(2), Some("baz"));
206    /// assert_eq!(vec.get(3), Some("quux"));
207    /// assert_eq!(vec.get(4), None);
208    /// # Ok::<(), ZeroVecError>(())
209    /// ```
210    pub fn get(&self, idx: usize) -> Option<&T> {
211        self.as_components().get(idx)
212    }
213
214    /// Get one of this slice's elements
215    ///
216    /// # Safety
217    ///
218    /// `index` must be in range
219    ///
220    /// # Example
221    ///
222    /// ```rust
223    /// # use zerovec::ule::ZeroVecError;
224    /// # use zerovec::VarZeroVec;
225    ///
226    /// let strings = vec!["foo", "bar", "baz", "quux"];
227    /// let vec = VarZeroVec::<str>::from(&strings);
228    ///
229    /// let mut iter_results: Vec<&str> = vec.iter().collect();
230    /// unsafe {
231    ///     assert_eq!(vec.get_unchecked(0), "foo");
232    ///     assert_eq!(vec.get_unchecked(1), "bar");
233    ///     assert_eq!(vec.get_unchecked(2), "baz");
234    ///     assert_eq!(vec.get_unchecked(3), "quux");
235    /// }
236    /// # Ok::<(), ZeroVecError>(())
237    /// ```
238    pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
239        self.as_components().get_unchecked(idx)
240    }
241
242    /// Obtain an owned `Vec<Box<T>>` out of this
243    pub fn to_vec(&self) -> Vec<Box<T>> {
244        self.as_components().to_vec()
245    }
246
247    /// Get a reference to the entire encoded backing buffer of this slice
248    ///
249    /// The bytes can be passed back to [`Self::parse_byte_slice()`].
250    ///
251    /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
252    ///
253    /// # Example
254    ///
255    /// ```rust
256    /// # use zerovec::ule::ZeroVecError;
257    /// # use zerovec::VarZeroVec;
258    ///
259    /// let strings = vec!["foo", "bar", "baz"];
260    /// let vzv = VarZeroVec::<str>::from(&strings);
261    ///
262    /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap());
263    ///
264    /// # Ok::<(), ZeroVecError>(())
265    /// ```
266    #[inline]
267    pub const fn as_bytes(&self) -> &[u8] {
268        &self.entire_slice
269    }
270
271    /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
272    ///
273    /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
274    /// it is more efficient to perform this conversion first
275    pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
276        VarZeroVec::Borrowed(self)
277    }
278
279    /// Parse a VarZeroSlice from a slice of the appropriate format
280    ///
281    /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
282    pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> {
283        <Self as VarULE>::parse_byte_slice(slice)
284    }
285
286    /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice`
287    ///
288    /// # Safety
289    /// - `bytes` must be a valid sequence of bytes for this VarZeroVec
290    pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self {
291        // self is really just a wrapper around a byte slice
292        mem::transmute(bytes)
293    }
294
295    pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] {
296        let range = self.as_components().get_range(idx);
297        #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges
298        &mut self.entire_slice[range]
299    }
300}
301
302impl<T, F> VarZeroSlice<T, F>
303where
304    T: VarULE,
305    T: ?Sized,
306    T: Ord,
307    F: VarZeroVecFormat,
308{
309    /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
310    /// the standard library function [`binary_search`].
311    ///
312    /// # Example
313    ///
314    /// ```
315    /// # use zerovec::ule::ZeroVecError;
316    /// # use zerovec::VarZeroVec;
317    ///
318    /// let strings = vec!["a", "b", "f", "g"];
319    /// let vec = VarZeroVec::<str>::from(&strings);
320    ///
321    /// assert_eq!(vec.binary_search("f"), Ok(2));
322    /// assert_eq!(vec.binary_search("e"), Err(2));
323    /// # Ok::<(), ZeroVecError>(())
324    /// ```
325    ///
326    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
327    #[inline]
328    pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
329        self.as_components().binary_search(x)
330    }
331
332    /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
333    ///
334    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
335    /// to the behavior of the standard library function [`binary_search`].
336    ///
337    /// The index is returned relative to the start of the range.
338    ///
339    /// # Example
340    ///
341    /// ```
342    /// # use zerovec::ule::ZeroVecError;
343    /// # use zerovec::VarZeroVec;
344    ///
345    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
346    /// let vec = VarZeroVec::<str>::from(&strings);
347    ///
348    /// // Same behavior as binary_search when the range covers the whole slice:
349    /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
350    /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
351    ///
352    /// // Will not look outside of the range:
353    /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
354    /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
355    ///
356    /// // Will return indices relative to the start of the range:
357    /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
358    /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
359    ///
360    /// // Will return `None` if the range is out of bounds:
361    /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
362    /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
363    /// # Ok::<(), ZeroVecError>(())
364    /// ```
365    ///
366    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
367    #[inline]
368    pub fn binary_search_in_range(
369        &self,
370        x: &T,
371        range: Range<usize>,
372    ) -> Option<Result<usize, usize>> {
373        self.as_components().binary_search_in_range(x, range)
374    }
375}
376
377impl<T, F> VarZeroSlice<T, F>
378where
379    T: VarULE,
380    T: ?Sized,
381    F: VarZeroVecFormat,
382{
383    /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
384    /// the standard library function [`binary_search_by`].
385    ///
386    /// # Example
387    ///
388    /// ```
389    /// # use zerovec::ule::ZeroVecError;
390    /// # use zerovec::VarZeroVec;
391    ///
392    /// let strings = vec!["a", "b", "f", "g"];
393    /// let vec = VarZeroVec::<str>::from(&strings);
394    ///
395    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
396    /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
397    /// # Ok::<(), ZeroVecError>(())
398    /// ```
399    ///
400    /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
401    #[inline]
402    pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
403        self.as_components().binary_search_by(predicate)
404    }
405
406    /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
407    ///
408    /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
409    /// to the behavior of the standard library function [`binary_search`].
410    ///
411    /// The index is returned relative to the start of the range.
412    ///
413    /// # Example
414    ///
415    /// ```
416    /// # use zerovec::ule::ZeroVecError;
417    /// # use zerovec::VarZeroVec;
418    ///
419    /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
420    /// let vec = VarZeroVec::<str>::from(&strings);
421    ///
422    /// // Same behavior as binary_search when the range covers the whole slice:
423    /// assert_eq!(
424    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
425    ///     Some(Ok(3))
426    /// );
427    /// assert_eq!(
428    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
429    ///     Some(Err(4))
430    /// );
431    ///
432    /// // Will not look outside of the range:
433    /// assert_eq!(
434    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
435    ///     Some(Err(1))
436    /// );
437    /// assert_eq!(
438    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
439    ///     Some(Err(0))
440    /// );
441    ///
442    /// // Will return indices relative to the start of the range:
443    /// assert_eq!(
444    ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
445    ///     Some(Ok(2))
446    /// );
447    /// assert_eq!(
448    ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
449    ///     Some(Err(3))
450    /// );
451    ///
452    /// // Will return `None` if the range is out of bounds:
453    /// assert_eq!(
454    ///     vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
455    ///     None
456    /// );
457    /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
458    /// # Ok::<(), ZeroVecError>(())
459    /// ```
460    ///
461    /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
462    pub fn binary_search_in_range_by(
463        &self,
464        predicate: impl FnMut(&T) -> Ordering,
465        range: Range<usize>,
466    ) -> Option<Result<usize, usize>> {
467        self.as_components()
468            .binary_search_in_range_by(predicate, range)
469    }
470}
471// Safety (based on the safety checklist on the VarULE trait):
472//  1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
473//     `[u8]` slice which satisfies this invariant)
474//  2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
475//     `[u8]` slice which satisfies this invariant)
476//  3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
477//  4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
478//  5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
479//  6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data
480//  7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
481unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
482    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
483        let _: VarZeroVecComponents<T, F> = VarZeroVecComponents::parse_byte_slice(bytes)?;
484        Ok(())
485    }
486
487    unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
488        // self is really just a wrapper around a byte slice
489        mem::transmute(bytes)
490    }
491
492    fn as_byte_slice(&self) -> &[u8] {
493        &self.entire_slice
494    }
495}
496
497impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
498    type Output = T;
499    fn index(&self, index: usize) -> &Self::Output {
500        #[allow(clippy::panic)] // documented
501        match self.get(index) {
502            Some(x) => x,
503            None => panic!(
504                "index out of bounds: the len is {} but the index is {index}",
505                self.len()
506            ),
507        }
508    }
509}
510
511impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
512where
513    T: VarULE,
514    T: ?Sized,
515    T: PartialEq,
516    F: VarZeroVecFormat,
517{
518    #[inline]
519    fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
520        // VarULE has an API guarantee that this is equivalent
521        // to `T::VarULE::eq()`
522        self.entire_slice.eq(&other.entire_slice)
523    }
524}
525
526impl<T, F> Eq for VarZeroSlice<T, F>
527where
528    T: VarULE,
529    T: ?Sized,
530    T: Eq,
531    F: VarZeroVecFormat,
532{
533}
534
535impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
536    #[inline]
537    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
538        self.iter().partial_cmp(other.iter())
539    }
540}
541
542impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
543    #[inline]
544    fn cmp(&self, other: &Self) -> Ordering {
545        self.iter().cmp(other.iter())
546    }
547}
548
549impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
550where
551    T: fmt::Debug,
552{
553    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
554        f.debug_list().entries(self.iter()).finish()
555    }
556}
557
558impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
559    fn as_ref(&self) -> &VarZeroSlice<T, F> {
560        self
561    }
562}