nibble_vec/
lib.rs

1#[cfg(test)]
2mod test;
3
4use smallvec::{Array, SmallVec};
5
6use std::convert::{From, Into};
7use std::fmt::{self, Debug, Formatter};
8use std::iter::FromIterator;
9
10/// A `NibbleVec` backed by a `SmallVec` with 64 inline element slots.
11/// This will not allocate until more than 64 elements are added.
12pub type Nibblet = NibbleVec<[u8; 64]>;
13
14/// A data-structure for storing a sequence of 4-bit values.
15///
16/// Values are stored in a `Vec<u8>`, with two values per byte.
17///
18/// Values at even indices are stored in the most-significant half of their byte,
19/// while values at odd indices are stored in the least-significant half.
20///
21/// Imagine a vector of [MSB][msb-wiki] first bytes, and you'll be right.
22///
23/// n = [_ _ | _ _ | _ _]
24///
25/// [msb-wiki]: http://en.wikipedia.org/wiki/Most_significant_bit
26#[derive(Clone, Default)]
27pub struct NibbleVec<A: Array<Item = u8>> {
28    length: usize,
29    data: SmallVec<A>,
30}
31
32impl<A: Array<Item = u8>> NibbleVec<A> {
33    /// Create an empty nibble vector.
34    pub fn new() -> NibbleVec<A> {
35        NibbleVec {
36            length: 0,
37            data: SmallVec::new(),
38        }
39    }
40
41    /// Create a nibble vector from a vector of bytes.
42    ///
43    /// Each byte is split into two 4-bit entries (MSB, LSB).
44    #[inline]
45    pub fn from_byte_vec(vec: Vec<u8>) -> NibbleVec<A> {
46        let length = 2 * vec.len();
47        NibbleVec {
48            length,
49            data: SmallVec::from_iter(vec),
50        }
51    }
52
53    /// Returns a byte slice of the nibble vector's contents.
54    #[inline]
55    pub fn as_bytes(&self) -> &[u8] {
56        &self.data[..]
57    }
58
59    /// Converts a nibble vector into a byte vector.
60    ///
61    /// This consumes the nibble vector, so we do not need to copy its contents.
62    #[inline]
63    pub fn into_bytes(self) -> Vec<u8> {
64        self.data.to_vec()
65    }
66
67    /// Get the number of elements stored in the vector.
68    #[inline]
69    pub fn len(&self) -> usize {
70        self.length
71    }
72
73    /// Returns `true` if the nibble vector has a length of 0.
74    #[inline]
75    pub fn is_empty(&self) -> bool {
76        self.data.is_empty()
77    }
78
79    /// Fetch a single entry from the vector.
80    ///
81    /// Guaranteed to be a value in the interval [0, 15].
82    ///
83    /// **Panics** if `idx >= self.len()`.
84    #[inline]
85    pub fn get(&self, idx: usize) -> u8 {
86        if idx >= self.length {
87            panic!(
88                "NibbleVec index out of bounds: len is {}, index is {}",
89                self.length, idx
90            );
91        }
92        let vec_idx = idx / 2;
93        match idx % 2 {
94            // If the index is even, take the first (most significant) half of the stored byte.
95            0 => self.data[vec_idx] >> 4,
96            // If the index is odd, take the second (least significant) half.
97            _ => self.data[vec_idx] & 0x0F,
98        }
99    }
100
101    /// Add a single nibble to the vector.
102    ///
103    /// Only the 4 least-significant bits of the value are used.
104    #[inline]
105    pub fn push(&mut self, val: u8) {
106        if self.length % 2 == 0 {
107            self.data.push(val << 4);
108        } else {
109            let vec_len = self.data.len();
110
111            // Zero the second half of the last byte just to be safe.
112            self.data[vec_len - 1] &= 0xF0;
113
114            // Write the new value.
115            self.data[vec_len - 1] |= val & 0x0F;
116        }
117        self.length += 1;
118    }
119
120    /// Split the vector into two parts.
121    ///
122    /// All elements at or following the given index are returned in a new `NibbleVec`,
123    /// with exactly `idx` elements remaining in this vector.
124    ///
125    /// **Panics** if `idx > self.len()`.
126    pub fn split(&mut self, idx: usize) -> NibbleVec<A> {
127        // assert! is a few percent slower surprisingly
128        if idx > self.length {
129            panic!(
130                "attempted to split past vector end. len is {}, index is {}",
131                self.length, idx
132            );
133        } else if idx == self.length {
134            NibbleVec::new()
135        } else if idx % 2 == 0 {
136            self.split_even(idx)
137        } else {
138            self.split_odd(idx)
139        }
140    }
141
142    /// Split function for odd *indices*.
143    #[inline]
144    fn split_odd(&mut self, idx: usize) -> NibbleVec<A> {
145        let mut tail = NibbleVec::new();
146
147        // Perform an overlap copy, copying the last nibble of the original vector only if
148        // the length of the new tail is *odd*.
149        let tail_length = self.length - idx;
150        let take_last = tail_length % 2 == 1;
151        self.overlap_copy(
152            idx / 2,
153            self.data.len(),
154            &mut tail.data,
155            &mut tail.length,
156            take_last,
157        );
158
159        // Remove the copied bytes, being careful to skip the idx byte.
160        for _ in (idx / 2 + 1)..self.data.len() {
161            self.data.pop();
162        }
163
164        // Zero the second half of the index byte so as to maintain the last-nibble invariant.
165        self.data[idx / 2] &= 0xF0;
166
167        // Update the length of the first NibbleVec.
168        self.length = idx;
169
170        tail
171    }
172
173    /// Split function for even *indices*.
174    #[inline]
175    fn split_even(&mut self, idx: usize) -> NibbleVec<A> {
176        // Avoid allocating a temporary vector by copying all the bytes in order, then popping them.
177
178        // Possible to prove: l_d - ⌊i / 2⌋ = ⌊(l_v - i + 1) / 2⌋
179        //  where l_d = self.data.len()
180        //        l_v = self.length
181
182        let half_idx = idx / 2;
183        let mut tail = NibbleVec::new();
184
185        // Copy the bytes.
186        for i in half_idx..self.data.len() {
187            tail.data.push(self.data[i]);
188        }
189
190        // Pop the same bytes.
191        for _ in half_idx..self.data.len() {
192            self.data.pop();
193        }
194
195        // Update lengths.
196        tail.length = self.length - idx;
197        self.length = idx;
198
199        tail
200    }
201
202    /// Copy data between the second half of self.data[start] and
203    /// self.data[end - 1]. The second half of the last entry is included
204    /// if include_last is true.
205    #[inline]
206    fn overlap_copy(
207        &self,
208        start: usize,
209        end: usize,
210        vec: &mut SmallVec<A>,
211        length: &mut usize,
212        include_last: bool,
213    ) {
214        // Copy up to the first half of the last byte.
215        for i in start..(end - 1) {
216            // The first half is the second half of the old entry.
217            let first_half = self.data[i] & 0x0f;
218
219            // The second half is the first half of the next entry.
220            let second_half = self.data[i + 1] >> 4;
221
222            vec.push((first_half << 4) | second_half);
223            *length += 2;
224        }
225
226        if include_last {
227            let last = self.data[end - 1] & 0x0f;
228            vec.push(last << 4);
229            *length += 1;
230        }
231    }
232
233    /// Append another nibble vector whilst consuming this vector.
234    #[inline]
235    pub fn join(mut self, other: &NibbleVec<A>) -> NibbleVec<A> {
236        // If the length is even, we can append directly.
237        if self.length % 2 == 0 {
238            self.length += other.length;
239            self.data.extend_from_slice(&other.data);
240            return self;
241        }
242
243        // If the other vector is empty, bail out.
244        if other.is_empty() {
245            return self;
246        }
247
248        // If the length is odd, we have to perform an overlap copy.
249        // Copy the first half of the first element, to make the vector an even length.
250        self.push(other.get(0));
251
252        // Copy the rest of the vector using an overlap copy.
253        let take_last = other.len() % 2 == 0;
254        other.overlap_copy(
255            0,
256            other.data.len(),
257            &mut self.data,
258            &mut self.length,
259            take_last,
260        );
261
262        self
263    }
264}
265
266impl<A: Array<Item = u8>> PartialEq<NibbleVec<A>> for NibbleVec<A> {
267    #[inline]
268    fn eq(&self, other: &NibbleVec<A>) -> bool {
269        self.length == other.length && self.data == other.data
270    }
271}
272
273impl<A: Array<Item = u8>> Eq for NibbleVec<A> {}
274
275/// Compare a `NibbleVec` and a slice of bytes *element-by-element*.
276/// Bytes are **not** interpreted as two `NibbleVec` entries.
277impl<A: Array<Item = u8>> PartialEq<[u8]> for NibbleVec<A> {
278    #[inline]
279    fn eq(&self, other: &[u8]) -> bool {
280        if other.len() != self.len() {
281            return false;
282        }
283
284        for (i, x) in other.iter().enumerate() {
285            if self.get(i) != *x {
286                return false;
287            }
288        }
289        true
290    }
291}
292
293impl<A: Array<Item = u8>> Debug for NibbleVec<A> {
294    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
295        write!(fmt, "NibbleVec [")?;
296
297        if !self.is_empty() {
298            write!(fmt, "{}", self.get(0))?;
299        }
300
301        for i in 1..self.len() {
302            write!(fmt, ", {}", self.get(i))?;
303        }
304        write!(fmt, "]")
305    }
306}
307
308impl<A: Array<Item = u8>> From<Vec<u8>> for NibbleVec<A> {
309    #[inline]
310    fn from(v: Vec<u8>) -> NibbleVec<A> {
311        NibbleVec::from_byte_vec(v)
312    }
313}
314
315impl<'a, A: Array<Item = u8>> From<&'a [u8]> for NibbleVec<A> {
316    #[inline]
317    fn from(v: &[u8]) -> NibbleVec<A> {
318        NibbleVec::from_byte_vec(v.into())
319    }
320}
321
322impl<A: Array<Item = u8>> Into<Vec<u8>> for NibbleVec<A> {
323    #[inline]
324    fn into(self) -> Vec<u8> {
325        self.data.to_vec()
326    }
327}
328
329impl<'a, A: Array<Item = u8>> Into<Vec<u8>> for &'a NibbleVec<A> {
330    #[inline]
331    fn into(self) -> Vec<u8> {
332        self.data.to_vec()
333    }
334}