nibble_vec/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#[cfg(test)]
mod test;

use smallvec::{Array, SmallVec};

use std::convert::{From, Into};
use std::fmt::{self, Debug, Formatter};
use std::iter::FromIterator;

/// A `NibbleVec` backed by a `SmallVec` with 64 inline element slots.
/// This will not allocate until more than 64 elements are added.
pub type Nibblet = NibbleVec<[u8; 64]>;

/// A data-structure for storing a sequence of 4-bit values.
///
/// Values are stored in a `Vec<u8>`, with two values per byte.
///
/// Values at even indices are stored in the most-significant half of their byte,
/// while values at odd indices are stored in the least-significant half.
///
/// Imagine a vector of [MSB][msb-wiki] first bytes, and you'll be right.
///
/// n = [_ _ | _ _ | _ _]
///
/// [msb-wiki]: http://en.wikipedia.org/wiki/Most_significant_bit
#[derive(Clone, Default)]
pub struct NibbleVec<A: Array<Item = u8>> {
    length: usize,
    data: SmallVec<A>,
}

impl<A: Array<Item = u8>> NibbleVec<A> {
    /// Create an empty nibble vector.
    pub fn new() -> NibbleVec<A> {
        NibbleVec {
            length: 0,
            data: SmallVec::new(),
        }
    }

    /// Create a nibble vector from a vector of bytes.
    ///
    /// Each byte is split into two 4-bit entries (MSB, LSB).
    #[inline]
    pub fn from_byte_vec(vec: Vec<u8>) -> NibbleVec<A> {
        let length = 2 * vec.len();
        NibbleVec {
            length,
            data: SmallVec::from_iter(vec),
        }
    }

    /// Returns a byte slice of the nibble vector's contents.
    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        &self.data[..]
    }

    /// Converts a nibble vector into a byte vector.
    ///
    /// This consumes the nibble vector, so we do not need to copy its contents.
    #[inline]
    pub fn into_bytes(self) -> Vec<u8> {
        self.data.to_vec()
    }

    /// Get the number of elements stored in the vector.
    #[inline]
    pub fn len(&self) -> usize {
        self.length
    }

    /// Returns `true` if the nibble vector has a length of 0.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }

    /// Fetch a single entry from the vector.
    ///
    /// Guaranteed to be a value in the interval [0, 15].
    ///
    /// **Panics** if `idx >= self.len()`.
    #[inline]
    pub fn get(&self, idx: usize) -> u8 {
        if idx >= self.length {
            panic!(
                "NibbleVec index out of bounds: len is {}, index is {}",
                self.length, idx
            );
        }
        let vec_idx = idx / 2;
        match idx % 2 {
            // If the index is even, take the first (most significant) half of the stored byte.
            0 => self.data[vec_idx] >> 4,
            // If the index is odd, take the second (least significant) half.
            _ => self.data[vec_idx] & 0x0F,
        }
    }

    /// Add a single nibble to the vector.
    ///
    /// Only the 4 least-significant bits of the value are used.
    #[inline]
    pub fn push(&mut self, val: u8) {
        if self.length % 2 == 0 {
            self.data.push(val << 4);
        } else {
            let vec_len = self.data.len();

            // Zero the second half of the last byte just to be safe.
            self.data[vec_len - 1] &= 0xF0;

            // Write the new value.
            self.data[vec_len - 1] |= val & 0x0F;
        }
        self.length += 1;
    }

    /// Split the vector into two parts.
    ///
    /// All elements at or following the given index are returned in a new `NibbleVec`,
    /// with exactly `idx` elements remaining in this vector.
    ///
    /// **Panics** if `idx > self.len()`.
    pub fn split(&mut self, idx: usize) -> NibbleVec<A> {
        // assert! is a few percent slower surprisingly
        if idx > self.length {
            panic!(
                "attempted to split past vector end. len is {}, index is {}",
                self.length, idx
            );
        } else if idx == self.length {
            NibbleVec::new()
        } else if idx % 2 == 0 {
            self.split_even(idx)
        } else {
            self.split_odd(idx)
        }
    }

    /// Split function for odd *indices*.
    #[inline]
    fn split_odd(&mut self, idx: usize) -> NibbleVec<A> {
        let mut tail = NibbleVec::new();

        // Perform an overlap copy, copying the last nibble of the original vector only if
        // the length of the new tail is *odd*.
        let tail_length = self.length - idx;
        let take_last = tail_length % 2 == 1;
        self.overlap_copy(
            idx / 2,
            self.data.len(),
            &mut tail.data,
            &mut tail.length,
            take_last,
        );

        // Remove the copied bytes, being careful to skip the idx byte.
        for _ in (idx / 2 + 1)..self.data.len() {
            self.data.pop();
        }

        // Zero the second half of the index byte so as to maintain the last-nibble invariant.
        self.data[idx / 2] &= 0xF0;

        // Update the length of the first NibbleVec.
        self.length = idx;

        tail
    }

    /// Split function for even *indices*.
    #[inline]
    fn split_even(&mut self, idx: usize) -> NibbleVec<A> {
        // Avoid allocating a temporary vector by copying all the bytes in order, then popping them.

        // Possible to prove: l_d - ⌊i / 2⌋ = ⌊(l_v - i + 1) / 2⌋
        //  where l_d = self.data.len()
        //        l_v = self.length

        let half_idx = idx / 2;
        let mut tail = NibbleVec::new();

        // Copy the bytes.
        for i in half_idx..self.data.len() {
            tail.data.push(self.data[i]);
        }

        // Pop the same bytes.
        for _ in half_idx..self.data.len() {
            self.data.pop();
        }

        // Update lengths.
        tail.length = self.length - idx;
        self.length = idx;

        tail
    }

    /// Copy data between the second half of self.data[start] and
    /// self.data[end - 1]. The second half of the last entry is included
    /// if include_last is true.
    #[inline]
    fn overlap_copy(
        &self,
        start: usize,
        end: usize,
        vec: &mut SmallVec<A>,
        length: &mut usize,
        include_last: bool,
    ) {
        // Copy up to the first half of the last byte.
        for i in start..(end - 1) {
            // The first half is the second half of the old entry.
            let first_half = self.data[i] & 0x0f;

            // The second half is the first half of the next entry.
            let second_half = self.data[i + 1] >> 4;

            vec.push((first_half << 4) | second_half);
            *length += 2;
        }

        if include_last {
            let last = self.data[end - 1] & 0x0f;
            vec.push(last << 4);
            *length += 1;
        }
    }

    /// Append another nibble vector whilst consuming this vector.
    #[inline]
    pub fn join(mut self, other: &NibbleVec<A>) -> NibbleVec<A> {
        // If the length is even, we can append directly.
        if self.length % 2 == 0 {
            self.length += other.length;
            self.data.extend_from_slice(&other.data);
            return self;
        }

        // If the other vector is empty, bail out.
        if other.is_empty() {
            return self;
        }

        // If the length is odd, we have to perform an overlap copy.
        // Copy the first half of the first element, to make the vector an even length.
        self.push(other.get(0));

        // Copy the rest of the vector using an overlap copy.
        let take_last = other.len() % 2 == 0;
        other.overlap_copy(
            0,
            other.data.len(),
            &mut self.data,
            &mut self.length,
            take_last,
        );

        self
    }
}

impl<A: Array<Item = u8>> PartialEq<NibbleVec<A>> for NibbleVec<A> {
    #[inline]
    fn eq(&self, other: &NibbleVec<A>) -> bool {
        self.length == other.length && self.data == other.data
    }
}

impl<A: Array<Item = u8>> Eq for NibbleVec<A> {}

/// Compare a `NibbleVec` and a slice of bytes *element-by-element*.
/// Bytes are **not** interpreted as two `NibbleVec` entries.
impl<A: Array<Item = u8>> PartialEq<[u8]> for NibbleVec<A> {
    #[inline]
    fn eq(&self, other: &[u8]) -> bool {
        if other.len() != self.len() {
            return false;
        }

        for (i, x) in other.iter().enumerate() {
            if self.get(i) != *x {
                return false;
            }
        }
        true
    }
}

impl<A: Array<Item = u8>> Debug for NibbleVec<A> {
    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
        write!(fmt, "NibbleVec [")?;

        if !self.is_empty() {
            write!(fmt, "{}", self.get(0))?;
        }

        for i in 1..self.len() {
            write!(fmt, ", {}", self.get(i))?;
        }
        write!(fmt, "]")
    }
}

impl<A: Array<Item = u8>> From<Vec<u8>> for NibbleVec<A> {
    #[inline]
    fn from(v: Vec<u8>) -> NibbleVec<A> {
        NibbleVec::from_byte_vec(v)
    }
}

impl<'a, A: Array<Item = u8>> From<&'a [u8]> for NibbleVec<A> {
    #[inline]
    fn from(v: &[u8]) -> NibbleVec<A> {
        NibbleVec::from_byte_vec(v.into())
    }
}

impl<A: Array<Item = u8>> Into<Vec<u8>> for NibbleVec<A> {
    #[inline]
    fn into(self) -> Vec<u8> {
        self.data.to_vec()
    }
}

impl<'a, A: Array<Item = u8>> Into<Vec<u8>> for &'a NibbleVec<A> {
    #[inline]
    fn into(self) -> Vec<u8> {
        self.data.to_vec()
    }
}