bitcode/
pack.rs

1use crate::coder::Result;
2use crate::consume::{consume_byte, consume_byte_arrays, consume_bytes};
3use crate::error::err;
4use crate::fast::CowSlice;
5use crate::pack_ints::SizedInt;
6use alloc::vec::Vec;
7
8/// Possible states per byte in descending order. Each packed byte will use `log2(states)` bits.
9#[repr(u8)]
10#[derive(Copy, Clone, PartialEq, PartialOrd)]
11enum Packing {
12    _256 = 0,
13    _16,
14    _6,
15    _4,
16    _3,
17    _2,
18}
19
20impl Packing {
21    fn new(max: u8) -> Self {
22        match max {
23            // We could encode max 0 as nothing, but that could allocate unbounded memory when decoding.
24            0..=1 => Self::_2,
25            2 => Self::_3,
26            3 => Self::_4,
27            4..=5 => Self::_6,
28            6..=15 => Self::_16,
29            _ => Self::_256,
30        }
31    }
32
33    fn write(self, out: &mut Vec<u8>, offset_by_min: bool) {
34        // Encoded in such a way such that 0 is `Self::_256` and higher numbers are smaller packing.
35        // Also makes `Self::_256` with offset_by_min = true is unrepresentable.
36        out.push(self as u8 * 2 - offset_by_min as u8);
37    }
38
39    fn read(input: &mut &[u8]) -> Result<(Self, bool)> {
40        let v = consume_byte(input)?;
41        let p_u8 = crate::nightly::div_ceil_u8(v, 2);
42        let offset_by_min = v & 1 != 0;
43        let p = match p_u8 {
44            0 => Self::_256,
45            1 => Self::_16,
46            2 => Self::_6,
47            3 => Self::_4,
48            4 => Self::_3,
49            5 => Self::_2,
50            _ => return invalid_packing(),
51        };
52        debug_assert_eq!(p as u8, p_u8);
53        Ok((p, offset_by_min))
54    }
55}
56
57pub(crate) fn invalid_packing<T>() -> Result<T> {
58    err("invalid packing")
59}
60
61/// Packs 8 bools per byte.
62pub fn pack_bools(bools: &[bool], out: &mut Vec<u8>) {
63    pack_arithmetic::<2>(bytemuck::cast_slice(bools), out);
64}
65
66/// Unpacks 8 bools per byte. `out` will be overwritten with the bools.
67pub fn unpack_bools(input: &mut &[u8], length: usize, out: &mut CowSlice<bool>) -> Result<()> {
68    // TODO could borrow if length == 1.
69    let mut set_owned = out.set_owned();
70    let out: &mut Vec<bool> = &mut set_owned;
71    // Safety: u8 and bool have same size/align and `out` will only contain bytes that are 0 or 1.
72    let out: &mut Vec<u8> = unsafe { core::mem::transmute(out) };
73    unpack_arithmetic::<2>(input, length, out)
74}
75
76fn skip_packing(length: usize) -> bool {
77    length <= 2 // Packing takes at least 2 bytes, so it can only expand <= 2 bytes.
78}
79
80pub trait Byte: SizedInt {}
81impl Byte for u8 {}
82impl Byte for i8 {}
83
84/// Packs multiple bytes into single bytes and writes them to `out`. This only works if
85/// `max - min < 16`, otherwise this just copies `bytes` to `out`.
86///
87/// These particular tradeoffs were selected so input bytes don't span multiple output bytes to
88/// avoid confusing bytewise compression algorithms (e.g. Deflate).
89///
90/// Mutates `bytes` to avoid copying them. The remaining `bytes` should be considered garbage.
91pub fn pack_bytes<T: Byte>(bytes: &mut [T], out: &mut Vec<u8>) {
92    if skip_packing(bytes.len()) {
93        out.extend_from_slice(bytemuck::must_cast_slice(bytes));
94        return;
95    }
96    let (min, max) = crate::pack_ints::minmax(bytes);
97
98    // i8 packs as u8 if positive.
99    let basic_packing = if min >= T::default() {
100        Packing::new(bytemuck::must_cast(max))
101    } else {
102        Packing::_256 // Any negative i8 as u8 is > 15 and can't be packed without offset_packing.
103    };
104
105    // u8::wrapping_sub == i8::wrapping_sub, so we can use u8s from here onward.
106    let min: u8 = bytemuck::must_cast(min);
107    let max: u8 = bytemuck::must_cast(max);
108    let bytes: &mut [u8] = bytemuck::must_cast_slice_mut(bytes);
109    pack_bytes_unsigned(bytes, out, basic_packing, min, max);
110}
111
112/// [`pack_bytes`] but after i8s have been cast to u8s.
113fn pack_bytes_unsigned(
114    bytes: &mut [u8],
115    out: &mut Vec<u8>,
116    basic_packing: Packing,
117    min: u8,
118    max: u8,
119) {
120    // If subtracting min from all bytes results in a better packing do it, otherwise don't bother.
121    let offset_packing = Packing::new(max.wrapping_sub(min));
122    let p = if offset_packing > basic_packing && bytes.len() > 5 {
123        for b in bytes.iter_mut() {
124            *b = b.wrapping_sub(min);
125        }
126        offset_packing.write(out, true);
127        out.push(min);
128        offset_packing
129    } else {
130        basic_packing.write(out, false);
131        basic_packing
132    };
133
134    match p {
135        Packing::_256 => out.extend_from_slice(bytes),
136        Packing::_16 => pack_arithmetic::<16>(bytes, out),
137        Packing::_6 => pack_arithmetic::<6>(bytes, out),
138        Packing::_4 => pack_arithmetic::<4>(bytes, out),
139        Packing::_3 => pack_arithmetic::<3>(bytes, out),
140        Packing::_2 => pack_arithmetic::<2>(bytes, out),
141    }
142}
143
144/// Opposite of `pack_bytes`. Needs to know the `length` in bytes. `out` is overwritten with the bytes.
145pub fn unpack_bytes<'a, T: Byte>(
146    input: &mut &'a [u8],
147    length: usize,
148    out: &mut CowSlice<'a, T>,
149) -> Result<()> {
150    unpack_bytes_unsigned(input, length, out.cast_mut())
151}
152
153/// [`unpack_bytes`] but after i8s have been cast to u8s.
154fn unpack_bytes_unsigned<'a>(
155    input: &mut &'a [u8],
156    length: usize,
157    out: &mut CowSlice<'a, u8>,
158) -> Result<()> {
159    if skip_packing(length) {
160        out.set_borrowed(consume_bytes(input, length)?);
161        return Ok(());
162    }
163
164    let (p, offset_by_min) = Packing::read(input)?;
165    let min = offset_by_min.then(|| consume_byte(input)).transpose()?;
166
167    if p == Packing::_256 {
168        debug_assert!(min.is_none()); // Packing::_256 with min should be unrepresentable.
169        out.set_borrowed(consume_bytes(input, length)?);
170        return Ok(());
171    }
172
173    let mut set_owned = out.set_owned();
174    let out = &mut *set_owned;
175    match p {
176        Packing::_16 => unpack_arithmetic::<16>(input, length, out)?,
177        Packing::_6 => unpack_arithmetic::<6>(input, length, out)?,
178        Packing::_4 => unpack_arithmetic::<4>(input, length, out)?,
179        Packing::_3 => unpack_arithmetic::<3>(input, length, out)?,
180        Packing::_2 => unpack_arithmetic::<2>(input, length, out)?,
181        Packing::_256 => unreachable!(),
182    }
183    if let Some(min) = min {
184        for v in out {
185            *v = v.wrapping_add(min);
186        }
187    }
188    Ok(())
189}
190
191/// Like `pack_bytes` but all values are less than `N` so it can avoid encoding the packing.
192pub fn pack_bytes_less_than<const N: usize>(bytes: &[u8], out: &mut Vec<u8>) {
193    debug_assert!(bytes.iter().all(|&b| (b as usize) < N));
194    match Packing::new(N.saturating_sub(1) as u8) {
195        Packing::_256 => out.extend_from_slice(bytes),
196        Packing::_16 => pack_arithmetic::<16>(bytes, out),
197        Packing::_6 => pack_arithmetic::<6>(bytes, out),
198        Packing::_4 => pack_arithmetic::<4>(bytes, out),
199        Packing::_3 => pack_arithmetic::<3>(bytes, out),
200        Packing::_2 => pack_arithmetic::<2>(bytes, out),
201    }
202}
203
204/// Like `unpack_bytes` but all values are less than `N` so it can avoid encoding the packing.
205/// Bytes returned by this function are guaranteed less than `N`.
206///
207/// If `HISTOGRAM` is set to `N` it also returns a histogram of the output bytes. This is because
208/// the histogram can be calculated much faster when operating on the packed bytes.
209///
210/// If `HISTOGRAM` is set to `0` it only checks variants < `N` and doesn't calculate a histogram.
211pub fn unpack_bytes_less_than<'a, const N: usize, const HISTOGRAM: usize>(
212    input: &mut &'a [u8],
213    length: usize,
214    out: &mut CowSlice<'a, u8>,
215) -> Result<[usize; HISTOGRAM]> {
216    assert!(HISTOGRAM == N || HISTOGRAM == 0);
217
218    /// Checks that `unpacked` bytes are less than `N`. All of `unpacked` is assumed to be < `FACTOR`.
219    /// `HISTOGRAM` must be 0.
220    fn check_less_than<const N: usize, const HISTOGRAM: usize, const FACTOR: usize>(
221        unpacked: &[u8],
222    ) -> Result<[usize; HISTOGRAM]> {
223        assert!(FACTOR >= N);
224        debug_assert!(unpacked.iter().all(|&v| (v as usize) < FACTOR));
225        if FACTOR > N && unpacked.iter().copied().max().unwrap_or(0) as usize >= N {
226            return invalid_packing();
227        }
228        Ok(core::array::from_fn(|_| unreachable!("HISTOGRAM not 0")))
229    }
230
231    /// Returns `Ok(histogram)` if buckets after `OUT` are 0.
232    fn check_histogram<const IN: usize, const OUT: usize>(
233        histogram: [usize; IN],
234    ) -> Result<[usize; OUT]> {
235        let (histogram, remaining) = histogram.split_at(OUT);
236        if remaining.iter().copied().sum::<usize>() != 0 {
237            return invalid_packing();
238        }
239        Ok(*<&[usize; OUT]>::try_from(histogram).unwrap())
240    }
241
242    let p = Packing::new(N.saturating_sub(1) as u8);
243    if p == Packing::_256 {
244        let bytes = consume_bytes(input, length)?;
245        out.set_borrowed(bytes);
246        return if HISTOGRAM == 0 {
247            check_less_than::<N, HISTOGRAM, 256>(bytes)
248        } else {
249            check_histogram(crate::histogram::histogram(bytes))
250        };
251    }
252
253    /// `FACTOR_POW_DIVISOR == (FACTOR as usize).pow(factor_to_divisor::<FACTOR>() as u32)` but as a constant.
254    fn unpack_arithmetic_less_than<
255        const N: usize,
256        const HISTOGRAM: usize,
257        const FACTOR: usize,
258        const FACTOR_POW_DIVISOR: usize,
259    >(
260        input: &mut &[u8],
261        length: usize,
262        out: &mut Vec<u8>,
263    ) -> Result<[usize; HISTOGRAM]> {
264        assert!(HISTOGRAM == N || HISTOGRAM == 0);
265        assert!(FACTOR >= 2 && FACTOR >= N);
266        let divisor = factor_to_divisor::<FACTOR>();
267        assert_eq!(FACTOR.pow(divisor as u32), FACTOR_POW_DIVISOR);
268
269        let original_input = *input;
270        unpack_arithmetic::<FACTOR>(input, length, out)?;
271        if HISTOGRAM == 0 {
272            check_less_than::<N, HISTOGRAM, FACTOR>(out)
273        } else {
274            let floor = length / divisor;
275            let ceil = crate::nightly::div_ceil_usize(length, divisor);
276            let whole = &original_input[..floor];
277
278            // Can only `partial_with_garbage % FACTOR` partial_length times as the rest are undefined garbage.
279            let partial_length = length - floor * divisor;
280            let partial_with_garbage = original_input[floor..ceil].first().copied();
281
282            // POPCNT is much faster than histogram.
283            let histogram = if FACTOR == 2 {
284                assert_eq!(N, 2);
285                assert_eq!(divisor, 8);
286                let mut one_count = 0;
287                let mut whole = whole;
288                while let Ok(chunk) = consume_byte_arrays(&mut whole, 1) {
289                    one_count += u64::from_ne_bytes(chunk[0]).count_ones() as usize;
290                }
291                for &byte in whole {
292                    one_count += byte.count_ones() as usize;
293                }
294                if let Some(partial_with_garbage) = partial_with_garbage {
295                    // Set undefined garbage bits to zero.
296                    let partial = partial_with_garbage << (divisor - partial_length);
297                    one_count += partial.count_ones() as usize;
298                }
299                Ok(core::array::from_fn(|i| match i {
300                    0 => length - one_count,
301                    1 => one_count,
302                    _ => unreachable!(),
303                }))
304            } else {
305                check_histogram(if whole.len() < 100 {
306                    // Simple path: histogram of unpacked bytes.
307                    let mut histogram = [0; FACTOR];
308                    for &v in out.iter() {
309                        // Safety: unpack_arithmetic::<FACTOR> returns bytes < FACTOR.
310                        unsafe { *histogram.get_unchecked_mut(v as usize) += 1 };
311                    }
312                    histogram
313                } else {
314                    // High throughput path: histogram of packed bytes (one time cost of ~100ns).
315                    let packed_histogram = check_histogram::<256, FACTOR_POW_DIVISOR>(
316                        crate::histogram::histogram(whole),
317                    )?;
318                    let mut histogram: [_; FACTOR] = unpack_histogram(&packed_histogram);
319                    if let Some(mut partial_with_garbage) = partial_with_garbage {
320                        // .min(divisor) does nothing, it's only improve code gen.
321                        for _ in 0..partial_length.min(divisor) {
322                            histogram[partial_with_garbage as usize % FACTOR] += 1;
323                            partial_with_garbage /= FACTOR as u8;
324                        }
325                    }
326                    histogram
327                })
328            };
329            if let Ok(h) = histogram {
330                debug_assert_eq!(
331                    h,
332                    check_histogram(crate::histogram::histogram(out)).unwrap()
333                );
334            }
335            histogram
336        }
337    }
338
339    let mut set_owned = out.set_owned();
340    let out = &mut *set_owned;
341    match p {
342        Packing::_16 => unpack_arithmetic_less_than::<N, HISTOGRAM, 16, 256>(input, length, out),
343        Packing::_6 => unpack_arithmetic_less_than::<N, HISTOGRAM, 6, 216>(input, length, out),
344        Packing::_4 => unpack_arithmetic_less_than::<N, HISTOGRAM, 4, 256>(input, length, out),
345        Packing::_3 => unpack_arithmetic_less_than::<N, HISTOGRAM, 3, 243>(input, length, out),
346        Packing::_2 => unpack_arithmetic_less_than::<N, HISTOGRAM, 2, 256>(input, length, out),
347        Packing::_256 => unreachable!(),
348    }
349}
350
351#[inline(never)]
352fn unpack_histogram<const FACTOR: usize, const FACTOR_POW_DIVISOR: usize>(
353    packed_histogram: &[usize; FACTOR_POW_DIVISOR],
354) -> [usize; FACTOR] {
355    let divisor = factor_to_divisor::<FACTOR>();
356    assert_eq!(FACTOR.pow(divisor as u32), FACTOR_POW_DIVISOR);
357    core::array::from_fn(|i| {
358        let mut sum = 0;
359        for level in 0..divisor {
360            let width = FACTOR.pow(level as u32);
361            let runs = FACTOR_POW_DIVISOR / (width * FACTOR);
362            for run in 0..runs {
363                let run_start = run * (width * FACTOR) + i * width;
364                let section = &packed_histogram[run_start..run_start + width];
365                sum += section.iter().copied().sum::<usize>();
366            }
367        }
368        sum
369    })
370}
371
372#[inline(always)]
373fn factor_to_divisor<const FACTOR: usize>() -> usize {
374    match FACTOR {
375        2 => 8,
376        3 => 5,
377        4 => 4,
378        6 => 3,
379        16 => 2,
380        _ => unreachable!(),
381    }
382}
383
384/// Packs multiple bytes into one. All the bytes must be < `FACTOR`.
385/// Factors 2,4,16 are bit packing. Factors 3,6 are arithmetic coding.
386fn pack_arithmetic<const FACTOR: usize>(bytes: &[u8], out: &mut Vec<u8>) {
387    debug_assert!(bytes.iter().all(|&v| v < FACTOR as u8));
388    let divisor = factor_to_divisor::<FACTOR>();
389
390    let floor = bytes.len() / divisor;
391    let ceil = (bytes.len() + (divisor - 1)) / divisor;
392
393    out.reserve(ceil);
394    let packed = &mut out.spare_capacity_mut()[..ceil];
395
396    for i in 0..floor {
397        unsafe {
398            packed.get_unchecked_mut(i).write(if FACTOR == 2 {
399                let chunk = u64::from_le_bytes(*(bytes.as_ptr() as *const [u8; 8]).add(i));
400                // https://stackoverflow.com/a/51750902
401                (0x0102040810204080u64.wrapping_mul(chunk) >> 56) as u8
402            } else {
403                let mut acc = 0;
404                for byte_index in 0..divisor {
405                    let byte = *bytes.get_unchecked(i * divisor + byte_index);
406                    acc += byte * (FACTOR as u8).pow(byte_index as u32);
407                }
408                acc
409            });
410        }
411    }
412    if floor < ceil {
413        let mut acc = 0;
414        for &v in bytes[floor * divisor..].iter().rev() {
415            acc *= FACTOR as u8;
416            acc += v;
417        }
418        packed[floor].write(acc);
419    }
420    // Safety: `ceil` elements after len were initialized by loops above.
421    unsafe { out.set_len(out.len() + ceil) };
422}
423
424/// Opposite of `pack_arithmetic`. `out` will be overwritten with the unpacked bytes.
425fn unpack_arithmetic<const FACTOR: usize>(
426    input: &mut &[u8],
427    unpacked_len: usize,
428    out: &mut Vec<u8>,
429) -> Result<()> {
430    let divisor = factor_to_divisor::<FACTOR>();
431
432    // TODO STRICT: check that packed.all(|&b| b < FACTOR.powi(divisor)).
433    let floor = unpacked_len / divisor;
434    let ceil = crate::nightly::div_ceil_usize(unpacked_len, divisor);
435    let packed = consume_bytes(input, ceil)?;
436
437    debug_assert!(out.is_empty());
438    out.reserve(unpacked_len);
439    let unpacked = &mut out.spare_capacity_mut()[..unpacked_len];
440
441    for i in 0..floor {
442        unsafe {
443            let mut packed = *packed.get_unchecked(i);
444            if FACTOR == 2 {
445                // https://stackoverflow.com/a/51750902
446                // Can't swap bytes of magic number to avoid swap bytes at runtime because of carries in multiply.
447                let chunk =
448                    ((0x8040201008040201u64.wrapping_mul(packed as u64) & 0x8080808080808080) >> 7)
449                        .swap_bytes();
450                *(unpacked.as_mut_ptr() as *mut [u8; 8]).add(i) = chunk.to_le_bytes();
451            } else {
452                for byte in unpacked.get_unchecked_mut(i * divisor..i * divisor + divisor) {
453                    byte.write(packed % FACTOR as u8);
454                    packed /= FACTOR as u8;
455                }
456            }
457        }
458    }
459    if floor < ceil {
460        let mut packed = packed[floor];
461        for byte in unpacked[floor * divisor..].iter_mut() {
462            byte.write(packed % FACTOR as u8);
463            packed /= FACTOR as u8;
464        }
465    }
466    // Safety: `unpacked_len` elements were initialized by the loops above.
467    unsafe { out.set_len(unpacked_len) };
468    Ok(())
469}
470
471#[cfg(test)]
472mod tests {
473    use crate::error::err;
474    use alloc::borrow::ToOwned;
475    use alloc::vec::Vec;
476    use paste::paste;
477    use test::{black_box, Bencher};
478
479    fn pack_bytes<T: super::Byte>(bytes: &[T]) -> Vec<u8> {
480        let mut out = vec![];
481        super::pack_bytes(&mut bytes.to_owned(), &mut out);
482        out
483    }
484
485    fn unpack_bytes<T: super::Byte>(mut packed: &[u8], length: usize) -> Vec<T> {
486        let mut out = crate::fast::CowSlice::default();
487        super::unpack_bytes(&mut packed, length, &mut out).unwrap();
488        assert!(packed.is_empty());
489        unsafe { out.as_slice(length).to_vec() }
490    }
491
492    #[test]
493    fn test_pack_bytes_u8() {
494        assert_eq!(pack_bytes(&[1u8, 2, 3, 4, 5, 6, 7]).len(), 5);
495        assert_eq!(pack_bytes(&[201u8, 202, 203, 204, 205, 206, 207]).len(), 6);
496
497        for max in 0..255u8 {
498            for sub in [1, 2, 3, 4, 5, 15, 255] {
499                let min = max.saturating_sub(sub);
500                let original = [min, min, min, min, min, min, min, max];
501                let packed = pack_bytes(&original);
502                let unpacked = unpack_bytes(&packed, original.len());
503                assert_eq!(original.as_slice(), unpacked.as_slice());
504            }
505        }
506    }
507
508    #[test]
509    fn test_pack_bytes_i8() {
510        assert_eq!(pack_bytes(&[1i8, 2, 3, 4, 5, 6, 7]).len(), 5);
511        assert_eq!(pack_bytes(&[-1i8, -2, -3, -4, -5, -6, -7]).len(), 6);
512        assert_eq!(pack_bytes(&[-3i8, -2, -1, 0, 1, 2, 3]).len(), 6);
513        assert_eq!(
514            pack_bytes(&[0i8, -1, 0, -1, 0, -1, 0]),
515            [9, (-1i8) as u8, 0b1010101]
516        );
517
518        for max in i8::MIN..i8::MAX {
519            for sub in [1, 2, 3, 4, 5, 15, 127] {
520                let min = max.saturating_sub(sub);
521                let original = [min, min, min, min, min, min, min, max];
522                let packed = pack_bytes(&original);
523                let unpacked = unpack_bytes(&packed, original.len());
524                assert_eq!(original.as_slice(), unpacked.as_slice());
525            }
526        }
527    }
528
529    #[test]
530    fn unpack_bytes_errors() {
531        assert_eq!(
532            super::unpack_bytes::<u8>(&mut [1].as_slice(), 5, &mut Default::default()),
533            err("EOF")
534        );
535        assert_eq!(
536            super::unpack_bytes::<u8>(&mut [255].as_slice(), 5, &mut Default::default()),
537            super::invalid_packing()
538        );
539    }
540
541    fn pack_arithmetic<const FACTOR: usize>(bytes: &[u8]) -> Vec<u8> {
542        let mut out = vec![];
543        super::pack_arithmetic::<FACTOR>(bytes, &mut out);
544        out
545    }
546
547    #[test]
548    fn test_pack_arithmetic() {
549        assert_eq!(pack_arithmetic::<2>(&[1, 0, 1, 0]), [0b0101]);
550        assert_eq!(
551            pack_arithmetic::<2>(&[1, 0, 1, 0, 1, 0, 1, 0]),
552            [0b01010101]
553        );
554        assert_eq!(
555            pack_arithmetic::<2>(&[1, 0, 1, 0, 1, 0, 1, 0, 1]),
556            [0b01010101, 0b1]
557        );
558
559        assert_eq!(pack_arithmetic::<3>(&[0]), [0]);
560        assert_eq!(pack_arithmetic::<3>(&[0, 1]), [0 + 1 * 3]);
561        assert_eq!(pack_arithmetic::<3>(&[0, 1, 2]), [0 + 1 * 3 + 2 * 3 * 3]);
562        assert_eq!(
563            pack_arithmetic::<3>(&[2, 0, 0, 0, 0, 0, 1, 2]),
564            [2, 0 + 1 * 3 + 2 * 3 * 3]
565        );
566
567        assert_eq!(pack_arithmetic::<4>(&[1, 0]), [0b0001]);
568        assert_eq!(pack_arithmetic::<4>(&[1, 0, 1, 0]), [0b00010001]);
569        assert_eq!(
570            pack_arithmetic::<4>(&[1, 0, 1, 0, 1, 0]),
571            [0b00010001, 0b0001]
572        );
573
574        assert_eq!(pack_arithmetic::<6>(&[0]), [0]);
575        assert_eq!(pack_arithmetic::<6>(&[0, 1]), [0 + 1 * 6]);
576        assert_eq!(pack_arithmetic::<6>(&[0, 1, 2]), [0 + 1 * 6 + 2 * 6 * 6]);
577        assert_eq!(
578            pack_arithmetic::<6>(&[2, 0, 0, 0, 1, 2]),
579            [2, 0 + 1 * 6 + 2 * 6 * 6]
580        );
581
582        assert_eq!(pack_arithmetic::<16>(&[1]), [0b0001]);
583        assert_eq!(pack_arithmetic::<16>(&[1, 0]), [0b00000001]);
584        assert_eq!(pack_arithmetic::<16>(&[1, 0, 1]), [0b00000001, 0b0001]);
585    }
586
587    #[test]
588    fn test_unpack_arithmetic() {
589        fn test<const FACTOR: usize>(bytes: &[u8]) {
590            let packed = pack_arithmetic::<FACTOR>(bytes);
591
592            let mut input = packed.as_slice();
593            let mut bytes2 = vec![];
594            super::unpack_arithmetic::<FACTOR>(&mut input, bytes.len(), &mut bytes2).unwrap();
595            assert!(input.is_empty());
596            assert_eq!(bytes, bytes2);
597        }
598
599        test::<2>(&[1, 0, 1, 0]);
600        test::<2>(&[1, 0, 1, 0, 1, 0, 1, 0]);
601        test::<2>(&[1, 0, 1, 0, 1, 0, 1, 0, 1]);
602
603        test::<3>(&[0]);
604        test::<3>(&[0, 1]);
605        test::<3>(&[0, 1, 2]);
606        test::<3>(&[2, 0, 0, 0, 0, 0, 1, 2]);
607
608        test::<4>(&[1, 0]);
609        test::<4>(&[1, 0, 1, 0]);
610        test::<4>(&[1, 0, 1, 0, 1, 0]);
611
612        test::<6>(&[0]);
613        test::<6>(&[0, 1]);
614        test::<6>(&[0, 1, 2]);
615        test::<6>(&[2, 0, 0, 0, 1, 2]);
616
617        test::<16>(&[1]);
618        test::<16>(&[1, 0]);
619        test::<16>(&[1, 0, 1]);
620    }
621
622    fn bench_pack_arithmetic<const FACTOR: usize>(b: &mut Bencher) {
623        let bytes = vec![0; 1000];
624        let mut out = Vec::with_capacity(bytes.len());
625        b.iter(|| {
626            out.clear();
627            super::pack_arithmetic::<FACTOR>(&bytes, black_box(&mut out));
628        });
629    }
630
631    fn bench_unpack_arithmetic<const FACTOR: usize>(b: &mut Bencher) {
632        let unpacked_len = 1000;
633        let packed = pack_arithmetic::<FACTOR>(&vec![0; unpacked_len]);
634        let mut out = Vec::with_capacity(unpacked_len);
635
636        b.iter(|| {
637            let mut input = packed.as_slice();
638            let input = black_box(&mut input);
639            let unpacked_len = black_box(unpacked_len);
640            out.clear();
641            super::unpack_arithmetic::<FACTOR>(input, unpacked_len, black_box(&mut out)).unwrap();
642        });
643    }
644
645    macro_rules! bench_n {
646        ($bench:ident, $($n:literal),+) => {
647            paste! {
648                $(
649                    #[bench]
650                    fn [<$bench $n>](b: &mut Bencher) {
651                        $bench::<$n>(b);
652                    }
653                )+
654            }
655        }
656    }
657    bench_n!(bench_pack_arithmetic, 2, 3, 4, 6, 16);
658    bench_n!(bench_unpack_arithmetic, 2, 3, 4, 6, 16);
659
660    fn test_pack_bytes_less_than_n<const N: usize, const FACTOR: usize>() {
661        for n in [1, 11, 97, 991, 10007].into_iter().flat_map(|n_prime| {
662            let divisor = if FACTOR == 256 {
663                1
664            } else {
665                super::factor_to_divisor::<FACTOR>()
666            };
667            let n_factor = crate::nightly::div_ceil_usize(n_prime, divisor) * divisor;
668            [n_factor, n_prime]
669        }) {
670            let bytes: Vec<_> = crate::random_data(n)
671                .into_iter()
672                .map(|v: usize| (v % N as usize) as u8)
673                .collect();
674            let n = bytes.len(); // random_data shrinks n on miri.
675
676            #[cfg(feature = "std")]
677            println!("n {n}, N {N}, FACTOR {FACTOR}");
678            if N != FACTOR {
679                let mut bytes = bytes.clone();
680                bytes[n - 1] = (FACTOR - 1) as u8; // Make least 1 byte is out of bounds.
681                let mut packed = vec![];
682                super::pack_bytes_less_than::<FACTOR>(&bytes, &mut packed);
683
684                assert!(super::unpack_bytes_less_than::<N, 0>(
685                    &mut packed.as_slice(),
686                    bytes.len(),
687                    &mut crate::fast::CowSlice::default()
688                )
689                .is_err());
690                assert!(super::unpack_bytes_less_than::<N, N>(
691                    &mut packed.as_slice(),
692                    bytes.len(),
693                    &mut crate::fast::CowSlice::default()
694                )
695                .is_err());
696            }
697
698            let mut packed = vec![];
699            super::pack_bytes_less_than::<N>(&bytes, &mut packed);
700
701            let mut input = packed.as_slice();
702            let mut unpacked = crate::fast::CowSlice::default();
703            super::unpack_bytes_less_than::<N, 0>(&mut input, bytes.len(), &mut unpacked).unwrap();
704            assert!(input.is_empty());
705            assert_eq!(unsafe { unpacked.as_slice(bytes.len()) }, bytes);
706
707            let mut input = packed.as_slice();
708            let mut unpacked = crate::fast::CowSlice::default();
709            let histogram =
710                super::unpack_bytes_less_than::<N, N>(&mut input, bytes.len(), &mut unpacked)
711                    .unwrap();
712            assert!(input.is_empty());
713            assert_eq!(unsafe { unpacked.as_slice(bytes.len()) }, bytes);
714            assert_eq!(
715                histogram.as_slice(),
716                &crate::histogram::histogram(&bytes)[..N]
717            );
718        }
719    }
720
721    macro_rules! test_pack_bytes_less_than_n {
722        ($($n:literal => $factor:literal),+) => {
723            $(
724                paste::paste! {
725                    #[test]
726                    fn [<test_pack_bytes_less_than_ $n>]() {
727                        test_pack_bytes_less_than_n::<$n, $factor>();
728                    }
729                }
730            )+
731        }
732    }
733    // Test factors and +/- 1 to catch off by 1 errors.
734    test_pack_bytes_less_than_n!(2 => 2, 3 => 3, 4 => 4, 5 => 6, 6 => 6, 7 => 16);
735    test_pack_bytes_less_than_n!(15 => 16, 16 => 16, 17 => 256, 255 => 256, 256 => 256);
736
737    macro_rules! bench_unpack_histogram {
738        ($($f:literal => $fpd:literal),+) => {
739            $(
740                paste::paste! {
741                    #[bench]
742                    fn [<bench_unpack_histogram $f>](b: &mut Bencher) {
743                        b.iter(|| {
744                            super::unpack_histogram::<$f, $fpd>(black_box(&[0; $fpd]))
745                        });
746                    }
747                }
748            )+
749        }
750    }
751    bench_unpack_histogram!(3 => 243, 4 => 256, 6 => 216, 16 => 256);
752
753    macro_rules! bench_unpack_bytes_less_than {
754        ($($n:literal),+) => {
755            $(
756                paste::paste! {
757                    #[bench]
758                    fn [<bench_unpack_bytes_less_than $n>](b: &mut Bencher) {
759                        let mut out = crate::fast::CowSlice::default();
760                        b.iter(|| {
761                            super::unpack_bytes_less_than::<$n, 0>(black_box(&mut [0].as_slice()), black_box(1), black_box(&mut out)).unwrap();
762                        });
763                    }
764
765                    #[bench]
766                    fn [<bench_unpack_bytes_less_than $n _histogram>](b: &mut Bencher) {
767                        let mut out = crate::fast::CowSlice::default();
768                        b.iter(|| {
769                            super::unpack_bytes_less_than::<$n, $n>(black_box(&mut [0].as_slice()), black_box(1), black_box(&mut out)).unwrap();
770                        });
771                    }
772                }
773            )+
774        }
775    }
776    bench_unpack_bytes_less_than!(2, 3, 4, 6, 16, 256);
777}