bitcode/
histogram.rs

1pub fn histogram(bytes: &[u8]) -> [usize; 256] {
2    if bytes.len() < 100 {
3        histogram_simple(bytes)
4    } else {
5        histogram_parallel(bytes)
6    }
7}
8
9fn histogram_simple(bytes: &[u8]) -> [usize; 256] {
10    let mut histogram = [0; 256];
11    for &v in bytes {
12        histogram[v as usize] += 1;
13    }
14    histogram
15}
16
17fn histogram_parallel(bytes: &[u8]) -> [usize; 256] {
18    // Summing multiple 32 bit histograms is faster than a 64 bit histogram.
19    let mut total = [0; 256];
20    for bytes in bytes.chunks(u32::MAX as usize) {
21        for (i, &v) in histogram_parallel_u32(bytes).iter().enumerate() {
22            total[i] += v as usize;
23        }
24    }
25    total
26}
27
28// Based on https://github.com/facebook/zstd/blob/1518570c62b95136b6a69714012957cae5487a9a/lib/compress/hist.c#L66
29fn histogram_parallel_u32(bytes: &[u8]) -> [u32; 256] {
30    let mut histograms = [[0; 256]; 4];
31
32    let (chunks, remainder) = bytes.split_at(bytes.len() / 16 * 16);
33    let chunks16: &[[[u8; 4]; 4]] = bytemuck::cast_slice(chunks);
34    for chunk16 in chunks16 {
35        for chunk4 in chunk16 {
36            let c = u32::from_ne_bytes(*chunk4);
37            histograms[0][c as u8 as usize] += 1;
38            histograms[1][(c >> 8) as u8 as usize] += 1;
39            histograms[2][(c >> 16) as u8 as usize] += 1;
40            histograms[3][(c >> 24) as usize] += 1;
41        }
42    }
43    for &v in remainder {
44        histograms[0][v as usize] += 1;
45    }
46
47    let (dst, src) = histograms.split_at_mut(1);
48    let dst = &mut dst[0];
49    for i in 0..256 {
50        for src in src.iter() {
51            dst[i] += src[i];
52        }
53    }
54    *dst
55}
56
57#[cfg(test)]
58mod tests {
59    use super::*;
60    use alloc::vec::Vec;
61    use rand::prelude::*;
62    use rand_chacha::ChaCha20Rng;
63    use test::{black_box, Bencher};
64
65    fn bench_data(n: usize) -> Vec<u8> {
66        let mut rng = ChaCha20Rng::from_seed(Default::default());
67        core::iter::repeat_with(|| rng.gen_range(0..2))
68            .take(crate::limit_bench_miri(n))
69            .collect()
70    }
71
72    fn bench_histogram_parallel(b: &mut Bencher, n: usize) {
73        let data = bench_data(n);
74        b.iter(|| histogram_parallel(black_box(&data)));
75    }
76
77    fn bench_histogram_simple(b: &mut Bencher, n: usize) {
78        let data = bench_data(n);
79        b.iter(|| histogram_simple(black_box(&data)));
80    }
81
82    macro_rules! bench {
83        ($name:ident, $($n:literal),+) => {
84            paste::paste! {
85                $(
86                    #[bench]
87                    fn [<$name _ $n>](b: &mut Bencher) {
88                        $name(b, $n);
89                    }
90                )+
91            }
92        }
93    }
94    bench!(bench_histogram_parallel, 10, 100, 1000, 10000);
95    bench!(bench_histogram_simple, 10, 100, 1000, 10000);
96}