p3_util/
lib.rs

1//! Various simple utilities.
2
3#![no_std]
4
5extern crate alloc;
6
7use alloc::string::String;
8use alloc::vec::Vec;
9use core::any::type_name;
10use core::hint::unreachable_unchecked;
11use core::mem;
12use core::mem::MaybeUninit;
13
14pub mod array_serialization;
15pub mod linear_map;
16pub mod transpose;
17pub mod zip_eq;
18
19/// Computes `ceil(log_2(n))`.
20#[must_use]
21pub const fn log2_ceil_usize(n: usize) -> usize {
22    (usize::BITS - n.saturating_sub(1).leading_zeros()) as usize
23}
24
25#[must_use]
26pub fn log2_ceil_u64(n: u64) -> u64 {
27    (u64::BITS - n.saturating_sub(1).leading_zeros()).into()
28}
29
30/// Computes `log_2(n)`
31///
32/// # Panics
33/// Panics if `n` is not a power of two.
34#[must_use]
35#[inline]
36pub fn log2_strict_usize(n: usize) -> usize {
37    let res = n.trailing_zeros();
38    assert_eq!(n.wrapping_shr(res), 1, "Not a power of two: {n}");
39    // Tell the optimizer about the semantics of `log2_strict`. i.e. it can replace `n` with
40    // `1 << res` and vice versa.
41    assume(n == 1 << res);
42    res as usize
43}
44
45/// Returns `[0, ..., N - 1]`.
46#[must_use]
47pub const fn indices_arr<const N: usize>() -> [usize; N] {
48    let mut indices_arr = [0; N];
49    let mut i = 0;
50    while i < N {
51        indices_arr[i] = i;
52        i += 1;
53    }
54    indices_arr
55}
56
57#[inline]
58pub const fn reverse_bits(x: usize, n: usize) -> usize {
59    // Assert that n is a power of 2
60    debug_assert!(n.is_power_of_two());
61    reverse_bits_len(x, n.trailing_zeros() as usize)
62}
63
64#[inline]
65pub const fn reverse_bits_len(x: usize, bit_len: usize) -> usize {
66    // NB: The only reason we need overflowing_shr() here as opposed
67    // to plain '>>' is to accommodate the case n == num_bits == 0,
68    // which would become `0 >> 64`. Rust thinks that any shift of 64
69    // bits causes overflow, even when the argument is zero.
70    x.reverse_bits()
71        .overflowing_shr(usize::BITS - bit_len as u32)
72        .0
73}
74
75// Lookup table of 6-bit reverses.
76// NB: 2^6=64 bytes is a cacheline. A smaller table wastes cache space.
77#[cfg(not(target_arch = "aarch64"))]
78#[rustfmt::skip]
79const BIT_REVERSE_6BIT: &[u8] = &[
80    0o00, 0o40, 0o20, 0o60, 0o10, 0o50, 0o30, 0o70,
81    0o04, 0o44, 0o24, 0o64, 0o14, 0o54, 0o34, 0o74,
82    0o02, 0o42, 0o22, 0o62, 0o12, 0o52, 0o32, 0o72,
83    0o06, 0o46, 0o26, 0o66, 0o16, 0o56, 0o36, 0o76,
84    0o01, 0o41, 0o21, 0o61, 0o11, 0o51, 0o31, 0o71,
85    0o05, 0o45, 0o25, 0o65, 0o15, 0o55, 0o35, 0o75,
86    0o03, 0o43, 0o23, 0o63, 0o13, 0o53, 0o33, 0o73,
87    0o07, 0o47, 0o27, 0o67, 0o17, 0o57, 0o37, 0o77,
88];
89
90// Ensure that SMALL_ARR_SIZE >= 4 * BIG_T_SIZE.
91const BIG_T_SIZE: usize = 1 << 14;
92const SMALL_ARR_SIZE: usize = 1 << 16;
93
94/// Permutes `arr` such that each index is mapped to its reverse in binary.
95/// If the whole array fits in fast cache, then the trivial algorithm is cache friendly. Also, if
96/// `T` is really big, then the trivial algorithm is cache-friendly, no matter the size of the array.
97pub fn reverse_slice_index_bits<F>(vals: &mut [F]) {
98    let n = vals.len();
99    if n == 0 {
100        return;
101    }
102    let log_n = log2_strict_usize(n);
103
104    // If the whole array fits in fast cache, then the trivial algorithm is cache friendly. Also, if
105    // `T` is really big, then the trivial algorithm is cache-friendly, no matter the size of the array.
106    if core::mem::size_of::<F>() << log_n <= SMALL_ARR_SIZE
107        || core::mem::size_of::<F>() >= BIG_T_SIZE
108    {
109        reverse_slice_index_bits_small(vals, log_n);
110    } else {
111        debug_assert!(n >= 4); // By our choice of `BIG_T_SIZE` and `SMALL_ARR_SIZE`.
112
113        // Algorithm:
114        //
115        // Treat `arr` as a `sqrt(n)` by `sqrt(n)` row-major matrix. (Assume for now that `lb_n` is
116        // even, i.e., `n` is a square number.) To perform bit-order reversal we:
117        //  1. Bit-reverse the order of the rows. (They are contiguous in memory, so this is
118        //     basically a series of large `memcpy`s.)
119        //  2. Transpose the matrix.
120        //  3. Bit-reverse the order of the rows.
121        //
122        // This is equivalent to, for every index `0 <= i < n`:
123        //  1. bit-reversing `i[lb_n / 2..lb_n]`,
124        //  2. swapping `i[0..lb_n / 2]` and `i[lb_n / 2..lb_n]`,
125        //  3. bit-reversing `i[lb_n / 2..lb_n]`.
126        //
127        // If `lb_n` is odd, i.e., `n` is not a square number, then the above procedure requires
128        // slight modification. At steps 1 and 3 we bit-reverse bits `ceil(lb_n / 2)..lb_n`, of the
129        // index (shuffling `floor(lb_n / 2)` chunks of length `ceil(lb_n / 2)`). At step 2, we
130        // perform _two_ transposes. We treat `arr` as two matrices, one where the middle bit of the
131        // index is `0` and another, where the middle bit is `1`; we transpose each individually.
132
133        let lb_num_chunks = log_n >> 1;
134        let lb_chunk_size = log_n - lb_num_chunks;
135        unsafe {
136            reverse_slice_index_bits_chunks(vals, lb_num_chunks, lb_chunk_size);
137            transpose_in_place_square(vals, lb_chunk_size, lb_num_chunks, 0);
138            if lb_num_chunks != lb_chunk_size {
139                // `arr` cannot be interpreted as a square matrix. We instead interpret it as a
140                // `1 << lb_num_chunks` by `2` by `1 << lb_num_chunks` tensor, in row-major order.
141                // The above transpose acted on `tensor[..., 0, ...]` (all indices with middle bit
142                // `0`). We still need to transpose `tensor[..., 1, ...]`. To do so, we advance
143                // arr by `1 << lb_num_chunks` effectively, adding that to every index.
144                let vals_with_offset = &mut vals[1 << lb_num_chunks..];
145                transpose_in_place_square(vals_with_offset, lb_chunk_size, lb_num_chunks, 0);
146            }
147            reverse_slice_index_bits_chunks(vals, lb_num_chunks, lb_chunk_size);
148        }
149    }
150}
151
152// Both functions below are semantically equivalent to:
153//     for i in 0..n {
154//         result.push(arr[reverse_bits(i, n_power)]);
155//     }
156// where reverse_bits(i, n_power) computes the n_power-bit reverse. The complications are there
157// to guide the compiler to generate optimal assembly.
158
159#[cfg(not(target_arch = "aarch64"))]
160fn reverse_slice_index_bits_small<F>(vals: &mut [F], lb_n: usize) {
161    if lb_n <= 6 {
162        // BIT_REVERSE_6BIT holds 6-bit reverses. This shift makes them lb_n-bit reverses.
163        let dst_shr_amt = 6 - lb_n as u32;
164        #[allow(clippy::needless_range_loop)]
165        for src in 0..vals.len() {
166            let dst = (BIT_REVERSE_6BIT[src] as usize).wrapping_shr(dst_shr_amt);
167            if src < dst {
168                vals.swap(src, dst);
169            }
170        }
171    } else {
172        // LLVM does not know that it does not need to reverse src at each iteration (which is
173        // expensive on x86). We take advantage of the fact that the low bits of dst change rarely and the high
174        // bits of dst are dependent only on the low bits of src.
175        let dst_lo_shr_amt = usize::BITS - (lb_n - 6) as u32;
176        let dst_hi_shl_amt = lb_n - 6;
177        for src_chunk in 0..(vals.len() >> 6) {
178            let src_hi = src_chunk << 6;
179            let dst_lo = src_chunk.reverse_bits().wrapping_shr(dst_lo_shr_amt);
180            #[allow(clippy::needless_range_loop)]
181            for src_lo in 0..(1 << 6) {
182                let dst_hi = (BIT_REVERSE_6BIT[src_lo] as usize) << dst_hi_shl_amt;
183                let src = src_hi + src_lo;
184                let dst = dst_hi + dst_lo;
185                if src < dst {
186                    vals.swap(src, dst);
187                }
188            }
189        }
190    }
191}
192
193#[cfg(target_arch = "aarch64")]
194fn reverse_slice_index_bits_small<F>(vals: &mut [F], lb_n: usize) {
195    // Aarch64 can reverse bits in one instruction, so the trivial version works best.
196    for src in 0..vals.len() {
197        let dst = src.reverse_bits().wrapping_shr(usize::BITS - lb_n as u32);
198        if src < dst {
199            vals.swap(src, dst);
200        }
201    }
202}
203
204/// Split `arr` chunks and bit-reverse the order of the chunks. There are `1 << lb_num_chunks`
205/// chunks, each of length `1 << lb_chunk_size`.
206/// SAFETY: ensure that `arr.len() == 1 << lb_num_chunks + lb_chunk_size`.
207unsafe fn reverse_slice_index_bits_chunks<F>(
208    vals: &mut [F],
209    lb_num_chunks: usize,
210    lb_chunk_size: usize,
211) {
212    for i in 0..1usize << lb_num_chunks {
213        // `wrapping_shr` handles the silly case when `lb_num_chunks == 0`.
214        let j = i
215            .reverse_bits()
216            .wrapping_shr(usize::BITS - lb_num_chunks as u32);
217        if i < j {
218            core::ptr::swap_nonoverlapping(
219                vals.get_unchecked_mut(i << lb_chunk_size),
220                vals.get_unchecked_mut(j << lb_chunk_size),
221                1 << lb_chunk_size,
222            );
223        }
224    }
225}
226
227/// Transpose a square matrix in place.
228/// SAFETY: ensure that `arr.len() == 1 << lb_chunk_size + lb_num_chunks`.
229unsafe fn transpose_in_place_square<T>(
230    arr: &mut [T],
231    lb_chunk_size: usize,
232    lb_num_chunks: usize,
233    offset: usize,
234) {
235    transpose::transpose_in_place_square(arr, lb_chunk_size, lb_num_chunks, offset)
236}
237
238#[inline(always)]
239pub fn assume(p: bool) {
240    debug_assert!(p);
241    if !p {
242        unsafe {
243            unreachable_unchecked();
244        }
245    }
246}
247
248/// Try to force Rust to emit a branch. Example:
249///
250/// ```no_run
251/// let x = 100;
252/// if x > 20 {
253///     println!("x is big!");
254///     p3_util::branch_hint();
255/// } else {
256///     println!("x is small!");
257/// }
258/// ```
259///
260/// This function has no semantics. It is a hint only.
261#[inline(always)]
262pub fn branch_hint() {
263    // NOTE: These are the currently supported assembly architectures. See the
264    // [nightly reference](https://doc.rust-lang.org/nightly/reference/inline-assembly.html) for
265    // the most up-to-date list.
266    #[cfg(any(
267        target_arch = "aarch64",
268        target_arch = "arm",
269        target_arch = "riscv32",
270        target_arch = "riscv64",
271        target_arch = "x86",
272        target_arch = "x86_64",
273    ))]
274    unsafe {
275        core::arch::asm!("", options(nomem, nostack, preserves_flags));
276    }
277}
278
279/// Convenience methods for Vec.
280pub trait VecExt<T> {
281    /// Push `elem` and return a reference to it.
282    fn pushed_ref(&mut self, elem: T) -> &T;
283    /// Push `elem` and return a mutable reference to it.
284    fn pushed_mut(&mut self, elem: T) -> &mut T;
285}
286
287impl<T> VecExt<T> for alloc::vec::Vec<T> {
288    fn pushed_ref(&mut self, elem: T) -> &T {
289        self.push(elem);
290        self.last().unwrap()
291    }
292    fn pushed_mut(&mut self, elem: T) -> &mut T {
293        self.push(elem);
294        self.last_mut().unwrap()
295    }
296}
297
298pub fn transpose_vec<T>(v: Vec<Vec<T>>) -> Vec<Vec<T>> {
299    assert!(!v.is_empty());
300    let len = v[0].len();
301    let mut iters: Vec<_> = v.into_iter().map(|n| n.into_iter()).collect();
302    (0..len)
303        .map(|_| {
304            iters
305                .iter_mut()
306                .map(|n| n.next().unwrap())
307                .collect::<Vec<T>>()
308        })
309        .collect()
310}
311
312/// Return a String containing the name of T but with all the crate
313/// and module prefixes removed.
314pub fn pretty_name<T>() -> String {
315    let name = type_name::<T>();
316    let mut result = String::new();
317    for qual in name.split_inclusive(&['<', '>', ',']) {
318        result.push_str(qual.split("::").last().unwrap());
319    }
320    result
321}
322
323/// A C-style buffered input reader, similar to
324/// `std::iter::Iterator::next_chunk()` from nightly.
325///
326/// Unsafe because the returned array may contain uninitialised
327/// elements.
328#[inline]
329unsafe fn iter_next_chunk<const BUFLEN: usize, I: Iterator>(
330    iter: &mut I,
331) -> ([I::Item; BUFLEN], usize)
332where
333    I::Item: Copy,
334{
335    let mut buf = unsafe {
336        let t = [const { MaybeUninit::<I::Item>::uninit() }; BUFLEN];
337        // We are forced to use `transmute_copy` here instead of
338        // `transmute` because `BUFLEN` is a const generic parameter.
339        // The compiler *should* be smart enough not to emit a copy though.
340        core::mem::transmute_copy::<_, [I::Item; BUFLEN]>(&t)
341    };
342    let mut i = 0;
343
344    // Read BUFLEN values from `iter` into `buf` at a time.
345    for c in iter {
346        // Copy the next Item into `buf`.
347        unsafe {
348            *buf.get_unchecked_mut(i) = c;
349            i = i.unchecked_add(1);
350        }
351        // If `buf` is full
352        if i == BUFLEN {
353            break;
354        }
355    }
356    (buf, i)
357}
358
359/// Split an iterator into small arrays and apply `func` to each.
360///
361/// Repeatedly read `BUFLEN` elements from `input` into an array and
362/// pass the array to `func` as a slice. If less than `BUFLEN`
363/// elements are remaining, that smaller slice is passed to `func` (if
364/// it is non-empty) and the function returns.
365#[inline]
366pub fn apply_to_chunks<const BUFLEN: usize, I, H>(input: I, mut func: H)
367where
368    I: IntoIterator<Item = u8>,
369    H: FnMut(&[I::Item]),
370{
371    let mut iter = input.into_iter();
372    loop {
373        let (buf, n) = unsafe { iter_next_chunk::<BUFLEN, _>(&mut iter) };
374        if n == 0 {
375            break;
376        }
377        func(unsafe { buf.get_unchecked(..n) });
378    }
379}
380
381/// Converts a vector of one type to one of another type.
382///
383/// This is useful to convert between things like `Vec<u32>` and `Vec<[u32; 10]>`, for example.
384/// This is roughly like a transmutation, except that we also adjust the vector's length
385/// and capacity based on the sizes of the two types.
386///
387/// # Safety
388/// In addition to the usual safety considerations around transmutation, the caller must ensure that
389/// the two types have the same alignment, that one of their sizes is a multiple of the other.
390#[inline(always)]
391pub unsafe fn convert_vec<T, U>(mut vec: Vec<T>) -> Vec<U> {
392    let ptr = vec.as_mut_ptr() as *mut U;
393    let len_bytes = vec.len() * size_of::<T>();
394    let cap_bytes = vec.capacity() * size_of::<T>();
395
396    assert_eq!(align_of::<T>(), align_of::<U>());
397    assert_eq!(len_bytes % size_of::<U>(), 0);
398    assert_eq!(cap_bytes % size_of::<U>(), 0);
399
400    let new_len = len_bytes / size_of::<U>();
401    let new_cap = cap_bytes / size_of::<U>();
402    mem::forget(vec);
403    Vec::from_raw_parts(ptr, new_len, new_cap)
404}
405
406#[cfg(test)]
407mod tests {
408    use alloc::vec;
409    use alloc::vec::Vec;
410
411    use rand::rngs::OsRng;
412    use rand::Rng;
413
414    use super::*;
415
416    #[test]
417    fn test_reverse_bits_len() {
418        assert_eq!(reverse_bits_len(0b0000000000, 10), 0b0000000000);
419        assert_eq!(reverse_bits_len(0b0000000001, 10), 0b1000000000);
420        assert_eq!(reverse_bits_len(0b1000000000, 10), 0b0000000001);
421        assert_eq!(reverse_bits_len(0b00000, 5), 0b00000);
422        assert_eq!(reverse_bits_len(0b01011, 5), 0b11010);
423    }
424
425    #[test]
426    fn test_reverse_index_bits() {
427        let mut arg = vec![10, 20, 30, 40];
428        reverse_slice_index_bits(&mut arg);
429        assert_eq!(arg, vec![10, 30, 20, 40]);
430
431        let mut input256: Vec<u64> = (0..256).collect();
432        #[rustfmt::skip]
433        let output256: Vec<u64> = vec![
434            0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
435            0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
436            0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
437            0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
438            0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
439            0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
440            0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
441            0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
442            0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
443            0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
444            0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
445            0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
446            0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
447            0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
448            0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
449            0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
450        ];
451        reverse_slice_index_bits(&mut input256[..]);
452        assert_eq!(input256, output256);
453    }
454
455    #[test]
456    fn test_apply_to_chunks_exact_fit() {
457        const CHUNK_SIZE: usize = 4;
458        let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8];
459        let mut results: Vec<Vec<u8>> = Vec::new();
460
461        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
462            results.push(chunk.to_vec());
463        });
464
465        assert_eq!(results, vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8]]);
466    }
467
468    #[test]
469    fn test_apply_to_chunks_with_remainder() {
470        const CHUNK_SIZE: usize = 3;
471        let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7];
472        let mut results: Vec<Vec<u8>> = Vec::new();
473
474        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
475            results.push(chunk.to_vec());
476        });
477
478        assert_eq!(results, vec![vec![1, 2, 3], vec![4, 5, 6], vec![7]]);
479    }
480
481    #[test]
482    fn test_apply_to_chunks_empty_input() {
483        const CHUNK_SIZE: usize = 4;
484        let input: Vec<u8> = vec![];
485        let mut results: Vec<Vec<u8>> = Vec::new();
486
487        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
488            results.push(chunk.to_vec());
489        });
490
491        assert!(results.is_empty());
492    }
493
494    #[test]
495    fn test_apply_to_chunks_single_chunk() {
496        const CHUNK_SIZE: usize = 10;
497        let input: Vec<u8> = vec![1, 2, 3, 4, 5];
498        let mut results: Vec<Vec<u8>> = Vec::new();
499
500        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
501            results.push(chunk.to_vec());
502        });
503
504        assert_eq!(results, vec![vec![1, 2, 3, 4, 5]]);
505    }
506
507    #[test]
508    fn test_apply_to_chunks_large_chunk_size() {
509        const CHUNK_SIZE: usize = 100;
510        let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8];
511        let mut results: Vec<Vec<u8>> = Vec::new();
512
513        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
514            results.push(chunk.to_vec());
515        });
516
517        assert_eq!(results, vec![vec![1, 2, 3, 4, 5, 6, 7, 8]]);
518    }
519
520    #[test]
521    fn test_apply_to_chunks_large_input() {
522        const CHUNK_SIZE: usize = 5;
523        let input: Vec<u8> = (1..=20).collect();
524        let mut results: Vec<Vec<u8>> = Vec::new();
525
526        apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
527            results.push(chunk.to_vec());
528        });
529
530        assert_eq!(
531            results,
532            vec![
533                vec![1, 2, 3, 4, 5],
534                vec![6, 7, 8, 9, 10],
535                vec![11, 12, 13, 14, 15],
536                vec![16, 17, 18, 19, 20]
537            ]
538        );
539    }
540
541    #[test]
542    fn test_reverse_slice_index_bits_random() {
543        let lengths = [32, 128, 1 << 16];
544        let mut rng = OsRng;
545        for _ in 0..32 {
546            for &length in &lengths {
547                let mut rand_list: Vec<u32> = Vec::with_capacity(length);
548                rand_list.resize_with(length, || rng.gen());
549                let expect = reverse_index_bits_naive(&rand_list);
550
551                let mut actual = rand_list.clone();
552                reverse_slice_index_bits(&mut actual);
553
554                assert_eq!(actual, expect);
555            }
556        }
557    }
558
559    #[test]
560    fn test_log2_strict_usize_edge_cases() {
561        assert_eq!(log2_strict_usize(1), 0);
562        assert_eq!(log2_strict_usize(2), 1);
563        assert_eq!(log2_strict_usize(1 << 18), 18);
564        assert_eq!(log2_strict_usize(1 << 31), 31);
565        assert_eq!(
566            log2_strict_usize(1 << (usize::BITS - 1)),
567            usize::BITS as usize - 1
568        );
569    }
570
571    #[test]
572    #[should_panic]
573    fn test_log2_strict_usize_zero() {
574        let _ = log2_strict_usize(0);
575    }
576
577    #[test]
578    #[should_panic]
579    fn test_log2_strict_usize_nonpower_2() {
580        let _ = log2_strict_usize(0x78c341c65ae6d262);
581    }
582
583    #[test]
584    #[should_panic]
585    fn test_log2_strict_usize_max() {
586        let _ = log2_strict_usize(usize::MAX);
587    }
588
589    #[test]
590    fn test_log2_ceil_usize_comprehensive() {
591        // Powers of 2
592        assert_eq!(log2_ceil_usize(0), 0);
593        assert_eq!(log2_ceil_usize(1), 0);
594        assert_eq!(log2_ceil_usize(2), 1);
595        assert_eq!(log2_ceil_usize(1 << 18), 18);
596        assert_eq!(log2_ceil_usize(1 << 31), 31);
597        assert_eq!(
598            log2_ceil_usize(1 << (usize::BITS - 1)),
599            usize::BITS as usize - 1
600        );
601
602        // Nonpowers; want to round up
603        assert_eq!(log2_ceil_usize(3), 2);
604        assert_eq!(log2_ceil_usize(0x14fe901b), 29);
605        assert_eq!(
606            log2_ceil_usize((1 << (usize::BITS - 1)) + 1),
607            usize::BITS as usize
608        );
609        assert_eq!(log2_ceil_usize(usize::MAX - 1), usize::BITS as usize);
610        assert_eq!(log2_ceil_usize(usize::MAX), usize::BITS as usize);
611    }
612
613    fn reverse_index_bits_naive<T: Copy>(arr: &[T]) -> Vec<T> {
614        let n = arr.len();
615        let n_power = log2_strict_usize(n);
616
617        let mut out = vec![None; n];
618        for (i, v) in arr.iter().enumerate() {
619            let dst = i.reverse_bits() >> (usize::BITS - n_power as u32);
620            out[dst] = Some(*v);
621        }
622
623        out.into_iter().map(|x| x.unwrap()).collect()
624    }
625}