1#![no_std]
4
5extern crate alloc;
6
7use alloc::string::String;
8use alloc::vec::Vec;
9use core::any::type_name;
10use core::hint::unreachable_unchecked;
11use core::mem;
12use core::mem::MaybeUninit;
13
14pub mod array_serialization;
15pub mod linear_map;
16pub mod transpose;
17pub mod zip_eq;
18
19#[must_use]
21pub const fn log2_ceil_usize(n: usize) -> usize {
22 (usize::BITS - n.saturating_sub(1).leading_zeros()) as usize
23}
24
25#[must_use]
26pub fn log2_ceil_u64(n: u64) -> u64 {
27 (u64::BITS - n.saturating_sub(1).leading_zeros()).into()
28}
29
30#[must_use]
35#[inline]
36pub fn log2_strict_usize(n: usize) -> usize {
37 let res = n.trailing_zeros();
38 assert_eq!(n.wrapping_shr(res), 1, "Not a power of two: {n}");
39 assume(n == 1 << res);
42 res as usize
43}
44
45#[must_use]
47pub const fn indices_arr<const N: usize>() -> [usize; N] {
48 let mut indices_arr = [0; N];
49 let mut i = 0;
50 while i < N {
51 indices_arr[i] = i;
52 i += 1;
53 }
54 indices_arr
55}
56
57#[inline]
58pub const fn reverse_bits(x: usize, n: usize) -> usize {
59 debug_assert!(n.is_power_of_two());
61 reverse_bits_len(x, n.trailing_zeros() as usize)
62}
63
64#[inline]
65pub const fn reverse_bits_len(x: usize, bit_len: usize) -> usize {
66 x.reverse_bits()
71 .overflowing_shr(usize::BITS - bit_len as u32)
72 .0
73}
74
75#[cfg(not(target_arch = "aarch64"))]
78#[rustfmt::skip]
79const BIT_REVERSE_6BIT: &[u8] = &[
80 0o00, 0o40, 0o20, 0o60, 0o10, 0o50, 0o30, 0o70,
81 0o04, 0o44, 0o24, 0o64, 0o14, 0o54, 0o34, 0o74,
82 0o02, 0o42, 0o22, 0o62, 0o12, 0o52, 0o32, 0o72,
83 0o06, 0o46, 0o26, 0o66, 0o16, 0o56, 0o36, 0o76,
84 0o01, 0o41, 0o21, 0o61, 0o11, 0o51, 0o31, 0o71,
85 0o05, 0o45, 0o25, 0o65, 0o15, 0o55, 0o35, 0o75,
86 0o03, 0o43, 0o23, 0o63, 0o13, 0o53, 0o33, 0o73,
87 0o07, 0o47, 0o27, 0o67, 0o17, 0o57, 0o37, 0o77,
88];
89
90const BIG_T_SIZE: usize = 1 << 14;
92const SMALL_ARR_SIZE: usize = 1 << 16;
93
94pub fn reverse_slice_index_bits<F>(vals: &mut [F]) {
98 let n = vals.len();
99 if n == 0 {
100 return;
101 }
102 let log_n = log2_strict_usize(n);
103
104 if core::mem::size_of::<F>() << log_n <= SMALL_ARR_SIZE
107 || core::mem::size_of::<F>() >= BIG_T_SIZE
108 {
109 reverse_slice_index_bits_small(vals, log_n);
110 } else {
111 debug_assert!(n >= 4); let lb_num_chunks = log_n >> 1;
134 let lb_chunk_size = log_n - lb_num_chunks;
135 unsafe {
136 reverse_slice_index_bits_chunks(vals, lb_num_chunks, lb_chunk_size);
137 transpose_in_place_square(vals, lb_chunk_size, lb_num_chunks, 0);
138 if lb_num_chunks != lb_chunk_size {
139 let vals_with_offset = &mut vals[1 << lb_num_chunks..];
145 transpose_in_place_square(vals_with_offset, lb_chunk_size, lb_num_chunks, 0);
146 }
147 reverse_slice_index_bits_chunks(vals, lb_num_chunks, lb_chunk_size);
148 }
149 }
150}
151
152#[cfg(not(target_arch = "aarch64"))]
160fn reverse_slice_index_bits_small<F>(vals: &mut [F], lb_n: usize) {
161 if lb_n <= 6 {
162 let dst_shr_amt = 6 - lb_n as u32;
164 #[allow(clippy::needless_range_loop)]
165 for src in 0..vals.len() {
166 let dst = (BIT_REVERSE_6BIT[src] as usize).wrapping_shr(dst_shr_amt);
167 if src < dst {
168 vals.swap(src, dst);
169 }
170 }
171 } else {
172 let dst_lo_shr_amt = usize::BITS - (lb_n - 6) as u32;
176 let dst_hi_shl_amt = lb_n - 6;
177 for src_chunk in 0..(vals.len() >> 6) {
178 let src_hi = src_chunk << 6;
179 let dst_lo = src_chunk.reverse_bits().wrapping_shr(dst_lo_shr_amt);
180 #[allow(clippy::needless_range_loop)]
181 for src_lo in 0..(1 << 6) {
182 let dst_hi = (BIT_REVERSE_6BIT[src_lo] as usize) << dst_hi_shl_amt;
183 let src = src_hi + src_lo;
184 let dst = dst_hi + dst_lo;
185 if src < dst {
186 vals.swap(src, dst);
187 }
188 }
189 }
190 }
191}
192
193#[cfg(target_arch = "aarch64")]
194fn reverse_slice_index_bits_small<F>(vals: &mut [F], lb_n: usize) {
195 for src in 0..vals.len() {
197 let dst = src.reverse_bits().wrapping_shr(usize::BITS - lb_n as u32);
198 if src < dst {
199 vals.swap(src, dst);
200 }
201 }
202}
203
204unsafe fn reverse_slice_index_bits_chunks<F>(
208 vals: &mut [F],
209 lb_num_chunks: usize,
210 lb_chunk_size: usize,
211) {
212 for i in 0..1usize << lb_num_chunks {
213 let j = i
215 .reverse_bits()
216 .wrapping_shr(usize::BITS - lb_num_chunks as u32);
217 if i < j {
218 core::ptr::swap_nonoverlapping(
219 vals.get_unchecked_mut(i << lb_chunk_size),
220 vals.get_unchecked_mut(j << lb_chunk_size),
221 1 << lb_chunk_size,
222 );
223 }
224 }
225}
226
227unsafe fn transpose_in_place_square<T>(
230 arr: &mut [T],
231 lb_chunk_size: usize,
232 lb_num_chunks: usize,
233 offset: usize,
234) {
235 transpose::transpose_in_place_square(arr, lb_chunk_size, lb_num_chunks, offset)
236}
237
238#[inline(always)]
239pub fn assume(p: bool) {
240 debug_assert!(p);
241 if !p {
242 unsafe {
243 unreachable_unchecked();
244 }
245 }
246}
247
248#[inline(always)]
262pub fn branch_hint() {
263 #[cfg(any(
267 target_arch = "aarch64",
268 target_arch = "arm",
269 target_arch = "riscv32",
270 target_arch = "riscv64",
271 target_arch = "x86",
272 target_arch = "x86_64",
273 ))]
274 unsafe {
275 core::arch::asm!("", options(nomem, nostack, preserves_flags));
276 }
277}
278
279pub trait VecExt<T> {
281 fn pushed_ref(&mut self, elem: T) -> &T;
283 fn pushed_mut(&mut self, elem: T) -> &mut T;
285}
286
287impl<T> VecExt<T> for alloc::vec::Vec<T> {
288 fn pushed_ref(&mut self, elem: T) -> &T {
289 self.push(elem);
290 self.last().unwrap()
291 }
292 fn pushed_mut(&mut self, elem: T) -> &mut T {
293 self.push(elem);
294 self.last_mut().unwrap()
295 }
296}
297
298pub fn transpose_vec<T>(v: Vec<Vec<T>>) -> Vec<Vec<T>> {
299 assert!(!v.is_empty());
300 let len = v[0].len();
301 let mut iters: Vec<_> = v.into_iter().map(|n| n.into_iter()).collect();
302 (0..len)
303 .map(|_| {
304 iters
305 .iter_mut()
306 .map(|n| n.next().unwrap())
307 .collect::<Vec<T>>()
308 })
309 .collect()
310}
311
312pub fn pretty_name<T>() -> String {
315 let name = type_name::<T>();
316 let mut result = String::new();
317 for qual in name.split_inclusive(&['<', '>', ',']) {
318 result.push_str(qual.split("::").last().unwrap());
319 }
320 result
321}
322
323#[inline]
329unsafe fn iter_next_chunk<const BUFLEN: usize, I: Iterator>(
330 iter: &mut I,
331) -> ([I::Item; BUFLEN], usize)
332where
333 I::Item: Copy,
334{
335 let mut buf = unsafe {
336 let t = [const { MaybeUninit::<I::Item>::uninit() }; BUFLEN];
337 core::mem::transmute_copy::<_, [I::Item; BUFLEN]>(&t)
341 };
342 let mut i = 0;
343
344 for c in iter {
346 unsafe {
348 *buf.get_unchecked_mut(i) = c;
349 i = i.unchecked_add(1);
350 }
351 if i == BUFLEN {
353 break;
354 }
355 }
356 (buf, i)
357}
358
359#[inline]
366pub fn apply_to_chunks<const BUFLEN: usize, I, H>(input: I, mut func: H)
367where
368 I: IntoIterator<Item = u8>,
369 H: FnMut(&[I::Item]),
370{
371 let mut iter = input.into_iter();
372 loop {
373 let (buf, n) = unsafe { iter_next_chunk::<BUFLEN, _>(&mut iter) };
374 if n == 0 {
375 break;
376 }
377 func(unsafe { buf.get_unchecked(..n) });
378 }
379}
380
381#[inline(always)]
391pub unsafe fn convert_vec<T, U>(mut vec: Vec<T>) -> Vec<U> {
392 let ptr = vec.as_mut_ptr() as *mut U;
393 let len_bytes = vec.len() * size_of::<T>();
394 let cap_bytes = vec.capacity() * size_of::<T>();
395
396 assert_eq!(align_of::<T>(), align_of::<U>());
397 assert_eq!(len_bytes % size_of::<U>(), 0);
398 assert_eq!(cap_bytes % size_of::<U>(), 0);
399
400 let new_len = len_bytes / size_of::<U>();
401 let new_cap = cap_bytes / size_of::<U>();
402 mem::forget(vec);
403 Vec::from_raw_parts(ptr, new_len, new_cap)
404}
405
406#[cfg(test)]
407mod tests {
408 use alloc::vec;
409 use alloc::vec::Vec;
410
411 use rand::rngs::OsRng;
412 use rand::Rng;
413
414 use super::*;
415
416 #[test]
417 fn test_reverse_bits_len() {
418 assert_eq!(reverse_bits_len(0b0000000000, 10), 0b0000000000);
419 assert_eq!(reverse_bits_len(0b0000000001, 10), 0b1000000000);
420 assert_eq!(reverse_bits_len(0b1000000000, 10), 0b0000000001);
421 assert_eq!(reverse_bits_len(0b00000, 5), 0b00000);
422 assert_eq!(reverse_bits_len(0b01011, 5), 0b11010);
423 }
424
425 #[test]
426 fn test_reverse_index_bits() {
427 let mut arg = vec![10, 20, 30, 40];
428 reverse_slice_index_bits(&mut arg);
429 assert_eq!(arg, vec![10, 30, 20, 40]);
430
431 let mut input256: Vec<u64> = (0..256).collect();
432 #[rustfmt::skip]
433 let output256: Vec<u64> = vec![
434 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
435 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
436 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
437 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
438 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
439 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
440 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
441 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
442 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
443 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
444 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
445 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
446 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
447 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
448 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
449 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
450 ];
451 reverse_slice_index_bits(&mut input256[..]);
452 assert_eq!(input256, output256);
453 }
454
455 #[test]
456 fn test_apply_to_chunks_exact_fit() {
457 const CHUNK_SIZE: usize = 4;
458 let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8];
459 let mut results: Vec<Vec<u8>> = Vec::new();
460
461 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
462 results.push(chunk.to_vec());
463 });
464
465 assert_eq!(results, vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8]]);
466 }
467
468 #[test]
469 fn test_apply_to_chunks_with_remainder() {
470 const CHUNK_SIZE: usize = 3;
471 let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7];
472 let mut results: Vec<Vec<u8>> = Vec::new();
473
474 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
475 results.push(chunk.to_vec());
476 });
477
478 assert_eq!(results, vec![vec![1, 2, 3], vec![4, 5, 6], vec![7]]);
479 }
480
481 #[test]
482 fn test_apply_to_chunks_empty_input() {
483 const CHUNK_SIZE: usize = 4;
484 let input: Vec<u8> = vec![];
485 let mut results: Vec<Vec<u8>> = Vec::new();
486
487 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
488 results.push(chunk.to_vec());
489 });
490
491 assert!(results.is_empty());
492 }
493
494 #[test]
495 fn test_apply_to_chunks_single_chunk() {
496 const CHUNK_SIZE: usize = 10;
497 let input: Vec<u8> = vec![1, 2, 3, 4, 5];
498 let mut results: Vec<Vec<u8>> = Vec::new();
499
500 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
501 results.push(chunk.to_vec());
502 });
503
504 assert_eq!(results, vec![vec![1, 2, 3, 4, 5]]);
505 }
506
507 #[test]
508 fn test_apply_to_chunks_large_chunk_size() {
509 const CHUNK_SIZE: usize = 100;
510 let input: Vec<u8> = vec![1, 2, 3, 4, 5, 6, 7, 8];
511 let mut results: Vec<Vec<u8>> = Vec::new();
512
513 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
514 results.push(chunk.to_vec());
515 });
516
517 assert_eq!(results, vec![vec![1, 2, 3, 4, 5, 6, 7, 8]]);
518 }
519
520 #[test]
521 fn test_apply_to_chunks_large_input() {
522 const CHUNK_SIZE: usize = 5;
523 let input: Vec<u8> = (1..=20).collect();
524 let mut results: Vec<Vec<u8>> = Vec::new();
525
526 apply_to_chunks::<CHUNK_SIZE, _, _>(input, |chunk| {
527 results.push(chunk.to_vec());
528 });
529
530 assert_eq!(
531 results,
532 vec![
533 vec![1, 2, 3, 4, 5],
534 vec![6, 7, 8, 9, 10],
535 vec![11, 12, 13, 14, 15],
536 vec![16, 17, 18, 19, 20]
537 ]
538 );
539 }
540
541 #[test]
542 fn test_reverse_slice_index_bits_random() {
543 let lengths = [32, 128, 1 << 16];
544 let mut rng = OsRng;
545 for _ in 0..32 {
546 for &length in &lengths {
547 let mut rand_list: Vec<u32> = Vec::with_capacity(length);
548 rand_list.resize_with(length, || rng.gen());
549 let expect = reverse_index_bits_naive(&rand_list);
550
551 let mut actual = rand_list.clone();
552 reverse_slice_index_bits(&mut actual);
553
554 assert_eq!(actual, expect);
555 }
556 }
557 }
558
559 #[test]
560 fn test_log2_strict_usize_edge_cases() {
561 assert_eq!(log2_strict_usize(1), 0);
562 assert_eq!(log2_strict_usize(2), 1);
563 assert_eq!(log2_strict_usize(1 << 18), 18);
564 assert_eq!(log2_strict_usize(1 << 31), 31);
565 assert_eq!(
566 log2_strict_usize(1 << (usize::BITS - 1)),
567 usize::BITS as usize - 1
568 );
569 }
570
571 #[test]
572 #[should_panic]
573 fn test_log2_strict_usize_zero() {
574 let _ = log2_strict_usize(0);
575 }
576
577 #[test]
578 #[should_panic]
579 fn test_log2_strict_usize_nonpower_2() {
580 let _ = log2_strict_usize(0x78c341c65ae6d262);
581 }
582
583 #[test]
584 #[should_panic]
585 fn test_log2_strict_usize_max() {
586 let _ = log2_strict_usize(usize::MAX);
587 }
588
589 #[test]
590 fn test_log2_ceil_usize_comprehensive() {
591 assert_eq!(log2_ceil_usize(0), 0);
593 assert_eq!(log2_ceil_usize(1), 0);
594 assert_eq!(log2_ceil_usize(2), 1);
595 assert_eq!(log2_ceil_usize(1 << 18), 18);
596 assert_eq!(log2_ceil_usize(1 << 31), 31);
597 assert_eq!(
598 log2_ceil_usize(1 << (usize::BITS - 1)),
599 usize::BITS as usize - 1
600 );
601
602 assert_eq!(log2_ceil_usize(3), 2);
604 assert_eq!(log2_ceil_usize(0x14fe901b), 29);
605 assert_eq!(
606 log2_ceil_usize((1 << (usize::BITS - 1)) + 1),
607 usize::BITS as usize
608 );
609 assert_eq!(log2_ceil_usize(usize::MAX - 1), usize::BITS as usize);
610 assert_eq!(log2_ceil_usize(usize::MAX), usize::BITS as usize);
611 }
612
613 fn reverse_index_bits_naive<T: Copy>(arr: &[T]) -> Vec<T> {
614 let n = arr.len();
615 let n_power = log2_strict_usize(n);
616
617 let mut out = vec![None; n];
618 for (i, v) in arr.iter().enumerate() {
619 let dst = i.reverse_bits() >> (usize::BITS - n_power as u32);
620 out[dst] = Some(*v);
621 }
622
623 out.into_iter().map(|x| x.unwrap()).collect()
624 }
625}