strength_reduce/
lib.rs

1//! `strength_reduce` implements integer division and modulo via "arithmetic strength reduction"
2//!
3//! Modern processors can do multiplication and shifts much faster than division, and "arithmetic strength reduction" is an algorithm to transform divisions into multiplications and shifts.
4//! Compilers already perform this optimization for divisors that are known at compile time; this library enables this optimization for divisors that are only known at runtime.
5//!
6//! Benchmarking shows a 5-10x speedup or integer division and modulo operations.
7//!
8//! # Example:
9//! ```
10//! use strength_reduce::StrengthReducedU64;
11//! 
12//! let mut my_array: Vec<u64> = (0..500).collect();
13//! let divisor = 3;
14//! let modulo = 14;
15//!
16//! // slow naive division and modulo
17//! for element in &mut my_array {
18//!     *element = (*element / divisor) % modulo;
19//! }
20//!
21//! // fast strength-reduced division and modulo
22//! let reduced_divisor = StrengthReducedU64::new(divisor);
23//! let reduced_modulo = StrengthReducedU64::new(modulo);
24//! for element in &mut my_array {
25//!     *element = (*element / reduced_divisor) % reduced_modulo;
26//! }
27//! ```
28//!
29//! This library is intended for hot loops like the example above, where a division is repeated many times in a loop with the divisor remaining unchanged. 
30//! There is a setup cost associated with creating stength-reduced division instances, so using strength-reduced division for 1-2 divisions is not worth the setup cost.
31//! The break-even point differs by use-case, but is typically low: Benchmarking has shown that takes 3 to 4 repeated divisions with the same StengthReduced## instance to be worth it.
32//! 
33//! `strength_reduce` is `#![no_std]`
34//!
35//! The optimizations that this library provides are inherently dependent on architecture, compiler, and platform,
36//! so test before you use. 
37#![no_std]
38
39#[cfg(test)]
40extern crate num_bigint;
41#[cfg(test)]
42extern crate rand;
43
44use core::ops::{Div, Rem};
45
46mod long_division;
47mod long_multiplication;
48
49/// Implements unsigned division and modulo via mutiplication and shifts.
50///
51/// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
52/// this version will be several times faster than naive division.
53#[derive(Clone, Copy, Debug)]
54pub struct StrengthReducedU8 {
55    multiplier: u16,
56    divisor: u8,
57}
58impl StrengthReducedU8 {
59    /// Creates a new divisor instance.
60    ///
61    /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
62    ///
63    /// # Panics:
64    /// 
65    /// Panics if `divisor` is 0
66    #[inline]
67    pub fn new(divisor: u8) -> Self {
68        assert!(divisor > 0);
69
70        if divisor.is_power_of_two() { 
71            Self{ multiplier: 0, divisor }
72        } else {
73            let divided = core::u16::MAX / (divisor as u16);
74            Self{ multiplier: divided + 1, divisor }
75        }
76    }
77
78    /// Simultaneous truncated integer division and modulus.
79    /// Returns `(quotient, remainder)`.
80    #[inline]
81    pub fn div_rem(numerator: u8, denom: Self) -> (u8, u8) {
82        let quotient = numerator / denom;
83        let remainder = numerator % denom;
84        (quotient, remainder)
85    }
86
87    /// Retrieve the value used to create this struct
88    #[inline]
89    pub fn get(&self) -> u8 {
90        self.divisor
91    }
92}
93
94impl Div<StrengthReducedU8> for u8 {
95    type Output = u8;
96
97    #[inline]
98    fn div(self, rhs: StrengthReducedU8) -> Self::Output {
99        if rhs.multiplier == 0 {
100            (self as u16 >> rhs.divisor.trailing_zeros()) as u8
101        } else {
102            let numerator = self as u16;
103            let multiplied_hi = numerator * (rhs.multiplier >> 8);
104            let multiplied_lo = (numerator * rhs.multiplier as u8 as u16) >> 8;
105
106            ((multiplied_hi + multiplied_lo) >> 8) as u8
107        }
108    }
109}
110
111impl Rem<StrengthReducedU8> for u8 {
112    type Output = u8;
113
114    #[inline]
115    fn rem(self, rhs: StrengthReducedU8) -> Self::Output {
116        if rhs.multiplier == 0 {
117            self & (rhs.divisor - 1)
118        } else {
119            let product = rhs.multiplier.wrapping_mul(self as u16) as u32;
120            let divisor = rhs.divisor as u32;
121
122            let shifted = (product * divisor) >> 16;
123            shifted as u8
124        }
125    }
126}
127
128// small types prefer to do work in the intermediate type
129macro_rules! strength_reduced_u16 {
130    ($struct_name:ident, $primitive_type:ident) => (
131        /// Implements unsigned division and modulo via mutiplication and shifts.
132        ///
133        /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
134        /// this version will be several times faster than naive division.
135        #[derive(Clone, Copy, Debug)]
136        pub struct $struct_name {
137            multiplier: u32,
138            divisor: $primitive_type,
139        }
140        impl $struct_name {
141            /// Creates a new divisor instance.
142            ///
143            /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
144            ///
145            /// # Panics:
146            /// 
147            /// Panics if `divisor` is 0
148            #[inline]
149            pub fn new(divisor: $primitive_type) -> Self {
150                assert!(divisor > 0);
151
152                if divisor.is_power_of_two() { 
153                    Self{ multiplier: 0, divisor }
154                } else {
155                    let divided = core::u32::MAX / (divisor as u32);
156                    Self{ multiplier: divided + 1, divisor }
157                }
158            }
159
160            /// Simultaneous truncated integer division and modulus.
161            /// Returns `(quotient, remainder)`.
162            #[inline]
163            pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
164                let quotient = numerator / denom;
165                let remainder = numerator - quotient * denom.divisor;
166                (quotient, remainder)
167            }
168
169            /// Retrieve the value used to create this struct
170            #[inline]
171            pub fn get(&self) -> $primitive_type {
172                self.divisor
173            }
174        }
175
176        impl Div<$struct_name> for $primitive_type {
177            type Output = $primitive_type;
178
179            #[inline]
180            fn div(self, rhs: $struct_name) -> Self::Output {
181                if rhs.multiplier == 0 {
182                    self >> rhs.divisor.trailing_zeros()
183                } else {
184                    let numerator = self as u32;
185                    let multiplied_hi = numerator * (rhs.multiplier >> 16);
186                    let multiplied_lo = (numerator * rhs.multiplier as u16 as u32) >> 16;
187
188                    ((multiplied_hi + multiplied_lo) >> 16) as $primitive_type
189                }
190            }
191        }
192
193        impl Rem<$struct_name> for $primitive_type {
194            type Output = $primitive_type;
195
196            #[inline]
197            fn rem(self, rhs: $struct_name) -> Self::Output {
198                if rhs.multiplier == 0 {
199                    self & (rhs.divisor - 1)
200                } else {
201                    let quotient = self / rhs;
202                    self - quotient * rhs.divisor
203                }
204            }
205        }
206    )
207}
208
209// small types prefer to do work in the intermediate type
210macro_rules! strength_reduced_u32 {
211    ($struct_name:ident, $primitive_type:ident) => (
212        /// Implements unsigned division and modulo via mutiplication and shifts.
213        ///
214        /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
215        /// this version will be several times faster than naive division.
216        #[derive(Clone, Copy, Debug)]
217        pub struct $struct_name {
218            multiplier: u64,
219            divisor: $primitive_type,
220        }
221        impl $struct_name {
222            /// Creates a new divisor instance.
223            ///
224            /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
225            ///
226            /// # Panics:
227            /// 
228            /// Panics if `divisor` is 0
229            #[inline]
230            pub fn new(divisor: $primitive_type) -> Self {
231                assert!(divisor > 0);
232
233                if divisor.is_power_of_two() { 
234                    Self{ multiplier: 0, divisor }
235                } else {
236                    let divided = core::u64::MAX / (divisor as u64);
237                    Self{ multiplier: divided + 1, divisor }
238                }
239            }
240
241            /// Simultaneous truncated integer division and modulus.
242            /// Returns `(quotient, remainder)`.
243            #[inline]
244            pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
245                if denom.multiplier == 0 {
246                    (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1))
247                }
248                else {
249                    let numerator64 = numerator as u64;
250                    let multiplied_hi = numerator64 * (denom.multiplier >> 32);
251                    let multiplied_lo = numerator64 * (denom.multiplier as u32 as u64) >> 32;
252
253                    let quotient = ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type;
254                    let remainder = numerator - quotient * denom.divisor;
255                    (quotient, remainder)
256                }
257            }
258
259            /// Retrieve the value used to create this struct
260            #[inline]
261            pub fn get(&self) -> $primitive_type {
262                self.divisor
263            }
264        }
265
266        impl Div<$struct_name> for $primitive_type {
267            type Output = $primitive_type;
268
269            #[inline]
270            fn div(self, rhs: $struct_name) -> Self::Output {
271                if rhs.multiplier == 0 {
272                    self >> rhs.divisor.trailing_zeros()
273                } else {
274                    let numerator = self as u64;
275                    let multiplied_hi = numerator * (rhs.multiplier >> 32);
276                    let multiplied_lo = numerator * (rhs.multiplier as u32 as u64) >> 32;
277
278                    ((multiplied_hi + multiplied_lo) >> 32) as $primitive_type
279                }
280            }
281        }
282
283        impl Rem<$struct_name> for $primitive_type {
284            type Output = $primitive_type;
285
286            #[inline]
287            fn rem(self, rhs: $struct_name) -> Self::Output {
288                if rhs.multiplier == 0 {
289                    self & (rhs.divisor - 1)
290                } else {
291                    let product = rhs.multiplier.wrapping_mul(self as u64) as u128;
292                    let divisor = rhs.divisor as u128;
293
294                    let shifted = (product * divisor) >> 64;
295                    shifted as $primitive_type
296                }
297            }
298        }
299    )
300}
301
302macro_rules! strength_reduced_u64 {
303    ($struct_name:ident, $primitive_type:ident) => (
304        /// Implements unsigned division and modulo via mutiplication and shifts.
305        ///
306        /// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
307        /// this version will be several times faster than naive division.
308        #[derive(Clone, Copy, Debug)]
309        pub struct $struct_name {
310            multiplier: u128,
311            divisor: $primitive_type,
312        }
313        impl $struct_name {
314            /// Creates a new divisor instance.
315            ///
316            /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
317            ///
318            /// # Panics:
319            /// 
320            /// Panics if `divisor` is 0
321            #[inline]
322            pub fn new(divisor: $primitive_type) -> Self {
323                assert!(divisor > 0);
324
325                if divisor.is_power_of_two() { 
326                    Self{ multiplier: 0, divisor }
327                } else {
328                    let quotient = long_division::divide_128_max_by_64(divisor as u64);
329                    Self{ multiplier: quotient + 1, divisor }
330                }
331            }
332            /// Simultaneous truncated integer division and modulus.
333            /// Returns `(quotient, remainder)`.
334            #[inline]
335            pub fn div_rem(numerator: $primitive_type, denom: Self) -> ($primitive_type, $primitive_type) {
336                if denom.multiplier == 0 {
337                    (numerator >> denom.divisor.trailing_zeros(), numerator & (denom.divisor - 1))
338                }
339                else {
340                    let numerator128 = numerator as u128;
341                    let multiplied_hi = numerator128 * (denom.multiplier >> 64);
342                    let multiplied_lo = numerator128 * (denom.multiplier as u64 as u128) >> 64;
343
344                    let quotient = ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type;
345                    let remainder = numerator - quotient * denom.divisor;
346                    (quotient, remainder)
347                }
348            }
349
350            /// Retrieve the value used to create this struct
351            #[inline]
352            pub fn get(&self) -> $primitive_type {
353                self.divisor
354            }
355        }
356
357        impl Div<$struct_name> for $primitive_type {
358            type Output = $primitive_type;
359
360            #[inline]
361            fn div(self, rhs: $struct_name) -> Self::Output {
362                if rhs.multiplier == 0 {
363                    self >> rhs.divisor.trailing_zeros()
364                } else {
365                    let numerator = self as u128;
366                    let multiplied_hi = numerator * (rhs.multiplier >> 64);
367                    let multiplied_lo = numerator * (rhs.multiplier as u64 as u128) >> 64;
368
369                    ((multiplied_hi + multiplied_lo) >> 64) as $primitive_type
370                }
371            }
372        }
373
374        impl Rem<$struct_name> for $primitive_type {
375            type Output = $primitive_type;
376
377            #[inline]
378            fn rem(self, rhs: $struct_name) -> Self::Output {
379                if rhs.multiplier == 0 {
380                    self & (rhs.divisor - 1)
381                } else {
382                    let quotient = self / rhs;
383                    self - quotient * rhs.divisor
384                }
385            }
386        }
387    )
388}
389
390/// Implements unsigned division and modulo via mutiplication and shifts.
391///
392/// Creating a an instance of this struct is more expensive than a single division, but if the division is repeated,
393/// this version will be several times faster than naive division.
394#[derive(Clone, Copy, Debug)]
395pub struct StrengthReducedU128 {
396    multiplier_hi: u128,
397    multiplier_lo: u128,
398    divisor: u128,
399}
400impl StrengthReducedU128 {
401    /// Creates a new divisor instance.
402    ///
403    /// If possible, avoid calling new() from an inner loop: The intended usage is to create an instance of this struct outside the loop, and use it for divison and remainders inside the loop.
404    ///
405    /// # Panics:
406    /// 
407    /// Panics if `divisor` is 0
408    #[inline]
409    pub fn new(divisor: u128) -> Self {
410        assert!(divisor > 0);
411
412        if divisor.is_power_of_two() { 
413            Self{ multiplier_hi: 0, multiplier_lo: 0, divisor }
414        } else {
415            let (quotient_hi, quotient_lo) = long_division::divide_256_max_by_128(divisor);
416            let multiplier_lo = quotient_lo.wrapping_add(1);
417            let multiplier_hi = if multiplier_lo == 0 { quotient_hi + 1 } else { quotient_hi };
418            Self{ multiplier_hi, multiplier_lo, divisor }
419        }
420    }
421
422    /// Simultaneous truncated integer division and modulus.
423    /// Returns `(quotient, remainder)`.
424    #[inline]
425    pub fn div_rem(numerator: u128, denom: Self) -> (u128, u128) {
426        let quotient = numerator / denom;
427        let remainder = numerator - quotient * denom.divisor;
428        (quotient, remainder)
429    }
430
431    /// Retrieve the value used to create this struct
432    #[inline]
433    pub fn get(&self) -> u128 {
434        self.divisor
435    }
436}
437
438impl Div<StrengthReducedU128> for u128 {
439    type Output = u128;
440
441    #[inline]
442    fn div(self, rhs: StrengthReducedU128) -> Self::Output {
443        if rhs.multiplier_hi == 0 {
444            self >> rhs.divisor.trailing_zeros()
445        } else {
446            long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self)
447        }
448    }
449}
450
451impl Rem<StrengthReducedU128> for u128 {
452    type Output = u128;
453
454    #[inline]
455    fn rem(self, rhs: StrengthReducedU128) -> Self::Output {
456        if rhs.multiplier_hi == 0 {
457            self & (rhs.divisor - 1)
458        } else {
459             let quotient = long_multiplication::multiply_256_by_128_upperbits(rhs.multiplier_hi, rhs.multiplier_lo, self);
460             self - quotient * rhs.divisor
461        }
462    }
463}
464
465// We just hardcoded u8 and u128 since they will never be a usize. for the rest, we have macros, so we can reuse the same code for usize
466strength_reduced_u16!(StrengthReducedU16, u16);
467strength_reduced_u32!(StrengthReducedU32, u32);
468strength_reduced_u64!(StrengthReducedU64, u64);
469
470// Our definition for usize will depend on how big usize is
471#[cfg(target_pointer_width = "16")]
472strength_reduced_u16!(StrengthReducedUsize, usize);
473#[cfg(target_pointer_width = "32")]
474strength_reduced_u32!(StrengthReducedUsize, usize);
475#[cfg(target_pointer_width = "64")]
476strength_reduced_u64!(StrengthReducedUsize, usize);
477
478#[cfg(test)]
479mod unit_tests {
480    use super::*;
481
482    macro_rules! reduction_test {
483        ($test_name:ident, $struct_name:ident, $primitive_type:ident) => (
484            #[test]
485            fn $test_name() {
486                let max = core::$primitive_type::MAX;
487                let divisors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,max-1,max];
488                let numerators = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20];
489
490                for &divisor in &divisors {
491                    let reduced_divisor = $struct_name::new(divisor);
492                    for &numerator in &numerators {
493                        let expected_div = numerator / divisor;
494                        let expected_rem = numerator % divisor;
495
496                        let reduced_div = numerator / reduced_divisor;
497
498                        assert_eq!(expected_div, reduced_div, "Divide failed with numerator: {}, divisor: {}", numerator, divisor);
499                        let reduced_rem = numerator % reduced_divisor;
500
501                        let (reduced_combined_div, reduced_combined_rem) = $struct_name::div_rem(numerator, reduced_divisor);
502
503                        
504                        assert_eq!(expected_rem, reduced_rem, "Modulo failed with numerator: {}, divisor: {}", numerator, divisor);
505                        assert_eq!(expected_div, reduced_combined_div, "div_rem divide failed with numerator: {}, divisor: {}", numerator, divisor);
506                        assert_eq!(expected_rem, reduced_combined_rem, "div_rem modulo failed with numerator: {}, divisor: {}", numerator, divisor);
507                    }
508                }
509            }
510        )
511    }
512
513    reduction_test!(test_strength_reduced_u8, StrengthReducedU8, u8);
514    reduction_test!(test_strength_reduced_u16, StrengthReducedU16, u16);
515    reduction_test!(test_strength_reduced_u32, StrengthReducedU32, u32);
516    reduction_test!(test_strength_reduced_u64, StrengthReducedU64, u64);
517    reduction_test!(test_strength_reduced_usize, StrengthReducedUsize, usize);
518    reduction_test!(test_strength_reduced_u128, StrengthReducedU128, u128);
519}