ruint/algorithms/div/
knuth.rs

1//! Knuth division
2
3use super::{reciprocal::reciprocal_2, small::div_3x2, DoubleWord};
4use crate::{
5    algorithms::{add::adc_n, mul::submul_nx1},
6    utils::{likely, unlikely},
7};
8
9/// ⚠️ In-place Knuth normalized long division with reciprocals.
10///
11/// Requires
12/// * the highest bit of the divisor to be set,
13/// * the `divisor` and `numerator` to be at least two limbs, and
14/// * `numerator` is at least as long as `divisor`.
15///
16/// # Panics
17///
18/// May panic if the above requirements are not met.
19#[inline]
20#[allow(clippy::many_single_char_names)]
21pub fn div_nxm_normalized(numerator: &mut [u64], divisor: &[u64]) {
22    debug_assert!(divisor.len() >= 2);
23    debug_assert!(numerator.len() >= divisor.len());
24    debug_assert!(*divisor.last().unwrap() >= (1 << 63));
25
26    let n = divisor.len();
27    let m = numerator.len() - n - 1;
28
29    // Compute the divisor double limb and reciprocal
30    let d = u128::join(divisor[n - 1], divisor[n - 2]);
31    let v = reciprocal_2(d);
32
33    // Compute the quotient one limb at a time.
34    for j in (0..=m).rev() {
35        // Fetch the first three limbs of the numerator.
36        let n21 = u128::join(numerator[j + n], numerator[j + n - 1]);
37        let n0 = numerator[j + n - 2];
38        debug_assert!(n21 <= d);
39
40        // Overflow case
41        if unlikely(n21 == d) {
42            let q = u64::MAX;
43            let _carry = submul_nx1(&mut numerator[j..j + n], divisor, q);
44            numerator[j + n] = q;
45            continue;
46        }
47
48        // Calculate 3x2 approximate quotient word.
49        // By using 3x2 limbs we get a quotient that is very likely correct
50        // and at most one too large. In the process we also get the first
51        // two remainder limbs.
52        let (mut q, r) = div_3x2(n21, n0, d, v);
53
54        // Subtract the quotient times the divisor from the remainder.
55        // We already have the highest two limbs, so we can reduce the
56        // computation. We still need to carry propagate into these limbs.
57        let borrow = submul_nx1(&mut numerator[j..j + n - 2], &divisor[..n - 2], q);
58        let (r, borrow) = r.overflowing_sub(u128::from(borrow));
59        numerator[j + n - 2] = r.low();
60        numerator[j + n - 1] = r.high();
61
62        // If we have a carry then the quotient was one too large.
63        // We correct by decrementing the quotient and adding one divisor back.
64        if unlikely(borrow) {
65            q = q.wrapping_sub(1);
66            let carry = adc_n(&mut numerator[j..j + n], &divisor[..n], 0);
67            // Expect carry because we flip sign back to positive.
68            debug_assert_eq!(carry, 1);
69        }
70
71        // Store quotient in the unused bits of numerator
72        numerator[j + n] = q;
73    }
74}
75
76/// ⚠️ In-place Knuth long division with implicit normalization and reciprocals.
77///
78/// Requires
79/// * the highest limb of the divisor to be non-zero,
80/// * the `divisor` and `numerator` to be at least two limbs, and
81/// * `numerator` is at least as long as `divisor`.
82///
83/// # Panics
84///
85/// May panic if the above requirements are not met.
86#[inline]
87#[allow(clippy::many_single_char_names)]
88pub fn div_nxm(numerator: &mut [u64], divisor: &mut [u64]) {
89    debug_assert!(divisor.len() >= 3);
90    debug_assert!(numerator.len() >= divisor.len());
91    debug_assert!(*divisor.last().unwrap() >= 1);
92
93    let n = divisor.len();
94    let m = numerator.len() - n;
95
96    // Compute normalized divisor double-word and reciprocal.
97    // TODO: Delegate to div_nxm_normalized if normalized.
98    let (d, shift) = {
99        let d = u128::join(divisor[n - 1], divisor[n - 2]);
100        let shift = d.high().leading_zeros();
101        (
102            if shift == 0 {
103                d
104            } else {
105                (d << shift) | u128::from(divisor[n - 3] >> (64 - shift))
106            },
107            shift,
108        )
109    };
110    debug_assert!(d >= 1 << 127);
111    let v = reciprocal_2(d);
112
113    // Compute the quotient one limb at a time.
114    let mut q_high = 0;
115    for j in (0..=m).rev() {
116        // Fetch the first three limbs of the shifted numerator starting at `j + n`.
117        let (n21, n0) = {
118            let n2 = numerator.get(j + n).copied().unwrap_or_default();
119            let n21 = u128::join(n2, numerator[j + n - 1]);
120            let n0 = numerator[j + n - 2];
121            if shift == 0 {
122                (n21, n0)
123            } else {
124                (
125                    (n21 << shift) | u128::from(n0 >> (64 - shift)),
126                    (n0 << shift) | (numerator[j + n - 3] >> (64 - shift)),
127                )
128            }
129        };
130        debug_assert!(n21 <= d);
131
132        // Compute the quotient
133        let q = if likely(n21 < d) {
134            // Calculate 3x2 approximate quotient word.
135            // By using 3x2 limbs we get a quotient that is very likely correct
136            // and at most one too large. In the process we also get the first
137            // two remainder limbs.
138            let (mut q, r) = div_3x2(n21, n0, d, v);
139
140            if q != 0 {
141                // Subtract the quotient times the divisor from the remainder.
142                // We already have the highest 128 bit, so we can reduce the
143                // computation. We still need to carry propagate into these limbs.
144                let borrow = if shift == 0 {
145                    let borrow = submul_nx1(&mut numerator[j..j + n - 2], &divisor[..n - 2], q);
146                    let (r, borrow) = r.overflowing_sub(u128::from(borrow));
147                    numerator[j + n - 2] = r.low();
148                    numerator[j + n - 1] = r.high();
149                    borrow
150                } else {
151                    // OPT: Can we re-use `r` here somehow? The problem is we can not just
152                    // shift the `r` or `borrow` because we need to accurately reproduce
153                    // the remainder and carry in the middle of a limb.
154                    let borrow = submul_nx1(&mut numerator[j..j + n], divisor, q);
155                    let n2 = numerator.get(j + n).copied().unwrap_or_default();
156                    borrow != n2
157                };
158
159                // If we have a carry then the quotient was one too large.
160                // We correct by decrementing the quotient and adding one divisor back.
161                if unlikely(borrow) {
162                    q = q.wrapping_sub(1);
163                    let carry = adc_n(&mut numerator[j..j + n], &divisor[..n], 0);
164                    // Expect carry because we flip sign back to positive.
165                    debug_assert_eq!(carry, 1);
166                }
167            }
168            q
169        } else {
170            // Overflow case
171            let q = u64::MAX;
172            let _carry = submul_nx1(&mut numerator[j..j + n], divisor, q);
173            q
174        };
175
176        // Store the quotient in the processed bits of numerator plus `q_high`.
177        if j + n < numerator.len() {
178            numerator[j + n] = q;
179        } else {
180            q_high = q;
181        }
182    }
183
184    // Copy remainder to `divisor` and `quotient` to numerator.
185    divisor.copy_from_slice(&numerator[..n]);
186    numerator.copy_within(n.., 0);
187    numerator[m] = q_high;
188    numerator[m + 1..].fill(0);
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194    use crate::algorithms::{addmul, cmp, sbb_n};
195    use core::cmp::Ordering;
196    use proptest::{
197        collection, num, proptest,
198        strategy::{Just, Strategy},
199    };
200
201    #[allow(unused_imports)]
202    use alloc::vec::Vec;
203
204    // Basic test without exceptional paths
205    #[test]
206    fn test_divrem_8by4() {
207        let mut numerator = [
208            0x3000000000000000,
209            0xd4e15e75fd4e6516,
210            0x593a70aa5daf127b,
211            0xff0a216ae9c215f1,
212            0xa78c7ad6fea10429,
213            0x18276b093f5d1dac,
214            0xfe2e0bccb9e6d8b3,
215            0x1bebfb3bc05d9347,
216        ];
217        let divisor = [
218            0x800000000000000,
219            0x580c0c40583c55b5,
220            0x6b16b3fb5bd85ed3,
221            0xcc958c207ce3c96f,
222        ];
223        let expected_quotient = [
224            0x9128464e61d6b5b3_u64,
225            0xd9eea4fc30c5ac6c_u64,
226            0x944a2d832d5a6a08_u64,
227            0x22f06722e8d883b1_u64,
228        ];
229        let expected_remainder = [
230            0x9800000000000000,
231            0x70efd2d3f528c8d9,
232            0x6dad759fcd6af14a,
233            0x5fe38801c609f277,
234        ];
235        div_nxm_normalized(&mut numerator, &divisor);
236        let (remainder, quotient) = numerator.split_at(divisor.len());
237        assert_eq!(remainder, expected_remainder);
238        assert_eq!(quotient, expected_quotient);
239    }
240
241    // Test case that forces the `unlikely(borrow)` branch.
242    #[test]
243    fn test_div_rollback() {
244        let mut numerator = [
245            0x1656178c14142000,
246            0x821415dfe9e81612,
247            0x1616561616161616,
248            0x96000016820016,
249        ];
250        let divisor = [0x1415dfe9e8161414, 0x1656161616161682, 0x9600001682001616];
251        let expected_quotient = [0xffffffffffffff];
252        let expected_remainder = [0x166bf775fc2a3414, 0x1656161616161680, 0x9600001682001616];
253        div_nxm_normalized(&mut numerator, &divisor);
254        let (remainder, quotient) = numerator.split_at(divisor.len());
255        assert_eq!(remainder, expected_remainder);
256        assert_eq!(quotient, expected_quotient);
257    }
258
259    // Test case that forces the `unlikely(borrow)` branch.
260    #[test]
261    fn test_div_rollback_2() {
262        let mut numerator = [
263            0x100100000,
264            0x81000,
265            0x1000000000000000,
266            0x0,
267            0x0,
268            0xfffff00000000000,
269            0xffffffffffffffff,
270            0xdfffffffffffff,
271        ];
272        let divisor = [
273            0xfffffffffff00000,
274            0xffffffffffffffff,
275            0xfffffffffffff3ff,
276            0xffffffffffffffff,
277            0xdfffffffffffffff,
278        ];
279        let expected_quotient = [0xffffedb6db6db6e9, 0xffffffffffffffff, 0xffffffffffffff];
280        let expected_remainder = [
281            0xdb6db6dc6ea00000,
282            0x80ffe,
283            0xf2492492492ec00,
284            0x1000,
285            0x2000000000000000,
286        ];
287        div_nxm_normalized(&mut numerator, &divisor);
288        let (remainder, quotient) = numerator.split_at(divisor.len());
289        assert_eq!(quotient, expected_quotient);
290        assert_eq!(remainder, expected_remainder);
291    }
292
293    #[test]
294    fn test_div_overflow() {
295        let mut numerator = [0xb200000000000002, 0x1, 0x0, 0xfdffffff00000000];
296        let divisor = [0x10002, 0x0, 0xfdffffff00000000];
297        let expected_quotient = [0xffffffffffffffff];
298        let expected_remainder = [0xb200000000010004, 0xfffffffffffeffff, 0xfdfffffeffffffff];
299        div_nxm_normalized(&mut numerator, &divisor);
300        let (remainder, quotient) = numerator.split_at(divisor.len());
301        assert_eq!(quotient, expected_quotient);
302        assert_eq!(remainder, expected_remainder);
303    }
304
305    // Proptest without forced exceptional paths
306    #[test]
307    fn test_div_nxm_normalized() {
308        let quotient = collection::vec(num::u64::ANY, 1..10);
309        let divisor = collection::vec(num::u64::ANY, 2..10).prop_map(|mut vec| {
310            *vec.last_mut().unwrap() |= 1 << 63;
311            vec
312        });
313        let dr = divisor.prop_flat_map(|divisor| {
314            let d = divisor.clone();
315            let remainder =
316                collection::vec(num::u64::ANY, divisor.len()).prop_map(move |mut vec| {
317                    if cmp(&vec, &d) != Ordering::Less {
318                        let carry = sbb_n(&mut vec, &d, 0);
319                        assert_eq!(carry, 0);
320                    }
321                    vec
322                });
323            (Just(divisor), remainder)
324        });
325        proptest!(|(quotient in quotient, (divisor, remainder) in dr)| {
326            let mut numerator: Vec<u64> = vec![0; divisor.len() + quotient.len()];
327            numerator[..remainder.len()].copy_from_slice(&remainder);
328            addmul(&mut numerator, quotient.as_slice(), divisor.as_slice());
329
330            div_nxm_normalized(numerator.as_mut_slice(), divisor.as_slice());
331            let (r, q) = numerator.split_at(divisor.len());
332            assert_eq!(r, remainder);
333            assert_eq!(q, quotient);
334        });
335    }
336
337    // Basic test without exceptional paths (with shift == 0)
338    #[test]
339    fn test_div_nxm_8by4_noshift() {
340        let mut numerator = [
341            0x3000000000000000,
342            0xd4e15e75fd4e6516,
343            0x593a70aa5daf127b,
344            0xff0a216ae9c215f1,
345            0xa78c7ad6fea10429,
346            0x18276b093f5d1dac,
347            0xfe2e0bccb9e6d8b3,
348            0x1bebfb3bc05d9347,
349        ];
350        let mut divisor = [
351            0x800000000000000,
352            0x580c0c40583c55b5,
353            0x6b16b3fb5bd85ed3,
354            0xcc958c207ce3c96f,
355        ];
356        let quotient = [
357            0x9128464e61d6b5b3,
358            0xd9eea4fc30c5ac6c,
359            0x944a2d832d5a6a08,
360            0x22f06722e8d883b1,
361        ];
362        let remainder = [
363            0x9800000000000000,
364            0x70efd2d3f528c8d9,
365            0x6dad759fcd6af14a,
366            0x5fe38801c609f277,
367        ];
368        div_nxm(&mut numerator, &mut divisor);
369        assert_eq!(&numerator[..quotient.len()], quotient);
370        assert_eq!(divisor, remainder);
371    }
372
373    // Basic test without exceptional paths (with shift > 0)
374    #[test]
375    fn test_div_nxm_8by4_shift() {
376        let mut numerator = [
377            0xc59c28364a491d22,
378            0x1ab240e2a2a91050,
379            0xe497baaf4e4b16cb,
380            0xd21643d231c590d6,
381            0xda918cd26803c7f1,
382            0xb445074f770b5261,
383            0x37aff2aa32059516,
384            0x3cf254c1,
385        ];
386        let mut divisor = [
387            0xc91e935375a97723,
388            0x86a9ded3044ec12b,
389            0xc7d2c4d3b53bff51,
390            0x6ef0530d,
391        ];
392        let quotient = [
393            0x456b1581ef1a759a,
394            0x88702c90bbe2ef3c,
395            0xff8492ee85dec642,
396            0x8ca39da4ca785f36,
397        ];
398        let remainder = [
399            0x82c3522848567314,
400            0xeaba6edb18db568e,
401            0x18f16cfde22dcefe,
402            0x11296685,
403        ];
404        div_nxm(&mut numerator, &mut divisor);
405        assert_eq!(&numerator[..quotient.len()], quotient);
406        assert_eq!(divisor, remainder);
407    }
408
409    // Basic test without exceptional paths (with q_high > 0)
410    #[test]
411    fn test_div_nxm_8by4_q_high() {
412        let mut numerator = [
413            0x39ea76324ed952cc,
414            0x89b7a0d30e2df1be,
415            0x7011596e8b3f301f,
416            0x11930a89eca68640,
417            0x36a34eca4f73d0e4,
418            0x86d53c52b1108c90,
419            0x6093338b7b667e03,
420        ];
421        let mut divisor = [
422            0x439702d44a8f62a4,
423            0x8dfa6ea7fc41f689,
424            0xc79723ff4dd060e0,
425            0x7d13e204,
426        ];
427        let quotient = [
428            0x181cecbb64efa48b,
429            0x1e97056793a15125,
430            0xe8145d63cd312d02,
431            0xc5a9aced,
432        ];
433        let remainder = [
434            0x682e10f8d0b1b3c0,
435            0xbf46c8b0e5ac8a62,
436            0x5abe292d53acf085,
437            0x699fc911,
438        ];
439        div_nxm(&mut numerator, &mut divisor);
440        assert_eq!(&numerator[..quotient.len()], quotient);
441        assert_eq!(divisor, remainder);
442    }
443
444    // Basic test with numerator and divisor the same size.
445    #[test]
446    fn test_div_nxm_4by4() {
447        let mut numerator = [
448            0x55a8f128f187ecee,
449            0xe059a1f3fe52e559,
450            0x570ab3b2aac5c5d9,
451            0xf7ea0c73b80ddca1,
452        ];
453        let mut divisor = [
454            0x6c8cd670adcae7da,
455            0x458d6428c7fd36d3,
456            0x4a73ad64cc703a1d,
457            0x33bf790f92ed51fe,
458        ];
459        let quotient = [0x4];
460        let remainder = [
461            0xa37597663a5c4d86,
462            0xca241150de5e0a0b,
463            0x2d3bfe1f7904dd64,
464            0x28ec28356c5894a8,
465        ];
466        div_nxm(&mut numerator, &mut divisor);
467        assert_eq!(&numerator[..quotient.len()], quotient);
468        assert_eq!(divisor, remainder);
469    }
470
471    #[test]
472    fn test_div_nxm_4by3() {
473        let mut numerator = [
474            0x8000000000000000,
475            0x8000000000000000,
476            0x8000000000000000,
477            0x8000000000000001,
478        ];
479        let mut divisor = [0x8000000000000000, 0x8000000000000000, 0x8000000000000000];
480        let quotient = [0x1, 0x1];
481        let remainder = [0x0, 0x8000000000000000, 0x7fffffffffffffff];
482        div_nxm(&mut numerator, &mut divisor);
483        assert_eq!(&numerator[..quotient.len()], quotient);
484        assert_eq!(divisor, remainder);
485    }
486
487    // Proptest without forced exceptional paths
488    #[test]
489    fn test_div_nxm() {
490        let quotient = collection::vec(num::u64::ANY, 1..10);
491        let divisor = collection::vec(num::u64::ANY, 3..10);
492        let dr = divisor.prop_flat_map(|divisor| {
493            let d = divisor.clone();
494            let remainder =
495                collection::vec(num::u64::ANY, divisor.len()).prop_map(move |mut vec| {
496                    *vec.last_mut().unwrap() %= d.last().unwrap();
497                    vec
498                });
499            (Just(divisor), remainder)
500        });
501        proptest!(|(quotient in quotient, (mut divisor, remainder) in dr)| {
502            let mut numerator: Vec<u64> = vec![0; divisor.len() + quotient.len()];
503            numerator[..remainder.len()].copy_from_slice(&remainder);
504            addmul(&mut numerator, quotient.as_slice(), divisor.as_slice());
505
506            div_nxm(numerator.as_mut_slice(), divisor.as_mut_slice());
507            assert_eq!(&numerator[..quotient.len()], quotient);
508            assert_eq!(divisor, remainder);
509            assert!(numerator[quotient.len()..].iter().all(|&x| x == 0));
510        });
511    }
512}