transpose/
in_place.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

use strength_reduce::StrengthReducedUsize;
use num_integer;

fn multiplicative_inverse(a: usize, n: usize) -> usize {
    // we're going to use a modified version extended euclidean algorithm
    // we only need half the output

    let mut t = 0;
    let mut t_new = 1;

    let mut r = n;
    let mut r_new = a;

    while r_new > 0 {
        let quotient = r / r_new;

        r = r - quotient * r_new;
        core::mem::swap(&mut r, &mut r_new);

        // t might go negative here, so we have to do a checked subtract
        // if it underflows, wrap it around to the other end of the modulo
        // IE, 3 - 4 mod 5  =  -1 mod 5  =  4
        let t_subtract = quotient * t_new;
        t = if t_subtract < t {
            t - t_subtract
        } else {
            n - (t_subtract - t) % n
        };
     	core::mem::swap(&mut t, &mut t_new);
    }

    t
}

/// Transpose the input array in-place.
///
/// Given an input array of size input_width * input_height, representing flattened 2D data stored in row-major order,
/// transpose the rows and columns of that input array, in-place.
///
/// Despite being in-place, this algorithm requires max(width, height) in scratch space.
///
/// ```
/// // row-major order: the rows of our 2D array are contiguous,
/// // and the columns are strided
/// let original_array = vec![ 1, 2, 3,
/// 						   4, 5, 6];
/// let mut input_array = original_array.clone();
///
/// // Treat our 6-element array as a 2D 3x2 array, and transpose it to a 2x3 array
/// // transpose_inplace requires max(width, height) scratch space, which is in this case 3
/// let mut scratch = vec![0; 3];
/// transpose::transpose_inplace(&mut input_array, &mut scratch, 3, 2);
///
/// // The rows have become the columns, and the columns have become the rows
/// let expected_array =  vec![ 1, 4,
///								2, 5,
///								3, 6];
/// assert_eq!(input_array, expected_array);
///
/// // If we transpose it again, we should get our original data back.
/// transpose::transpose_inplace(&mut input_array, &mut scratch, 2, 3);
/// assert_eq!(original_array, input_array);
/// ```
///
/// # Panics
///
/// Panics if `input.len() != input_width * input_height` or if `scratch.len() != max(width, height)`
pub fn transpose_inplace<T: Copy>(buffer: &mut [T], scratch: &mut [T], width: usize, height: usize) {
	assert_eq!(width.checked_mul(height), Some(buffer.len()));
	assert_eq!(core::cmp::max(width, height), scratch.len());

	let gcd = StrengthReducedUsize::new(num_integer::gcd(width, height));
	let a = StrengthReducedUsize::new(height / gcd);
	let b = StrengthReducedUsize::new(width / gcd);
	let a_inverse = multiplicative_inverse(a.get(), b.get());
	let strength_reduced_height = StrengthReducedUsize::new(height);

	let index_fn = |x, y| x + y * width;

	if gcd.get() > 1 {
		for x in 0..width {
			let column_offset = (x / b) % strength_reduced_height;
			let wrapping_point = height - column_offset;

			// wrapped rotation -- do the "right half" of the array, then the "left half"
	        for y in 0..wrapping_point {
	            scratch[y] = buffer[index_fn(x, y + column_offset)];
	        }
	        for y in wrapping_point..height {
	            scratch[y] = buffer[index_fn(x, y + column_offset - height)];
	        }

	        // copy the data back into the column
	        for y in 0..height {
	            buffer[index_fn(x, y)] = scratch[y];
	        }
	    }
	}

	// Permute the rows
	{
		let row_scratch = &mut scratch[0..width];
		
		for (y, row) in buffer.chunks_exact_mut(width).enumerate() {
		    for x in 0..width {
		    	let helper_val = if y <= height + x%gcd - gcd.get() { x + y*(width-1) } else { x + y*(width-1) + height };
		    	let (helper_div, helper_mod) = StrengthReducedUsize::div_rem(helper_val, gcd);

		    	let gather_x = (a_inverse * helper_div)%b + b.get()*helper_mod;
		        row_scratch[x] = row[gather_x];
		    }

		    row.copy_from_slice(row_scratch);
		}
	}

	// Permute the columns
	for x in 0..width {
		let column_offset = x % strength_reduced_height;
		let wrapping_point = height - column_offset;

		// wrapped rotation -- do the "right half" of the array, then the "left half"
	    for y in 0..wrapping_point {
	        scratch[y] = buffer[index_fn(x, y + column_offset)];
	    }
	    for y in wrapping_point..height {
            scratch[y] = buffer[index_fn(x, y + column_offset - height)];
        }

        // Copy the data back to the buffer, but shuffle it as we do so
	    for y in 0..height {
	    	let shuffled_y = (y * width - (y / a)) % strength_reduced_height;
	        buffer[index_fn(x, y)] = scratch[shuffled_y];
	    }
	}
}