ppv_lite86/
generic.rs

1#![allow(non_camel_case_types)]
2
3use crate::soft::{x2, x4};
4use crate::types::*;
5use core::ops::*;
6use zerocopy::{AsBytes, FromBytes, FromZeroes};
7
8#[repr(C)]
9#[derive(Clone, Copy, FromBytes, AsBytes, FromZeroes)]
10pub union vec128_storage {
11    d: [u32; 4],
12    q: [u64; 2],
13}
14impl From<[u32; 4]> for vec128_storage {
15    #[inline(always)]
16    fn from(d: [u32; 4]) -> Self {
17        Self { d }
18    }
19}
20impl From<vec128_storage> for [u32; 4] {
21    #[inline(always)]
22    fn from(d: vec128_storage) -> Self {
23        unsafe { d.d }
24    }
25}
26impl From<[u64; 2]> for vec128_storage {
27    #[inline(always)]
28    fn from(q: [u64; 2]) -> Self {
29        Self { q }
30    }
31}
32impl From<vec128_storage> for [u64; 2] {
33    #[inline(always)]
34    fn from(q: vec128_storage) -> Self {
35        unsafe { q.q }
36    }
37}
38impl Default for vec128_storage {
39    #[inline(always)]
40    fn default() -> Self {
41        Self { q: [0, 0] }
42    }
43}
44impl Eq for vec128_storage {}
45impl PartialEq<vec128_storage> for vec128_storage {
46    #[inline(always)]
47    fn eq(&self, rhs: &Self) -> bool {
48        unsafe { self.q == rhs.q }
49    }
50}
51#[derive(Clone, Copy, PartialEq, Eq, Default)]
52pub struct vec256_storage {
53    v128: [vec128_storage; 2],
54}
55impl vec256_storage {
56    #[inline(always)]
57    pub fn new128(v128: [vec128_storage; 2]) -> Self {
58        Self { v128 }
59    }
60    #[inline(always)]
61    pub fn split128(self) -> [vec128_storage; 2] {
62        self.v128
63    }
64}
65impl From<vec256_storage> for [u64; 4] {
66    #[inline(always)]
67    fn from(q: vec256_storage) -> Self {
68        let [a, b]: [u64; 2] = q.v128[0].into();
69        let [c, d]: [u64; 2] = q.v128[1].into();
70        [a, b, c, d]
71    }
72}
73impl From<[u64; 4]> for vec256_storage {
74    #[inline(always)]
75    fn from([a, b, c, d]: [u64; 4]) -> Self {
76        Self {
77            v128: [[a, b].into(), [c, d].into()],
78        }
79    }
80}
81#[derive(Clone, Copy, PartialEq, Eq, Default)]
82pub struct vec512_storage {
83    v128: [vec128_storage; 4],
84}
85impl vec512_storage {
86    #[inline(always)]
87    pub fn new128(v128: [vec128_storage; 4]) -> Self {
88        Self { v128 }
89    }
90    #[inline(always)]
91    pub fn split128(self) -> [vec128_storage; 4] {
92        self.v128
93    }
94}
95
96#[inline(always)]
97fn dmap<T, F>(t: T, f: F) -> T
98where
99    T: Store<vec128_storage> + Into<vec128_storage>,
100    F: Fn(u32) -> u32,
101{
102    let t: vec128_storage = t.into();
103    let d = unsafe { t.d };
104    let d = vec128_storage {
105        d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
106    };
107    unsafe { T::unpack(d) }
108}
109
110fn dmap2<T, F>(a: T, b: T, f: F) -> T
111where
112    T: Store<vec128_storage> + Into<vec128_storage>,
113    F: Fn(u32, u32) -> u32,
114{
115    let a: vec128_storage = a.into();
116    let b: vec128_storage = b.into();
117    let ao = unsafe { a.d };
118    let bo = unsafe { b.d };
119    let d = vec128_storage {
120        d: [
121            f(ao[0], bo[0]),
122            f(ao[1], bo[1]),
123            f(ao[2], bo[2]),
124            f(ao[3], bo[3]),
125        ],
126    };
127    unsafe { T::unpack(d) }
128}
129
130#[inline(always)]
131fn qmap<T, F>(t: T, f: F) -> T
132where
133    T: Store<vec128_storage> + Into<vec128_storage>,
134    F: Fn(u64) -> u64,
135{
136    let t: vec128_storage = t.into();
137    let q = unsafe { t.q };
138    let q = vec128_storage {
139        q: [f(q[0]), f(q[1])],
140    };
141    unsafe { T::unpack(q) }
142}
143
144#[inline(always)]
145fn qmap2<T, F>(a: T, b: T, f: F) -> T
146where
147    T: Store<vec128_storage> + Into<vec128_storage>,
148    F: Fn(u64, u64) -> u64,
149{
150    let a: vec128_storage = a.into();
151    let b: vec128_storage = b.into();
152    let ao = unsafe { a.q };
153    let bo = unsafe { b.q };
154    let q = vec128_storage {
155        q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
156    };
157    unsafe { T::unpack(q) }
158}
159
160#[inline(always)]
161fn o_of_q(q: [u64; 2]) -> u128 {
162    u128::from(q[0]) | (u128::from(q[1]) << 64)
163}
164
165#[inline(always)]
166fn q_of_o(o: u128) -> [u64; 2] {
167    [o as u64, (o >> 64) as u64]
168}
169
170#[inline(always)]
171fn omap<T, F>(a: T, f: F) -> T
172where
173    T: Store<vec128_storage> + Into<vec128_storage>,
174    F: Fn(u128) -> u128,
175{
176    let a: vec128_storage = a.into();
177    let ao = o_of_q(unsafe { a.q });
178    let o = vec128_storage { q: q_of_o(f(ao)) };
179    unsafe { T::unpack(o) }
180}
181
182#[inline(always)]
183fn omap2<T, F>(a: T, b: T, f: F) -> T
184where
185    T: Store<vec128_storage> + Into<vec128_storage>,
186    F: Fn(u128, u128) -> u128,
187{
188    let a: vec128_storage = a.into();
189    let b: vec128_storage = b.into();
190    let ao = o_of_q(unsafe { a.q });
191    let bo = o_of_q(unsafe { b.q });
192    let o = vec128_storage {
193        q: q_of_o(f(ao, bo)),
194    };
195    unsafe { T::unpack(o) }
196}
197
198impl RotateEachWord128 for u128x1_generic {}
199impl BitOps128 for u128x1_generic {}
200impl BitOps64 for u128x1_generic {}
201impl BitOps64 for u64x2_generic {}
202impl BitOps32 for u128x1_generic {}
203impl BitOps32 for u64x2_generic {}
204impl BitOps32 for u32x4_generic {}
205impl BitOps0 for u128x1_generic {}
206impl BitOps0 for u64x2_generic {}
207impl BitOps0 for u32x4_generic {}
208
209macro_rules! impl_bitops {
210    ($vec:ident) => {
211        impl Not for $vec {
212            type Output = Self;
213            #[inline(always)]
214            fn not(self) -> Self::Output {
215                omap(self, |x| !x)
216            }
217        }
218        impl BitAnd for $vec {
219            type Output = Self;
220            #[inline(always)]
221            fn bitand(self, rhs: Self) -> Self::Output {
222                omap2(self, rhs, |x, y| x & y)
223            }
224        }
225        impl BitOr for $vec {
226            type Output = Self;
227            #[inline(always)]
228            fn bitor(self, rhs: Self) -> Self::Output {
229                omap2(self, rhs, |x, y| x | y)
230            }
231        }
232        impl BitXor for $vec {
233            type Output = Self;
234            #[inline(always)]
235            fn bitxor(self, rhs: Self) -> Self::Output {
236                omap2(self, rhs, |x, y| x ^ y)
237            }
238        }
239        impl AndNot for $vec {
240            type Output = Self;
241            #[inline(always)]
242            fn andnot(self, rhs: Self) -> Self::Output {
243                omap2(self, rhs, |x, y| !x & y)
244            }
245        }
246        impl BitAndAssign for $vec {
247            #[inline(always)]
248            fn bitand_assign(&mut self, rhs: Self) {
249                *self = *self & rhs
250            }
251        }
252        impl BitOrAssign for $vec {
253            #[inline(always)]
254            fn bitor_assign(&mut self, rhs: Self) {
255                *self = *self | rhs
256            }
257        }
258        impl BitXorAssign for $vec {
259            #[inline(always)]
260            fn bitxor_assign(&mut self, rhs: Self) {
261                *self = *self ^ rhs
262            }
263        }
264
265        impl Swap64 for $vec {
266            #[inline(always)]
267            fn swap1(self) -> Self {
268                qmap(self, |x| {
269                    ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
270                })
271            }
272            #[inline(always)]
273            fn swap2(self) -> Self {
274                qmap(self, |x| {
275                    ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
276                })
277            }
278            #[inline(always)]
279            fn swap4(self) -> Self {
280                qmap(self, |x| {
281                    ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
282                })
283            }
284            #[inline(always)]
285            fn swap8(self) -> Self {
286                qmap(self, |x| {
287                    ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
288                })
289            }
290            #[inline(always)]
291            fn swap16(self) -> Self {
292                dmap(self, |x| x.rotate_left(16))
293            }
294            #[inline(always)]
295            fn swap32(self) -> Self {
296                qmap(self, |x| x.rotate_left(32))
297            }
298            #[inline(always)]
299            fn swap64(self) -> Self {
300                omap(self, |x| (x << 64) | (x >> 64))
301            }
302        }
303    };
304}
305impl_bitops!(u32x4_generic);
306impl_bitops!(u64x2_generic);
307impl_bitops!(u128x1_generic);
308
309impl RotateEachWord32 for u32x4_generic {
310    #[inline(always)]
311    fn rotate_each_word_right7(self) -> Self {
312        dmap(self, |x| x.rotate_right(7))
313    }
314    #[inline(always)]
315    fn rotate_each_word_right8(self) -> Self {
316        dmap(self, |x| x.rotate_right(8))
317    }
318    #[inline(always)]
319    fn rotate_each_word_right11(self) -> Self {
320        dmap(self, |x| x.rotate_right(11))
321    }
322    #[inline(always)]
323    fn rotate_each_word_right12(self) -> Self {
324        dmap(self, |x| x.rotate_right(12))
325    }
326    #[inline(always)]
327    fn rotate_each_word_right16(self) -> Self {
328        dmap(self, |x| x.rotate_right(16))
329    }
330    #[inline(always)]
331    fn rotate_each_word_right20(self) -> Self {
332        dmap(self, |x| x.rotate_right(20))
333    }
334    #[inline(always)]
335    fn rotate_each_word_right24(self) -> Self {
336        dmap(self, |x| x.rotate_right(24))
337    }
338    #[inline(always)]
339    fn rotate_each_word_right25(self) -> Self {
340        dmap(self, |x| x.rotate_right(25))
341    }
342}
343
344impl RotateEachWord32 for u64x2_generic {
345    #[inline(always)]
346    fn rotate_each_word_right7(self) -> Self {
347        qmap(self, |x| x.rotate_right(7))
348    }
349    #[inline(always)]
350    fn rotate_each_word_right8(self) -> Self {
351        qmap(self, |x| x.rotate_right(8))
352    }
353    #[inline(always)]
354    fn rotate_each_word_right11(self) -> Self {
355        qmap(self, |x| x.rotate_right(11))
356    }
357    #[inline(always)]
358    fn rotate_each_word_right12(self) -> Self {
359        qmap(self, |x| x.rotate_right(12))
360    }
361    #[inline(always)]
362    fn rotate_each_word_right16(self) -> Self {
363        qmap(self, |x| x.rotate_right(16))
364    }
365    #[inline(always)]
366    fn rotate_each_word_right20(self) -> Self {
367        qmap(self, |x| x.rotate_right(20))
368    }
369    #[inline(always)]
370    fn rotate_each_word_right24(self) -> Self {
371        qmap(self, |x| x.rotate_right(24))
372    }
373    #[inline(always)]
374    fn rotate_each_word_right25(self) -> Self {
375        qmap(self, |x| x.rotate_right(25))
376    }
377}
378impl RotateEachWord64 for u64x2_generic {
379    #[inline(always)]
380    fn rotate_each_word_right32(self) -> Self {
381        qmap(self, |x| x.rotate_right(32))
382    }
383}
384
385// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
386#[inline(always)]
387fn rotate_u128_right(x: u128, i: u32) -> u128 {
388    (x >> i) | (x << (128 - i))
389}
390#[test]
391fn test_rotate_u128() {
392    const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
393    assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
394}
395
396impl RotateEachWord32 for u128x1_generic {
397    #[inline(always)]
398    fn rotate_each_word_right7(self) -> Self {
399        Self([rotate_u128_right(self.0[0], 7)])
400    }
401    #[inline(always)]
402    fn rotate_each_word_right8(self) -> Self {
403        Self([rotate_u128_right(self.0[0], 8)])
404    }
405    #[inline(always)]
406    fn rotate_each_word_right11(self) -> Self {
407        Self([rotate_u128_right(self.0[0], 11)])
408    }
409    #[inline(always)]
410    fn rotate_each_word_right12(self) -> Self {
411        Self([rotate_u128_right(self.0[0], 12)])
412    }
413    #[inline(always)]
414    fn rotate_each_word_right16(self) -> Self {
415        Self([rotate_u128_right(self.0[0], 16)])
416    }
417    #[inline(always)]
418    fn rotate_each_word_right20(self) -> Self {
419        Self([rotate_u128_right(self.0[0], 20)])
420    }
421    #[inline(always)]
422    fn rotate_each_word_right24(self) -> Self {
423        Self([rotate_u128_right(self.0[0], 24)])
424    }
425    #[inline(always)]
426    fn rotate_each_word_right25(self) -> Self {
427        Self([rotate_u128_right(self.0[0], 25)])
428    }
429}
430impl RotateEachWord64 for u128x1_generic {
431    #[inline(always)]
432    fn rotate_each_word_right32(self) -> Self {
433        Self([rotate_u128_right(self.0[0], 32)])
434    }
435}
436
437#[derive(Copy, Clone)]
438pub struct GenericMachine;
439impl Machine for GenericMachine {
440    type u32x4 = u32x4_generic;
441    type u64x2 = u64x2_generic;
442    type u128x1 = u128x1_generic;
443    type u32x4x2 = u32x4x2_generic;
444    type u64x2x2 = u64x2x2_generic;
445    type u64x4 = u64x4_generic;
446    type u128x2 = u128x2_generic;
447    type u32x4x4 = u32x4x4_generic;
448    type u64x2x4 = u64x2x4_generic;
449    type u128x4 = u128x4_generic;
450    #[inline(always)]
451    unsafe fn instance() -> Self {
452        Self
453    }
454}
455
456#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
457#[repr(transparent)]
458pub struct u32x4_generic([u32; 4]);
459#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
460#[repr(transparent)]
461pub struct u64x2_generic([u64; 2]);
462#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
463#[repr(transparent)]
464pub struct u128x1_generic([u128; 1]);
465
466impl From<u32x4_generic> for vec128_storage {
467    #[inline(always)]
468    fn from(d: u32x4_generic) -> Self {
469        Self { d: d.0 }
470    }
471}
472impl From<u64x2_generic> for vec128_storage {
473    #[inline(always)]
474    fn from(q: u64x2_generic) -> Self {
475        Self { q: q.0 }
476    }
477}
478impl From<u128x1_generic> for vec128_storage {
479    #[inline(always)]
480    fn from(o: u128x1_generic) -> Self {
481        Self { q: q_of_o(o.0[0]) }
482    }
483}
484
485impl Store<vec128_storage> for u32x4_generic {
486    #[inline(always)]
487    unsafe fn unpack(s: vec128_storage) -> Self {
488        Self(s.d)
489    }
490}
491impl Store<vec128_storage> for u64x2_generic {
492    #[inline(always)]
493    unsafe fn unpack(s: vec128_storage) -> Self {
494        Self(s.q)
495    }
496}
497impl Store<vec128_storage> for u128x1_generic {
498    #[inline(always)]
499    unsafe fn unpack(s: vec128_storage) -> Self {
500        Self([o_of_q(s.q); 1])
501    }
502}
503
504impl ArithOps for u32x4_generic {}
505impl ArithOps for u64x2_generic {}
506impl ArithOps for u128x1_generic {}
507
508impl Add for u32x4_generic {
509    type Output = Self;
510    #[inline(always)]
511    fn add(self, rhs: Self) -> Self::Output {
512        dmap2(self, rhs, |x, y| x.wrapping_add(y))
513    }
514}
515impl Add for u64x2_generic {
516    type Output = Self;
517    #[inline(always)]
518    fn add(self, rhs: Self) -> Self::Output {
519        qmap2(self, rhs, |x, y| x.wrapping_add(y))
520    }
521}
522impl Add for u128x1_generic {
523    type Output = Self;
524    #[inline(always)]
525    fn add(self, rhs: Self) -> Self::Output {
526        omap2(self, rhs, |x, y| x.wrapping_add(y))
527    }
528}
529impl AddAssign for u32x4_generic {
530    #[inline(always)]
531    fn add_assign(&mut self, rhs: Self) {
532        *self = *self + rhs
533    }
534}
535impl AddAssign for u64x2_generic {
536    #[inline(always)]
537    fn add_assign(&mut self, rhs: Self) {
538        *self = *self + rhs
539    }
540}
541impl AddAssign for u128x1_generic {
542    #[inline(always)]
543    fn add_assign(&mut self, rhs: Self) {
544        *self = *self + rhs
545    }
546}
547impl BSwap for u32x4_generic {
548    #[inline(always)]
549    fn bswap(self) -> Self {
550        dmap(self, |x| x.swap_bytes())
551    }
552}
553impl BSwap for u64x2_generic {
554    #[inline(always)]
555    fn bswap(self) -> Self {
556        qmap(self, |x| x.swap_bytes())
557    }
558}
559impl BSwap for u128x1_generic {
560    #[inline(always)]
561    fn bswap(self) -> Self {
562        omap(self, |x| x.swap_bytes())
563    }
564}
565impl StoreBytes for u32x4_generic {
566    #[inline(always)]
567    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
568        let x = u32x4_generic::read_from(input).unwrap();
569        dmap(x, |x| x.to_le())
570    }
571    #[inline(always)]
572    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
573        let x = u32x4_generic::read_from(input).unwrap();
574        dmap(x, |x| x.to_be())
575    }
576    #[inline(always)]
577    fn write_le(self, out: &mut [u8]) {
578        let x = dmap(self, |x| x.to_le());
579        x.write_to(out).unwrap();
580    }
581    #[inline(always)]
582    fn write_be(self, out: &mut [u8]) {
583        let x = dmap(self, |x| x.to_be());
584        x.write_to(out).unwrap();
585    }
586}
587impl StoreBytes for u64x2_generic {
588    #[inline(always)]
589    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
590        let x = u64x2_generic::read_from(input).unwrap();
591        qmap(x, |x| x.to_le())
592    }
593    #[inline(always)]
594    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
595        let x = u64x2_generic::read_from(input).unwrap();
596        qmap(x, |x| x.to_be())
597    }
598    #[inline(always)]
599    fn write_le(self, out: &mut [u8]) {
600        let x = qmap(self, |x| x.to_le());
601        x.write_to(out).unwrap();
602    }
603    #[inline(always)]
604    fn write_be(self, out: &mut [u8]) {
605        let x = qmap(self, |x| x.to_be());
606        x.write_to(out).unwrap();
607    }
608}
609
610#[derive(Copy, Clone)]
611pub struct G0;
612#[derive(Copy, Clone)]
613pub struct G1;
614pub type u32x4x2_generic = x2<u32x4_generic, G0>;
615pub type u64x2x2_generic = x2<u64x2_generic, G0>;
616pub type u64x4_generic = x2<u64x2_generic, G1>;
617pub type u128x2_generic = x2<u128x1_generic, G0>;
618pub type u32x4x4_generic = x4<u32x4_generic>;
619pub type u64x2x4_generic = x4<u64x2_generic>;
620pub type u128x4_generic = x4<u128x1_generic>;
621
622impl Vector<[u32; 16]> for u32x4x4_generic {
623    fn to_scalars(self) -> [u32; 16] {
624        let [a, b, c, d] = self.0;
625        let a = a.0;
626        let b = b.0;
627        let c = c.0;
628        let d = d.0;
629        [
630            a[0], a[1], a[2], a[3], //
631            b[0], b[1], b[2], b[3], //
632            c[0], c[1], c[2], c[3], //
633            d[0], d[1], d[2], d[3], //
634        ]
635    }
636}
637
638impl MultiLane<[u32; 4]> for u32x4_generic {
639    #[inline(always)]
640    fn to_lanes(self) -> [u32; 4] {
641        self.0
642    }
643    #[inline(always)]
644    fn from_lanes(xs: [u32; 4]) -> Self {
645        Self(xs)
646    }
647}
648impl MultiLane<[u64; 2]> for u64x2_generic {
649    #[inline(always)]
650    fn to_lanes(self) -> [u64; 2] {
651        self.0
652    }
653    #[inline(always)]
654    fn from_lanes(xs: [u64; 2]) -> Self {
655        Self(xs)
656    }
657}
658impl MultiLane<[u64; 4]> for u64x4_generic {
659    #[inline(always)]
660    fn to_lanes(self) -> [u64; 4] {
661        let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
662        [a[0], a[1], b[0], b[1]]
663    }
664    #[inline(always)]
665    fn from_lanes(xs: [u64; 4]) -> Self {
666        let (a, b) = (
667            u64x2_generic::from_lanes([xs[0], xs[1]]),
668            u64x2_generic::from_lanes([xs[2], xs[3]]),
669        );
670        x2::new([a, b])
671    }
672}
673impl MultiLane<[u128; 1]> for u128x1_generic {
674    #[inline(always)]
675    fn to_lanes(self) -> [u128; 1] {
676        self.0
677    }
678    #[inline(always)]
679    fn from_lanes(xs: [u128; 1]) -> Self {
680        Self(xs)
681    }
682}
683impl Vec4<u32> for u32x4_generic {
684    #[inline(always)]
685    fn extract(self, i: u32) -> u32 {
686        self.0[i as usize]
687    }
688    #[inline(always)]
689    fn insert(mut self, v: u32, i: u32) -> Self {
690        self.0[i as usize] = v;
691        self
692    }
693}
694impl Vec4<u64> for u64x4_generic {
695    #[inline(always)]
696    fn extract(self, i: u32) -> u64 {
697        let d: [u64; 4] = self.to_lanes();
698        d[i as usize]
699    }
700    #[inline(always)]
701    fn insert(self, v: u64, i: u32) -> Self {
702        self.0[(i / 2) as usize].insert(v, i % 2);
703        self
704    }
705}
706impl Vec2<u64> for u64x2_generic {
707    #[inline(always)]
708    fn extract(self, i: u32) -> u64 {
709        self.0[i as usize]
710    }
711    #[inline(always)]
712    fn insert(mut self, v: u64, i: u32) -> Self {
713        self.0[i as usize] = v;
714        self
715    }
716}
717
718impl Words4 for u32x4_generic {
719    #[inline(always)]
720    fn shuffle2301(self) -> Self {
721        self.swap64()
722    }
723    #[inline(always)]
724    fn shuffle1230(self) -> Self {
725        let x = self.0;
726        Self([x[3], x[0], x[1], x[2]])
727    }
728    #[inline(always)]
729    fn shuffle3012(self) -> Self {
730        let x = self.0;
731        Self([x[1], x[2], x[3], x[0]])
732    }
733}
734impl LaneWords4 for u32x4_generic {
735    #[inline(always)]
736    fn shuffle_lane_words2301(self) -> Self {
737        self.shuffle2301()
738    }
739    #[inline(always)]
740    fn shuffle_lane_words1230(self) -> Self {
741        self.shuffle1230()
742    }
743    #[inline(always)]
744    fn shuffle_lane_words3012(self) -> Self {
745        self.shuffle3012()
746    }
747}
748
749impl Words4 for u64x4_generic {
750    #[inline(always)]
751    fn shuffle2301(self) -> Self {
752        x2::new([self.0[1], self.0[0]])
753    }
754    #[inline(always)]
755    fn shuffle1230(self) -> Self {
756        unimplemented!()
757    }
758    #[inline(always)]
759    fn shuffle3012(self) -> Self {
760        unimplemented!()
761    }
762}
763
764impl u32x4<GenericMachine> for u32x4_generic {}
765impl u64x2<GenericMachine> for u64x2_generic {}
766impl u128x1<GenericMachine> for u128x1_generic {}
767impl u32x4x2<GenericMachine> for u32x4x2_generic {}
768impl u64x2x2<GenericMachine> for u64x2x2_generic {}
769impl u64x4<GenericMachine> for u64x4_generic {}
770impl u128x2<GenericMachine> for u128x2_generic {}
771impl u32x4x4<GenericMachine> for u32x4x4_generic {}
772impl u64x2x4<GenericMachine> for u64x2x4_generic {}
773impl u128x4<GenericMachine> for u128x4_generic {}
774
775#[macro_export]
776macro_rules! dispatch {
777    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
778        #[inline(always)]
779        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
780            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
781            #[inline(always)]
782            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
783            fn_impl($mach, $($arg),*)
784        }
785    };
786    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
787        dispatch!($mach, $MTy, {
788            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
789        });
790    }
791}
792#[macro_export]
793macro_rules! dispatch_light128 {
794    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
795        #[inline(always)]
796        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
797            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
798            #[inline(always)]
799            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
800            fn_impl($mach, $($arg),*)
801        }
802    };
803    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
804        dispatch!($mach, $MTy, {
805            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
806        });
807    }
808}
809#[macro_export]
810macro_rules! dispatch_light256 {
811    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
812        #[inline(always)]
813        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
814            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
815            #[inline(always)]
816            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
817            fn_impl($mach, $($arg),*)
818        }
819    };
820    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
821        dispatch!($mach, $MTy, {
822            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
823        });
824    }
825}
826#[macro_export]
827macro_rules! dispatch_light512 {
828    ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
829        #[inline(always)]
830        $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
831            let $mach = unsafe { $crate::generic::GenericMachine::instance() };
832            #[inline(always)]
833            fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
834            fn_impl($mach, $($arg),*)
835        }
836    };
837    ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
838        dispatch!($mach, $MTy, {
839            $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
840        });
841    }
842}
843
844#[cfg(test)]
845mod test {
846    use super::*;
847
848    #[test]
849    fn test_bswap32() {
850        let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
851        let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
852
853        let m = unsafe { GenericMachine::instance() };
854
855        let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
856        let x = x.bswap();
857
858        let y = m.vec(ys);
859        assert_eq!(x, y);
860    }
861}