1#![allow(non_camel_case_types)]
2
3use crate::soft::{x2, x4};
4use crate::types::*;
5use core::ops::*;
6use zerocopy::{AsBytes, FromBytes, FromZeroes};
7
8#[repr(C)]
9#[derive(Clone, Copy, FromBytes, AsBytes, FromZeroes)]
10pub union vec128_storage {
11 d: [u32; 4],
12 q: [u64; 2],
13}
14impl From<[u32; 4]> for vec128_storage {
15 #[inline(always)]
16 fn from(d: [u32; 4]) -> Self {
17 Self { d }
18 }
19}
20impl From<vec128_storage> for [u32; 4] {
21 #[inline(always)]
22 fn from(d: vec128_storage) -> Self {
23 unsafe { d.d }
24 }
25}
26impl From<[u64; 2]> for vec128_storage {
27 #[inline(always)]
28 fn from(q: [u64; 2]) -> Self {
29 Self { q }
30 }
31}
32impl From<vec128_storage> for [u64; 2] {
33 #[inline(always)]
34 fn from(q: vec128_storage) -> Self {
35 unsafe { q.q }
36 }
37}
38impl Default for vec128_storage {
39 #[inline(always)]
40 fn default() -> Self {
41 Self { q: [0, 0] }
42 }
43}
44impl Eq for vec128_storage {}
45impl PartialEq<vec128_storage> for vec128_storage {
46 #[inline(always)]
47 fn eq(&self, rhs: &Self) -> bool {
48 unsafe { self.q == rhs.q }
49 }
50}
51#[derive(Clone, Copy, PartialEq, Eq, Default)]
52pub struct vec256_storage {
53 v128: [vec128_storage; 2],
54}
55impl vec256_storage {
56 #[inline(always)]
57 pub fn new128(v128: [vec128_storage; 2]) -> Self {
58 Self { v128 }
59 }
60 #[inline(always)]
61 pub fn split128(self) -> [vec128_storage; 2] {
62 self.v128
63 }
64}
65impl From<vec256_storage> for [u64; 4] {
66 #[inline(always)]
67 fn from(q: vec256_storage) -> Self {
68 let [a, b]: [u64; 2] = q.v128[0].into();
69 let [c, d]: [u64; 2] = q.v128[1].into();
70 [a, b, c, d]
71 }
72}
73impl From<[u64; 4]> for vec256_storage {
74 #[inline(always)]
75 fn from([a, b, c, d]: [u64; 4]) -> Self {
76 Self {
77 v128: [[a, b].into(), [c, d].into()],
78 }
79 }
80}
81#[derive(Clone, Copy, PartialEq, Eq, Default)]
82pub struct vec512_storage {
83 v128: [vec128_storage; 4],
84}
85impl vec512_storage {
86 #[inline(always)]
87 pub fn new128(v128: [vec128_storage; 4]) -> Self {
88 Self { v128 }
89 }
90 #[inline(always)]
91 pub fn split128(self) -> [vec128_storage; 4] {
92 self.v128
93 }
94}
95
96#[inline(always)]
97fn dmap<T, F>(t: T, f: F) -> T
98where
99 T: Store<vec128_storage> + Into<vec128_storage>,
100 F: Fn(u32) -> u32,
101{
102 let t: vec128_storage = t.into();
103 let d = unsafe { t.d };
104 let d = vec128_storage {
105 d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
106 };
107 unsafe { T::unpack(d) }
108}
109
110fn dmap2<T, F>(a: T, b: T, f: F) -> T
111where
112 T: Store<vec128_storage> + Into<vec128_storage>,
113 F: Fn(u32, u32) -> u32,
114{
115 let a: vec128_storage = a.into();
116 let b: vec128_storage = b.into();
117 let ao = unsafe { a.d };
118 let bo = unsafe { b.d };
119 let d = vec128_storage {
120 d: [
121 f(ao[0], bo[0]),
122 f(ao[1], bo[1]),
123 f(ao[2], bo[2]),
124 f(ao[3], bo[3]),
125 ],
126 };
127 unsafe { T::unpack(d) }
128}
129
130#[inline(always)]
131fn qmap<T, F>(t: T, f: F) -> T
132where
133 T: Store<vec128_storage> + Into<vec128_storage>,
134 F: Fn(u64) -> u64,
135{
136 let t: vec128_storage = t.into();
137 let q = unsafe { t.q };
138 let q = vec128_storage {
139 q: [f(q[0]), f(q[1])],
140 };
141 unsafe { T::unpack(q) }
142}
143
144#[inline(always)]
145fn qmap2<T, F>(a: T, b: T, f: F) -> T
146where
147 T: Store<vec128_storage> + Into<vec128_storage>,
148 F: Fn(u64, u64) -> u64,
149{
150 let a: vec128_storage = a.into();
151 let b: vec128_storage = b.into();
152 let ao = unsafe { a.q };
153 let bo = unsafe { b.q };
154 let q = vec128_storage {
155 q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
156 };
157 unsafe { T::unpack(q) }
158}
159
160#[inline(always)]
161fn o_of_q(q: [u64; 2]) -> u128 {
162 u128::from(q[0]) | (u128::from(q[1]) << 64)
163}
164
165#[inline(always)]
166fn q_of_o(o: u128) -> [u64; 2] {
167 [o as u64, (o >> 64) as u64]
168}
169
170#[inline(always)]
171fn omap<T, F>(a: T, f: F) -> T
172where
173 T: Store<vec128_storage> + Into<vec128_storage>,
174 F: Fn(u128) -> u128,
175{
176 let a: vec128_storage = a.into();
177 let ao = o_of_q(unsafe { a.q });
178 let o = vec128_storage { q: q_of_o(f(ao)) };
179 unsafe { T::unpack(o) }
180}
181
182#[inline(always)]
183fn omap2<T, F>(a: T, b: T, f: F) -> T
184where
185 T: Store<vec128_storage> + Into<vec128_storage>,
186 F: Fn(u128, u128) -> u128,
187{
188 let a: vec128_storage = a.into();
189 let b: vec128_storage = b.into();
190 let ao = o_of_q(unsafe { a.q });
191 let bo = o_of_q(unsafe { b.q });
192 let o = vec128_storage {
193 q: q_of_o(f(ao, bo)),
194 };
195 unsafe { T::unpack(o) }
196}
197
198impl RotateEachWord128 for u128x1_generic {}
199impl BitOps128 for u128x1_generic {}
200impl BitOps64 for u128x1_generic {}
201impl BitOps64 for u64x2_generic {}
202impl BitOps32 for u128x1_generic {}
203impl BitOps32 for u64x2_generic {}
204impl BitOps32 for u32x4_generic {}
205impl BitOps0 for u128x1_generic {}
206impl BitOps0 for u64x2_generic {}
207impl BitOps0 for u32x4_generic {}
208
209macro_rules! impl_bitops {
210 ($vec:ident) => {
211 impl Not for $vec {
212 type Output = Self;
213 #[inline(always)]
214 fn not(self) -> Self::Output {
215 omap(self, |x| !x)
216 }
217 }
218 impl BitAnd for $vec {
219 type Output = Self;
220 #[inline(always)]
221 fn bitand(self, rhs: Self) -> Self::Output {
222 omap2(self, rhs, |x, y| x & y)
223 }
224 }
225 impl BitOr for $vec {
226 type Output = Self;
227 #[inline(always)]
228 fn bitor(self, rhs: Self) -> Self::Output {
229 omap2(self, rhs, |x, y| x | y)
230 }
231 }
232 impl BitXor for $vec {
233 type Output = Self;
234 #[inline(always)]
235 fn bitxor(self, rhs: Self) -> Self::Output {
236 omap2(self, rhs, |x, y| x ^ y)
237 }
238 }
239 impl AndNot for $vec {
240 type Output = Self;
241 #[inline(always)]
242 fn andnot(self, rhs: Self) -> Self::Output {
243 omap2(self, rhs, |x, y| !x & y)
244 }
245 }
246 impl BitAndAssign for $vec {
247 #[inline(always)]
248 fn bitand_assign(&mut self, rhs: Self) {
249 *self = *self & rhs
250 }
251 }
252 impl BitOrAssign for $vec {
253 #[inline(always)]
254 fn bitor_assign(&mut self, rhs: Self) {
255 *self = *self | rhs
256 }
257 }
258 impl BitXorAssign for $vec {
259 #[inline(always)]
260 fn bitxor_assign(&mut self, rhs: Self) {
261 *self = *self ^ rhs
262 }
263 }
264
265 impl Swap64 for $vec {
266 #[inline(always)]
267 fn swap1(self) -> Self {
268 qmap(self, |x| {
269 ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
270 })
271 }
272 #[inline(always)]
273 fn swap2(self) -> Self {
274 qmap(self, |x| {
275 ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
276 })
277 }
278 #[inline(always)]
279 fn swap4(self) -> Self {
280 qmap(self, |x| {
281 ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
282 })
283 }
284 #[inline(always)]
285 fn swap8(self) -> Self {
286 qmap(self, |x| {
287 ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
288 })
289 }
290 #[inline(always)]
291 fn swap16(self) -> Self {
292 dmap(self, |x| x.rotate_left(16))
293 }
294 #[inline(always)]
295 fn swap32(self) -> Self {
296 qmap(self, |x| x.rotate_left(32))
297 }
298 #[inline(always)]
299 fn swap64(self) -> Self {
300 omap(self, |x| (x << 64) | (x >> 64))
301 }
302 }
303 };
304}
305impl_bitops!(u32x4_generic);
306impl_bitops!(u64x2_generic);
307impl_bitops!(u128x1_generic);
308
309impl RotateEachWord32 for u32x4_generic {
310 #[inline(always)]
311 fn rotate_each_word_right7(self) -> Self {
312 dmap(self, |x| x.rotate_right(7))
313 }
314 #[inline(always)]
315 fn rotate_each_word_right8(self) -> Self {
316 dmap(self, |x| x.rotate_right(8))
317 }
318 #[inline(always)]
319 fn rotate_each_word_right11(self) -> Self {
320 dmap(self, |x| x.rotate_right(11))
321 }
322 #[inline(always)]
323 fn rotate_each_word_right12(self) -> Self {
324 dmap(self, |x| x.rotate_right(12))
325 }
326 #[inline(always)]
327 fn rotate_each_word_right16(self) -> Self {
328 dmap(self, |x| x.rotate_right(16))
329 }
330 #[inline(always)]
331 fn rotate_each_word_right20(self) -> Self {
332 dmap(self, |x| x.rotate_right(20))
333 }
334 #[inline(always)]
335 fn rotate_each_word_right24(self) -> Self {
336 dmap(self, |x| x.rotate_right(24))
337 }
338 #[inline(always)]
339 fn rotate_each_word_right25(self) -> Self {
340 dmap(self, |x| x.rotate_right(25))
341 }
342}
343
344impl RotateEachWord32 for u64x2_generic {
345 #[inline(always)]
346 fn rotate_each_word_right7(self) -> Self {
347 qmap(self, |x| x.rotate_right(7))
348 }
349 #[inline(always)]
350 fn rotate_each_word_right8(self) -> Self {
351 qmap(self, |x| x.rotate_right(8))
352 }
353 #[inline(always)]
354 fn rotate_each_word_right11(self) -> Self {
355 qmap(self, |x| x.rotate_right(11))
356 }
357 #[inline(always)]
358 fn rotate_each_word_right12(self) -> Self {
359 qmap(self, |x| x.rotate_right(12))
360 }
361 #[inline(always)]
362 fn rotate_each_word_right16(self) -> Self {
363 qmap(self, |x| x.rotate_right(16))
364 }
365 #[inline(always)]
366 fn rotate_each_word_right20(self) -> Self {
367 qmap(self, |x| x.rotate_right(20))
368 }
369 #[inline(always)]
370 fn rotate_each_word_right24(self) -> Self {
371 qmap(self, |x| x.rotate_right(24))
372 }
373 #[inline(always)]
374 fn rotate_each_word_right25(self) -> Self {
375 qmap(self, |x| x.rotate_right(25))
376 }
377}
378impl RotateEachWord64 for u64x2_generic {
379 #[inline(always)]
380 fn rotate_each_word_right32(self) -> Self {
381 qmap(self, |x| x.rotate_right(32))
382 }
383}
384
385#[inline(always)]
387fn rotate_u128_right(x: u128, i: u32) -> u128 {
388 (x >> i) | (x << (128 - i))
389}
390#[test]
391fn test_rotate_u128() {
392 const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
393 assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
394}
395
396impl RotateEachWord32 for u128x1_generic {
397 #[inline(always)]
398 fn rotate_each_word_right7(self) -> Self {
399 Self([rotate_u128_right(self.0[0], 7)])
400 }
401 #[inline(always)]
402 fn rotate_each_word_right8(self) -> Self {
403 Self([rotate_u128_right(self.0[0], 8)])
404 }
405 #[inline(always)]
406 fn rotate_each_word_right11(self) -> Self {
407 Self([rotate_u128_right(self.0[0], 11)])
408 }
409 #[inline(always)]
410 fn rotate_each_word_right12(self) -> Self {
411 Self([rotate_u128_right(self.0[0], 12)])
412 }
413 #[inline(always)]
414 fn rotate_each_word_right16(self) -> Self {
415 Self([rotate_u128_right(self.0[0], 16)])
416 }
417 #[inline(always)]
418 fn rotate_each_word_right20(self) -> Self {
419 Self([rotate_u128_right(self.0[0], 20)])
420 }
421 #[inline(always)]
422 fn rotate_each_word_right24(self) -> Self {
423 Self([rotate_u128_right(self.0[0], 24)])
424 }
425 #[inline(always)]
426 fn rotate_each_word_right25(self) -> Self {
427 Self([rotate_u128_right(self.0[0], 25)])
428 }
429}
430impl RotateEachWord64 for u128x1_generic {
431 #[inline(always)]
432 fn rotate_each_word_right32(self) -> Self {
433 Self([rotate_u128_right(self.0[0], 32)])
434 }
435}
436
437#[derive(Copy, Clone)]
438pub struct GenericMachine;
439impl Machine for GenericMachine {
440 type u32x4 = u32x4_generic;
441 type u64x2 = u64x2_generic;
442 type u128x1 = u128x1_generic;
443 type u32x4x2 = u32x4x2_generic;
444 type u64x2x2 = u64x2x2_generic;
445 type u64x4 = u64x4_generic;
446 type u128x2 = u128x2_generic;
447 type u32x4x4 = u32x4x4_generic;
448 type u64x2x4 = u64x2x4_generic;
449 type u128x4 = u128x4_generic;
450 #[inline(always)]
451 unsafe fn instance() -> Self {
452 Self
453 }
454}
455
456#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
457#[repr(transparent)]
458pub struct u32x4_generic([u32; 4]);
459#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
460#[repr(transparent)]
461pub struct u64x2_generic([u64; 2]);
462#[derive(Copy, Clone, Debug, PartialEq, FromBytes, AsBytes, FromZeroes)]
463#[repr(transparent)]
464pub struct u128x1_generic([u128; 1]);
465
466impl From<u32x4_generic> for vec128_storage {
467 #[inline(always)]
468 fn from(d: u32x4_generic) -> Self {
469 Self { d: d.0 }
470 }
471}
472impl From<u64x2_generic> for vec128_storage {
473 #[inline(always)]
474 fn from(q: u64x2_generic) -> Self {
475 Self { q: q.0 }
476 }
477}
478impl From<u128x1_generic> for vec128_storage {
479 #[inline(always)]
480 fn from(o: u128x1_generic) -> Self {
481 Self { q: q_of_o(o.0[0]) }
482 }
483}
484
485impl Store<vec128_storage> for u32x4_generic {
486 #[inline(always)]
487 unsafe fn unpack(s: vec128_storage) -> Self {
488 Self(s.d)
489 }
490}
491impl Store<vec128_storage> for u64x2_generic {
492 #[inline(always)]
493 unsafe fn unpack(s: vec128_storage) -> Self {
494 Self(s.q)
495 }
496}
497impl Store<vec128_storage> for u128x1_generic {
498 #[inline(always)]
499 unsafe fn unpack(s: vec128_storage) -> Self {
500 Self([o_of_q(s.q); 1])
501 }
502}
503
504impl ArithOps for u32x4_generic {}
505impl ArithOps for u64x2_generic {}
506impl ArithOps for u128x1_generic {}
507
508impl Add for u32x4_generic {
509 type Output = Self;
510 #[inline(always)]
511 fn add(self, rhs: Self) -> Self::Output {
512 dmap2(self, rhs, |x, y| x.wrapping_add(y))
513 }
514}
515impl Add for u64x2_generic {
516 type Output = Self;
517 #[inline(always)]
518 fn add(self, rhs: Self) -> Self::Output {
519 qmap2(self, rhs, |x, y| x.wrapping_add(y))
520 }
521}
522impl Add for u128x1_generic {
523 type Output = Self;
524 #[inline(always)]
525 fn add(self, rhs: Self) -> Self::Output {
526 omap2(self, rhs, |x, y| x.wrapping_add(y))
527 }
528}
529impl AddAssign for u32x4_generic {
530 #[inline(always)]
531 fn add_assign(&mut self, rhs: Self) {
532 *self = *self + rhs
533 }
534}
535impl AddAssign for u64x2_generic {
536 #[inline(always)]
537 fn add_assign(&mut self, rhs: Self) {
538 *self = *self + rhs
539 }
540}
541impl AddAssign for u128x1_generic {
542 #[inline(always)]
543 fn add_assign(&mut self, rhs: Self) {
544 *self = *self + rhs
545 }
546}
547impl BSwap for u32x4_generic {
548 #[inline(always)]
549 fn bswap(self) -> Self {
550 dmap(self, |x| x.swap_bytes())
551 }
552}
553impl BSwap for u64x2_generic {
554 #[inline(always)]
555 fn bswap(self) -> Self {
556 qmap(self, |x| x.swap_bytes())
557 }
558}
559impl BSwap for u128x1_generic {
560 #[inline(always)]
561 fn bswap(self) -> Self {
562 omap(self, |x| x.swap_bytes())
563 }
564}
565impl StoreBytes for u32x4_generic {
566 #[inline(always)]
567 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
568 let x = u32x4_generic::read_from(input).unwrap();
569 dmap(x, |x| x.to_le())
570 }
571 #[inline(always)]
572 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
573 let x = u32x4_generic::read_from(input).unwrap();
574 dmap(x, |x| x.to_be())
575 }
576 #[inline(always)]
577 fn write_le(self, out: &mut [u8]) {
578 let x = dmap(self, |x| x.to_le());
579 x.write_to(out).unwrap();
580 }
581 #[inline(always)]
582 fn write_be(self, out: &mut [u8]) {
583 let x = dmap(self, |x| x.to_be());
584 x.write_to(out).unwrap();
585 }
586}
587impl StoreBytes for u64x2_generic {
588 #[inline(always)]
589 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
590 let x = u64x2_generic::read_from(input).unwrap();
591 qmap(x, |x| x.to_le())
592 }
593 #[inline(always)]
594 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
595 let x = u64x2_generic::read_from(input).unwrap();
596 qmap(x, |x| x.to_be())
597 }
598 #[inline(always)]
599 fn write_le(self, out: &mut [u8]) {
600 let x = qmap(self, |x| x.to_le());
601 x.write_to(out).unwrap();
602 }
603 #[inline(always)]
604 fn write_be(self, out: &mut [u8]) {
605 let x = qmap(self, |x| x.to_be());
606 x.write_to(out).unwrap();
607 }
608}
609
610#[derive(Copy, Clone)]
611pub struct G0;
612#[derive(Copy, Clone)]
613pub struct G1;
614pub type u32x4x2_generic = x2<u32x4_generic, G0>;
615pub type u64x2x2_generic = x2<u64x2_generic, G0>;
616pub type u64x4_generic = x2<u64x2_generic, G1>;
617pub type u128x2_generic = x2<u128x1_generic, G0>;
618pub type u32x4x4_generic = x4<u32x4_generic>;
619pub type u64x2x4_generic = x4<u64x2_generic>;
620pub type u128x4_generic = x4<u128x1_generic>;
621
622impl Vector<[u32; 16]> for u32x4x4_generic {
623 fn to_scalars(self) -> [u32; 16] {
624 let [a, b, c, d] = self.0;
625 let a = a.0;
626 let b = b.0;
627 let c = c.0;
628 let d = d.0;
629 [
630 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3], d[0], d[1], d[2], d[3], ]
635 }
636}
637
638impl MultiLane<[u32; 4]> for u32x4_generic {
639 #[inline(always)]
640 fn to_lanes(self) -> [u32; 4] {
641 self.0
642 }
643 #[inline(always)]
644 fn from_lanes(xs: [u32; 4]) -> Self {
645 Self(xs)
646 }
647}
648impl MultiLane<[u64; 2]> for u64x2_generic {
649 #[inline(always)]
650 fn to_lanes(self) -> [u64; 2] {
651 self.0
652 }
653 #[inline(always)]
654 fn from_lanes(xs: [u64; 2]) -> Self {
655 Self(xs)
656 }
657}
658impl MultiLane<[u64; 4]> for u64x4_generic {
659 #[inline(always)]
660 fn to_lanes(self) -> [u64; 4] {
661 let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
662 [a[0], a[1], b[0], b[1]]
663 }
664 #[inline(always)]
665 fn from_lanes(xs: [u64; 4]) -> Self {
666 let (a, b) = (
667 u64x2_generic::from_lanes([xs[0], xs[1]]),
668 u64x2_generic::from_lanes([xs[2], xs[3]]),
669 );
670 x2::new([a, b])
671 }
672}
673impl MultiLane<[u128; 1]> for u128x1_generic {
674 #[inline(always)]
675 fn to_lanes(self) -> [u128; 1] {
676 self.0
677 }
678 #[inline(always)]
679 fn from_lanes(xs: [u128; 1]) -> Self {
680 Self(xs)
681 }
682}
683impl Vec4<u32> for u32x4_generic {
684 #[inline(always)]
685 fn extract(self, i: u32) -> u32 {
686 self.0[i as usize]
687 }
688 #[inline(always)]
689 fn insert(mut self, v: u32, i: u32) -> Self {
690 self.0[i as usize] = v;
691 self
692 }
693}
694impl Vec4<u64> for u64x4_generic {
695 #[inline(always)]
696 fn extract(self, i: u32) -> u64 {
697 let d: [u64; 4] = self.to_lanes();
698 d[i as usize]
699 }
700 #[inline(always)]
701 fn insert(self, v: u64, i: u32) -> Self {
702 self.0[(i / 2) as usize].insert(v, i % 2);
703 self
704 }
705}
706impl Vec2<u64> for u64x2_generic {
707 #[inline(always)]
708 fn extract(self, i: u32) -> u64 {
709 self.0[i as usize]
710 }
711 #[inline(always)]
712 fn insert(mut self, v: u64, i: u32) -> Self {
713 self.0[i as usize] = v;
714 self
715 }
716}
717
718impl Words4 for u32x4_generic {
719 #[inline(always)]
720 fn shuffle2301(self) -> Self {
721 self.swap64()
722 }
723 #[inline(always)]
724 fn shuffle1230(self) -> Self {
725 let x = self.0;
726 Self([x[3], x[0], x[1], x[2]])
727 }
728 #[inline(always)]
729 fn shuffle3012(self) -> Self {
730 let x = self.0;
731 Self([x[1], x[2], x[3], x[0]])
732 }
733}
734impl LaneWords4 for u32x4_generic {
735 #[inline(always)]
736 fn shuffle_lane_words2301(self) -> Self {
737 self.shuffle2301()
738 }
739 #[inline(always)]
740 fn shuffle_lane_words1230(self) -> Self {
741 self.shuffle1230()
742 }
743 #[inline(always)]
744 fn shuffle_lane_words3012(self) -> Self {
745 self.shuffle3012()
746 }
747}
748
749impl Words4 for u64x4_generic {
750 #[inline(always)]
751 fn shuffle2301(self) -> Self {
752 x2::new([self.0[1], self.0[0]])
753 }
754 #[inline(always)]
755 fn shuffle1230(self) -> Self {
756 unimplemented!()
757 }
758 #[inline(always)]
759 fn shuffle3012(self) -> Self {
760 unimplemented!()
761 }
762}
763
764impl u32x4<GenericMachine> for u32x4_generic {}
765impl u64x2<GenericMachine> for u64x2_generic {}
766impl u128x1<GenericMachine> for u128x1_generic {}
767impl u32x4x2<GenericMachine> for u32x4x2_generic {}
768impl u64x2x2<GenericMachine> for u64x2x2_generic {}
769impl u64x4<GenericMachine> for u64x4_generic {}
770impl u128x2<GenericMachine> for u128x2_generic {}
771impl u32x4x4<GenericMachine> for u32x4x4_generic {}
772impl u64x2x4<GenericMachine> for u64x2x4_generic {}
773impl u128x4<GenericMachine> for u128x4_generic {}
774
775#[macro_export]
776macro_rules! dispatch {
777 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
778 #[inline(always)]
779 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
780 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
781 #[inline(always)]
782 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
783 fn_impl($mach, $($arg),*)
784 }
785 };
786 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
787 dispatch!($mach, $MTy, {
788 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
789 });
790 }
791}
792#[macro_export]
793macro_rules! dispatch_light128 {
794 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
795 #[inline(always)]
796 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
797 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
798 #[inline(always)]
799 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
800 fn_impl($mach, $($arg),*)
801 }
802 };
803 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
804 dispatch!($mach, $MTy, {
805 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
806 });
807 }
808}
809#[macro_export]
810macro_rules! dispatch_light256 {
811 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
812 #[inline(always)]
813 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
814 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
815 #[inline(always)]
816 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
817 fn_impl($mach, $($arg),*)
818 }
819 };
820 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
821 dispatch!($mach, $MTy, {
822 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
823 });
824 }
825}
826#[macro_export]
827macro_rules! dispatch_light512 {
828 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
829 #[inline(always)]
830 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
831 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
832 #[inline(always)]
833 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
834 fn_impl($mach, $($arg),*)
835 }
836 };
837 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
838 dispatch!($mach, $MTy, {
839 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
840 });
841 }
842}
843
844#[cfg(test)]
845mod test {
846 use super::*;
847
848 #[test]
849 fn test_bswap32() {
850 let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
851 let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
852
853 let m = unsafe { GenericMachine::instance() };
854
855 let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
856 let x = x.bswap();
857
858 let y = m.vec(ys);
859 assert_eq!(x, y);
860 }
861}