vsimd/
unified.rs

1#![allow(clippy::collapsible_if, clippy::too_many_lines)]
2
3use crate::isa::InstructionSet;
4use crate::pod::POD;
5use crate::tools::transmute_copy as tc;
6use crate::vector::{V128, V256};
7
8#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
9use crate::isa::{AVX2, SSE2, SSE41};
10
11#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
12use crate::isa::NEON;
13
14#[cfg(target_arch = "wasm32")]
15use crate::isa::WASM128;
16
17#[cfg(target_arch = "x86")]
18use core::arch::x86::*;
19
20#[cfg(target_arch = "x86_64")]
21use core::arch::x86_64::*;
22
23#[cfg(all(feature = "unstable", target_arch = "arm"))]
24use core::arch::arm::*;
25
26#[cfg(target_arch = "aarch64")]
27use core::arch::aarch64::*;
28
29#[cfg(target_arch = "wasm32")]
30use core::arch::wasm32::*;
31
32#[inline(always)]
33pub fn splat<S: InstructionSet, T: POD, V: POD>(s: S, x: T) -> V {
34    if is_pod_type!(V, V256) {
35        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
36        if matches_isa!(S, AVX2) {
37            if is_pod_type!(T, u8 | i8) {
38                return unsafe { tc(&_mm256_set1_epi8(tc(&x))) };
39            }
40        }
41        {
42            let c = splat::<S, T, V128>(s, x).x2();
43            return unsafe { tc(&c) };
44        }
45    }
46    if is_pod_type!(V, V128) {
47        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48        if matches_isa!(S, SSE2) {
49            if is_pod_type!(T, u8 | i8) {
50                return unsafe { tc(&_mm_set1_epi8(tc(&x))) };
51            }
52            if is_pod_type!(T, u16 | i16) {
53                return unsafe { tc(&_mm_set1_epi16(tc(&x))) };
54            }
55            if is_pod_type!(T, u32 | i32) {
56                return unsafe { tc(&_mm_set1_epi32(tc(&x))) };
57            }
58            if is_pod_type!(T, u64 | i64) {
59                return unsafe { tc(&_mm_set1_epi64x(tc(&x))) };
60            }
61        }
62        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
63        if matches_isa!(S, NEON) {
64            if is_pod_type!(T, u8 | i8) {
65                return unsafe { tc(&vld1q_dup_u8(&tc(&x))) };
66            }
67            if is_pod_type!(T, u16 | i16) {
68                return unsafe { tc(&vld1q_dup_u16(&tc(&x))) };
69            }
70            if is_pod_type!(T, u32 | i32) {
71                return unsafe { tc(&vld1q_dup_u32(&tc(&x))) };
72            }
73            if is_pod_type!(T, u64 | i64) {
74                return unsafe { tc(&vld1q_dup_u64(&tc(&x))) };
75            }
76        }
77        #[cfg(target_arch = "wasm32")]
78        if matches_isa!(S, WASM128) {
79            if is_pod_type!(T, u8 | i8) {
80                return unsafe { tc(&u8x16_splat(tc(&x))) };
81            }
82            if is_pod_type!(T, u16 | i16) {
83                return unsafe { tc(&u16x8_splat(tc(&x))) };
84            }
85            if is_pod_type!(T, u32 | i32) {
86                return unsafe { tc(&u32x4_splat(tc(&x))) };
87            }
88            if is_pod_type!(T, u64 | i64) {
89                return unsafe { tc(&u64x2_splat(tc(&x))) };
90            }
91        }
92    }
93    {
94        let _ = (s, x);
95        unreachable!()
96    }
97}
98
99#[inline(always)]
100pub fn add<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
101    if is_pod_type!(V, V256) {
102        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103        if matches_isa!(S, AVX2) {
104            if is_pod_type!(T, u8 | i8) {
105                return unsafe { tc(&_mm256_add_epi8(tc(&a), tc(&b))) };
106            }
107            if is_pod_type!(T, u16 | i16) {
108                return unsafe { tc(&_mm256_add_epi16(tc(&a), tc(&b))) };
109            }
110            if is_pod_type!(T, u32 | i32) {
111                return unsafe { tc(&_mm256_add_epi32(tc(&a), tc(&b))) };
112            }
113            if is_pod_type!(T, u64 | i64) {
114                return unsafe { tc(&_mm256_add_epi64(tc(&a), tc(&b))) };
115            }
116        }
117        {
118            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
119            let (a, b) = (a.to_v128x2(), b.to_v128x2());
120            let c0 = add::<S, T, V128>(s, a.0, b.0);
121            let c1 = add::<S, T, V128>(s, a.1, b.1);
122            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
123        }
124    }
125    if is_pod_type!(V, V128) {
126        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127        if matches_isa!(S, SSE2) {
128            if is_pod_type!(T, u8 | i8) {
129                return unsafe { tc(&_mm_add_epi8(tc(&a), tc(&b))) };
130            }
131            if is_pod_type!(T, u16 | i16) {
132                return unsafe { tc(&_mm_add_epi16(tc(&a), tc(&b))) };
133            }
134            if is_pod_type!(T, u32 | i32) {
135                return unsafe { tc(&_mm_add_epi32(tc(&a), tc(&b))) };
136            }
137            if is_pod_type!(T, u64 | i64) {
138                return unsafe { tc(&_mm_add_epi64(tc(&a), tc(&b))) };
139            }
140        }
141        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
142        if matches_isa!(S, NEON) {
143            if is_pod_type!(T, u8 | i8) {
144                return unsafe { tc(&vaddq_u8(tc(&a), tc(&b))) };
145            }
146            if is_pod_type!(T, u16 | i16) {
147                return unsafe { tc(&vaddq_u16(tc(&a), tc(&b))) };
148            }
149            if is_pod_type!(T, u32 | i32) {
150                return unsafe { tc(&vaddq_u32(tc(&a), tc(&b))) };
151            }
152            if is_pod_type!(T, u64 | i64) {
153                return unsafe { tc(&vaddq_u64(tc(&a), tc(&b))) };
154            }
155        }
156        #[cfg(target_arch = "wasm32")]
157        if matches_isa!(S, WASM128) {
158            if is_pod_type!(T, u8 | i8) {
159                return unsafe { tc(&u8x16_add(tc(&a), tc(&b))) };
160            }
161            if is_pod_type!(T, u16 | i16) {
162                return unsafe { tc(&u16x8_add(tc(&a), tc(&b))) };
163            }
164            if is_pod_type!(T, u32 | i32) {
165                return unsafe { tc(&u32x4_add(tc(&a), tc(&b))) };
166            }
167            if is_pod_type!(T, u64 | i64) {
168                return unsafe { tc(&u64x2_add(tc(&a), tc(&b))) };
169            }
170        }
171    }
172    {
173        let _ = (s, a, b);
174        unreachable!()
175    }
176}
177
178#[inline(always)]
179pub fn sub<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
180    if is_pod_type!(V, V256) {
181        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
182        if matches_isa!(S, AVX2) {
183            if is_pod_type!(T, u8 | i8) {
184                return unsafe { tc(&_mm256_sub_epi8(tc(&a), tc(&b))) };
185            }
186            if is_pod_type!(T, u16 | i16) {
187                return unsafe { tc(&_mm256_sub_epi16(tc(&a), tc(&b))) };
188            }
189            if is_pod_type!(T, u32 | i32) {
190                return unsafe { tc(&_mm256_sub_epi32(tc(&a), tc(&b))) };
191            }
192            if is_pod_type!(T, u64 | i64) {
193                return unsafe { tc(&_mm256_sub_epi64(tc(&a), tc(&b))) };
194            }
195        }
196        {
197            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
198            let (a, b) = (a.to_v128x2(), b.to_v128x2());
199            let c0 = sub::<S, T, V128>(s, a.0, b.0);
200            let c1 = sub::<S, T, V128>(s, a.1, b.1);
201            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
202        }
203    }
204    if is_pod_type!(V, V128) {
205        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
206        if matches_isa!(S, SSE2) {
207            if is_pod_type!(T, u8 | i8) {
208                return unsafe { tc(&_mm_sub_epi8(tc(&a), tc(&b))) };
209            }
210            if is_pod_type!(T, u16 | i16) {
211                return unsafe { tc(&_mm_sub_epi16(tc(&a), tc(&b))) };
212            }
213            if is_pod_type!(T, u32 | i32) {
214                return unsafe { tc(&_mm_sub_epi32(tc(&a), tc(&b))) };
215            }
216            if is_pod_type!(T, u64 | i64) {
217                return unsafe { tc(&_mm_sub_epi64(tc(&a), tc(&b))) };
218            }
219        }
220        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
221        if matches_isa!(S, NEON) {
222            if is_pod_type!(T, u8 | i8) {
223                return unsafe { tc(&vsubq_u8(tc(&a), tc(&b))) };
224            }
225            if is_pod_type!(T, u16 | i16) {
226                return unsafe { tc(&vsubq_u16(tc(&a), tc(&b))) };
227            }
228            if is_pod_type!(T, u32 | i32) {
229                return unsafe { tc(&vsubq_u32(tc(&a), tc(&b))) };
230            }
231            if is_pod_type!(T, u64 | i64) {
232                return unsafe { tc(&vsubq_u64(tc(&a), tc(&b))) };
233            }
234        }
235        #[cfg(target_arch = "wasm32")]
236        if matches_isa!(S, WASM128) {
237            if is_pod_type!(T, u8 | i8) {
238                return unsafe { tc(&u8x16_sub(tc(&a), tc(&b))) };
239            }
240            if is_pod_type!(T, u16 | i16) {
241                return unsafe { tc(&u16x8_sub(tc(&a), tc(&b))) };
242            }
243            if is_pod_type!(T, u32 | i32) {
244                return unsafe { tc(&u32x4_sub(tc(&a), tc(&b))) };
245            }
246            if is_pod_type!(T, u64 | i64) {
247                return unsafe { tc(&u64x2_sub(tc(&a), tc(&b))) };
248            }
249        }
250    }
251    {
252        let _ = (s, a, b);
253        unreachable!()
254    }
255}
256
257#[inline(always)]
258pub fn eq<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
259    if is_pod_type!(V, V256) {
260        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
261        if matches_isa!(S, AVX2) {
262            if is_pod_type!(T, u8 | i8) {
263                return unsafe { tc(&_mm256_cmpeq_epi8(tc(&a), tc(&b))) };
264            }
265            if is_pod_type!(T, u16 | i16) {
266                return unsafe { tc(&_mm256_cmpeq_epi16(tc(&a), tc(&b))) };
267            }
268            if is_pod_type!(T, u32 | i32) {
269                return unsafe { tc(&_mm256_cmpeq_epi32(tc(&a), tc(&b))) };
270            }
271            if is_pod_type!(T, u64 | i64) {
272                return unsafe { tc(&_mm256_cmpeq_epi64(tc(&a), tc(&b))) };
273            }
274        }
275        {
276            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
277            let (a, b) = (a.to_v128x2(), b.to_v128x2());
278            let c0 = eq::<S, T, V128>(s, a.0, b.0);
279            let c1 = eq::<S, T, V128>(s, a.1, b.1);
280            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
281        }
282    }
283    if is_pod_type!(V, V128) {
284        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
285        if matches_isa!(S, SSE2) {
286            if is_pod_type!(T, u8 | i8) {
287                return unsafe { tc(&_mm_cmpeq_epi8(tc(&a), tc(&b))) };
288            }
289            if is_pod_type!(T, u16 | i16) {
290                return unsafe { tc(&_mm_cmpeq_epi16(tc(&a), tc(&b))) };
291            }
292            if is_pod_type!(T, u32 | i32) {
293                return unsafe { tc(&_mm_cmpeq_epi32(tc(&a), tc(&b))) };
294            }
295        }
296        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
297        if matches_isa!(S, NEON) {
298            if is_pod_type!(T, u8 | i8) {
299                return unsafe { tc(&vceqq_u8(tc(&a), tc(&b))) };
300            }
301            if is_pod_type!(T, u16 | i16) {
302                return unsafe { tc(&vceqq_u16(tc(&a), tc(&b))) };
303            }
304            if is_pod_type!(T, u32 | i32) {
305                return unsafe { tc(&vceqq_u32(tc(&a), tc(&b))) };
306            }
307            #[cfg(target_arch = "aarch64")]
308            if is_pod_type!(T, u64 | i64) {
309                return unsafe { tc(&vceqq_u64(tc(&a), tc(&b))) };
310            }
311        }
312        #[cfg(target_arch = "wasm32")]
313        if matches_isa!(S, WASM128) {
314            if is_pod_type!(T, u8 | i8) {
315                return unsafe { tc(&u8x16_eq(tc(&a), tc(&b))) };
316            }
317            if is_pod_type!(T, u16 | i16) {
318                return unsafe { tc(&u16x8_eq(tc(&a), tc(&b))) };
319            }
320            if is_pod_type!(T, u32 | i32) {
321                return unsafe { tc(&u32x4_eq(tc(&a), tc(&b))) };
322            }
323            if is_pod_type!(T, u64 | i64) {
324                return unsafe { tc(&u64x2_eq(tc(&a), tc(&b))) };
325            }
326        }
327    }
328    {
329        let _ = (s, a, b);
330        unreachable!()
331    }
332}
333
334#[inline(always)]
335pub fn lt<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
336    if is_pod_type!(V, V256) {
337        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
338        if matches_isa!(S, AVX2) {
339            if is_pod_type!(T, i8) {
340                return unsafe { tc(&_mm256_cmpgt_epi8(tc(&b), tc(&a))) };
341            }
342            if is_pod_type!(T, i16) {
343                return unsafe { tc(&_mm256_cmpgt_epi16(tc(&b), tc(&a))) };
344            }
345            if is_pod_type!(T, i32) {
346                return unsafe { tc(&_mm256_cmpgt_epi32(tc(&b), tc(&a))) };
347            }
348            if is_pod_type!(T, i64) {
349                return unsafe { tc(&_mm256_cmpgt_epi64(tc(&b), tc(&a))) };
350            }
351            if is_pod_type!(T, u8) {
352                return unsafe {
353                    let (a, b) = (tc(&a), tc(&b));
354                    let c = _mm256_cmpeq_epi8(a, _mm256_max_epu8(a, b));
355                    tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi8(a, a)))
356                };
357            }
358            if is_pod_type!(T, u16) {
359                return unsafe {
360                    let (a, b) = (tc(&a), tc(&b));
361                    let c = _mm256_cmpeq_epi16(a, _mm256_max_epu16(a, b));
362                    tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi16(a, a)))
363                };
364            }
365            if is_pod_type!(T, u32) {
366                return unsafe {
367                    let (a, b) = (tc(&a), tc(&b));
368                    let c = _mm256_cmpeq_epi32(a, _mm256_max_epu32(a, b));
369                    tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi32(a, a)))
370                };
371            }
372        }
373        {
374            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
375            let (a, b) = (a.to_v128x2(), b.to_v128x2());
376            let c0 = lt::<S, T, V128>(s, a.0, b.0);
377            let c1 = lt::<S, T, V128>(s, a.1, b.1);
378            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
379        }
380    }
381    if is_pod_type!(V, V128) {
382        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
383        if matches_isa!(S, SSE2) {
384            if is_pod_type!(T, i8) {
385                return unsafe { tc(&_mm_cmplt_epi8(tc(&a), tc(&b))) };
386            }
387            if is_pod_type!(T, i16) {
388                return unsafe { tc(&_mm_cmplt_epi16(tc(&a), tc(&b))) };
389            }
390            if is_pod_type!(T, i32) {
391                return unsafe { tc(&_mm_cmplt_epi32(tc(&a), tc(&b))) };
392            }
393            if is_pod_type!(T, u8) {
394                return unsafe {
395                    let (a, b) = (tc(&a), tc(&b));
396                    let c = _mm_cmpeq_epi8(a, _mm_max_epu8(a, b));
397                    tc(&_mm_xor_si128(c, _mm_cmpeq_epi8(a, a)))
398                };
399            }
400            if is_pod_type!(T, u16) {
401                return unsafe {
402                    let m = _mm_set1_epi16(i16::MIN);
403                    let a = _mm_xor_si128(tc(&a), m);
404                    let b = _mm_xor_si128(tc(&b), m);
405                    tc(&_mm_cmplt_epi16(a, b))
406                };
407            }
408            if is_pod_type!(T, u32) {
409                return unsafe {
410                    let m = _mm_set1_epi32(i32::MIN);
411                    let a = _mm_xor_si128(tc(&a), m);
412                    let b = _mm_xor_si128(tc(&b), m);
413                    tc(&_mm_cmplt_epi32(a, b))
414                };
415            }
416        }
417        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
418        if matches_isa!(S, NEON) {
419            if is_pod_type!(T, i8) {
420                return unsafe { tc(&vcltq_s8(tc(&a), tc(&b))) };
421            }
422            if is_pod_type!(T, i16) {
423                return unsafe { tc(&vcltq_s16(tc(&a), tc(&b))) };
424            }
425            if is_pod_type!(T, i32) {
426                return unsafe { tc(&vcltq_s32(tc(&a), tc(&b))) };
427            }
428            #[cfg(target_arch = "aarch64")]
429            if is_pod_type!(T, i64) {
430                return unsafe { tc(&vcltq_s64(tc(&a), tc(&b))) };
431            }
432            if is_pod_type!(T, u8) {
433                return unsafe { tc(&vcltq_u8(tc(&a), tc(&b))) };
434            }
435            if is_pod_type!(T, u16) {
436                return unsafe { tc(&vcltq_u16(tc(&a), tc(&b))) };
437            }
438            if is_pod_type!(T, u32) {
439                return unsafe { tc(&vcltq_u32(tc(&a), tc(&b))) };
440            }
441            #[cfg(target_arch = "aarch64")]
442            if is_pod_type!(T, u64) {
443                return unsafe { tc(&vcltq_u64(tc(&a), tc(&b))) };
444            }
445        }
446        #[cfg(target_arch = "wasm32")]
447        if matches_isa!(S, WASM128) {
448            if is_pod_type!(T, i8) {
449                return unsafe { tc(&i8x16_lt(tc(&a), tc(&b))) };
450            }
451            if is_pod_type!(T, i16) {
452                return unsafe { tc(&i16x8_lt(tc(&a), tc(&b))) };
453            }
454            if is_pod_type!(T, i32) {
455                return unsafe { tc(&i32x4_lt(tc(&a), tc(&b))) };
456            }
457            if is_pod_type!(T, i64) {
458                return unsafe { tc(&i64x2_lt(tc(&a), tc(&b))) };
459            }
460            if is_pod_type!(T, u8) {
461                return unsafe { tc(&u8x16_lt(tc(&a), tc(&b))) };
462            }
463            if is_pod_type!(T, u16) {
464                return unsafe { tc(&u16x8_lt(tc(&a), tc(&b))) };
465            }
466            if is_pod_type!(T, u32) {
467                return unsafe { tc(&u32x4_lt(tc(&a), tc(&b))) };
468            }
469            // if is_pod_type!(T, u64) {
470            //     return unsafe { tc(&u64x2_lt(tc(&a), tc(&b))) };
471            // }
472        }
473    }
474    {
475        let _ = (s, a, b);
476        unreachable!()
477    }
478}
479
480#[inline(always)]
481pub fn add_sat<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
482    if is_pod_type!(V, V256) {
483        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
484        if matches_isa!(S, AVX2) {
485            if is_pod_type!(T, i8) {
486                return unsafe { tc(&_mm256_adds_epi8(tc(&a), tc(&b))) };
487            }
488            if is_pod_type!(T, i16) {
489                return unsafe { tc(&_mm256_adds_epi16(tc(&a), tc(&b))) };
490            }
491            if is_pod_type!(T, u8) {
492                return unsafe { tc(&_mm256_adds_epu8(tc(&a), tc(&b))) };
493            }
494            if is_pod_type!(T, u16) {
495                return unsafe { tc(&_mm256_adds_epu16(tc(&a), tc(&b))) };
496            }
497        }
498        {
499            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
500            let (a, b) = (a.to_v128x2(), b.to_v128x2());
501            let c0 = add_sat::<S, T, V128>(s, a.0, b.0);
502            let c1 = add_sat::<S, T, V128>(s, a.1, b.1);
503            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
504        }
505    }
506    if is_pod_type!(V, V128) {
507        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
508        if matches_isa!(S, SSE2) {
509            if is_pod_type!(T, i8) {
510                return unsafe { tc(&_mm_adds_epi8(tc(&a), tc(&b))) };
511            }
512            if is_pod_type!(T, i16) {
513                return unsafe { tc(&_mm_adds_epi16(tc(&a), tc(&b))) };
514            }
515            if is_pod_type!(T, u8) {
516                return unsafe { tc(&_mm_adds_epu8(tc(&a), tc(&b))) };
517            }
518            if is_pod_type!(T, u16) {
519                return unsafe { tc(&_mm_adds_epu16(tc(&a), tc(&b))) };
520            }
521        }
522        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
523        if matches_isa!(S, NEON) {
524            if is_pod_type!(T, i8) {
525                return unsafe { tc(&vqaddq_s8(tc(&a), tc(&b))) };
526            }
527            if is_pod_type!(T, i16) {
528                return unsafe { tc(&vqaddq_s16(tc(&a), tc(&b))) };
529            }
530            if is_pod_type!(T, i32) {
531                return unsafe { tc(&vqaddq_s32(tc(&a), tc(&b))) };
532            }
533            if is_pod_type!(T, u8) {
534                return unsafe { tc(&vqaddq_u8(tc(&a), tc(&b))) };
535            }
536            if is_pod_type!(T, u16) {
537                return unsafe { tc(&vqaddq_u16(tc(&a), tc(&b))) };
538            }
539            if is_pod_type!(T, u32) {
540                return unsafe { tc(&vqaddq_u32(tc(&a), tc(&b))) };
541            }
542        }
543        #[cfg(target_arch = "wasm32")]
544        if matches_isa!(S, WASM128) {
545            if is_pod_type!(T, i8) {
546                return unsafe { tc(&i8x16_add_sat(tc(&a), tc(&b))) };
547            }
548            if is_pod_type!(T, i16) {
549                return unsafe { tc(&i16x8_add_sat(tc(&a), tc(&b))) };
550            }
551            if is_pod_type!(T, u8) {
552                return unsafe { tc(&u8x16_add_sat(tc(&a), tc(&b))) };
553            }
554            if is_pod_type!(T, u16) {
555                return unsafe { tc(&u16x8_add_sat(tc(&a), tc(&b))) };
556            }
557        }
558    }
559    {
560        let _ = (s, a, b);
561        unreachable!()
562    }
563}
564
565#[inline(always)]
566pub fn sub_sat<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
567    if is_pod_type!(V, V256) {
568        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
569        if matches_isa!(S, AVX2) {
570            if is_pod_type!(T, i8) {
571                return unsafe { tc(&_mm256_subs_epi8(tc(&a), tc(&b))) };
572            }
573            if is_pod_type!(T, i16) {
574                return unsafe { tc(&_mm256_subs_epi16(tc(&a), tc(&b))) };
575            }
576            if is_pod_type!(T, u8) {
577                return unsafe { tc(&_mm256_subs_epu8(tc(&a), tc(&b))) };
578            }
579            if is_pod_type!(T, u16) {
580                return unsafe { tc(&_mm256_subs_epu16(tc(&a), tc(&b))) };
581            }
582        }
583        {
584            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
585            let (a, b) = (a.to_v128x2(), b.to_v128x2());
586            let c0 = sub_sat::<S, T, V128>(s, a.0, b.0);
587            let c1 = sub_sat::<S, T, V128>(s, a.1, b.1);
588            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
589        }
590    }
591    if is_pod_type!(V, V128) {
592        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
593        if matches_isa!(S, SSE2) {
594            if is_pod_type!(T, i8) {
595                return unsafe { tc(&_mm_subs_epi8(tc(&a), tc(&b))) };
596            }
597            if is_pod_type!(T, i16) {
598                return unsafe { tc(&_mm_subs_epi16(tc(&a), tc(&b))) };
599            }
600            if is_pod_type!(T, u8) {
601                return unsafe { tc(&_mm_subs_epu8(tc(&a), tc(&b))) };
602            }
603            if is_pod_type!(T, u16) {
604                return unsafe { tc(&_mm_subs_epu16(tc(&a), tc(&b))) };
605            }
606        }
607        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
608        if matches_isa!(S, NEON) {
609            if is_pod_type!(T, i8) {
610                return unsafe { tc(&vqsubq_s8(tc(&a), tc(&b))) };
611            }
612            if is_pod_type!(T, i16) {
613                return unsafe { tc(&vqsubq_s16(tc(&a), tc(&b))) };
614            }
615            if is_pod_type!(T, i32) {
616                return unsafe { tc(&vqsubq_s32(tc(&a), tc(&b))) };
617            }
618            if is_pod_type!(T, u8) {
619                return unsafe { tc(&vqsubq_u8(tc(&a), tc(&b))) };
620            }
621            if is_pod_type!(T, u16) {
622                return unsafe { tc(&vqsubq_u16(tc(&a), tc(&b))) };
623            }
624            if is_pod_type!(T, u32) {
625                return unsafe { tc(&vqsubq_u32(tc(&a), tc(&b))) };
626            }
627        }
628        #[cfg(target_arch = "wasm32")]
629        if matches_isa!(S, WASM128) {
630            if is_pod_type!(T, i8) {
631                return unsafe { tc(&i8x16_sub_sat(tc(&a), tc(&b))) };
632            }
633            if is_pod_type!(T, i16) {
634                return unsafe { tc(&i16x8_sub_sat(tc(&a), tc(&b))) };
635            }
636            if is_pod_type!(T, u8) {
637                return unsafe { tc(&u8x16_sub_sat(tc(&a), tc(&b))) };
638            }
639            if is_pod_type!(T, u16) {
640                return unsafe { tc(&u16x8_sub_sat(tc(&a), tc(&b))) };
641            }
642        }
643    }
644    {
645        let _ = (s, a, b);
646        unreachable!()
647    }
648}
649
650pub fn max<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
651    if is_pod_type!(V, V256) {
652        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
653        if matches_isa!(S, AVX2) {
654            if is_pod_type!(T, i8) {
655                return unsafe { tc(&_mm256_max_epi8(tc(&a), tc(&b))) };
656            }
657            if is_pod_type!(T, i16) {
658                return unsafe { tc(&_mm256_max_epi16(tc(&a), tc(&b))) };
659            }
660            if is_pod_type!(T, i32) {
661                return unsafe { tc(&_mm256_max_epi32(tc(&a), tc(&b))) };
662            }
663            if is_pod_type!(T, u8) {
664                return unsafe { tc(&_mm256_max_epu8(tc(&a), tc(&b))) };
665            }
666            if is_pod_type!(T, u16) {
667                return unsafe { tc(&_mm256_max_epu16(tc(&a), tc(&b))) };
668            }
669            if is_pod_type!(T, u32) {
670                return unsafe { tc(&_mm256_max_epu32(tc(&a), tc(&b))) };
671            }
672            if is_pod_type!(T, f32) {
673                return unsafe { tc(&_mm256_max_ps(tc(&a), tc(&b))) };
674            }
675            if is_pod_type!(T, f64) {
676                return unsafe { tc(&_mm256_max_pd(tc(&a), tc(&b))) };
677            }
678        }
679        {
680            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
681            let (a, b) = (a.to_v128x2(), b.to_v128x2());
682            let c0 = max::<S, T, V128>(s, a.0, b.0);
683            let c1 = max::<S, T, V128>(s, a.1, b.1);
684            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
685        }
686    }
687    if is_pod_type!(V, V128) {
688        #[cfg(miri)]
689        {
690            if is_pod_type!(T, u8) {
691                return unsafe { tc(&crate::simulation::u8x16_max(tc(&a), tc(&b))) };
692            }
693        }
694        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
695        if matches_isa!(S, SSE41) {
696            if is_pod_type!(T, i8) {
697                return unsafe { tc(&_mm_max_epi8(tc(&a), tc(&b))) };
698            }
699            if is_pod_type!(T, i32) {
700                return unsafe { tc(&_mm_max_epi32(tc(&a), tc(&b))) };
701            }
702            if is_pod_type!(T, u16) {
703                return unsafe { tc(&_mm_max_epu16(tc(&a), tc(&b))) };
704            }
705            if is_pod_type!(T, u32) {
706                return unsafe { tc(&_mm_max_epu32(tc(&a), tc(&b))) };
707            }
708        }
709        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
710        if matches_isa!(S, SSE2) {
711            if is_pod_type!(T, i16) {
712                return unsafe { tc(&_mm_max_epi16(tc(&a), tc(&b))) };
713            }
714            if is_pod_type!(T, u8) {
715                return unsafe { tc(&_mm_max_epu8(tc(&a), tc(&b))) };
716            }
717            if is_pod_type!(T, f32) {
718                return unsafe { tc(&_mm_max_ps(tc(&a), tc(&b))) };
719            }
720            if is_pod_type!(T, f64) {
721                return unsafe { tc(&_mm_max_pd(tc(&a), tc(&b))) };
722            }
723        }
724        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
725        if matches_isa!(S, NEON) {
726            if is_pod_type!(T, i8) {
727                return unsafe { tc(&vmaxq_s8(tc(&a), tc(&b))) };
728            }
729            if is_pod_type!(T, i16) {
730                return unsafe { tc(&vmaxq_s16(tc(&a), tc(&b))) };
731            }
732            if is_pod_type!(T, i32) {
733                return unsafe { tc(&vmaxq_s32(tc(&a), tc(&b))) };
734            }
735            if is_pod_type!(T, u8) {
736                return unsafe { tc(&vmaxq_u8(tc(&a), tc(&b))) };
737            }
738            if is_pod_type!(T, u16) {
739                return unsafe { tc(&vmaxq_u16(tc(&a), tc(&b))) };
740            }
741            if is_pod_type!(T, u32) {
742                return unsafe { tc(&vmaxq_u32(tc(&a), tc(&b))) };
743            }
744            if is_pod_type!(T, f32) {
745                return unsafe { tc(&vmaxq_f32(tc(&a), tc(&b))) };
746            }
747        }
748        #[cfg(target_arch = "wasm32")]
749        if matches_isa!(S, WASM128) {
750            if is_pod_type!(T, i8) {
751                return unsafe { tc(&i8x16_max(tc(&a), tc(&b))) };
752            }
753            if is_pod_type!(T, i16) {
754                return unsafe { tc(&i16x8_max(tc(&a), tc(&b))) };
755            }
756            if is_pod_type!(T, i32) {
757                return unsafe { tc(&i32x4_max(tc(&a), tc(&b))) };
758            }
759            if is_pod_type!(T, u8) {
760                return unsafe { tc(&u8x16_max(tc(&a), tc(&b))) };
761            }
762            if is_pod_type!(T, u16) {
763                return unsafe { tc(&u16x8_max(tc(&a), tc(&b))) };
764            }
765            if is_pod_type!(T, u32) {
766                return unsafe { tc(&u32x4_max(tc(&a), tc(&b))) };
767            }
768            if is_pod_type!(T, f32) {
769                return unsafe { tc(&f32x4_max(tc(&a), tc(&b))) };
770            }
771        }
772    }
773    {
774        let _ = (s, a, b);
775        unreachable!()
776    }
777}
778
779#[inline(always)]
780pub fn min<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
781    if is_pod_type!(V, V256) {
782        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
783        if matches_isa!(S, AVX2) {
784            if is_pod_type!(T, i8) {
785                return unsafe { tc(&_mm256_min_epi8(tc(&a), tc(&b))) };
786            }
787            if is_pod_type!(T, i16) {
788                return unsafe { tc(&_mm256_min_epi16(tc(&a), tc(&b))) };
789            }
790            if is_pod_type!(T, i32) {
791                return unsafe { tc(&_mm256_min_epi32(tc(&a), tc(&b))) };
792            }
793            if is_pod_type!(T, u8) {
794                return unsafe { tc(&_mm256_min_epu8(tc(&a), tc(&b))) };
795            }
796            if is_pod_type!(T, u16) {
797                return unsafe { tc(&_mm256_min_epu16(tc(&a), tc(&b))) };
798            }
799            if is_pod_type!(T, u32) {
800                return unsafe { tc(&_mm256_min_epu32(tc(&a), tc(&b))) };
801            }
802            if is_pod_type!(T, f32) {
803                return unsafe { tc(&_mm256_min_ps(tc(&a), tc(&b))) };
804            }
805            if is_pod_type!(T, f64) {
806                return unsafe { tc(&_mm256_min_pd(tc(&a), tc(&b))) };
807            }
808        }
809        {
810            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
811            let (a, b) = (a.to_v128x2(), b.to_v128x2());
812            let c0 = min::<S, T, V128>(s, a.0, b.0);
813            let c1 = min::<S, T, V128>(s, a.1, b.1);
814            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
815        }
816    }
817    if is_pod_type!(V, V128) {
818        #[cfg(miri)]
819        {
820            if is_pod_type!(T, u8) {
821                return unsafe { tc(&crate::simulation::u8x16_min(tc(&a), tc(&b))) };
822            }
823        }
824        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
825        if matches_isa!(S, SSE41) {
826            if is_pod_type!(T, i8) {
827                return unsafe { tc(&_mm_min_epi8(tc(&a), tc(&b))) };
828            }
829            if is_pod_type!(T, i32) {
830                return unsafe { tc(&_mm_min_epi32(tc(&a), tc(&b))) };
831            }
832
833            if is_pod_type!(T, u16) {
834                return unsafe { tc(&_mm_min_epu16(tc(&a), tc(&b))) };
835            }
836            if is_pod_type!(T, u32) {
837                return unsafe { tc(&_mm_min_epu32(tc(&a), tc(&b))) };
838            }
839        }
840        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
841        if matches_isa!(S, SSE2) {
842            if is_pod_type!(T, i16) {
843                return unsafe { tc(&_mm_min_epi16(tc(&a), tc(&b))) };
844            }
845            if is_pod_type!(T, u8) {
846                return unsafe { tc(&_mm_min_epu8(tc(&a), tc(&b))) };
847            }
848            if is_pod_type!(T, f32) {
849                return unsafe { tc(&_mm_min_ps(tc(&a), tc(&b))) };
850            }
851            if is_pod_type!(T, f64) {
852                return unsafe { tc(&_mm_min_pd(tc(&a), tc(&b))) };
853            }
854        }
855        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
856        if matches_isa!(S, NEON) {
857            if is_pod_type!(T, i8) {
858                return unsafe { tc(&vminq_s8(tc(&a), tc(&b))) };
859            }
860            if is_pod_type!(T, i16) {
861                return unsafe { tc(&vminq_s16(tc(&a), tc(&b))) };
862            }
863            if is_pod_type!(T, i32) {
864                return unsafe { tc(&vminq_s32(tc(&a), tc(&b))) };
865            }
866            if is_pod_type!(T, u8) {
867                return unsafe { tc(&vminq_u8(tc(&a), tc(&b))) };
868            }
869            if is_pod_type!(T, u16) {
870                return unsafe { tc(&vminq_u16(tc(&a), tc(&b))) };
871            }
872            if is_pod_type!(T, u32) {
873                return unsafe { tc(&vminq_u32(tc(&a), tc(&b))) };
874            }
875            if is_pod_type!(T, f32) {
876                return unsafe { tc(&vminq_f32(tc(&a), tc(&b))) };
877            }
878        }
879        #[cfg(target_arch = "wasm32")]
880        if matches_isa!(S, WASM128) {
881            if is_pod_type!(T, i8) {
882                return unsafe { tc(&i8x16_min(tc(&a), tc(&b))) };
883            }
884            if is_pod_type!(T, i16) {
885                return unsafe { tc(&i16x8_min(tc(&a), tc(&b))) };
886            }
887            if is_pod_type!(T, i32) {
888                return unsafe { tc(&i32x4_min(tc(&a), tc(&b))) };
889            }
890            if is_pod_type!(T, u8) {
891                return unsafe { tc(&u8x16_min(tc(&a), tc(&b))) };
892            }
893            if is_pod_type!(T, u16) {
894                return unsafe { tc(&u16x8_min(tc(&a), tc(&b))) };
895            }
896            if is_pod_type!(T, u32) {
897                return unsafe { tc(&u32x4_min(tc(&a), tc(&b))) };
898            }
899            if is_pod_type!(T, f32) {
900                return unsafe { tc(&f32x4_min(tc(&a), tc(&b))) };
901            }
902        }
903    }
904    {
905        let _ = (s, a, b);
906        unreachable!()
907    }
908}
909
910#[inline(always)]
911pub fn and<S, V>(s: S, a: V, b: V) -> V
912where
913    S: InstructionSet,
914    V: POD,
915{
916    if is_pod_type!(V, V256) {
917        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
918        if matches_isa!(S, AVX2) {
919            return unsafe { tc(&_mm256_and_si256(tc(&a), tc(&b))) };
920        }
921        {
922            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
923            let (a, b) = (a.to_v128x2(), b.to_v128x2());
924            let c0 = and::<S, V128>(s, a.0, b.0);
925            let c1 = and::<S, V128>(s, a.1, b.1);
926            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
927        }
928    }
929    if is_pod_type!(V, V128) {
930        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
931        if matches_isa!(S, SSE2) {
932            return unsafe { tc(&_mm_and_si128(tc(&a), tc(&b))) };
933        }
934        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
935        if matches_isa!(S, NEON) {
936            return unsafe { tc(&vandq_u8(tc(&a), tc(&b))) };
937        }
938        #[cfg(target_arch = "wasm32")]
939        if matches_isa!(S, WASM128) {
940            return unsafe { tc(&v128_and(tc(&a), tc(&b))) };
941        }
942    }
943    {
944        let _ = (s, a, b);
945        unreachable!()
946    }
947}
948
949#[inline(always)]
950pub fn or<S, V>(s: S, a: V, b: V) -> V
951where
952    S: InstructionSet,
953    V: POD,
954{
955    if is_pod_type!(V, V256) {
956        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
957        if matches_isa!(S, AVX2) {
958            return unsafe { tc(&_mm256_or_si256(tc(&a), tc(&b))) };
959        }
960        {
961            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
962            let (a, b) = (a.to_v128x2(), b.to_v128x2());
963            let c0 = or::<S, V128>(s, a.0, b.0);
964            let c1 = or::<S, V128>(s, a.1, b.1);
965            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
966        }
967    }
968    if is_pod_type!(V, V128) {
969        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
970        if matches_isa!(S, SSE2) {
971            return unsafe { tc(&_mm_or_si128(tc(&a), tc(&b))) };
972        }
973        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
974        if matches_isa!(S, NEON) {
975            return unsafe { tc(&vorrq_u8(tc(&a), tc(&b))) };
976        }
977        #[cfg(target_arch = "wasm32")]
978        if matches_isa!(S, WASM128) {
979            return unsafe { tc(&v128_or(tc(&a), tc(&b))) };
980        }
981    }
982    {
983        let _ = (s, a, b);
984        unreachable!()
985    }
986}
987
988#[inline(always)]
989pub fn xor<S, V>(s: S, a: V, b: V) -> V
990where
991    S: InstructionSet,
992    V: POD,
993{
994    if is_pod_type!(V, V256) {
995        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
996        if matches_isa!(S, AVX2) {
997            return unsafe { tc(&_mm256_xor_si256(tc(&a), tc(&b))) };
998        }
999        {
1000            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
1001            let (a, b) = (a.to_v128x2(), b.to_v128x2());
1002            let c0 = xor::<S, V128>(s, a.0, b.0);
1003            let c1 = xor::<S, V128>(s, a.1, b.1);
1004            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
1005        }
1006    }
1007    if is_pod_type!(V, V128) {
1008        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1009        if matches_isa!(S, SSE2) {
1010            return unsafe { tc(&_mm_xor_si128(tc(&a), tc(&b))) };
1011        }
1012        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1013        if matches_isa!(S, NEON) {
1014            return unsafe { tc(&veorq_u8(tc(&a), tc(&b))) };
1015        }
1016        #[cfg(target_arch = "wasm32")]
1017        if matches_isa!(S, WASM128) {
1018            return unsafe { tc(&v128_xor(tc(&a), tc(&b))) };
1019        }
1020    }
1021    {
1022        let _ = (s, a, b);
1023        unreachable!()
1024    }
1025}
1026
1027#[inline(always)]
1028pub fn andnot<S, V>(s: S, a: V, b: V) -> V
1029where
1030    S: InstructionSet,
1031    V: POD,
1032{
1033    if is_pod_type!(V, V256) {
1034        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1035        if matches_isa!(S, AVX2) {
1036            let (a, b) = (b, a);
1037            return unsafe { tc(&_mm256_andnot_si256(tc(&a), tc(&b))) };
1038        }
1039        {
1040            let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
1041            let (a, b) = (a.to_v128x2(), b.to_v128x2());
1042            let c0 = andnot::<S, V128>(s, a.0, b.0);
1043            let c1 = andnot::<S, V128>(s, a.1, b.1);
1044            return unsafe { tc(&V256::from_v128x2((c0, c1))) };
1045        }
1046    }
1047    if is_pod_type!(V, V128) {
1048        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1049        if matches_isa!(S, SSE2) {
1050            let (a, b) = (b, a);
1051            return unsafe { tc(&_mm_andnot_si128(tc(&a), tc(&b))) };
1052        }
1053        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1054        if matches_isa!(S, NEON) {
1055            return unsafe { tc(&vbicq_u8(tc(&a), tc(&b))) };
1056        }
1057        #[cfg(target_arch = "wasm32")]
1058        if matches_isa!(S, WASM128) {
1059            return unsafe { tc(&v128_andnot(tc(&a), tc(&b))) };
1060        }
1061    }
1062    {
1063        let _ = (s, a, b);
1064        unreachable!()
1065    }
1066}