1#![allow(clippy::collapsible_if, clippy::too_many_lines)]
2
3use crate::isa::InstructionSet;
4use crate::pod::POD;
5use crate::tools::transmute_copy as tc;
6use crate::vector::{V128, V256};
7
8#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
9use crate::isa::{AVX2, SSE2, SSE41};
10
11#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
12use crate::isa::NEON;
13
14#[cfg(target_arch = "wasm32")]
15use crate::isa::WASM128;
16
17#[cfg(target_arch = "x86")]
18use core::arch::x86::*;
19
20#[cfg(target_arch = "x86_64")]
21use core::arch::x86_64::*;
22
23#[cfg(all(feature = "unstable", target_arch = "arm"))]
24use core::arch::arm::*;
25
26#[cfg(target_arch = "aarch64")]
27use core::arch::aarch64::*;
28
29#[cfg(target_arch = "wasm32")]
30use core::arch::wasm32::*;
31
32#[inline(always)]
33pub fn splat<S: InstructionSet, T: POD, V: POD>(s: S, x: T) -> V {
34 if is_pod_type!(V, V256) {
35 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
36 if matches_isa!(S, AVX2) {
37 if is_pod_type!(T, u8 | i8) {
38 return unsafe { tc(&_mm256_set1_epi8(tc(&x))) };
39 }
40 }
41 {
42 let c = splat::<S, T, V128>(s, x).x2();
43 return unsafe { tc(&c) };
44 }
45 }
46 if is_pod_type!(V, V128) {
47 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48 if matches_isa!(S, SSE2) {
49 if is_pod_type!(T, u8 | i8) {
50 return unsafe { tc(&_mm_set1_epi8(tc(&x))) };
51 }
52 if is_pod_type!(T, u16 | i16) {
53 return unsafe { tc(&_mm_set1_epi16(tc(&x))) };
54 }
55 if is_pod_type!(T, u32 | i32) {
56 return unsafe { tc(&_mm_set1_epi32(tc(&x))) };
57 }
58 if is_pod_type!(T, u64 | i64) {
59 return unsafe { tc(&_mm_set1_epi64x(tc(&x))) };
60 }
61 }
62 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
63 if matches_isa!(S, NEON) {
64 if is_pod_type!(T, u8 | i8) {
65 return unsafe { tc(&vld1q_dup_u8(&tc(&x))) };
66 }
67 if is_pod_type!(T, u16 | i16) {
68 return unsafe { tc(&vld1q_dup_u16(&tc(&x))) };
69 }
70 if is_pod_type!(T, u32 | i32) {
71 return unsafe { tc(&vld1q_dup_u32(&tc(&x))) };
72 }
73 if is_pod_type!(T, u64 | i64) {
74 return unsafe { tc(&vld1q_dup_u64(&tc(&x))) };
75 }
76 }
77 #[cfg(target_arch = "wasm32")]
78 if matches_isa!(S, WASM128) {
79 if is_pod_type!(T, u8 | i8) {
80 return unsafe { tc(&u8x16_splat(tc(&x))) };
81 }
82 if is_pod_type!(T, u16 | i16) {
83 return unsafe { tc(&u16x8_splat(tc(&x))) };
84 }
85 if is_pod_type!(T, u32 | i32) {
86 return unsafe { tc(&u32x4_splat(tc(&x))) };
87 }
88 if is_pod_type!(T, u64 | i64) {
89 return unsafe { tc(&u64x2_splat(tc(&x))) };
90 }
91 }
92 }
93 {
94 let _ = (s, x);
95 unreachable!()
96 }
97}
98
99#[inline(always)]
100pub fn add<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
101 if is_pod_type!(V, V256) {
102 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103 if matches_isa!(S, AVX2) {
104 if is_pod_type!(T, u8 | i8) {
105 return unsafe { tc(&_mm256_add_epi8(tc(&a), tc(&b))) };
106 }
107 if is_pod_type!(T, u16 | i16) {
108 return unsafe { tc(&_mm256_add_epi16(tc(&a), tc(&b))) };
109 }
110 if is_pod_type!(T, u32 | i32) {
111 return unsafe { tc(&_mm256_add_epi32(tc(&a), tc(&b))) };
112 }
113 if is_pod_type!(T, u64 | i64) {
114 return unsafe { tc(&_mm256_add_epi64(tc(&a), tc(&b))) };
115 }
116 }
117 {
118 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
119 let (a, b) = (a.to_v128x2(), b.to_v128x2());
120 let c0 = add::<S, T, V128>(s, a.0, b.0);
121 let c1 = add::<S, T, V128>(s, a.1, b.1);
122 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
123 }
124 }
125 if is_pod_type!(V, V128) {
126 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127 if matches_isa!(S, SSE2) {
128 if is_pod_type!(T, u8 | i8) {
129 return unsafe { tc(&_mm_add_epi8(tc(&a), tc(&b))) };
130 }
131 if is_pod_type!(T, u16 | i16) {
132 return unsafe { tc(&_mm_add_epi16(tc(&a), tc(&b))) };
133 }
134 if is_pod_type!(T, u32 | i32) {
135 return unsafe { tc(&_mm_add_epi32(tc(&a), tc(&b))) };
136 }
137 if is_pod_type!(T, u64 | i64) {
138 return unsafe { tc(&_mm_add_epi64(tc(&a), tc(&b))) };
139 }
140 }
141 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
142 if matches_isa!(S, NEON) {
143 if is_pod_type!(T, u8 | i8) {
144 return unsafe { tc(&vaddq_u8(tc(&a), tc(&b))) };
145 }
146 if is_pod_type!(T, u16 | i16) {
147 return unsafe { tc(&vaddq_u16(tc(&a), tc(&b))) };
148 }
149 if is_pod_type!(T, u32 | i32) {
150 return unsafe { tc(&vaddq_u32(tc(&a), tc(&b))) };
151 }
152 if is_pod_type!(T, u64 | i64) {
153 return unsafe { tc(&vaddq_u64(tc(&a), tc(&b))) };
154 }
155 }
156 #[cfg(target_arch = "wasm32")]
157 if matches_isa!(S, WASM128) {
158 if is_pod_type!(T, u8 | i8) {
159 return unsafe { tc(&u8x16_add(tc(&a), tc(&b))) };
160 }
161 if is_pod_type!(T, u16 | i16) {
162 return unsafe { tc(&u16x8_add(tc(&a), tc(&b))) };
163 }
164 if is_pod_type!(T, u32 | i32) {
165 return unsafe { tc(&u32x4_add(tc(&a), tc(&b))) };
166 }
167 if is_pod_type!(T, u64 | i64) {
168 return unsafe { tc(&u64x2_add(tc(&a), tc(&b))) };
169 }
170 }
171 }
172 {
173 let _ = (s, a, b);
174 unreachable!()
175 }
176}
177
178#[inline(always)]
179pub fn sub<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
180 if is_pod_type!(V, V256) {
181 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
182 if matches_isa!(S, AVX2) {
183 if is_pod_type!(T, u8 | i8) {
184 return unsafe { tc(&_mm256_sub_epi8(tc(&a), tc(&b))) };
185 }
186 if is_pod_type!(T, u16 | i16) {
187 return unsafe { tc(&_mm256_sub_epi16(tc(&a), tc(&b))) };
188 }
189 if is_pod_type!(T, u32 | i32) {
190 return unsafe { tc(&_mm256_sub_epi32(tc(&a), tc(&b))) };
191 }
192 if is_pod_type!(T, u64 | i64) {
193 return unsafe { tc(&_mm256_sub_epi64(tc(&a), tc(&b))) };
194 }
195 }
196 {
197 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
198 let (a, b) = (a.to_v128x2(), b.to_v128x2());
199 let c0 = sub::<S, T, V128>(s, a.0, b.0);
200 let c1 = sub::<S, T, V128>(s, a.1, b.1);
201 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
202 }
203 }
204 if is_pod_type!(V, V128) {
205 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
206 if matches_isa!(S, SSE2) {
207 if is_pod_type!(T, u8 | i8) {
208 return unsafe { tc(&_mm_sub_epi8(tc(&a), tc(&b))) };
209 }
210 if is_pod_type!(T, u16 | i16) {
211 return unsafe { tc(&_mm_sub_epi16(tc(&a), tc(&b))) };
212 }
213 if is_pod_type!(T, u32 | i32) {
214 return unsafe { tc(&_mm_sub_epi32(tc(&a), tc(&b))) };
215 }
216 if is_pod_type!(T, u64 | i64) {
217 return unsafe { tc(&_mm_sub_epi64(tc(&a), tc(&b))) };
218 }
219 }
220 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
221 if matches_isa!(S, NEON) {
222 if is_pod_type!(T, u8 | i8) {
223 return unsafe { tc(&vsubq_u8(tc(&a), tc(&b))) };
224 }
225 if is_pod_type!(T, u16 | i16) {
226 return unsafe { tc(&vsubq_u16(tc(&a), tc(&b))) };
227 }
228 if is_pod_type!(T, u32 | i32) {
229 return unsafe { tc(&vsubq_u32(tc(&a), tc(&b))) };
230 }
231 if is_pod_type!(T, u64 | i64) {
232 return unsafe { tc(&vsubq_u64(tc(&a), tc(&b))) };
233 }
234 }
235 #[cfg(target_arch = "wasm32")]
236 if matches_isa!(S, WASM128) {
237 if is_pod_type!(T, u8 | i8) {
238 return unsafe { tc(&u8x16_sub(tc(&a), tc(&b))) };
239 }
240 if is_pod_type!(T, u16 | i16) {
241 return unsafe { tc(&u16x8_sub(tc(&a), tc(&b))) };
242 }
243 if is_pod_type!(T, u32 | i32) {
244 return unsafe { tc(&u32x4_sub(tc(&a), tc(&b))) };
245 }
246 if is_pod_type!(T, u64 | i64) {
247 return unsafe { tc(&u64x2_sub(tc(&a), tc(&b))) };
248 }
249 }
250 }
251 {
252 let _ = (s, a, b);
253 unreachable!()
254 }
255}
256
257#[inline(always)]
258pub fn eq<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
259 if is_pod_type!(V, V256) {
260 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
261 if matches_isa!(S, AVX2) {
262 if is_pod_type!(T, u8 | i8) {
263 return unsafe { tc(&_mm256_cmpeq_epi8(tc(&a), tc(&b))) };
264 }
265 if is_pod_type!(T, u16 | i16) {
266 return unsafe { tc(&_mm256_cmpeq_epi16(tc(&a), tc(&b))) };
267 }
268 if is_pod_type!(T, u32 | i32) {
269 return unsafe { tc(&_mm256_cmpeq_epi32(tc(&a), tc(&b))) };
270 }
271 if is_pod_type!(T, u64 | i64) {
272 return unsafe { tc(&_mm256_cmpeq_epi64(tc(&a), tc(&b))) };
273 }
274 }
275 {
276 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
277 let (a, b) = (a.to_v128x2(), b.to_v128x2());
278 let c0 = eq::<S, T, V128>(s, a.0, b.0);
279 let c1 = eq::<S, T, V128>(s, a.1, b.1);
280 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
281 }
282 }
283 if is_pod_type!(V, V128) {
284 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
285 if matches_isa!(S, SSE2) {
286 if is_pod_type!(T, u8 | i8) {
287 return unsafe { tc(&_mm_cmpeq_epi8(tc(&a), tc(&b))) };
288 }
289 if is_pod_type!(T, u16 | i16) {
290 return unsafe { tc(&_mm_cmpeq_epi16(tc(&a), tc(&b))) };
291 }
292 if is_pod_type!(T, u32 | i32) {
293 return unsafe { tc(&_mm_cmpeq_epi32(tc(&a), tc(&b))) };
294 }
295 }
296 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
297 if matches_isa!(S, NEON) {
298 if is_pod_type!(T, u8 | i8) {
299 return unsafe { tc(&vceqq_u8(tc(&a), tc(&b))) };
300 }
301 if is_pod_type!(T, u16 | i16) {
302 return unsafe { tc(&vceqq_u16(tc(&a), tc(&b))) };
303 }
304 if is_pod_type!(T, u32 | i32) {
305 return unsafe { tc(&vceqq_u32(tc(&a), tc(&b))) };
306 }
307 #[cfg(target_arch = "aarch64")]
308 if is_pod_type!(T, u64 | i64) {
309 return unsafe { tc(&vceqq_u64(tc(&a), tc(&b))) };
310 }
311 }
312 #[cfg(target_arch = "wasm32")]
313 if matches_isa!(S, WASM128) {
314 if is_pod_type!(T, u8 | i8) {
315 return unsafe { tc(&u8x16_eq(tc(&a), tc(&b))) };
316 }
317 if is_pod_type!(T, u16 | i16) {
318 return unsafe { tc(&u16x8_eq(tc(&a), tc(&b))) };
319 }
320 if is_pod_type!(T, u32 | i32) {
321 return unsafe { tc(&u32x4_eq(tc(&a), tc(&b))) };
322 }
323 if is_pod_type!(T, u64 | i64) {
324 return unsafe { tc(&u64x2_eq(tc(&a), tc(&b))) };
325 }
326 }
327 }
328 {
329 let _ = (s, a, b);
330 unreachable!()
331 }
332}
333
334#[inline(always)]
335pub fn lt<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
336 if is_pod_type!(V, V256) {
337 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
338 if matches_isa!(S, AVX2) {
339 if is_pod_type!(T, i8) {
340 return unsafe { tc(&_mm256_cmpgt_epi8(tc(&b), tc(&a))) };
341 }
342 if is_pod_type!(T, i16) {
343 return unsafe { tc(&_mm256_cmpgt_epi16(tc(&b), tc(&a))) };
344 }
345 if is_pod_type!(T, i32) {
346 return unsafe { tc(&_mm256_cmpgt_epi32(tc(&b), tc(&a))) };
347 }
348 if is_pod_type!(T, i64) {
349 return unsafe { tc(&_mm256_cmpgt_epi64(tc(&b), tc(&a))) };
350 }
351 if is_pod_type!(T, u8) {
352 return unsafe {
353 let (a, b) = (tc(&a), tc(&b));
354 let c = _mm256_cmpeq_epi8(a, _mm256_max_epu8(a, b));
355 tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi8(a, a)))
356 };
357 }
358 if is_pod_type!(T, u16) {
359 return unsafe {
360 let (a, b) = (tc(&a), tc(&b));
361 let c = _mm256_cmpeq_epi16(a, _mm256_max_epu16(a, b));
362 tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi16(a, a)))
363 };
364 }
365 if is_pod_type!(T, u32) {
366 return unsafe {
367 let (a, b) = (tc(&a), tc(&b));
368 let c = _mm256_cmpeq_epi32(a, _mm256_max_epu32(a, b));
369 tc(&_mm256_xor_si256(c, _mm256_cmpeq_epi32(a, a)))
370 };
371 }
372 }
373 {
374 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
375 let (a, b) = (a.to_v128x2(), b.to_v128x2());
376 let c0 = lt::<S, T, V128>(s, a.0, b.0);
377 let c1 = lt::<S, T, V128>(s, a.1, b.1);
378 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
379 }
380 }
381 if is_pod_type!(V, V128) {
382 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
383 if matches_isa!(S, SSE2) {
384 if is_pod_type!(T, i8) {
385 return unsafe { tc(&_mm_cmplt_epi8(tc(&a), tc(&b))) };
386 }
387 if is_pod_type!(T, i16) {
388 return unsafe { tc(&_mm_cmplt_epi16(tc(&a), tc(&b))) };
389 }
390 if is_pod_type!(T, i32) {
391 return unsafe { tc(&_mm_cmplt_epi32(tc(&a), tc(&b))) };
392 }
393 if is_pod_type!(T, u8) {
394 return unsafe {
395 let (a, b) = (tc(&a), tc(&b));
396 let c = _mm_cmpeq_epi8(a, _mm_max_epu8(a, b));
397 tc(&_mm_xor_si128(c, _mm_cmpeq_epi8(a, a)))
398 };
399 }
400 if is_pod_type!(T, u16) {
401 return unsafe {
402 let m = _mm_set1_epi16(i16::MIN);
403 let a = _mm_xor_si128(tc(&a), m);
404 let b = _mm_xor_si128(tc(&b), m);
405 tc(&_mm_cmplt_epi16(a, b))
406 };
407 }
408 if is_pod_type!(T, u32) {
409 return unsafe {
410 let m = _mm_set1_epi32(i32::MIN);
411 let a = _mm_xor_si128(tc(&a), m);
412 let b = _mm_xor_si128(tc(&b), m);
413 tc(&_mm_cmplt_epi32(a, b))
414 };
415 }
416 }
417 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
418 if matches_isa!(S, NEON) {
419 if is_pod_type!(T, i8) {
420 return unsafe { tc(&vcltq_s8(tc(&a), tc(&b))) };
421 }
422 if is_pod_type!(T, i16) {
423 return unsafe { tc(&vcltq_s16(tc(&a), tc(&b))) };
424 }
425 if is_pod_type!(T, i32) {
426 return unsafe { tc(&vcltq_s32(tc(&a), tc(&b))) };
427 }
428 #[cfg(target_arch = "aarch64")]
429 if is_pod_type!(T, i64) {
430 return unsafe { tc(&vcltq_s64(tc(&a), tc(&b))) };
431 }
432 if is_pod_type!(T, u8) {
433 return unsafe { tc(&vcltq_u8(tc(&a), tc(&b))) };
434 }
435 if is_pod_type!(T, u16) {
436 return unsafe { tc(&vcltq_u16(tc(&a), tc(&b))) };
437 }
438 if is_pod_type!(T, u32) {
439 return unsafe { tc(&vcltq_u32(tc(&a), tc(&b))) };
440 }
441 #[cfg(target_arch = "aarch64")]
442 if is_pod_type!(T, u64) {
443 return unsafe { tc(&vcltq_u64(tc(&a), tc(&b))) };
444 }
445 }
446 #[cfg(target_arch = "wasm32")]
447 if matches_isa!(S, WASM128) {
448 if is_pod_type!(T, i8) {
449 return unsafe { tc(&i8x16_lt(tc(&a), tc(&b))) };
450 }
451 if is_pod_type!(T, i16) {
452 return unsafe { tc(&i16x8_lt(tc(&a), tc(&b))) };
453 }
454 if is_pod_type!(T, i32) {
455 return unsafe { tc(&i32x4_lt(tc(&a), tc(&b))) };
456 }
457 if is_pod_type!(T, i64) {
458 return unsafe { tc(&i64x2_lt(tc(&a), tc(&b))) };
459 }
460 if is_pod_type!(T, u8) {
461 return unsafe { tc(&u8x16_lt(tc(&a), tc(&b))) };
462 }
463 if is_pod_type!(T, u16) {
464 return unsafe { tc(&u16x8_lt(tc(&a), tc(&b))) };
465 }
466 if is_pod_type!(T, u32) {
467 return unsafe { tc(&u32x4_lt(tc(&a), tc(&b))) };
468 }
469 }
473 }
474 {
475 let _ = (s, a, b);
476 unreachable!()
477 }
478}
479
480#[inline(always)]
481pub fn add_sat<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
482 if is_pod_type!(V, V256) {
483 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
484 if matches_isa!(S, AVX2) {
485 if is_pod_type!(T, i8) {
486 return unsafe { tc(&_mm256_adds_epi8(tc(&a), tc(&b))) };
487 }
488 if is_pod_type!(T, i16) {
489 return unsafe { tc(&_mm256_adds_epi16(tc(&a), tc(&b))) };
490 }
491 if is_pod_type!(T, u8) {
492 return unsafe { tc(&_mm256_adds_epu8(tc(&a), tc(&b))) };
493 }
494 if is_pod_type!(T, u16) {
495 return unsafe { tc(&_mm256_adds_epu16(tc(&a), tc(&b))) };
496 }
497 }
498 {
499 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
500 let (a, b) = (a.to_v128x2(), b.to_v128x2());
501 let c0 = add_sat::<S, T, V128>(s, a.0, b.0);
502 let c1 = add_sat::<S, T, V128>(s, a.1, b.1);
503 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
504 }
505 }
506 if is_pod_type!(V, V128) {
507 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
508 if matches_isa!(S, SSE2) {
509 if is_pod_type!(T, i8) {
510 return unsafe { tc(&_mm_adds_epi8(tc(&a), tc(&b))) };
511 }
512 if is_pod_type!(T, i16) {
513 return unsafe { tc(&_mm_adds_epi16(tc(&a), tc(&b))) };
514 }
515 if is_pod_type!(T, u8) {
516 return unsafe { tc(&_mm_adds_epu8(tc(&a), tc(&b))) };
517 }
518 if is_pod_type!(T, u16) {
519 return unsafe { tc(&_mm_adds_epu16(tc(&a), tc(&b))) };
520 }
521 }
522 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
523 if matches_isa!(S, NEON) {
524 if is_pod_type!(T, i8) {
525 return unsafe { tc(&vqaddq_s8(tc(&a), tc(&b))) };
526 }
527 if is_pod_type!(T, i16) {
528 return unsafe { tc(&vqaddq_s16(tc(&a), tc(&b))) };
529 }
530 if is_pod_type!(T, i32) {
531 return unsafe { tc(&vqaddq_s32(tc(&a), tc(&b))) };
532 }
533 if is_pod_type!(T, u8) {
534 return unsafe { tc(&vqaddq_u8(tc(&a), tc(&b))) };
535 }
536 if is_pod_type!(T, u16) {
537 return unsafe { tc(&vqaddq_u16(tc(&a), tc(&b))) };
538 }
539 if is_pod_type!(T, u32) {
540 return unsafe { tc(&vqaddq_u32(tc(&a), tc(&b))) };
541 }
542 }
543 #[cfg(target_arch = "wasm32")]
544 if matches_isa!(S, WASM128) {
545 if is_pod_type!(T, i8) {
546 return unsafe { tc(&i8x16_add_sat(tc(&a), tc(&b))) };
547 }
548 if is_pod_type!(T, i16) {
549 return unsafe { tc(&i16x8_add_sat(tc(&a), tc(&b))) };
550 }
551 if is_pod_type!(T, u8) {
552 return unsafe { tc(&u8x16_add_sat(tc(&a), tc(&b))) };
553 }
554 if is_pod_type!(T, u16) {
555 return unsafe { tc(&u16x8_add_sat(tc(&a), tc(&b))) };
556 }
557 }
558 }
559 {
560 let _ = (s, a, b);
561 unreachable!()
562 }
563}
564
565#[inline(always)]
566pub fn sub_sat<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
567 if is_pod_type!(V, V256) {
568 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
569 if matches_isa!(S, AVX2) {
570 if is_pod_type!(T, i8) {
571 return unsafe { tc(&_mm256_subs_epi8(tc(&a), tc(&b))) };
572 }
573 if is_pod_type!(T, i16) {
574 return unsafe { tc(&_mm256_subs_epi16(tc(&a), tc(&b))) };
575 }
576 if is_pod_type!(T, u8) {
577 return unsafe { tc(&_mm256_subs_epu8(tc(&a), tc(&b))) };
578 }
579 if is_pod_type!(T, u16) {
580 return unsafe { tc(&_mm256_subs_epu16(tc(&a), tc(&b))) };
581 }
582 }
583 {
584 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
585 let (a, b) = (a.to_v128x2(), b.to_v128x2());
586 let c0 = sub_sat::<S, T, V128>(s, a.0, b.0);
587 let c1 = sub_sat::<S, T, V128>(s, a.1, b.1);
588 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
589 }
590 }
591 if is_pod_type!(V, V128) {
592 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
593 if matches_isa!(S, SSE2) {
594 if is_pod_type!(T, i8) {
595 return unsafe { tc(&_mm_subs_epi8(tc(&a), tc(&b))) };
596 }
597 if is_pod_type!(T, i16) {
598 return unsafe { tc(&_mm_subs_epi16(tc(&a), tc(&b))) };
599 }
600 if is_pod_type!(T, u8) {
601 return unsafe { tc(&_mm_subs_epu8(tc(&a), tc(&b))) };
602 }
603 if is_pod_type!(T, u16) {
604 return unsafe { tc(&_mm_subs_epu16(tc(&a), tc(&b))) };
605 }
606 }
607 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
608 if matches_isa!(S, NEON) {
609 if is_pod_type!(T, i8) {
610 return unsafe { tc(&vqsubq_s8(tc(&a), tc(&b))) };
611 }
612 if is_pod_type!(T, i16) {
613 return unsafe { tc(&vqsubq_s16(tc(&a), tc(&b))) };
614 }
615 if is_pod_type!(T, i32) {
616 return unsafe { tc(&vqsubq_s32(tc(&a), tc(&b))) };
617 }
618 if is_pod_type!(T, u8) {
619 return unsafe { tc(&vqsubq_u8(tc(&a), tc(&b))) };
620 }
621 if is_pod_type!(T, u16) {
622 return unsafe { tc(&vqsubq_u16(tc(&a), tc(&b))) };
623 }
624 if is_pod_type!(T, u32) {
625 return unsafe { tc(&vqsubq_u32(tc(&a), tc(&b))) };
626 }
627 }
628 #[cfg(target_arch = "wasm32")]
629 if matches_isa!(S, WASM128) {
630 if is_pod_type!(T, i8) {
631 return unsafe { tc(&i8x16_sub_sat(tc(&a), tc(&b))) };
632 }
633 if is_pod_type!(T, i16) {
634 return unsafe { tc(&i16x8_sub_sat(tc(&a), tc(&b))) };
635 }
636 if is_pod_type!(T, u8) {
637 return unsafe { tc(&u8x16_sub_sat(tc(&a), tc(&b))) };
638 }
639 if is_pod_type!(T, u16) {
640 return unsafe { tc(&u16x8_sub_sat(tc(&a), tc(&b))) };
641 }
642 }
643 }
644 {
645 let _ = (s, a, b);
646 unreachable!()
647 }
648}
649
650pub fn max<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
651 if is_pod_type!(V, V256) {
652 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
653 if matches_isa!(S, AVX2) {
654 if is_pod_type!(T, i8) {
655 return unsafe { tc(&_mm256_max_epi8(tc(&a), tc(&b))) };
656 }
657 if is_pod_type!(T, i16) {
658 return unsafe { tc(&_mm256_max_epi16(tc(&a), tc(&b))) };
659 }
660 if is_pod_type!(T, i32) {
661 return unsafe { tc(&_mm256_max_epi32(tc(&a), tc(&b))) };
662 }
663 if is_pod_type!(T, u8) {
664 return unsafe { tc(&_mm256_max_epu8(tc(&a), tc(&b))) };
665 }
666 if is_pod_type!(T, u16) {
667 return unsafe { tc(&_mm256_max_epu16(tc(&a), tc(&b))) };
668 }
669 if is_pod_type!(T, u32) {
670 return unsafe { tc(&_mm256_max_epu32(tc(&a), tc(&b))) };
671 }
672 if is_pod_type!(T, f32) {
673 return unsafe { tc(&_mm256_max_ps(tc(&a), tc(&b))) };
674 }
675 if is_pod_type!(T, f64) {
676 return unsafe { tc(&_mm256_max_pd(tc(&a), tc(&b))) };
677 }
678 }
679 {
680 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
681 let (a, b) = (a.to_v128x2(), b.to_v128x2());
682 let c0 = max::<S, T, V128>(s, a.0, b.0);
683 let c1 = max::<S, T, V128>(s, a.1, b.1);
684 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
685 }
686 }
687 if is_pod_type!(V, V128) {
688 #[cfg(miri)]
689 {
690 if is_pod_type!(T, u8) {
691 return unsafe { tc(&crate::simulation::u8x16_max(tc(&a), tc(&b))) };
692 }
693 }
694 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
695 if matches_isa!(S, SSE41) {
696 if is_pod_type!(T, i8) {
697 return unsafe { tc(&_mm_max_epi8(tc(&a), tc(&b))) };
698 }
699 if is_pod_type!(T, i32) {
700 return unsafe { tc(&_mm_max_epi32(tc(&a), tc(&b))) };
701 }
702 if is_pod_type!(T, u16) {
703 return unsafe { tc(&_mm_max_epu16(tc(&a), tc(&b))) };
704 }
705 if is_pod_type!(T, u32) {
706 return unsafe { tc(&_mm_max_epu32(tc(&a), tc(&b))) };
707 }
708 }
709 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
710 if matches_isa!(S, SSE2) {
711 if is_pod_type!(T, i16) {
712 return unsafe { tc(&_mm_max_epi16(tc(&a), tc(&b))) };
713 }
714 if is_pod_type!(T, u8) {
715 return unsafe { tc(&_mm_max_epu8(tc(&a), tc(&b))) };
716 }
717 if is_pod_type!(T, f32) {
718 return unsafe { tc(&_mm_max_ps(tc(&a), tc(&b))) };
719 }
720 if is_pod_type!(T, f64) {
721 return unsafe { tc(&_mm_max_pd(tc(&a), tc(&b))) };
722 }
723 }
724 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
725 if matches_isa!(S, NEON) {
726 if is_pod_type!(T, i8) {
727 return unsafe { tc(&vmaxq_s8(tc(&a), tc(&b))) };
728 }
729 if is_pod_type!(T, i16) {
730 return unsafe { tc(&vmaxq_s16(tc(&a), tc(&b))) };
731 }
732 if is_pod_type!(T, i32) {
733 return unsafe { tc(&vmaxq_s32(tc(&a), tc(&b))) };
734 }
735 if is_pod_type!(T, u8) {
736 return unsafe { tc(&vmaxq_u8(tc(&a), tc(&b))) };
737 }
738 if is_pod_type!(T, u16) {
739 return unsafe { tc(&vmaxq_u16(tc(&a), tc(&b))) };
740 }
741 if is_pod_type!(T, u32) {
742 return unsafe { tc(&vmaxq_u32(tc(&a), tc(&b))) };
743 }
744 if is_pod_type!(T, f32) {
745 return unsafe { tc(&vmaxq_f32(tc(&a), tc(&b))) };
746 }
747 }
748 #[cfg(target_arch = "wasm32")]
749 if matches_isa!(S, WASM128) {
750 if is_pod_type!(T, i8) {
751 return unsafe { tc(&i8x16_max(tc(&a), tc(&b))) };
752 }
753 if is_pod_type!(T, i16) {
754 return unsafe { tc(&i16x8_max(tc(&a), tc(&b))) };
755 }
756 if is_pod_type!(T, i32) {
757 return unsafe { tc(&i32x4_max(tc(&a), tc(&b))) };
758 }
759 if is_pod_type!(T, u8) {
760 return unsafe { tc(&u8x16_max(tc(&a), tc(&b))) };
761 }
762 if is_pod_type!(T, u16) {
763 return unsafe { tc(&u16x8_max(tc(&a), tc(&b))) };
764 }
765 if is_pod_type!(T, u32) {
766 return unsafe { tc(&u32x4_max(tc(&a), tc(&b))) };
767 }
768 if is_pod_type!(T, f32) {
769 return unsafe { tc(&f32x4_max(tc(&a), tc(&b))) };
770 }
771 }
772 }
773 {
774 let _ = (s, a, b);
775 unreachable!()
776 }
777}
778
779#[inline(always)]
780pub fn min<S: InstructionSet, T: POD, V: POD>(s: S, a: V, b: V) -> V {
781 if is_pod_type!(V, V256) {
782 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
783 if matches_isa!(S, AVX2) {
784 if is_pod_type!(T, i8) {
785 return unsafe { tc(&_mm256_min_epi8(tc(&a), tc(&b))) };
786 }
787 if is_pod_type!(T, i16) {
788 return unsafe { tc(&_mm256_min_epi16(tc(&a), tc(&b))) };
789 }
790 if is_pod_type!(T, i32) {
791 return unsafe { tc(&_mm256_min_epi32(tc(&a), tc(&b))) };
792 }
793 if is_pod_type!(T, u8) {
794 return unsafe { tc(&_mm256_min_epu8(tc(&a), tc(&b))) };
795 }
796 if is_pod_type!(T, u16) {
797 return unsafe { tc(&_mm256_min_epu16(tc(&a), tc(&b))) };
798 }
799 if is_pod_type!(T, u32) {
800 return unsafe { tc(&_mm256_min_epu32(tc(&a), tc(&b))) };
801 }
802 if is_pod_type!(T, f32) {
803 return unsafe { tc(&_mm256_min_ps(tc(&a), tc(&b))) };
804 }
805 if is_pod_type!(T, f64) {
806 return unsafe { tc(&_mm256_min_pd(tc(&a), tc(&b))) };
807 }
808 }
809 {
810 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
811 let (a, b) = (a.to_v128x2(), b.to_v128x2());
812 let c0 = min::<S, T, V128>(s, a.0, b.0);
813 let c1 = min::<S, T, V128>(s, a.1, b.1);
814 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
815 }
816 }
817 if is_pod_type!(V, V128) {
818 #[cfg(miri)]
819 {
820 if is_pod_type!(T, u8) {
821 return unsafe { tc(&crate::simulation::u8x16_min(tc(&a), tc(&b))) };
822 }
823 }
824 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
825 if matches_isa!(S, SSE41) {
826 if is_pod_type!(T, i8) {
827 return unsafe { tc(&_mm_min_epi8(tc(&a), tc(&b))) };
828 }
829 if is_pod_type!(T, i32) {
830 return unsafe { tc(&_mm_min_epi32(tc(&a), tc(&b))) };
831 }
832
833 if is_pod_type!(T, u16) {
834 return unsafe { tc(&_mm_min_epu16(tc(&a), tc(&b))) };
835 }
836 if is_pod_type!(T, u32) {
837 return unsafe { tc(&_mm_min_epu32(tc(&a), tc(&b))) };
838 }
839 }
840 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
841 if matches_isa!(S, SSE2) {
842 if is_pod_type!(T, i16) {
843 return unsafe { tc(&_mm_min_epi16(tc(&a), tc(&b))) };
844 }
845 if is_pod_type!(T, u8) {
846 return unsafe { tc(&_mm_min_epu8(tc(&a), tc(&b))) };
847 }
848 if is_pod_type!(T, f32) {
849 return unsafe { tc(&_mm_min_ps(tc(&a), tc(&b))) };
850 }
851 if is_pod_type!(T, f64) {
852 return unsafe { tc(&_mm_min_pd(tc(&a), tc(&b))) };
853 }
854 }
855 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
856 if matches_isa!(S, NEON) {
857 if is_pod_type!(T, i8) {
858 return unsafe { tc(&vminq_s8(tc(&a), tc(&b))) };
859 }
860 if is_pod_type!(T, i16) {
861 return unsafe { tc(&vminq_s16(tc(&a), tc(&b))) };
862 }
863 if is_pod_type!(T, i32) {
864 return unsafe { tc(&vminq_s32(tc(&a), tc(&b))) };
865 }
866 if is_pod_type!(T, u8) {
867 return unsafe { tc(&vminq_u8(tc(&a), tc(&b))) };
868 }
869 if is_pod_type!(T, u16) {
870 return unsafe { tc(&vminq_u16(tc(&a), tc(&b))) };
871 }
872 if is_pod_type!(T, u32) {
873 return unsafe { tc(&vminq_u32(tc(&a), tc(&b))) };
874 }
875 if is_pod_type!(T, f32) {
876 return unsafe { tc(&vminq_f32(tc(&a), tc(&b))) };
877 }
878 }
879 #[cfg(target_arch = "wasm32")]
880 if matches_isa!(S, WASM128) {
881 if is_pod_type!(T, i8) {
882 return unsafe { tc(&i8x16_min(tc(&a), tc(&b))) };
883 }
884 if is_pod_type!(T, i16) {
885 return unsafe { tc(&i16x8_min(tc(&a), tc(&b))) };
886 }
887 if is_pod_type!(T, i32) {
888 return unsafe { tc(&i32x4_min(tc(&a), tc(&b))) };
889 }
890 if is_pod_type!(T, u8) {
891 return unsafe { tc(&u8x16_min(tc(&a), tc(&b))) };
892 }
893 if is_pod_type!(T, u16) {
894 return unsafe { tc(&u16x8_min(tc(&a), tc(&b))) };
895 }
896 if is_pod_type!(T, u32) {
897 return unsafe { tc(&u32x4_min(tc(&a), tc(&b))) };
898 }
899 if is_pod_type!(T, f32) {
900 return unsafe { tc(&f32x4_min(tc(&a), tc(&b))) };
901 }
902 }
903 }
904 {
905 let _ = (s, a, b);
906 unreachable!()
907 }
908}
909
910#[inline(always)]
911pub fn and<S, V>(s: S, a: V, b: V) -> V
912where
913 S: InstructionSet,
914 V: POD,
915{
916 if is_pod_type!(V, V256) {
917 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
918 if matches_isa!(S, AVX2) {
919 return unsafe { tc(&_mm256_and_si256(tc(&a), tc(&b))) };
920 }
921 {
922 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
923 let (a, b) = (a.to_v128x2(), b.to_v128x2());
924 let c0 = and::<S, V128>(s, a.0, b.0);
925 let c1 = and::<S, V128>(s, a.1, b.1);
926 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
927 }
928 }
929 if is_pod_type!(V, V128) {
930 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
931 if matches_isa!(S, SSE2) {
932 return unsafe { tc(&_mm_and_si128(tc(&a), tc(&b))) };
933 }
934 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
935 if matches_isa!(S, NEON) {
936 return unsafe { tc(&vandq_u8(tc(&a), tc(&b))) };
937 }
938 #[cfg(target_arch = "wasm32")]
939 if matches_isa!(S, WASM128) {
940 return unsafe { tc(&v128_and(tc(&a), tc(&b))) };
941 }
942 }
943 {
944 let _ = (s, a, b);
945 unreachable!()
946 }
947}
948
949#[inline(always)]
950pub fn or<S, V>(s: S, a: V, b: V) -> V
951where
952 S: InstructionSet,
953 V: POD,
954{
955 if is_pod_type!(V, V256) {
956 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
957 if matches_isa!(S, AVX2) {
958 return unsafe { tc(&_mm256_or_si256(tc(&a), tc(&b))) };
959 }
960 {
961 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
962 let (a, b) = (a.to_v128x2(), b.to_v128x2());
963 let c0 = or::<S, V128>(s, a.0, b.0);
964 let c1 = or::<S, V128>(s, a.1, b.1);
965 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
966 }
967 }
968 if is_pod_type!(V, V128) {
969 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
970 if matches_isa!(S, SSE2) {
971 return unsafe { tc(&_mm_or_si128(tc(&a), tc(&b))) };
972 }
973 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
974 if matches_isa!(S, NEON) {
975 return unsafe { tc(&vorrq_u8(tc(&a), tc(&b))) };
976 }
977 #[cfg(target_arch = "wasm32")]
978 if matches_isa!(S, WASM128) {
979 return unsafe { tc(&v128_or(tc(&a), tc(&b))) };
980 }
981 }
982 {
983 let _ = (s, a, b);
984 unreachable!()
985 }
986}
987
988#[inline(always)]
989pub fn xor<S, V>(s: S, a: V, b: V) -> V
990where
991 S: InstructionSet,
992 V: POD,
993{
994 if is_pod_type!(V, V256) {
995 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
996 if matches_isa!(S, AVX2) {
997 return unsafe { tc(&_mm256_xor_si256(tc(&a), tc(&b))) };
998 }
999 {
1000 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
1001 let (a, b) = (a.to_v128x2(), b.to_v128x2());
1002 let c0 = xor::<S, V128>(s, a.0, b.0);
1003 let c1 = xor::<S, V128>(s, a.1, b.1);
1004 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
1005 }
1006 }
1007 if is_pod_type!(V, V128) {
1008 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1009 if matches_isa!(S, SSE2) {
1010 return unsafe { tc(&_mm_xor_si128(tc(&a), tc(&b))) };
1011 }
1012 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1013 if matches_isa!(S, NEON) {
1014 return unsafe { tc(&veorq_u8(tc(&a), tc(&b))) };
1015 }
1016 #[cfg(target_arch = "wasm32")]
1017 if matches_isa!(S, WASM128) {
1018 return unsafe { tc(&v128_xor(tc(&a), tc(&b))) };
1019 }
1020 }
1021 {
1022 let _ = (s, a, b);
1023 unreachable!()
1024 }
1025}
1026
1027#[inline(always)]
1028pub fn andnot<S, V>(s: S, a: V, b: V) -> V
1029where
1030 S: InstructionSet,
1031 V: POD,
1032{
1033 if is_pod_type!(V, V256) {
1034 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1035 if matches_isa!(S, AVX2) {
1036 let (a, b) = (b, a);
1037 return unsafe { tc(&_mm256_andnot_si256(tc(&a), tc(&b))) };
1038 }
1039 {
1040 let (a, b): (V256, V256) = unsafe { (tc(&a), tc(&b)) };
1041 let (a, b) = (a.to_v128x2(), b.to_v128x2());
1042 let c0 = andnot::<S, V128>(s, a.0, b.0);
1043 let c1 = andnot::<S, V128>(s, a.1, b.1);
1044 return unsafe { tc(&V256::from_v128x2((c0, c1))) };
1045 }
1046 }
1047 if is_pod_type!(V, V128) {
1048 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1049 if matches_isa!(S, SSE2) {
1050 let (a, b) = (b, a);
1051 return unsafe { tc(&_mm_andnot_si128(tc(&a), tc(&b))) };
1052 }
1053 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1054 if matches_isa!(S, NEON) {
1055 return unsafe { tc(&vbicq_u8(tc(&a), tc(&b))) };
1056 }
1057 #[cfg(target_arch = "wasm32")]
1058 if matches_isa!(S, WASM128) {
1059 return unsafe { tc(&v128_andnot(tc(&a), tc(&b))) };
1060 }
1061 }
1062 {
1063 let _ = (s, a, b);
1064 unreachable!()
1065 }
1066}