httparse/simd/
neon.rs
1use crate::iter::Bytes;
2use core::arch::aarch64::*;
3
4#[inline]
5pub fn match_header_name_vectored(bytes: &mut Bytes) {
6 while bytes.as_ref().len() >= 16 {
7 unsafe {
9 let advance = match_header_name_char_16_neon(bytes.as_ref().as_ptr());
10 bytes.advance(advance);
11
12 if advance != 16 {
13 return;
14 }
15 }
16 }
17 super::swar::match_header_name_vectored(bytes);
18}
19
20#[inline]
21pub fn match_header_value_vectored(bytes: &mut Bytes) {
22 while bytes.as_ref().len() >= 16 {
23 unsafe {
25 let advance = match_header_value_char_16_neon(bytes.as_ref().as_ptr());
26 bytes.advance(advance);
27
28 if advance != 16 {
29 return;
30 }
31 }
32 }
33 super::swar::match_header_value_vectored(bytes);
34}
35
36#[inline]
37pub fn match_uri_vectored(bytes: &mut Bytes) {
38 while bytes.as_ref().len() >= 16 {
39 unsafe {
41 let advance = match_url_char_16_neon(bytes.as_ref().as_ptr());
42 bytes.advance(advance);
43
44 if advance != 16 {
45 return;
46 }
47 }
48 }
49 super::swar::match_uri_vectored(bytes);
50}
51
52const fn bit_set(x: u8) -> bool {
53 matches!(x, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'-' | b'.' | b'^' | b'_' | b'`' | b'|' | b'~')
56}
57
58const fn build_bitmap() -> ([u8; 16], [u8; 16]) {
62 let mut bitmap_0_7 = [0u8; 16]; let mut bitmap_8_15 = [0u8; 16]; let mut i = 0;
65 while i < 256 {
66 if bit_set(i as u8) {
67 let (lo, hi) = (i & 0x0F, i >> 4);
69 if i < 128 {
70 bitmap_0_7[lo] |= 1 << hi;
71 } else {
72 bitmap_8_15[lo] |= 1 << hi;
73 }
74 }
75 i += 1;
76 }
77 (bitmap_0_7, bitmap_8_15)
78}
79
80const BITMAPS: ([u8; 16], [u8; 16]) = build_bitmap();
81
82#[inline]
84unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize {
85 let bitmaps = BITMAPS;
86 let (bitmap_0_7, _bitmap_8_15) = bitmaps;
88 let bitmap_0_7 = vld1q_u8(bitmap_0_7.as_ptr());
89 const BITMASK_LOOKUP_DATA: [u8; 16] =
93 [1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128];
94 let bitmask_lookup = vld1q_u8(BITMASK_LOOKUP_DATA.as_ptr());
95
96 let input = vld1q_u8(ptr);
98
99 let indices_0_7 = vandq_u8(input, vdupq_n_u8(0x8F)); let row_0_7 = vqtbl1q_u8(bitmap_0_7, indices_0_7);
108 let bitmask = vqtbl1q_u8(bitmask_lookup, vshrq_n_u8(input, 4));
112
113 let bitsets = row_0_7;
116
117 let tmp = vandq_u8(bitsets, bitmask);
119 let result = vceqq_u8(tmp, bitmask);
120
121 offsetz(result) as usize
122}
123
124#[inline]
125unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize {
126 let input = vld1q_u8(ptr);
127
128 let result = vcleq_u8(vdupq_n_u8(b'!'), input);
130
131 let del = vceqq_u8(input, vdupq_n_u8(0x7F));
133 let result = vbicq_u8(result, del);
134
135 offsetz(result) as usize
136}
137
138#[inline]
139unsafe fn match_header_value_char_16_neon(ptr: *const u8) -> usize {
140 let input = vld1q_u8(ptr);
141
142 let result = vcleq_u8(vdupq_n_u8(b' '), input);
144
145 let tab = vceqq_u8(input, vdupq_n_u8(0x09));
147 let result = vorrq_u8(result, tab);
148
149 let del = vceqq_u8(input, vdupq_n_u8(0x7F));
151 let result = vbicq_u8(result, del);
152
153 offsetz(result) as usize
154}
155
156#[inline]
157unsafe fn offsetz(x: uint8x16_t) -> u32 {
158 offsetnz(vmvnq_u8(x))
160}
161
162#[inline]
163unsafe fn offsetnz(x: uint8x16_t) -> u32 {
164 let x = vreinterpretq_u64_u8(x);
166 let low: u64 = vgetq_lane_u64::<0>(x);
168 let high: u64 = vgetq_lane_u64::<1>(x);
169
170 #[inline]
171 fn clz(x: u64) -> u32 {
172 for (i, b) in x.to_ne_bytes().iter().copied().enumerate() {
175 if b != 0 {
176 return i as u32;
177 }
178 }
179 8 }
181
182 if low != 0 {
183 clz(low)
184 } else if high != 0 {
185 return 8 + clz(high);
186 } else {
187 return 16;
188 }
189}
190
191#[test]
192fn neon_code_matches_uri_chars_table() {
193 #[allow(clippy::undocumented_unsafe_blocks)]
194 unsafe {
195 assert!(byte_is_allowed(b'_', match_uri_vectored));
196
197 for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
198 assert_eq!(
199 byte_is_allowed(b as u8, match_uri_vectored),
200 allowed,
201 "byte_is_allowed({:?}) should be {:?}",
202 b,
203 allowed,
204 );
205 }
206 }
207}
208
209#[test]
210fn neon_code_matches_header_value_chars_table() {
211 #[allow(clippy::undocumented_unsafe_blocks)]
212 unsafe {
213 assert!(byte_is_allowed(b'_', match_header_value_vectored));
214
215 for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
216 assert_eq!(
217 byte_is_allowed(b as u8, match_header_value_vectored),
218 allowed,
219 "byte_is_allowed({:?}) should be {:?}",
220 b,
221 allowed,
222 );
223 }
224 }
225}
226
227#[test]
228fn neon_code_matches_header_name_chars_table() {
229 #[allow(clippy::undocumented_unsafe_blocks)]
230 unsafe {
231 assert!(byte_is_allowed(b'_', match_header_name_vectored));
232
233 for (b, allowed) in crate::TOKEN_MAP.iter().cloned().enumerate() {
234 assert_eq!(
235 byte_is_allowed(b as u8, match_header_name_vectored),
236 allowed,
237 "byte_is_allowed({:?}) should be {:?}",
238 b,
239 allowed,
240 );
241 }
242 }
243}
244
245#[cfg(test)]
246unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
247 let mut slice = [b'_'; 16];
248 slice[10] = byte;
249 let mut bytes = Bytes::new(&slice);
250
251 f(&mut bytes);
252
253 match bytes.pos() {
254 16 => true,
255 10 => false,
256 x => panic!("unexpected pos: {}", x),
257 }
258}