Vector Optimized Library of Kernels 3.3.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32f_s32f_convert_8i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
59
60#ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
61#define INCLUDED_volk_32f_s32f_convert_8i_u_H
62
63#include <inttypes.h>
64
65static inline void volk_32f_s32f_convert_8i_single(int8_t* out, const float in)
66{
67 const float min_val = INT8_MIN;
68 const float max_val = INT8_MAX;
69 if (in > max_val) {
70 *out = (int8_t)(max_val);
71 } else if (in < min_val) {
72 *out = (int8_t)(min_val);
73 } else {
74 *out = (int8_t)(rintf(in));
75 }
76}
77
78#ifdef LV_HAVE_GENERIC
79
80static inline void volk_32f_s32f_convert_8i_generic(int8_t* outputVector,
81 const float* inputVector,
82 const float scalar,
83 unsigned int num_points)
84{
85 const float* inputVectorPtr = inputVector;
86
87 for (unsigned int number = 0; number < num_points; number++) {
88 const float r = *inputVectorPtr++ * scalar;
89 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
90 }
91}
92
93#endif /* LV_HAVE_GENERIC */
94
95
96#ifdef LV_HAVE_AVX2
97#include <immintrin.h>
98
99static inline void volk_32f_s32f_convert_8i_u_avx2(int8_t* outputVector,
100 const float* inputVector,
101 const float scalar,
102 unsigned int num_points)
103{
104 const unsigned int thirtysecondPoints = num_points / 32;
105
106 const float* inputVectorPtr = (const float*)inputVector;
107 int8_t* outputVectorPtr = outputVector;
108
109 const float min_val = INT8_MIN;
110 const float max_val = INT8_MAX;
111 const __m256 vmin_val = _mm256_set1_ps(min_val);
112 const __m256 vmax_val = _mm256_set1_ps(max_val);
113
114 const __m256 vScalar = _mm256_set1_ps(scalar);
115
116 for (unsigned int number = 0; number < thirtysecondPoints; number++) {
117 __m256 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
118 inputVectorPtr += 8;
119 __m256 inputVal2 = _mm256_loadu_ps(inputVectorPtr);
120 inputVectorPtr += 8;
121 __m256 inputVal3 = _mm256_loadu_ps(inputVectorPtr);
122 inputVectorPtr += 8;
123 __m256 inputVal4 = _mm256_loadu_ps(inputVectorPtr);
124 inputVectorPtr += 8;
125
126 inputVal1 = _mm256_max_ps(
127 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
128 inputVal2 = _mm256_max_ps(
129 _mm256_min_ps(_mm256_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
130 inputVal3 = _mm256_max_ps(
131 _mm256_min_ps(_mm256_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
132 inputVal4 = _mm256_max_ps(
133 _mm256_min_ps(_mm256_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
134
135 __m256i intInputVal1 = _mm256_cvtps_epi32(inputVal1);
136 __m256i intInputVal2 = _mm256_cvtps_epi32(inputVal2);
137 __m256i intInputVal3 = _mm256_cvtps_epi32(inputVal3);
138 __m256i intInputVal4 = _mm256_cvtps_epi32(inputVal4);
139
140 intInputVal1 = _mm256_packs_epi32(intInputVal1, intInputVal2);
141 intInputVal1 = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
142 intInputVal3 = _mm256_packs_epi32(intInputVal3, intInputVal4);
143 intInputVal3 = _mm256_permute4x64_epi64(intInputVal3, 0b11011000);
144
145 intInputVal1 = _mm256_packs_epi16(intInputVal1, intInputVal3);
146 const __m256i intInputVal = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
147
148 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal);
149 outputVectorPtr += 32;
150 }
151
152 for (unsigned int number = thirtysecondPoints * 32; number < num_points; number++) {
153 float r = inputVector[number] * scalar;
154 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
155 }
156}
157
158#endif /* LV_HAVE_AVX2 */
159
160#ifdef LV_HAVE_AVX512F
161#include <immintrin.h>
162
163static inline void volk_32f_s32f_convert_8i_u_avx512(int8_t* outputVector,
164 const float* inputVector,
165 const float scalar,
166 unsigned int num_points)
167{
168 unsigned int number = 0;
169
170 const unsigned int thirtysecondPoints = num_points / 32;
171
172 const float* inputVectorPtr = (const float*)inputVector;
173 int8_t* outputVectorPtr = outputVector;
174
175 float min_val = INT8_MIN;
176 float max_val = INT8_MAX;
177 float r;
178
179 __m512 vScalar = _mm512_set1_ps(scalar);
180 __m512 inputVal1, inputVal2;
181 __m512i intInputVal1, intInputVal2;
182 __m512 vmin_val = _mm512_set1_ps(min_val);
183 __m512 vmax_val = _mm512_set1_ps(max_val);
184 __m128i packed_result;
185
186 for (; number < thirtysecondPoints; number++) {
187 inputVal1 = _mm512_loadu_ps(inputVectorPtr);
188 inputVectorPtr += 16;
189 inputVal2 = _mm512_loadu_ps(inputVectorPtr);
190 inputVectorPtr += 16;
191
192 inputVal1 = _mm512_max_ps(
193 _mm512_min_ps(_mm512_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
194 inputVal2 = _mm512_max_ps(
195 _mm512_min_ps(_mm512_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
196
197 intInputVal1 = _mm512_cvtps_epi32(inputVal1);
198 intInputVal2 = _mm512_cvtps_epi32(inputVal2);
199
200 // Pack int32 -> int16 -> int8
201 packed_result = _mm512_cvtsepi32_epi8(intInputVal1);
202 _mm_storeu_si128((__m128i*)outputVectorPtr, packed_result);
203 outputVectorPtr += 16;
204
205 packed_result = _mm512_cvtsepi32_epi8(intInputVal2);
206 _mm_storeu_si128((__m128i*)outputVectorPtr, packed_result);
207 outputVectorPtr += 16;
208 }
209
210 number = thirtysecondPoints * 32;
211 for (; number < num_points; number++) {
212 r = inputVector[number] * scalar;
213 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
214 }
215}
216
217#endif /* LV_HAVE_AVX512F */
218
219
220#ifdef LV_HAVE_SSE2
221#include <emmintrin.h>
222
223static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector,
224 const float* inputVector,
225 const float scalar,
226 unsigned int num_points)
227{
228 const unsigned int sixteenthPoints = num_points / 16;
229
230 const float* inputVectorPtr = (const float*)inputVector;
231 int8_t* outputVectorPtr = outputVector;
232
233 const float min_val = INT8_MIN;
234 const float max_val = INT8_MAX;
235 const __m128 vmin_val = _mm_set_ps1(min_val);
236 const __m128 vmax_val = _mm_set_ps1(max_val);
237
238 const __m128 vScalar = _mm_set_ps1(scalar);
239
240 for (unsigned int number = 0; number < sixteenthPoints; number++) {
241 __m128 inputVal1 = _mm_loadu_ps(inputVectorPtr);
242 inputVectorPtr += 4;
243 __m128 inputVal2 = _mm_loadu_ps(inputVectorPtr);
244 inputVectorPtr += 4;
245 __m128 inputVal3 = _mm_loadu_ps(inputVectorPtr);
246 inputVectorPtr += 4;
247 __m128 inputVal4 = _mm_loadu_ps(inputVectorPtr);
248 inputVectorPtr += 4;
249
250 inputVal1 =
251 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
252 inputVal2 =
253 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
254 inputVal3 =
255 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
256 inputVal4 =
257 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
258
259 __m128i intInputVal1 = _mm_cvtps_epi32(inputVal1);
260 __m128i intInputVal2 = _mm_cvtps_epi32(inputVal2);
261 __m128i intInputVal3 = _mm_cvtps_epi32(inputVal3);
262 __m128i intInputVal4 = _mm_cvtps_epi32(inputVal4);
263
264 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
265 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
266
267 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
268
269 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
270 outputVectorPtr += 16;
271 }
272
273 for (unsigned int number = sixteenthPoints * 16; number < num_points; number++) {
274 const float r = inputVector[number] * scalar;
275 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
276 }
277}
278
279#endif /* LV_HAVE_SSE2 */
280
281
282#ifdef LV_HAVE_SSE
283#include <xmmintrin.h>
284
285static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector,
286 const float* inputVector,
287 const float scalar,
288 unsigned int num_points)
289{
290 const unsigned int quarterPoints = num_points / 4;
291
292 const float* inputVectorPtr = (const float*)inputVector;
293 int8_t* outputVectorPtr = outputVector;
294
295 const float min_val = INT8_MIN;
296 const float max_val = INT8_MAX;
297 const __m128 vmin_val = _mm_set_ps1(min_val);
298 const __m128 vmax_val = _mm_set_ps1(max_val);
299
300 const __m128 vScalar = _mm_set_ps1(scalar);
301
302 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
303
304 for (unsigned int number = 0; number < quarterPoints; number++) {
305 __m128 ret = _mm_loadu_ps(inputVectorPtr);
306 inputVectorPtr += 4;
307
308 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
309
310 _mm_store_ps(outputFloatBuffer, ret);
311 for (size_t inner_loop = 0; inner_loop < 4; inner_loop++) {
312 *outputVectorPtr++ = (int8_t)(rintf(outputFloatBuffer[inner_loop]));
313 }
314 }
315
316 for (unsigned int number = quarterPoints * 4; number < num_points; number++) {
317 const float r = inputVector[number] * scalar;
318 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
319 }
320}
321
322#endif /* LV_HAVE_SSE */
323
324
325#endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
326#ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
327#define INCLUDED_volk_32f_s32f_convert_8i_a_H
328
329#include <inttypes.h>
330
331#ifdef LV_HAVE_AVX2
332#include <immintrin.h>
333
334static inline void volk_32f_s32f_convert_8i_a_avx2(int8_t* outputVector,
335 const float* inputVector,
336 const float scalar,
337 unsigned int num_points)
338{
339 const unsigned int thirtysecondPoints = num_points / 32;
340
341 const float* inputVectorPtr = (const float*)inputVector;
342 int8_t* outputVectorPtr = outputVector;
343
344 const float min_val = INT8_MIN;
345 const float max_val = INT8_MAX;
346 const __m256 vmin_val = _mm256_set1_ps(min_val);
347 const __m256 vmax_val = _mm256_set1_ps(max_val);
348
349 const __m256 vScalar = _mm256_set1_ps(scalar);
350
351 for (unsigned int number = 0; number < thirtysecondPoints; number++) {
352 __m256 inputVal1 = _mm256_load_ps(inputVectorPtr);
353 inputVectorPtr += 8;
354 __m256 inputVal2 = _mm256_load_ps(inputVectorPtr);
355 inputVectorPtr += 8;
356 __m256 inputVal3 = _mm256_load_ps(inputVectorPtr);
357 inputVectorPtr += 8;
358 __m256 inputVal4 = _mm256_load_ps(inputVectorPtr);
359 inputVectorPtr += 8;
360
361 inputVal1 = _mm256_max_ps(
362 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
363 inputVal2 = _mm256_max_ps(
364 _mm256_min_ps(_mm256_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
365 inputVal3 = _mm256_max_ps(
366 _mm256_min_ps(_mm256_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
367 inputVal4 = _mm256_max_ps(
368 _mm256_min_ps(_mm256_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
369
370 __m256i intInputVal1 = _mm256_cvtps_epi32(inputVal1);
371 __m256i intInputVal2 = _mm256_cvtps_epi32(inputVal2);
372 __m256i intInputVal3 = _mm256_cvtps_epi32(inputVal3);
373 __m256i intInputVal4 = _mm256_cvtps_epi32(inputVal4);
374
375 intInputVal1 = _mm256_packs_epi32(intInputVal1, intInputVal2);
376 intInputVal1 = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
377 intInputVal3 = _mm256_packs_epi32(intInputVal3, intInputVal4);
378 intInputVal3 = _mm256_permute4x64_epi64(intInputVal3, 0b11011000);
379
380 intInputVal1 = _mm256_packs_epi16(intInputVal1, intInputVal3);
381 __m256i intInputVal = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
382
383 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal);
384 outputVectorPtr += 32;
385 }
386
387 for (unsigned int number = thirtysecondPoints * 32; number < num_points; number++) {
388 const float r = inputVector[number] * scalar;
389 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
390 }
391}
392
393#endif /* LV_HAVE_AVX2 */
394
395#ifdef LV_HAVE_AVX512F
396#include <immintrin.h>
397
398static inline void volk_32f_s32f_convert_8i_a_avx512(int8_t* outputVector,
399 const float* inputVector,
400 const float scalar,
401 unsigned int num_points)
402{
403 unsigned int number = 0;
404
405 const unsigned int thirtysecondPoints = num_points / 32;
406
407 const float* inputVectorPtr = (const float*)inputVector;
408 int8_t* outputVectorPtr = outputVector;
409
410 float min_val = INT8_MIN;
411 float max_val = INT8_MAX;
412 float r;
413
414 __m512 vScalar = _mm512_set1_ps(scalar);
415 __m512 inputVal1, inputVal2;
416 __m512i intInputVal1, intInputVal2;
417 __m512 vmin_val = _mm512_set1_ps(min_val);
418 __m512 vmax_val = _mm512_set1_ps(max_val);
419 __m128i packed_result;
420
421 for (; number < thirtysecondPoints; number++) {
422 inputVal1 = _mm512_load_ps(inputVectorPtr);
423 inputVectorPtr += 16;
424 inputVal2 = _mm512_load_ps(inputVectorPtr);
425 inputVectorPtr += 16;
426
427 inputVal1 = _mm512_max_ps(
428 _mm512_min_ps(_mm512_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
429 inputVal2 = _mm512_max_ps(
430 _mm512_min_ps(_mm512_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
431
432 intInputVal1 = _mm512_cvtps_epi32(inputVal1);
433 intInputVal2 = _mm512_cvtps_epi32(inputVal2);
434
435 // Pack int32 -> int16 -> int8
436 packed_result = _mm512_cvtsepi32_epi8(intInputVal1);
437 _mm_store_si128((__m128i*)outputVectorPtr, packed_result);
438 outputVectorPtr += 16;
439
440 packed_result = _mm512_cvtsepi32_epi8(intInputVal2);
441 _mm_store_si128((__m128i*)outputVectorPtr, packed_result);
442 outputVectorPtr += 16;
443 }
444
445 number = thirtysecondPoints * 32;
446 for (; number < num_points; number++) {
447 r = inputVector[number] * scalar;
448 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
449 }
450}
451
452#endif /* LV_HAVE_AVX512F */
453
454
455#ifdef LV_HAVE_SSE2
456#include <emmintrin.h>
457
458static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector,
459 const float* inputVector,
460 const float scalar,
461 unsigned int num_points)
462{
463 const unsigned int sixteenthPoints = num_points / 16;
464
465 const float* inputVectorPtr = (const float*)inputVector;
466 int8_t* outputVectorPtr = outputVector;
467
468 const float min_val = INT8_MIN;
469 const float max_val = INT8_MAX;
470 const __m128 vmin_val = _mm_set_ps1(min_val);
471 const __m128 vmax_val = _mm_set_ps1(max_val);
472
473 const __m128 vScalar = _mm_set_ps1(scalar);
474
475 for (unsigned int number = 0; number < sixteenthPoints; number++) {
476 __m128 inputVal1 = _mm_load_ps(inputVectorPtr);
477 inputVectorPtr += 4;
478 __m128 inputVal2 = _mm_load_ps(inputVectorPtr);
479 inputVectorPtr += 4;
480 __m128 inputVal3 = _mm_load_ps(inputVectorPtr);
481 inputVectorPtr += 4;
482 __m128 inputVal4 = _mm_load_ps(inputVectorPtr);
483 inputVectorPtr += 4;
484
485 inputVal1 =
486 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
487 inputVal2 =
488 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
489 inputVal3 =
490 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
491 inputVal4 =
492 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
493
494 __m128i intInputVal1 = _mm_cvtps_epi32(inputVal1);
495 __m128i intInputVal2 = _mm_cvtps_epi32(inputVal2);
496 __m128i intInputVal3 = _mm_cvtps_epi32(inputVal3);
497 __m128i intInputVal4 = _mm_cvtps_epi32(inputVal4);
498
499 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
500 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
501
502 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
503
504 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
505 outputVectorPtr += 16;
506 }
507
508 for (unsigned int number = sixteenthPoints * 16; number < num_points; number++) {
509 const float r = inputVector[number] * scalar;
510 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
511 }
512}
513#endif /* LV_HAVE_SSE2 */
514
515
516#ifdef LV_HAVE_SSE
517#include <xmmintrin.h>
518
519static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector,
520 const float* inputVector,
521 const float scalar,
522 unsigned int num_points)
523{
524 const unsigned int quarterPoints = num_points / 4;
525
526 const float* inputVectorPtr = (const float*)inputVector;
527 int8_t* outputVectorPtr = outputVector;
528
529 const float min_val = INT8_MIN;
530 const float max_val = INT8_MAX;
531 const __m128 vmin_val = _mm_set_ps1(min_val);
532 const __m128 vmax_val = _mm_set_ps1(max_val);
533
534 const __m128 vScalar = _mm_set_ps1(scalar);
535
536 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
537
538 for (unsigned int number = 0; number < quarterPoints; number++) {
539 __m128 ret = _mm_load_ps(inputVectorPtr);
540 inputVectorPtr += 4;
541
542 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
543
544 _mm_store_ps(outputFloatBuffer, ret);
545 for (size_t inner_loop = 0; inner_loop < 4; inner_loop++) {
546 *outputVectorPtr++ = (int8_t)(rintf(outputFloatBuffer[inner_loop]));
547 }
548 }
549
550 for (unsigned int number = quarterPoints * 4; number < num_points; number++) {
551 const float r = inputVector[number] * scalar;
552 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
553 }
554}
555
556#endif /* LV_HAVE_SSE */
557
558
559#ifdef LV_HAVE_NEON
560#include <arm_neon.h>
561
562static inline void volk_32f_s32f_convert_8i_neon(int8_t* outputVector,
563 const float* inputVector,
564 const float scalar,
565 unsigned int num_points)
566{
567 unsigned int number = 0;
568 const unsigned int sixteenthPoints = num_points / 16;
569
570 const float* inputVectorPtr = inputVector;
571 int8_t* outputVectorPtr = outputVector;
572
573 const float min_val = INT8_MIN;
574 const float max_val = INT8_MAX;
575
576 float32x4_t vScalar = vdupq_n_f32(scalar);
577 float32x4_t vmin_val = vdupq_n_f32(min_val);
578 float32x4_t vmax_val = vdupq_n_f32(max_val);
579 float32x4_t half = vdupq_n_f32(0.5f);
580 float32x4_t neg_half = vdupq_n_f32(-0.5f);
581 float32x4_t zero = vdupq_n_f32(0.0f);
582
583 for (; number < sixteenthPoints; number++) {
584 float32x4_t inputVal0 = vld1q_f32(inputVectorPtr);
585 float32x4_t inputVal1 = vld1q_f32(inputVectorPtr + 4);
586 float32x4_t inputVal2 = vld1q_f32(inputVectorPtr + 8);
587 float32x4_t inputVal3 = vld1q_f32(inputVectorPtr + 12);
588 inputVectorPtr += 16;
589
590 // Scale and clip
591 float32x4_t ret0 =
592 vmaxq_f32(vminq_f32(vmulq_f32(inputVal0, vScalar), vmax_val), vmin_val);
593 float32x4_t ret1 =
594 vmaxq_f32(vminq_f32(vmulq_f32(inputVal1, vScalar), vmax_val), vmin_val);
595 float32x4_t ret2 =
596 vmaxq_f32(vminq_f32(vmulq_f32(inputVal2, vScalar), vmax_val), vmin_val);
597 float32x4_t ret3 =
598 vmaxq_f32(vminq_f32(vmulq_f32(inputVal3, vScalar), vmax_val), vmin_val);
599
600 // Round to nearest: add copysign(0.5, x) before truncating
601 uint32x4_t neg0 = vcltq_f32(ret0, zero);
602 uint32x4_t neg1 = vcltq_f32(ret1, zero);
603 uint32x4_t neg2 = vcltq_f32(ret2, zero);
604 uint32x4_t neg3 = vcltq_f32(ret3, zero);
605 ret0 = vaddq_f32(ret0, vbslq_f32(neg0, neg_half, half));
606 ret1 = vaddq_f32(ret1, vbslq_f32(neg1, neg_half, half));
607 ret2 = vaddq_f32(ret2, vbslq_f32(neg2, neg_half, half));
608 ret3 = vaddq_f32(ret3, vbslq_f32(neg3, neg_half, half));
609
610 // Convert to int32 (truncates towards zero, but we pre-rounded)
611 int32x4_t intVal0 = vcvtq_s32_f32(ret0);
612 int32x4_t intVal1 = vcvtq_s32_f32(ret1);
613 int32x4_t intVal2 = vcvtq_s32_f32(ret2);
614 int32x4_t intVal3 = vcvtq_s32_f32(ret3);
615
616 // Narrow to int16 with saturation
617 int16x4_t narrow16_0 = vqmovn_s32(intVal0);
618 int16x4_t narrow16_1 = vqmovn_s32(intVal1);
619 int16x4_t narrow16_2 = vqmovn_s32(intVal2);
620 int16x4_t narrow16_3 = vqmovn_s32(intVal3);
621 int16x8_t wide16_0 = vcombine_s16(narrow16_0, narrow16_1);
622 int16x8_t wide16_1 = vcombine_s16(narrow16_2, narrow16_3);
623
624 // Narrow to int8 with saturation
625 int8x8_t narrow8_0 = vqmovn_s16(wide16_0);
626 int8x8_t narrow8_1 = vqmovn_s16(wide16_1);
627 int8x16_t result = vcombine_s8(narrow8_0, narrow8_1);
628
629 vst1q_s8(outputVectorPtr, result);
630 outputVectorPtr += 16;
631 }
632
633 number = sixteenthPoints * 16;
634 for (; number < num_points; number++) {
635 float r = inputVector[number] * scalar;
636 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
637 }
638}
639#endif /* LV_HAVE_NEON */
640
641
642#ifdef LV_HAVE_NEONV8
643#include <arm_neon.h>
644
645static inline void volk_32f_s32f_convert_8i_neonv8(int8_t* outputVector,
646 const float* inputVector,
647 const float scalar,
648 unsigned int num_points)
649{
650 unsigned int number = 0;
651 const unsigned int thirtysecondPoints = num_points / 32;
652
653 const float* inputVectorPtr = inputVector;
654 int8_t* outputVectorPtr = outputVector;
655
656 const float min_val = INT8_MIN;
657 const float max_val = INT8_MAX;
658
659 float32x4_t vScalar = vdupq_n_f32(scalar);
660 float32x4_t vmin_val = vdupq_n_f32(min_val);
661 float32x4_t vmax_val = vdupq_n_f32(max_val);
662
663 for (; number < thirtysecondPoints; number++) {
664 float32x4_t inputVal0 = vld1q_f32(inputVectorPtr);
665 float32x4_t inputVal1 = vld1q_f32(inputVectorPtr + 4);
666 float32x4_t inputVal2 = vld1q_f32(inputVectorPtr + 8);
667 float32x4_t inputVal3 = vld1q_f32(inputVectorPtr + 12);
668 float32x4_t inputVal4 = vld1q_f32(inputVectorPtr + 16);
669 float32x4_t inputVal5 = vld1q_f32(inputVectorPtr + 20);
670 float32x4_t inputVal6 = vld1q_f32(inputVectorPtr + 24);
671 float32x4_t inputVal7 = vld1q_f32(inputVectorPtr + 28);
672 __VOLK_PREFETCH(inputVectorPtr + 32);
673 inputVectorPtr += 32;
674
675 // Scale and clip
676 float32x4_t ret0 =
677 vmaxq_f32(vminq_f32(vmulq_f32(inputVal0, vScalar), vmax_val), vmin_val);
678 float32x4_t ret1 =
679 vmaxq_f32(vminq_f32(vmulq_f32(inputVal1, vScalar), vmax_val), vmin_val);
680 float32x4_t ret2 =
681 vmaxq_f32(vminq_f32(vmulq_f32(inputVal2, vScalar), vmax_val), vmin_val);
682 float32x4_t ret3 =
683 vmaxq_f32(vminq_f32(vmulq_f32(inputVal3, vScalar), vmax_val), vmin_val);
684 float32x4_t ret4 =
685 vmaxq_f32(vminq_f32(vmulq_f32(inputVal4, vScalar), vmax_val), vmin_val);
686 float32x4_t ret5 =
687 vmaxq_f32(vminq_f32(vmulq_f32(inputVal5, vScalar), vmax_val), vmin_val);
688 float32x4_t ret6 =
689 vmaxq_f32(vminq_f32(vmulq_f32(inputVal6, vScalar), vmax_val), vmin_val);
690 float32x4_t ret7 =
691 vmaxq_f32(vminq_f32(vmulq_f32(inputVal7, vScalar), vmax_val), vmin_val);
692
693 // Convert to int32 using round-to-nearest (ARMv8)
694 int32x4_t intVal0 = vcvtnq_s32_f32(ret0);
695 int32x4_t intVal1 = vcvtnq_s32_f32(ret1);
696 int32x4_t intVal2 = vcvtnq_s32_f32(ret2);
697 int32x4_t intVal3 = vcvtnq_s32_f32(ret3);
698 int32x4_t intVal4 = vcvtnq_s32_f32(ret4);
699 int32x4_t intVal5 = vcvtnq_s32_f32(ret5);
700 int32x4_t intVal6 = vcvtnq_s32_f32(ret6);
701 int32x4_t intVal7 = vcvtnq_s32_f32(ret7);
702
703 // Narrow to int16 with saturation
704 int16x4_t narrow16_0 = vqmovn_s32(intVal0);
705 int16x4_t narrow16_1 = vqmovn_s32(intVal1);
706 int16x4_t narrow16_2 = vqmovn_s32(intVal2);
707 int16x4_t narrow16_3 = vqmovn_s32(intVal3);
708 int16x4_t narrow16_4 = vqmovn_s32(intVal4);
709 int16x4_t narrow16_5 = vqmovn_s32(intVal5);
710 int16x4_t narrow16_6 = vqmovn_s32(intVal6);
711 int16x4_t narrow16_7 = vqmovn_s32(intVal7);
712
713 int16x8_t wide16_0 = vcombine_s16(narrow16_0, narrow16_1);
714 int16x8_t wide16_1 = vcombine_s16(narrow16_2, narrow16_3);
715 int16x8_t wide16_2 = vcombine_s16(narrow16_4, narrow16_5);
716 int16x8_t wide16_3 = vcombine_s16(narrow16_6, narrow16_7);
717
718 // Narrow to int8 with saturation
719 int8x8_t narrow8_0 = vqmovn_s16(wide16_0);
720 int8x8_t narrow8_1 = vqmovn_s16(wide16_1);
721 int8x8_t narrow8_2 = vqmovn_s16(wide16_2);
722 int8x8_t narrow8_3 = vqmovn_s16(wide16_3);
723
724 int8x16_t result0 = vcombine_s8(narrow8_0, narrow8_1);
725 int8x16_t result1 = vcombine_s8(narrow8_2, narrow8_3);
726
727 vst1q_s8(outputVectorPtr, result0);
728 vst1q_s8(outputVectorPtr + 16, result1);
729 outputVectorPtr += 32;
730 }
731
732 number = thirtysecondPoints * 32;
733 for (; number < num_points; number++) {
734 float r = inputVector[number] * scalar;
735 volk_32f_s32f_convert_8i_single(&outputVector[number], r);
736 }
737}
738#endif /* LV_HAVE_NEONV8 */
739
740
741#ifdef LV_HAVE_RVV
742#include <riscv_vector.h>
743
744static inline void volk_32f_s32f_convert_8i_rvv(int8_t* outputVector,
745 const float* inputVector,
746 const float scalar,
747 unsigned int num_points)
748{
749 size_t n = num_points;
750 for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
751 vl = __riscv_vsetvl_e32m8(n);
752 vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
753 vint16m4_t vi = __riscv_vfncvt_x(__riscv_vfmul(v, scalar, vl), vl);
754 __riscv_vse8(outputVector, __riscv_vnclip(vi, 0, 0, vl), vl);
755 }
756}
757#endif /*LV_HAVE_RVV*/
758
759#endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */