51#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52#define INCLUDED_volk_32i_s32f_convert_32f_u_H
60static inline void volk_32i_s32f_convert_32f_u_avx512f(
float* outputVector,
61 const int32_t* inputVector,
63 unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
75 for (; number < onesixteenthPoints; number++) {
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
82 _mm512_storeu_ps(outputVectorPtr, ret);
84 outputVectorPtr += 16;
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
99static inline void volk_32i_s32f_convert_32f_u_avx2(
float* outputVector,
100 const int32_t* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const unsigned int oneEightPoints = num_points / 8;
107 float* outputVectorPtr = outputVector;
108 const float iScalar = 1.0 / scalar;
109 __m256 invScalar = _mm256_set1_ps(iScalar);
110 int32_t* inputPtr = (int32_t*)inputVector;
114 for (; number < oneEightPoints; number++) {
116 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
118 ret = _mm256_cvtepi32_ps(inputVal);
119 ret = _mm256_mul_ps(ret, invScalar);
121 _mm256_storeu_ps(outputVectorPtr, ret);
123 outputVectorPtr += 8;
127 number = oneEightPoints * 8;
128 for (; number < num_points; number++) {
129 outputVector[number] = ((float)(inputVector[number])) * iScalar;
136#include <emmintrin.h>
139 const int32_t* inputVector,
141 unsigned int num_points)
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
146 float* outputVectorPtr = outputVector;
147 const float iScalar = 1.0 / scalar;
148 __m128 invScalar = _mm_set_ps1(iScalar);
149 int32_t* inputPtr = (int32_t*)inputVector;
153 for (; number < quarterPoints; number++) {
155 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
157 ret = _mm_cvtepi32_ps(inputVal);
158 ret = _mm_mul_ps(ret, invScalar);
160 _mm_storeu_ps(outputVectorPtr, ret);
162 outputVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 outputVector[number] = ((float)(inputVector[number])) * iScalar;
174#ifdef LV_HAVE_GENERIC
177 const int32_t* inputVector,
179 unsigned int num_points)
181 float* outputVectorPtr = outputVector;
182 const int32_t* inputVectorPtr = inputVector;
183 unsigned int number = 0;
184 const float iScalar = 1.0 / scalar;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
195#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196#define INCLUDED_volk_32i_s32f_convert_32f_a_H
201#ifdef LV_HAVE_AVX512F
202#include <immintrin.h>
204static inline void volk_32i_s32f_convert_32f_a_avx512f(
float* outputVector,
205 const int32_t* inputVector,
207 unsigned int num_points)
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
219 for (; number < onesixteenthPoints; number++) {
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
226 _mm512_store_ps(outputVectorPtr, ret);
228 outputVectorPtr += 16;
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
240#include <immintrin.h>
242static inline void volk_32i_s32f_convert_32f_a_avx2(
float* outputVector,
243 const int32_t* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int oneEightPoints = num_points / 8;
250 float* outputVectorPtr = outputVector;
251 const float iScalar = 1.0 / scalar;
252 __m256 invScalar = _mm256_set1_ps(iScalar);
253 int32_t* inputPtr = (int32_t*)inputVector;
257 for (; number < oneEightPoints; number++) {
259 inputVal = _mm256_load_si256((__m256i*)inputPtr);
261 ret = _mm256_cvtepi32_ps(inputVal);
262 ret = _mm256_mul_ps(ret, invScalar);
264 _mm256_store_ps(outputVectorPtr, ret);
266 outputVectorPtr += 8;
270 number = oneEightPoints * 8;
271 for (; number < num_points; number++) {
272 outputVector[number] = ((float)(inputVector[number])) * iScalar;
279#include <emmintrin.h>
282 const int32_t* inputVector,
284 unsigned int num_points)
286 unsigned int number = 0;
287 const unsigned int quarterPoints = num_points / 4;
289 float* outputVectorPtr = outputVector;
290 const float iScalar = 1.0 / scalar;
291 __m128 invScalar = _mm_set_ps1(iScalar);
292 int32_t* inputPtr = (int32_t*)inputVector;
296 for (; number < quarterPoints; number++) {
298 inputVal = _mm_load_si128((__m128i*)inputPtr);
300 ret = _mm_cvtepi32_ps(inputVal);
301 ret = _mm_mul_ps(ret, invScalar);
303 _mm_store_ps(outputVectorPtr, ret);
305 outputVectorPtr += 4;
309 number = quarterPoints * 4;
310 for (; number < num_points; number++) {
311 outputVector[number] = ((float)(inputVector[number])) * iScalar;
321 const int32_t* inputVector,
323 unsigned int num_points)
325 unsigned int number = 0;
326 const unsigned int quarterPoints = num_points / 4;
328 float* outputVectorPtr = outputVector;
329 const int32_t* inputPtr = inputVector;
330 const float iScalar = 1.0f / scalar;
331 float32x4_t invScalar = vdupq_n_f32(iScalar);
333 for (; number < quarterPoints; number++) {
334 int32x4_t inputVal = vld1q_s32(inputPtr);
335 float32x4_t ret = vcvtq_f32_s32(inputVal);
336 ret = vmulq_f32(ret, invScalar);
337 vst1q_f32(outputVectorPtr, ret);
340 outputVectorPtr += 4;
343 number = quarterPoints * 4;
344 for (; number < num_points; number++) {
345 outputVector[number] = ((float)(inputVector[number])) * iScalar;
354static inline void volk_32i_s32f_convert_32f_neonv8(
float* outputVector,
355 const int32_t* inputVector,
357 unsigned int num_points)
359 unsigned int number = 0;
360 const unsigned int eighthPoints = num_points / 8;
362 float* outputVectorPtr = outputVector;
363 const int32_t* inputPtr = inputVector;
364 const float iScalar = 1.0f / scalar;
365 float32x4_t invScalar = vdupq_n_f32(iScalar);
367 for (; number < eighthPoints; number++) {
368 int32x4_t inputVal0 = vld1q_s32(inputPtr);
369 int32x4_t inputVal1 = vld1q_s32(inputPtr + 4);
373 float32x4_t ret0 = vcvtq_f32_s32(inputVal0);
374 float32x4_t ret1 = vcvtq_f32_s32(inputVal1);
376 ret0 = vmulq_f32(ret0, invScalar);
377 ret1 = vmulq_f32(ret1, invScalar);
379 vst1q_f32(outputVectorPtr, ret0);
380 vst1q_f32(outputVectorPtr + 4, ret1);
381 outputVectorPtr += 8;
384 number = eighthPoints * 8;
385 for (; number < num_points; number++) {
386 outputVector[number] = ((float)(inputVector[number])) * iScalar;
393#include <riscv_vector.h>
395static inline void volk_32i_s32f_convert_32f_rvv(
float* outputVector,
396 const int32_t* inputVector,
398 unsigned int num_points)
400 size_t n = num_points;
401 for (
size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
402 vl = __riscv_vsetvl_e32m8(n);
403 vfloat32m8_t v = __riscv_vfcvt_f(__riscv_vle32_v_i32m8(inputVector, vl), vl);
404 __riscv_vse32(outputVector, __riscv_vfmul(v, 1.0f / scalar, vl), vl);