58#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
59#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
71 unsigned int num_points)
73 unsigned int number = 0;
74 const unsigned int eighthPoints = num_points / 8;
76 const float* complexVectorPtr = (
float*)complexVector;
77 float* magnitudeVectorPtr = magnitudeVector;
79 __m256 cplxValue1, cplxValue2, result;
81 for (; number < eighthPoints; number++) {
82 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
85 _mm256_storeu_ps(magnitudeVectorPtr, result);
87 complexVectorPtr += 16;
88 magnitudeVectorPtr += 8;
91 number = eighthPoints * 8;
92 for (; number < num_points; number++) {
93 float val1Real = *complexVectorPtr++;
94 float val1Imag = *complexVectorPtr++;
95 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
102#include <pmmintrin.h>
107 unsigned int num_points)
109 unsigned int number = 0;
110 const unsigned int quarterPoints = num_points / 4;
112 const float* complexVectorPtr = (
float*)complexVector;
113 float* magnitudeVectorPtr = magnitudeVector;
115 __m128 cplxValue1, cplxValue2, result;
116 for (; number < quarterPoints; number++) {
117 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
118 complexVectorPtr += 4;
120 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
121 complexVectorPtr += 4;
124 _mm_storeu_ps(magnitudeVectorPtr, result);
125 magnitudeVectorPtr += 4;
128 number = quarterPoints * 4;
129 for (; number < num_points; number++) {
130 float val1Real = *complexVectorPtr++;
131 float val1Imag = *complexVectorPtr++;
132 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
140#include <xmmintrin.h>
144 unsigned int num_points)
146 unsigned int number = 0;
147 const unsigned int quarterPoints = num_points / 4;
149 const float* complexVectorPtr = (
float*)complexVector;
150 float* magnitudeVectorPtr = magnitudeVector;
152 __m128 cplxValue1, cplxValue2, result;
154 for (; number < quarterPoints; number++) {
155 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156 complexVectorPtr += 4;
158 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159 complexVectorPtr += 4;
162 _mm_storeu_ps(magnitudeVectorPtr, result);
163 magnitudeVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 float val1Real = *complexVectorPtr++;
169 float val1Imag = *complexVectorPtr++;
170 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
176#ifdef LV_HAVE_GENERIC
180 unsigned int num_points)
182 const float* complexVectorPtr = (
float*)complexVector;
183 float* magnitudeVectorPtr = magnitudeVector;
184 unsigned int number = 0;
185 for (number = 0; number < num_points; number++) {
186 const float real = *complexVectorPtr++;
187 const float imag = *complexVectorPtr++;
188 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
195#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
196#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
203#include <immintrin.h>
208 unsigned int num_points)
210 unsigned int number = 0;
211 const unsigned int eighthPoints = num_points / 8;
213 const float* complexVectorPtr = (
float*)complexVector;
214 float* magnitudeVectorPtr = magnitudeVector;
216 __m256 cplxValue1, cplxValue2, result;
217 for (; number < eighthPoints; number++) {
218 cplxValue1 = _mm256_load_ps(complexVectorPtr);
219 complexVectorPtr += 8;
221 cplxValue2 = _mm256_load_ps(complexVectorPtr);
222 complexVectorPtr += 8;
225 _mm256_store_ps(magnitudeVectorPtr, result);
226 magnitudeVectorPtr += 8;
229 number = eighthPoints * 8;
230 for (; number < num_points; number++) {
231 float val1Real = *complexVectorPtr++;
232 float val1Imag = *complexVectorPtr++;
233 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
240#include <pmmintrin.h>
245 unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int quarterPoints = num_points / 4;
250 const float* complexVectorPtr = (
float*)complexVector;
251 float* magnitudeVectorPtr = magnitudeVector;
253 __m128 cplxValue1, cplxValue2, result;
254 for (; number < quarterPoints; number++) {
255 cplxValue1 = _mm_load_ps(complexVectorPtr);
256 complexVectorPtr += 4;
258 cplxValue2 = _mm_load_ps(complexVectorPtr);
259 complexVectorPtr += 4;
262 _mm_store_ps(magnitudeVectorPtr, result);
263 magnitudeVectorPtr += 4;
266 number = quarterPoints * 4;
267 for (; number < num_points; number++) {
268 float val1Real = *complexVectorPtr++;
269 float val1Imag = *complexVectorPtr++;
270 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
278#include <xmmintrin.h>
282 unsigned int num_points)
284 unsigned int number = 0;
285 const unsigned int quarterPoints = num_points / 4;
287 const float* complexVectorPtr = (
float*)complexVector;
288 float* magnitudeVectorPtr = magnitudeVector;
290 __m128 cplxValue1, cplxValue2, result;
291 for (; number < quarterPoints; number++) {
292 cplxValue1 = _mm_load_ps(complexVectorPtr);
293 complexVectorPtr += 4;
295 cplxValue2 = _mm_load_ps(complexVectorPtr);
296 complexVectorPtr += 4;
299 _mm_store_ps(magnitudeVectorPtr, result);
300 magnitudeVectorPtr += 4;
303 number = quarterPoints * 4;
304 for (; number < num_points; number++) {
305 float val1Real = *complexVectorPtr++;
306 float val1Imag = *complexVectorPtr++;
307 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
318 unsigned int num_points)
320 unsigned int number = 0;
321 const unsigned int quarterPoints = num_points / 4;
323 const float* complexVectorPtr = (
float*)complexVector;
324 float* magnitudeVectorPtr = magnitudeVector;
326 float32x4x2_t cmplx_val;
328 for (; number < quarterPoints; number++) {
329 cmplx_val = vld2q_f32(complexVectorPtr);
330 complexVectorPtr += 8;
333 vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
335 vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
338 vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
340 vst1q_f32(magnitudeVectorPtr, result);
341 magnitudeVectorPtr += 4;
344 number = quarterPoints * 4;
345 for (; number < num_points; number++) {
346 float val1Real = *complexVectorPtr++;
347 float val1Imag = *complexVectorPtr++;
348 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
357static inline void volk_32fc_magnitude_squared_32f_neonv8(
float* magnitudeVector,
359 unsigned int num_points)
361 unsigned int n = num_points;
362 const float* in = (
const float*)complexVector;
363 float* out = magnitudeVector;
371 float32x4_t v0 = vld1q_f32(in);
372 float32x4_t v1 = vld1q_f32(in + 4);
376 v0 = vmulq_f32(v0, v0);
377 v1 = vmulq_f32(v1, v1);
380 float32x4_t mag = vpaddq_f32(v0, v1);
391 float32x4_t v0 = vld1q_f32(in);
392 v0 = vmulq_f32(v0, v0);
393 float32x2_t mag = vpadd_f32(vget_low_f32(v0), vget_high_f32(v0));
404 *out++ = (re * re) + (im * im);
412#include <riscv_vector.h>
414static inline void volk_32fc_magnitude_squared_32f_rvv(
float* magnitudeVector,
416 unsigned int num_points)
418 size_t n = num_points;
419 for (
size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
420 vl = __riscv_vsetvl_e32m4(n);
421 vuint64m8_t vc = __riscv_vle64_v_u64m8((
const uint64_t*)complexVector, vl);
422 vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
423 vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
424 vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
425 __riscv_vse32(magnitudeVector, v, vl);
431#include <riscv_vector.h>
433static inline void volk_32fc_magnitude_squared_32f_rvvseg(
float* magnitudeVector,
435 unsigned int num_points)
437 size_t n = num_points;
438 for (
size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
439 vl = __riscv_vsetvl_e32m4(n);
440 vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((
const float*)complexVector, vl);
441 vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
442 vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
443 vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
444 __riscv_vse32(magnitudeVector, v, vl);