60#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
72 unsigned int num_points)
74 unsigned int number = 0;
75 float* complexVectorPtr = (
float*)complexVector;
76 const float* iBufferPtr = iBuffer;
77 const float* qBufferPtr = qBuffer;
79 const uint64_t eighthPoints = num_points / 8;
81 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82 for (; number < eighthPoints; number++) {
83 iValue = _mm256_load_ps(iBufferPtr);
84 qValue = _mm256_load_ps(qBufferPtr);
87 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
89 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
91 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92 _mm256_store_ps(complexVectorPtr, cplxValue);
93 complexVectorPtr += 8;
95 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96 _mm256_store_ps(complexVectorPtr, cplxValue);
97 complexVectorPtr += 8;
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *complexVectorPtr++ = *iBufferPtr++;
106 *complexVectorPtr++ = *qBufferPtr++;
113#include <xmmintrin.h>
116 const float* iBuffer,
117 const float* qBuffer,
118 unsigned int num_points)
120 unsigned int number = 0;
121 float* complexVectorPtr = (
float*)complexVector;
122 const float* iBufferPtr = iBuffer;
123 const float* qBufferPtr = qBuffer;
125 const uint64_t quarterPoints = num_points / 4;
127 __m128 iValue, qValue, cplxValue;
128 for (; number < quarterPoints; number++) {
129 iValue = _mm_load_ps(iBufferPtr);
130 qValue = _mm_load_ps(qBufferPtr);
133 cplxValue = _mm_unpacklo_ps(iValue, qValue);
134 _mm_store_ps(complexVectorPtr, cplxValue);
135 complexVectorPtr += 4;
138 cplxValue = _mm_unpackhi_ps(iValue, qValue);
139 _mm_store_ps(complexVectorPtr, cplxValue);
140 complexVectorPtr += 4;
146 number = quarterPoints * 4;
147 for (; number < num_points; number++) {
148 *complexVectorPtr++ = *iBufferPtr++;
149 *complexVectorPtr++ = *qBufferPtr++;
159 const float* iBuffer,
160 const float* qBuffer,
161 unsigned int num_points)
163 unsigned int quarter_points = num_points / 4;
165 float* complexVectorPtr = (
float*)complexVector;
167 float32x4x2_t complex_vec;
168 for (number = 0; number < quarter_points; ++number) {
169 complex_vec.val[0] = vld1q_f32(iBuffer);
170 complex_vec.val[1] = vld1q_f32(qBuffer);
171 vst2q_f32(complexVectorPtr, complex_vec);
174 complexVectorPtr += 8;
177 for (number = quarter_points * 4; number < num_points; ++number) {
178 *complexVectorPtr++ = *iBuffer++;
179 *complexVectorPtr++ = *qBuffer++;
187static inline void volk_32f_x2_interleave_32fc_neonv8(
lv_32fc_t* complexVector,
188 const float* iBuffer,
189 const float* qBuffer,
190 unsigned int num_points)
192 const unsigned int eighthPoints = num_points / 8;
194 float* outPtr = (
float*)complexVector;
195 const float* iPtr = iBuffer;
196 const float* qPtr = qBuffer;
198 for (
unsigned int number = 0; number < eighthPoints; number++) {
199 float32x4x2_t cplx0, cplx1;
200 cplx0.val[0] = vld1q_f32(iPtr);
201 cplx0.val[1] = vld1q_f32(qPtr);
202 cplx1.val[0] = vld1q_f32(iPtr + 4);
203 cplx1.val[1] = vld1q_f32(qPtr + 4);
207 vst2q_f32(outPtr, cplx0);
208 vst2q_f32(outPtr + 8, cplx1);
215 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
223#ifdef LV_HAVE_GENERIC
226 const float* iBuffer,
227 const float* qBuffer,
228 unsigned int num_points)
230 float* complexVectorPtr = (
float*)complexVector;
231 const float* iBufferPtr = iBuffer;
232 const float* qBufferPtr = qBuffer;
235 for (number = 0; number < num_points; number++) {
236 *complexVectorPtr++ = *iBufferPtr++;
237 *complexVectorPtr++ = *qBufferPtr++;
245#ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
246#define INCLUDED_volk_32f_x2_interleave_32fc_u_H
252#include <immintrin.h>
255 const float* iBuffer,
256 const float* qBuffer,
257 unsigned int num_points)
259 unsigned int number = 0;
260 float* complexVectorPtr = (
float*)complexVector;
261 const float* iBufferPtr = iBuffer;
262 const float* qBufferPtr = qBuffer;
264 const uint64_t eighthPoints = num_points / 8;
266 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
267 for (; number < eighthPoints; number++) {
268 iValue = _mm256_loadu_ps(iBufferPtr);
269 qValue = _mm256_loadu_ps(qBufferPtr);
272 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
274 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
276 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
277 _mm256_storeu_ps(complexVectorPtr, cplxValue);
278 complexVectorPtr += 8;
280 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
281 _mm256_storeu_ps(complexVectorPtr, cplxValue);
282 complexVectorPtr += 8;
288 number = eighthPoints * 8;
289 for (; number < num_points; number++) {
290 *complexVectorPtr++ = *iBufferPtr++;
291 *complexVectorPtr++ = *qBufferPtr++;
297#include <riscv_vector.h>
299static inline void volk_32f_x2_interleave_32fc_rvv(
lv_32fc_t* complexVector,
300 const float* iBuffer,
301 const float* qBuffer,
302 unsigned int num_points)
304 uint64_t* out = (uint64_t*)complexVector;
305 size_t n = num_points;
306 for (
size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
307 vl = __riscv_vsetvl_e32m4(n);
308 vuint32m4_t vr = __riscv_vle32_v_u32m4((
const uint32_t*)iBuffer, vl);
309 vuint32m4_t vi = __riscv_vle32_v_u32m4((
const uint32_t*)qBuffer, vl);
311 __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFFFFFF, vi, vl);
312 __riscv_vse64(out, vc, vl);
318#include <riscv_vector.h>
320static inline void volk_32f_x2_interleave_32fc_rvvseg(
lv_32fc_t* complexVector,
321 const float* iBuffer,
322 const float* qBuffer,
323 unsigned int num_points)
325 size_t n = num_points;
326 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
327 vl = __riscv_vsetvl_e32m4(n);
328 vfloat32m4_t vr = __riscv_vle32_v_f32m4(iBuffer, vl);
329 vfloat32m4_t vi = __riscv_vle32_v_f32m4(qBuffer, vl);
330 __riscv_vsseg2e32((
float*)complexVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);