57#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
58#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
68 unsigned int num_points)
70 unsigned int number = 0;
71 const unsigned int eighthPoints = num_points / 8;
72 const float* complexVectorPtr = (
const float*)complexVector;
73 float* qBufferPtr = qBuffer;
75 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
76 for (; number < eighthPoints; number++) {
78 cplxValue1 = _mm256_load_ps(complexVectorPtr);
79 complexVectorPtr += 8;
81 cplxValue2 = _mm256_load_ps(complexVectorPtr);
82 complexVectorPtr += 8;
84 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
85 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
88 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
90 _mm256_store_ps(qBufferPtr, qValue);
95 number = eighthPoints * 8;
96 for (; number < num_points; number++) {
98 *qBufferPtr++ = *complexVectorPtr++;
104#include <xmmintrin.h>
108 unsigned int num_points)
110 unsigned int number = 0;
111 const unsigned int quarterPoints = num_points / 4;
113 const float* complexVectorPtr = (
const float*)complexVector;
114 float* qBufferPtr = qBuffer;
116 __m128 cplxValue1, cplxValue2, iValue;
117 for (; number < quarterPoints; number++) {
119 cplxValue1 = _mm_load_ps(complexVectorPtr);
120 complexVectorPtr += 4;
122 cplxValue2 = _mm_load_ps(complexVectorPtr);
123 complexVectorPtr += 4;
126 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
128 _mm_store_ps(qBufferPtr, iValue);
133 number = quarterPoints * 4;
134 for (; number < num_points; number++) {
136 *qBufferPtr++ = *complexVectorPtr++;
146 unsigned int num_points)
148 unsigned int number = 0;
149 unsigned int quarter_points = num_points / 4;
150 const float* complexVectorPtr = (
float*)complexVector;
151 float* qBufferPtr = qBuffer;
152 float32x4x2_t complexInput;
154 for (number = 0; number < quarter_points; number++) {
155 complexInput = vld2q_f32(complexVectorPtr);
156 vst1q_f32(qBufferPtr, complexInput.val[1]);
157 complexVectorPtr += 8;
161 for (number = quarter_points * 4; number < num_points; number++) {
163 *qBufferPtr++ = *complexVectorPtr++;
171static inline void volk_32fc_deinterleave_imag_32f_neonv8(
float* qBuffer,
173 unsigned int num_points)
175 const unsigned int eighthPoints = num_points / 8;
176 const float* complexVectorPtr = (
float*)complexVector;
177 float* qBufferPtr = qBuffer;
179 for (
unsigned int number = 0; number < eighthPoints; number++) {
180 float32x4x2_t cplx0 = vld2q_f32(complexVectorPtr);
181 float32x4x2_t cplx1 = vld2q_f32(complexVectorPtr + 8);
184 vst1q_f32(qBufferPtr, cplx0.val[1]);
185 vst1q_f32(qBufferPtr + 4, cplx1.val[1]);
187 complexVectorPtr += 16;
191 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
193 *qBufferPtr++ = *complexVectorPtr++;
198#ifdef LV_HAVE_GENERIC
202 unsigned int num_points)
204 unsigned int number = 0;
205 const float* complexVectorPtr = (
float*)complexVector;
206 float* qBufferPtr = qBuffer;
207 for (number = 0; number < num_points; number++) {
209 *qBufferPtr++ = *complexVectorPtr++;
217#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
218#define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
224#include <immintrin.h>
228 unsigned int num_points)
230 unsigned int number = 0;
231 const unsigned int eighthPoints = num_points / 8;
232 const float* complexVectorPtr = (
const float*)complexVector;
233 float* qBufferPtr = qBuffer;
235 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
236 for (; number < eighthPoints; number++) {
238 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
239 complexVectorPtr += 8;
241 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
242 complexVectorPtr += 8;
244 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
245 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
248 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
250 _mm256_storeu_ps(qBufferPtr, qValue);
255 number = eighthPoints * 8;
256 for (; number < num_points; number++) {
258 *qBufferPtr++ = *complexVectorPtr++;
264#include <riscv_vector.h>
266static inline void volk_32fc_deinterleave_imag_32f_rvv(
float* qBuffer,
268 unsigned int num_points)
270 const uint64_t* in = (
const uint64_t*)complexVector;
271 size_t n = num_points;
272 for (
size_t vl; n > 0; n -= vl, in += vl, qBuffer += vl) {
273 vl = __riscv_vsetvl_e64m8(n);
274 vuint64m8_t vc = __riscv_vle64_v_u64m8(in, vl);
275 __riscv_vse32((uint32_t*)qBuffer, __riscv_vnsrl(vc, 32, vl), vl);