55#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
56#define INCLUDED_volk_32fc_conjugate_32fc_u_H
68 unsigned int num_points)
70 unsigned int number = 0;
71 const unsigned int quarterPoints = num_points / 4;
77 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
79 for (; number < quarterPoints; number++) {
81 x = _mm256_loadu_ps((
float*)a);
83 x = _mm256_xor_ps(x, conjugator);
85 _mm256_storeu_ps((
float*)c, x);
91 number = quarterPoints * 4;
93 for (; number < num_points; number++) {
100#include <pmmintrin.h>
104 unsigned int num_points)
106 unsigned int number = 0;
107 const unsigned int halfPoints = num_points / 2;
113 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
115 for (; number < halfPoints; number++) {
117 x = _mm_loadu_ps((
float*)a);
119 x = _mm_xor_ps(x, conjugator);
121 _mm_storeu_ps((
float*)c, x);
127 if ((num_points % 2) != 0) {
133#ifdef LV_HAVE_GENERIC
137 unsigned int num_points)
141 unsigned int number = 0;
143 for (number = 0; number < num_points; number++) {
151#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
152#define INCLUDED_volk_32fc_conjugate_32fc_a_H
160#include <immintrin.h>
164 unsigned int num_points)
166 unsigned int number = 0;
167 const unsigned int quarterPoints = num_points / 4;
173 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
175 for (; number < quarterPoints; number++) {
177 x = _mm256_load_ps((
float*)a);
179 x = _mm256_xor_ps(x, conjugator);
181 _mm256_store_ps((
float*)c, x);
187 number = quarterPoints * 4;
189 for (; number < num_points; number++) {
196#include <pmmintrin.h>
200 unsigned int num_points)
202 unsigned int number = 0;
203 const unsigned int halfPoints = num_points / 2;
209 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
211 for (; number < halfPoints; number++) {
213 x = _mm_load_ps((
float*)a);
215 x = _mm_xor_ps(x, conjugator);
217 _mm_store_ps((
float*)c, x);
223 if ((num_points % 2) != 0) {
234 unsigned int num_points)
237 const unsigned int quarterPoints = num_points / 4;
243 for (number = 0; number < quarterPoints; number++) {
245 x = vld2q_f32((
float*)a);
248 x.val[1] = vnegq_f32(x.val[1]);
250 vst2q_f32((
float*)c, x);
256 for (number = quarterPoints * 4; number < num_points; number++) {
266static inline void volk_32fc_conjugate_32fc_neonv8(
lv_32fc_t* cVector,
268 unsigned int num_points)
270 unsigned int n = num_points;
275 const uint32x4_t sign_mask =
276 vreinterpretq_u32_f32((float32x4_t){ 0.0f, -0.0f, 0.0f, -0.0f });
280 float32x4_t v0 = vld1q_f32((
const float*)a);
281 float32x4_t v1 = vld1q_f32((
const float*)(a + 2));
285 v0 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v0), sign_mask));
286 v1 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v1), sign_mask));
288 vst1q_f32((
float*)c, v0);
289 vst1q_f32((
float*)(c + 2), v1);
307#include <riscv_vector.h>
309static inline void volk_32fc_conjugate_32fc_rvv(
lv_32fc_t* cVector,
311 unsigned int num_points)
313 size_t n = num_points;
314 vuint64m8_t m = __riscv_vmv_v_x_u64m8(1ull << 63, __riscv_vsetvlmax_e64m8());
315 for (
size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
316 vl = __riscv_vsetvl_e64m8(n);
317 vuint64m8_t v = __riscv_vle64_v_u64m8((
const uint64_t*)aVector, vl);
318 __riscv_vse64((uint64_t*)cVector, __riscv_vxor(v, m, vl), vl);