Vector Optimized Library of Kernels 3.3.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
39
40#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
42
43#include <inttypes.h>
44#include <math.h>
45#include <stdio.h>
46
47#ifdef LV_HAVE_GENERIC
48
49static inline void
51 const lv_32fc_t* complexFFTInput,
52 const float normalizationFactor,
53 unsigned int num_points)
54{
55 // Calculate the Power of the complex point
56 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
57
58 // Calculate dBm
59 // 50 ohm load assumption
60 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
61 // 75 ohm load assumption
62 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
63
64 /*
65 * For generic reference, the code below is a volk-optimized
66 * approach that also leverages a faster log2 calculation
67 * to calculate the log10:
68 * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
69 *
70 * Generic code:
71 *
72 * const float real = *inputPtr++ * iNormalizationFactor;
73 * const float imag = *inputPtr++ * iNormalizationFactor;
74 * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
75 * realFFTDataPointsPtr++;
76 *
77 */
78
79 // Calc mag^2
80 volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
81
82 // Finish ((real * real) + (imag * imag)) calculation:
83 volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
84
85 // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
86 // * log2(x)
87 volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88 volk_32f_s32f_multiply_32f(
89 logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
90}
91#endif /* LV_HAVE_GENERIC */
92
93#ifdef LV_HAVE_NEON
94#include <arm_neon.h>
96
97static inline void
99 const lv_32fc_t* complexFFTInput,
100 const float normalizationFactor,
101 unsigned int num_points)
102{
103 float* logPowerOutputPtr = logPowerOutput;
104 const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
105 const float iNormalizationFactor = 1.0 / normalizationFactor;
106 unsigned int number;
107 unsigned int quarter_points = num_points / 4;
108 float32x4x2_t fft_vec;
109 float32x4_t log_pwr_vec;
110 float32x4_t mag_squared_vec;
111
112 const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
113
114 for (number = 0; number < quarter_points; number++) {
115 // Load
116 fft_vec = vld2q_f32((float*)complexFFTInputPtr);
117 // Prefetch next 4
118 __VOLK_PREFETCH(complexFFTInputPtr + 4);
119 // Normalize
120 fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
121 fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
122 mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
123 log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
124 // Store
125 vst1q_f32(logPowerOutputPtr, log_pwr_vec);
126 // Move pointers ahead
127 complexFFTInputPtr += 4;
128 logPowerOutputPtr += 4;
129 }
130
131 // deal with the rest
132 for (number = quarter_points * 4; number < num_points; number++) {
133 const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
134 const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
135
136 *logPowerOutputPtr =
137 volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
138 complexFFTInputPtr++;
139 logPowerOutputPtr++;
140 }
141}
142
143#endif /* LV_HAVE_NEON */
144
145
146#ifdef LV_HAVE_RVV
147#include <riscv_vector.h>
148
149#ifndef LOG_POLY_DEGREE
150#define LOG_POLY_DEGREE 6
151#endif
152
153static inline void volk_32fc_s32f_power_spectrum_32f_rvv(float* logPowerOutput,
154 const lv_32fc_t* complexFFTInput,
155 const float normalizationFactor,
156 unsigned int num_points)
157{
158 size_t vlmax = __riscv_vsetvlmax_e32m2();
159
160#if LOG_POLY_DEGREE == 6
161 const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
162 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
163 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
164 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
165 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
166 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
167#elif LOG_POLY_DEGREE == 5
168 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
169 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
170 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
171 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
172 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
173#elif LOG_POLY_DEGREE == 4
174 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
175 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
176 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
177 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
178#elif LOG_POLY_DEGREE == 3
179 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
180 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
181 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
182#else
183#error
184#endif
185
186 const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
187 const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
188 const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
189 const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
190
191 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
192
193 size_t n = num_points;
194 for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
195 vl = __riscv_vsetvl_e32m2(n);
196 vuint64m4_t vc = __riscv_vle64_v_u64m4((const uint64_t*)complexFFTInput, vl);
197 vfloat32m2_t vr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 0, vl));
198 vfloat32m2_t vi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 32, vl));
199 vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
200 v = __riscv_vfmul(v, normFactSq, vl);
201
202 vfloat32m2_t a = __riscv_vfabs(v, vl);
203 vfloat32m2_t exp = __riscv_vfcvt_f(
204 __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
205 vl);
206 vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
207 __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
208
209 vfloat32m2_t mant = c0;
210 mant = __riscv_vfmadd(mant, frac, c1, vl);
211 mant = __riscv_vfmadd(mant, frac, c2, vl);
212#if LOG_POLY_DEGREE >= 4
213 mant = __riscv_vfmadd(mant, frac, c3, vl);
214#if LOG_POLY_DEGREE >= 5
215 mant = __riscv_vfmadd(mant, frac, c4, vl);
216#if LOG_POLY_DEGREE >= 6
217 mant = __riscv_vfmadd(mant, frac, c5, vl);
218#endif
219#endif
220#endif
221 v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
222 v = __riscv_vfmul(v, volk_log2to10factor, vl);
223
224 __riscv_vse32(logPowerOutput, v, vl);
225 }
226}
227#endif /*LV_HAVE_RVV*/
228
229
230#ifdef LV_HAVE_RVVSEG
231#include <riscv_vector.h>
232
233#ifndef LOG_POLY_DEGREE
234#define LOG_POLY_DEGREE 6
235#endif
236
237static inline void
238volk_32fc_s32f_power_spectrum_32f_rvvseg(float* logPowerOutput,
239 const lv_32fc_t* complexFFTInput,
240 const float normalizationFactor,
241 unsigned int num_points)
242{
243 size_t vlmax = __riscv_vsetvlmax_e32m2();
244
245#if LOG_POLY_DEGREE == 6
246 const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
247 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
248 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
249 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
250 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
251 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
252#elif LOG_POLY_DEGREE == 5
253 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
254 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
255 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
256 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
257 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
258#elif LOG_POLY_DEGREE == 4
259 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
260 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
261 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
262 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
263#elif LOG_POLY_DEGREE == 3
264 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
265 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
266 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
267#else
268#error
269#endif
270
271 const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
272 const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
273 const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
274 const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
275
276 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
277
278 size_t n = num_points;
279 for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
280 vl = __riscv_vsetvl_e32m2(n);
281 vfloat32m2x2_t vc =
282 __riscv_vlseg2e32_v_f32m2x2((const float*)complexFFTInput, vl);
283 vfloat32m2_t vr = __riscv_vget_f32m2(vc, 0);
284 vfloat32m2_t vi = __riscv_vget_f32m2(vc, 1);
285 vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
286 v = __riscv_vfmul(v, normFactSq, vl);
287
288 vfloat32m2_t a = __riscv_vfabs(v, vl);
289 vfloat32m2_t exp = __riscv_vfcvt_f(
290 __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
291 vl);
292 vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
293 __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
294
295 vfloat32m2_t mant = c0;
296 mant = __riscv_vfmadd(mant, frac, c1, vl);
297 mant = __riscv_vfmadd(mant, frac, c2, vl);
298#if LOG_POLY_DEGREE >= 4
299 mant = __riscv_vfmadd(mant, frac, c3, vl);
300#if LOG_POLY_DEGREE >= 5
301 mant = __riscv_vfmadd(mant, frac, c4, vl);
302#if LOG_POLY_DEGREE >= 6
303 mant = __riscv_vfmadd(mant, frac, c5, vl);
304#endif
305#endif
306#endif
307 v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
308 v = __riscv_vfmul(v, volk_log2to10factor, vl);
309
310 __riscv_vse32(logPowerOutput, v, vl);
311 }
312}
313
314#endif /*LV_HAVE_RVVSEG*/
315
316#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */