48#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
49#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
62 const float* realDataPoints,
63 const float spectralExclusionValue,
64 const unsigned int num_points)
66 unsigned int number = 0;
67 const unsigned int eighthPoints = num_points / 8;
69 const float* dataPointsPtr = realDataPoints;
73 __m256 avgPointsVal = _mm256_setzero_ps();
75 for (; number < eighthPoints; number++) {
77 dataPointsVal = _mm256_load_ps(dataPointsPtr);
81 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
84 _mm256_store_ps(avgPointsVector, avgPointsVal);
87 sumMean += avgPointsVector[0];
88 sumMean += avgPointsVector[1];
89 sumMean += avgPointsVector[2];
90 sumMean += avgPointsVector[3];
91 sumMean += avgPointsVector[4];
92 sumMean += avgPointsVector[5];
93 sumMean += avgPointsVector[6];
94 sumMean += avgPointsVector[7];
96 number = eighthPoints * 8;
97 for (; number < num_points; number++) {
98 sumMean += realDataPoints[number];
104 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
106 dataPointsPtr = realDataPoints;
107 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
108 __m256 vOnesVector = _mm256_set1_ps(1.0);
109 __m256 vValidBinCount = _mm256_setzero_ps();
110 avgPointsVal = _mm256_setzero_ps();
114 for (; number < eighthPoints; number++) {
116 dataPointsVal = _mm256_load_ps(dataPointsPtr);
121 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
126 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
130 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
134 _mm256_store_ps(avgPointsVector, avgPointsVal);
137 sumMean += avgPointsVector[0];
138 sumMean += avgPointsVector[1];
139 sumMean += avgPointsVector[2];
140 sumMean += avgPointsVector[3];
141 sumMean += avgPointsVector[4];
142 sumMean += avgPointsVector[5];
143 sumMean += avgPointsVector[6];
144 sumMean += avgPointsVector[7];
148 _mm256_store_ps(validBinCountVector, vValidBinCount);
150 float validBinCount = 0;
151 validBinCount += validBinCountVector[0];
152 validBinCount += validBinCountVector[1];
153 validBinCount += validBinCountVector[2];
154 validBinCount += validBinCountVector[3];
155 validBinCount += validBinCountVector[4];
156 validBinCount += validBinCountVector[5];
157 validBinCount += validBinCountVector[6];
158 validBinCount += validBinCountVector[7];
160 number = eighthPoints * 8;
161 for (; number < num_points; number++) {
162 if (realDataPoints[number] <= meanAmplitude) {
163 sumMean += realDataPoints[number];
164 validBinCount += 1.0;
168 float localNoiseFloorAmplitude = 0;
169 if (validBinCount > 0.0) {
170 localNoiseFloorAmplitude = sumMean / validBinCount;
172 localNoiseFloorAmplitude =
176 *noiseFloorAmplitude = localNoiseFloorAmplitude;
181#include <xmmintrin.h>
185 const float* realDataPoints,
186 const float spectralExclusionValue,
187 const unsigned int num_points)
189 unsigned int number = 0;
190 const unsigned int quarterPoints = num_points / 4;
192 const float* dataPointsPtr = realDataPoints;
195 __m128 dataPointsVal;
196 __m128 avgPointsVal = _mm_setzero_ps();
198 for (; number < quarterPoints; number++) {
200 dataPointsVal = _mm_load_ps(dataPointsPtr);
204 avgPointsVal = _mm_add_ps(avgPointsVal, dataPointsVal);
207 _mm_store_ps(avgPointsVector, avgPointsVal);
210 sumMean += avgPointsVector[0];
211 sumMean += avgPointsVector[1];
212 sumMean += avgPointsVector[2];
213 sumMean += avgPointsVector[3];
215 number = quarterPoints * 4;
216 for (; number < num_points; number++) {
217 sumMean += realDataPoints[number];
223 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
225 dataPointsPtr = realDataPoints;
226 __m128 vMeanAmplitudeVector = _mm_set_ps1(meanAmplitude);
227 __m128 vOnesVector = _mm_set_ps1(1.0);
228 __m128 vValidBinCount = _mm_setzero_ps();
229 avgPointsVal = _mm_setzero_ps();
233 for (; number < quarterPoints; number++) {
235 dataPointsVal = _mm_load_ps(dataPointsPtr);
240 compareMask = _mm_cmple_ps(dataPointsVal, vMeanAmplitudeVector);
244 avgPointsVal = _mm_add_ps(avgPointsVal, _mm_and_ps(compareMask, dataPointsVal));
247 vValidBinCount = _mm_add_ps(vValidBinCount, _mm_and_ps(compareMask, vOnesVector));
251 _mm_store_ps(avgPointsVector, avgPointsVal);
254 sumMean += avgPointsVector[0];
255 sumMean += avgPointsVector[1];
256 sumMean += avgPointsVector[2];
257 sumMean += avgPointsVector[3];
261 _mm_store_ps(validBinCountVector, vValidBinCount);
263 float validBinCount = 0;
264 validBinCount += validBinCountVector[0];
265 validBinCount += validBinCountVector[1];
266 validBinCount += validBinCountVector[2];
267 validBinCount += validBinCountVector[3];
269 number = quarterPoints * 4;
270 for (; number < num_points; number++) {
271 if (realDataPoints[number] <= meanAmplitude) {
272 sumMean += realDataPoints[number];
273 validBinCount += 1.0;
277 float localNoiseFloorAmplitude = 0;
278 if (validBinCount > 0.0) {
279 localNoiseFloorAmplitude = sumMean / validBinCount;
281 localNoiseFloorAmplitude =
285 *noiseFloorAmplitude = localNoiseFloorAmplitude;
290#ifdef LV_HAVE_GENERIC
294 const float* realDataPoints,
295 const float spectralExclusionValue,
296 const unsigned int num_points)
301 for (number = 0; number < num_points; number++) {
303 sumMean += realDataPoints[number];
309 const float meanAmplitude = (sumMean / num_points) + spectralExclusionValue;
313 unsigned int newNumDataPoints = num_points;
314 for (number = 0; number < num_points; number++) {
315 if (realDataPoints[number] <= meanAmplitude)
316 sumMean += realDataPoints[number];
321 float localNoiseFloorAmplitude = 0.0;
322 if (newNumDataPoints == 0)
323 localNoiseFloorAmplitude = meanAmplitude;
325 localNoiseFloorAmplitude = sumMean / ((float)newNumDataPoints);
327 *noiseFloorAmplitude = localNoiseFloorAmplitude;
336 const float* realDataPoints,
337 const float spectralExclusionValue,
338 const unsigned int num_points)
340 unsigned int number = 0;
341 const unsigned int quarterPoints = num_points / 4;
343 const float* dataPointsPtr = realDataPoints;
344 float32x4_t avgPointsVal = vdupq_n_f32(0.0f);
347 for (; number < quarterPoints; number++) {
348 float32x4_t dataPointsVal = vld1q_f32(dataPointsPtr);
350 avgPointsVal = vaddq_f32(avgPointsVal, dataPointsVal);
354 float32x2_t sum2 = vadd_f32(vget_low_f32(avgPointsVal), vget_high_f32(avgPointsVal));
355 float sumMean = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
357 number = quarterPoints * 4;
358 for (; number < num_points; number++) {
359 sumMean += realDataPoints[number];
363 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
365 dataPointsPtr = realDataPoints;
366 float32x4_t vMeanAmplitudeVector = vdupq_n_f32(meanAmplitude);
367 float32x4_t vOnesVector = vdupq_n_f32(1.0f);
368 float32x4_t vValidBinCount = vdupq_n_f32(0.0f);
369 avgPointsVal = vdupq_n_f32(0.0f);
373 for (; number < quarterPoints; number++) {
374 float32x4_t dataPointsVal = vld1q_f32(dataPointsPtr);
378 uint32x4_t compareMask = vcleq_f32(dataPointsVal, vMeanAmplitudeVector);
381 float32x4_t maskedData = vbslq_f32(compareMask, dataPointsVal, vdupq_n_f32(0.0f));
382 avgPointsVal = vaddq_f32(avgPointsVal, maskedData);
385 float32x4_t maskedOnes = vbslq_f32(compareMask, vOnesVector, vdupq_n_f32(0.0f));
386 vValidBinCount = vaddq_f32(vValidBinCount, maskedOnes);
390 sum2 = vadd_f32(vget_low_f32(avgPointsVal), vget_high_f32(avgPointsVal));
391 sumMean = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
394 vadd_f32(vget_low_f32(vValidBinCount), vget_high_f32(vValidBinCount));
395 float validBinCount = vget_lane_f32(vpadd_f32(cnt2, cnt2), 0);
397 number = quarterPoints * 4;
398 for (; number < num_points; number++) {
399 if (realDataPoints[number] <= meanAmplitude) {
400 sumMean += realDataPoints[number];
401 validBinCount += 1.0f;
405 float localNoiseFloorAmplitude = 0;
406 if (validBinCount > 0.0f) {
407 localNoiseFloorAmplitude = sumMean / validBinCount;
409 localNoiseFloorAmplitude = meanAmplitude;
412 *noiseFloorAmplitude = localNoiseFloorAmplitude;
420volk_32f_s32f_calc_spectral_noise_floor_32f_neonv8(
float* noiseFloorAmplitude,
421 const float* realDataPoints,
422 const float spectralExclusionValue,
423 const unsigned int num_points)
425 unsigned int number = 0;
426 const unsigned int eighthPoints = num_points / 8;
428 const float* dataPointsPtr = realDataPoints;
429 float32x4_t avgPointsVal0 = vdupq_n_f32(0.0f);
430 float32x4_t avgPointsVal1 = vdupq_n_f32(0.0f);
433 for (; number < eighthPoints; number++) {
435 float32x4_t dataPointsVal0 = vld1q_f32(dataPointsPtr);
436 float32x4_t dataPointsVal1 = vld1q_f32(dataPointsPtr + 4);
438 avgPointsVal0 = vaddq_f32(avgPointsVal0, dataPointsVal0);
439 avgPointsVal1 = vaddq_f32(avgPointsVal1, dataPointsVal1);
443 float32x4_t avgPointsVal = vaddq_f32(avgPointsVal0, avgPointsVal1);
444 float sumMean = vaddvq_f32(avgPointsVal);
446 number = eighthPoints * 8;
447 for (; number < num_points; number++) {
448 sumMean += realDataPoints[number];
452 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
454 dataPointsPtr = realDataPoints;
455 float32x4_t vMeanAmplitudeVector = vdupq_n_f32(meanAmplitude);
456 float32x4_t vOnesVector = vdupq_n_f32(1.0f);
457 float32x4_t vValidBinCount0 = vdupq_n_f32(0.0f);
458 float32x4_t vValidBinCount1 = vdupq_n_f32(0.0f);
459 avgPointsVal0 = vdupq_n_f32(0.0f);
460 avgPointsVal1 = vdupq_n_f32(0.0f);
464 for (; number < eighthPoints; number++) {
466 float32x4_t dataPointsVal0 = vld1q_f32(dataPointsPtr);
467 float32x4_t dataPointsVal1 = vld1q_f32(dataPointsPtr + 4);
471 uint32x4_t compareMask0 = vcleq_f32(dataPointsVal0, vMeanAmplitudeVector);
472 uint32x4_t compareMask1 = vcleq_f32(dataPointsVal1, vMeanAmplitudeVector);
475 float32x4_t maskedData0 =
476 vbslq_f32(compareMask0, dataPointsVal0, vdupq_n_f32(0.0f));
477 float32x4_t maskedData1 =
478 vbslq_f32(compareMask1, dataPointsVal1, vdupq_n_f32(0.0f));
479 avgPointsVal0 = vaddq_f32(avgPointsVal0, maskedData0);
480 avgPointsVal1 = vaddq_f32(avgPointsVal1, maskedData1);
483 float32x4_t maskedOnes0 = vbslq_f32(compareMask0, vOnesVector, vdupq_n_f32(0.0f));
484 float32x4_t maskedOnes1 = vbslq_f32(compareMask1, vOnesVector, vdupq_n_f32(0.0f));
485 vValidBinCount0 = vaddq_f32(vValidBinCount0, maskedOnes0);
486 vValidBinCount1 = vaddq_f32(vValidBinCount1, maskedOnes1);
490 avgPointsVal = vaddq_f32(avgPointsVal0, avgPointsVal1);
491 sumMean = vaddvq_f32(avgPointsVal);
493 float32x4_t vValidBinCount = vaddq_f32(vValidBinCount0, vValidBinCount1);
494 float validBinCount = vaddvq_f32(vValidBinCount);
496 number = eighthPoints * 8;
497 for (; number < num_points; number++) {
498 if (realDataPoints[number] <= meanAmplitude) {
499 sumMean += realDataPoints[number];
500 validBinCount += 1.0f;
504 float localNoiseFloorAmplitude = 0;
505 if (validBinCount > 0.0f) {
506 localNoiseFloorAmplitude = sumMean / validBinCount;
508 localNoiseFloorAmplitude = meanAmplitude;
511 *noiseFloorAmplitude = localNoiseFloorAmplitude;
517#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
518#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
525#include <immintrin.h>
529 const float* realDataPoints,
530 const float spectralExclusionValue,
531 const unsigned int num_points)
533 unsigned int number = 0;
534 const unsigned int eighthPoints = num_points / 8;
536 const float* dataPointsPtr = realDataPoints;
539 __m256 dataPointsVal;
540 __m256 avgPointsVal = _mm256_setzero_ps();
542 for (; number < eighthPoints; number++) {
544 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
548 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
551 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
554 sumMean += avgPointsVector[0];
555 sumMean += avgPointsVector[1];
556 sumMean += avgPointsVector[2];
557 sumMean += avgPointsVector[3];
558 sumMean += avgPointsVector[4];
559 sumMean += avgPointsVector[5];
560 sumMean += avgPointsVector[6];
561 sumMean += avgPointsVector[7];
563 number = eighthPoints * 8;
564 for (; number < num_points; number++) {
565 sumMean += realDataPoints[number];
571 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
573 dataPointsPtr = realDataPoints;
574 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
575 __m256 vOnesVector = _mm256_set1_ps(1.0);
576 __m256 vValidBinCount = _mm256_setzero_ps();
577 avgPointsVal = _mm256_setzero_ps();
581 for (; number < eighthPoints; number++) {
583 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
588 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
593 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
597 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
601 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
604 sumMean += avgPointsVector[0];
605 sumMean += avgPointsVector[1];
606 sumMean += avgPointsVector[2];
607 sumMean += avgPointsVector[3];
608 sumMean += avgPointsVector[4];
609 sumMean += avgPointsVector[5];
610 sumMean += avgPointsVector[6];
611 sumMean += avgPointsVector[7];
615 _mm256_storeu_ps(validBinCountVector, vValidBinCount);
617 float validBinCount = 0;
618 validBinCount += validBinCountVector[0];
619 validBinCount += validBinCountVector[1];
620 validBinCount += validBinCountVector[2];
621 validBinCount += validBinCountVector[3];
622 validBinCount += validBinCountVector[4];
623 validBinCount += validBinCountVector[5];
624 validBinCount += validBinCountVector[6];
625 validBinCount += validBinCountVector[7];
627 number = eighthPoints * 8;
628 for (; number < num_points; number++) {
629 if (realDataPoints[number] <= meanAmplitude) {
630 sumMean += realDataPoints[number];
631 validBinCount += 1.0;
635 float localNoiseFloorAmplitude = 0;
636 if (validBinCount > 0.0) {
637 localNoiseFloorAmplitude = sumMean / validBinCount;
639 localNoiseFloorAmplitude =
643 *noiseFloorAmplitude = localNoiseFloorAmplitude;
648#include <riscv_vector.h>
651volk_32f_s32f_calc_spectral_noise_floor_32f_rvv(
float* noiseFloorAmplitude,
652 const float* realDataPoints,
653 const float spectralExclusionValue,
654 const unsigned int num_points)
657 volk_32f_accumulator_s32f_rvv(&sum, realDataPoints, num_points);
658 float meanAmplitude = sum / num_points + spectralExclusionValue;
660 vfloat32m8_t vbin = __riscv_vfmv_v_f_f32m8(meanAmplitude, __riscv_vsetvlmax_e32m8());
661 vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
662 size_t n = num_points, binCount = 0;
663 for (
size_t vl; n > 0; n -= vl, realDataPoints += vl) {
664 vl = __riscv_vsetvl_e32m8(n);
665 vfloat32m8_t v = __riscv_vle32_v_f32m8(realDataPoints, vl);
666 vbool4_t m = __riscv_vmfle(v, vbin, vl);
667 binCount += __riscv_vcpop(m, vl);
668 vsum = __riscv_vfadd_tumu(m, vsum, vsum, v, vl);
670 size_t vl = __riscv_vsetvlmax_e32m1();
672 vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
673 sum = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
675 *noiseFloorAmplitude = binCount == 0 ? meanAmplitude : sum / binCount;