Vector Optimized Library of Kernels 3.3.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
kernel_tests.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 - 2021 Free Software Foundation, Inc.
4 * Copyright 2023 - 2025 Magnus Lundmark <magnuslundmark@gmail.com>
5 *
6 * This file is part of VOLK
7 *
8 * SPDX-License-Identifier: LGPL-3.0-or-later
9 */
10
11#include "qa_utils.h"
12
13#include <volk/volk.h>
14#include <cmath>
15#include <limits>
16#include <vector>
17
18// macros for initializing volk_test_case_t. Macros are needed to generate
19// function names of the pattern kernel_name_*
20
21// for puppets we need to get all the func_variants for the puppet and just
22// keep track of the actual function name to write to results
23#define VOLK_INIT_PUPP(func, puppet_master_func, test_params) \
24 volk_test_case_t(func##_get_func_desc(), \
25 (void (*)())func##_manual, \
26 std::string(#func), \
27 std::string(#puppet_master_func), \
28 test_params)
29
30#define VOLK_INIT_TEST(func, test_params) \
31 volk_test_case_t(func##_get_func_desc(), \
32 (void (*)())func##_manual, \
33 std::string(#func), \
34 test_params)
35
36#define QA(test) test_cases.push_back(test);
37std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
38{
39 const float inf = std::numeric_limits<float>::infinity();
40 const float nan = std::nanf("");
41
42 // Some kernels need a lower tolerance
43 volk_test_params_t test_params_inacc = test_params.make_tol(1e-2);
44 volk_test_params_t test_params_inacc_tenth = test_params.make_tol(1e-1);
45
46 volk_test_params_t test_params_power(test_params);
47 test_params_power.set_scalar(2.5);
48
49 volk_test_params_t test_params_clamp(test_params);
50 test_params_clamp.set_scalar(-.5f);
51
52 volk_test_params_t test_params_rotator(test_params);
53 test_params_rotator.set_scalar(std::polar(1.0f, 0.1f));
54 test_params_rotator.set_tol(1e-3);
55
56 volk_test_params_t test_params_snf(test_params);
57 test_params_snf.set_scalar(0.5);
58 test_params_snf.set_tol(1e-4);
59
60 std::vector<volk_test_case_t> test_cases;
61 QA(VOLK_INIT_PUPP(volk_64u_popcntpuppet_64u, volk_64u_popcnt, test_params))
62 QA(VOLK_INIT_PUPP(volk_16u_byteswappuppet_16u, volk_16u_byteswap, test_params))
63 QA(VOLK_INIT_PUPP(volk_32u_byteswappuppet_32u, volk_32u_byteswap, test_params))
64 QA(VOLK_INIT_PUPP(volk_32u_popcntpuppet_32u, volk_32u_popcnt, test_params))
65 QA(VOLK_INIT_PUPP(volk_64u_byteswappuppet_64u, volk_64u_byteswap, test_params))
66 QA(VOLK_INIT_PUPP(volk_32fc_s32fc_rotator2puppet_32fc,
67 volk_32fc_s32fc_x2_rotator2_32fc,
68 test_params_rotator))
70 volk_8u_conv_k7_r2puppet_8u, volk_8u_x4_conv_k7_r2_8u, test_params.make_tol(0)))
71 QA(VOLK_INIT_PUPP(volk_32f_x2_fm_detectpuppet_32f,
72 volk_32f_s32f_32f_fm_detect_32f,
73 test_params.make_absolute(1e-6)))
74 QA(VOLK_INIT_TEST(volk_16ic_s32f_deinterleave_real_32f, test_params))
75 QA(VOLK_INIT_TEST(volk_16ic_deinterleave_real_8i, test_params))
76 QA(VOLK_INIT_TEST(volk_16ic_deinterleave_16i_x2, test_params))
77 QA(VOLK_INIT_TEST(volk_16ic_s32f_deinterleave_32f_x2, test_params))
78 QA(VOLK_INIT_TEST(volk_16ic_deinterleave_real_16i, test_params))
79 QA(VOLK_INIT_TEST(volk_16ic_magnitude_16i, test_params))
80 QA(VOLK_INIT_TEST(volk_16ic_s32f_magnitude_32f, test_params))
81 QA(VOLK_INIT_TEST(volk_16ic_convert_32fc, test_params))
82 QA(VOLK_INIT_TEST(volk_16ic_x2_multiply_16ic, test_params))
83 QA(VOLK_INIT_TEST(volk_16ic_x2_dot_prod_16ic, test_params))
84 QA(VOLK_INIT_TEST(volk_16i_s32f_convert_32f, test_params))
85 QA(VOLK_INIT_TEST(volk_16i_convert_8i, test_params))
86 QA(VOLK_INIT_TEST(volk_16i_32fc_dot_prod_32fc, test_params.make_absolute(1e-1)))
87 QA(VOLK_INIT_TEST(volk_32f_accumulator_s32f, test_params.make_absolute(2e-2)))
88 QA(VOLK_INIT_TEST(volk_32f_x2_add_32f, test_params))
89
90 // Index kernels need identical values to test tie-breaking (first index wins)
91 volk_test_params_t test_params_index(test_params.make_tol(0));
92 test_params_index.add_float_edge_cases({
93 1.0f,
94 1.0f,
95 1.0f,
96 1.0f, // 4 identical (SSE lane width)
97 1.0f,
98 1.0f,
99 1.0f,
100 1.0f, // 8 total (AVX lane width)
101 1.0f,
102 1.0f,
103 1.0f,
104 1.0f, // 12
105 1.0f,
106 1.0f,
107 1.0f,
108 1.0f, // 16 total (AVX512 lane width)
109 });
110 QA(VOLK_INIT_TEST(volk_32f_index_max_16u, test_params_index))
111 QA(VOLK_INIT_TEST(volk_32f_index_max_32u, test_params_index))
112 QA(VOLK_INIT_TEST(volk_32f_index_min_16u, test_params_index))
113 QA(VOLK_INIT_TEST(volk_32f_index_min_32u, test_params_index))
114 QA(VOLK_INIT_TEST(volk_32fc_32f_multiply_32fc, test_params))
115 QA(VOLK_INIT_TEST(volk_32fc_32f_add_32fc, test_params))
116
117 volk_test_params_t test_params_log2(test_params.make_absolute(5e-6));
118 test_params_log2.add_float_edge_cases({ -1.f, 0.f, inf, 65536.f });
119 QA(VOLK_INIT_TEST(volk_32f_log2_32f, test_params_log2))
120
121 QA(VOLK_INIT_TEST(volk_32f_expfast_32f, test_params_inacc_tenth))
122 QA(VOLK_INIT_TEST(volk_32f_sin_32f, test_params))
123 QA(VOLK_INIT_TEST(volk_32f_cos_32f, test_params))
124 QA(VOLK_INIT_TEST(volk_32f_sincos_32f_x2, test_params))
125 QA(VOLK_INIT_TEST(volk_32f_tan_32f, test_params_inacc))
126
127 volk_test_params_t test_params_atan(test_params);
128 test_params_atan.add_float_edge_cases({ std::nanf(""),
129 std::numeric_limits<float>::infinity(),
130 -std::numeric_limits<float>::infinity(),
131 0.0f,
132 -0.0f,
133 1e10f,
134 -1e10f,
135 1.0f,
136 -1.0f });
137 QA(VOLK_INIT_TEST(volk_32f_atan_32f, test_params_atan))
138
139 volk_test_params_t test_params_asin(test_params);
140 test_params_asin.set_tol(1e-5);
141 test_params_asin.add_float_edge_cases({ std::nanf(""),
142 1.0f,
143 -1.0f,
144 0.0f,
145 -0.0f,
146 0.5f,
147 -0.5f,
148 0.99f,
149 -0.99f,
150 0.707107f,
151 -0.707107f });
152 QA(VOLK_INIT_TEST(volk_32f_asin_32f, test_params_asin))
153 QA(VOLK_INIT_TEST(volk_32f_acos_32f, test_params_asin))
154 QA(VOLK_INIT_TEST(volk_32fc_s32f_power_32fc, test_params_power))
155 QA(VOLK_INIT_TEST(volk_32f_s32f_calc_spectral_noise_floor_32f, test_params_snf))
156
157 volk_test_params_t test_params_atan2(test_params);
158 test_params_atan2.add_complex_edge_cases(
159 { lv_cmake(0.0f, 0.0f), // atan2(0, 0) = 0
160 lv_cmake(0.0f, -0.0f), // atan2(-0, 0) = -0 (preserve sign)
161 lv_cmake(0.0f, 1.0f), // atan2(1, 0) = π/2
162 lv_cmake(0.0f, -1.0f), // atan2(-1, 0) = -π/2
163 lv_cmake(1.0f, 0.0f), // atan2(0, 1) = 0
164 lv_cmake(-1.0f, 0.0f), // atan2(0, -1) = π
165 lv_cmake(1.0f, 1.0f), // atan2(1, 1) = π/4
166 lv_cmake(-1.0f, 1.0f), // atan2(1, -1) = 3π/4
167 lv_cmake(-1.0f, -1.0f), // atan2(-1, -1) = -3π/4
168 lv_cmake(1.0f, -1.0f), // atan2(-1, 1) = -π/4
169 lv_cmake(inf, inf), // atan2(inf, inf) = π/4
170 lv_cmake(inf, -inf), // atan2(-inf, inf) = -π/4
171 lv_cmake(-inf, inf), // atan2(inf, -inf) = 3π/4
172 lv_cmake(-inf, -inf), // atan2(-inf, -inf) = -3π/4
173 lv_cmake(inf, 0.0f), // atan2(0, inf) = 0
174 lv_cmake(-inf, 0.0f), // atan2(0, -inf) = π
175 lv_cmake(1.0f, inf), // atan2(inf, 1) = π/2
176 lv_cmake(1.0f, -inf), // atan2(-inf, 1) = -π/2
177 lv_cmake(nan, 1.0f), // atan2(1, nan) = nan (propagate)
178 lv_cmake(1.0f, nan) }); // atan2(nan, 1) = nan (propagate)
179 QA(VOLK_INIT_TEST(volk_32fc_s32f_atan2_32f, test_params_atan2))
180 QA(VOLK_INIT_TEST(volk_32fc_x2_conjugate_dot_prod_32fc,
181 test_params.make_absolute(2e-2)))
182 QA(VOLK_INIT_TEST(volk_32fc_deinterleave_32f_x2, test_params))
183 QA(VOLK_INIT_TEST(volk_32fc_accumulator_s32fc, test_params.make_absolute(3e-2)))
184 QA(VOLK_INIT_TEST(volk_32fc_deinterleave_64f_x2, test_params))
185 QA(VOLK_INIT_TEST(volk_32fc_s32f_deinterleave_real_16i, test_params.make_tol(1)))
186 QA(VOLK_INIT_TEST(volk_32fc_deinterleave_imag_32f, test_params))
187 QA(VOLK_INIT_TEST(volk_32fc_deinterleave_real_32f, test_params))
188 QA(VOLK_INIT_TEST(volk_32fc_deinterleave_real_64f, test_params))
189 QA(VOLK_INIT_TEST(volk_32fc_x2_dot_prod_32fc, test_params.make_absolute(2e-2)))
190 QA(VOLK_INIT_TEST(volk_32fc_32f_dot_prod_32fc, test_params.make_absolute(1e-2)))
191
192 // Complex index kernels: same magnitude values to test tie-breaking
193 volk_test_params_t test_params_index_fc(test_params.make_tol(0));
194 test_params_index_fc.add_complex_edge_cases({
195 lv_cmake(1.0f, 0.0f),
196 lv_cmake(1.0f, 0.0f), // 2 same magnitude
197 lv_cmake(0.0f, 1.0f),
198 lv_cmake(0.0f, 1.0f), // 4 (all |z|=1)
199 lv_cmake(1.0f, 0.0f),
200 lv_cmake(1.0f, 0.0f), // 6
201 lv_cmake(0.0f, 1.0f),
202 lv_cmake(0.0f, 1.0f), // 8 (covers AVX 8-wide)
203 });
204 QA(VOLK_INIT_TEST(volk_32fc_index_max_16u, test_params_index_fc))
205 QA(VOLK_INIT_TEST(volk_32fc_index_max_32u, test_params_index_fc))
206 QA(VOLK_INIT_TEST(volk_32fc_index_min_16u, test_params_index_fc))
207 QA(VOLK_INIT_TEST(volk_32fc_index_min_32u, test_params_index_fc))
208 QA(VOLK_INIT_TEST(volk_32fc_s32f_magnitude_16i, test_params.make_tol(1)))
209 QA(VOLK_INIT_TEST(volk_32fc_magnitude_32f, test_params_inacc_tenth))
210 QA(VOLK_INIT_TEST(volk_32fc_magnitude_squared_32f, test_params))
211 QA(VOLK_INIT_TEST(volk_32fc_x2_add_32fc, test_params))
212 QA(VOLK_INIT_TEST(volk_32fc_x2_multiply_32fc, test_params))
213 QA(VOLK_INIT_TEST(volk_32fc_x2_multiply_conjugate_32fc, test_params))
214 QA(VOLK_INIT_TEST(volk_32fc_x2_divide_32fc, test_params))
215 QA(VOLK_INIT_TEST(volk_32fc_conjugate_32fc, test_params))
216 QA(VOLK_INIT_TEST(volk_32f_s32f_convert_16i, test_params.make_tol(1)))
217 QA(VOLK_INIT_TEST(volk_32f_s32f_convert_32i, test_params.make_tol(1)))
218 QA(VOLK_INIT_TEST(volk_32f_convert_64f, test_params))
219 QA(VOLK_INIT_TEST(volk_32f_s32f_convert_8i, test_params.make_tol(1)))
220 QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params.make_tol(1)))
221 QA(VOLK_INIT_TEST(volk_32fc_s32f_power_spectrum_32f, test_params.make_tol(2e-6)))
222 QA(VOLK_INIT_TEST(volk_32fc_x2_square_dist_32f, test_params))
223 QA(VOLK_INIT_TEST(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, test_params))
224 QA(VOLK_INIT_TEST(volk_32f_x2_divide_32f, test_params))
225 QA(VOLK_INIT_TEST(volk_32f_x2_dot_prod_32f, test_params.make_absolute(1.5e-2)))
226 QA(VOLK_INIT_TEST(volk_32f_x2_s32f_interleave_16ic, test_params.make_tol(1)))
227 QA(VOLK_INIT_TEST(volk_32f_x2_interleave_32fc, test_params))
228 QA(VOLK_INIT_TEST(volk_32f_x2_max_32f, test_params))
229 QA(VOLK_INIT_TEST(volk_32f_x2_min_32f, test_params))
230 QA(VOLK_INIT_TEST(volk_32f_x2_multiply_32f, test_params))
231 QA(VOLK_INIT_TEST(volk_32f_64f_multiply_64f, test_params))
232 QA(VOLK_INIT_TEST(volk_32f_64f_add_64f, test_params))
233 QA(VOLK_INIT_TEST(volk_32f_s32f_normalize, test_params))
234 QA(VOLK_INIT_TEST(volk_32f_s32f_power_32f, test_params))
235 QA(VOLK_INIT_TEST(volk_32f_reciprocal_32f, test_params.make_tol(6.15e-5)))
236 QA(VOLK_INIT_TEST(volk_32f_sqrt_32f, test_params_inacc))
237
238 volk_test_params_t test_params_invsqrt(test_params.make_tol(1e-6));
239 test_params_invsqrt.add_float_edge_cases(
240 { -1.f, 1.f, 0.f, inf, 1e-2f, 1e2f, 1e-10, 1e10 });
241 QA(VOLK_INIT_TEST(volk_32f_invsqrt_32f, test_params_invsqrt))
242 QA(VOLK_INIT_TEST(volk_32f_s32f_stddev_32f, test_params_inacc))
243 QA(VOLK_INIT_TEST(volk_32f_stddev_and_mean_32f_x2, test_params.make_absolute(1e-5)))
244 QA(VOLK_INIT_TEST(volk_32f_x2_subtract_32f, test_params))
245 QA(VOLK_INIT_TEST(volk_32f_x3_sum_of_poly_32f, test_params.make_absolute(1e+3)))
246 QA(VOLK_INIT_TEST(volk_32i_x2_and_32i, test_params))
247 QA(VOLK_INIT_TEST(volk_32i_s32f_convert_32f, test_params))
248 QA(VOLK_INIT_TEST(volk_32i_x2_or_32i, test_params))
249 QA(VOLK_INIT_TEST(volk_32f_x2_dot_prod_16i, test_params.make_tol(1)))
250 QA(VOLK_INIT_TEST(volk_64f_convert_32f, test_params))
251 QA(VOLK_INIT_TEST(volk_64f_x2_max_64f, test_params))
252 QA(VOLK_INIT_TEST(volk_64f_x2_min_64f, test_params))
253 QA(VOLK_INIT_TEST(volk_64f_x2_multiply_64f, test_params))
254 QA(VOLK_INIT_TEST(volk_64f_x2_add_64f, test_params))
255 QA(VOLK_INIT_TEST(volk_64f_x2_dot_prod_64f, test_params))
256 QA(VOLK_INIT_TEST(volk_8ic_deinterleave_16i_x2, test_params))
257 QA(VOLK_INIT_TEST(volk_8ic_s32f_deinterleave_32f_x2, test_params))
258 QA(VOLK_INIT_TEST(volk_8ic_deinterleave_real_16i, test_params))
259 QA(VOLK_INIT_TEST(volk_8ic_s32f_deinterleave_real_32f, test_params))
260 QA(VOLK_INIT_TEST(volk_8ic_deinterleave_real_8i, test_params))
261 QA(VOLK_INIT_TEST(volk_8ic_x2_multiply_conjugate_16ic, test_params))
262 QA(VOLK_INIT_TEST(volk_8ic_x2_s32f_multiply_conjugate_32fc, test_params))
263 QA(VOLK_INIT_TEST(volk_8i_convert_16i, test_params))
264 QA(VOLK_INIT_TEST(volk_8i_s32f_convert_32f, test_params))
265 QA(VOLK_INIT_TEST(volk_8i_x2_add_saturated_8i, test_params))
266 QA(VOLK_INIT_TEST(volk_8u_x2_add_saturated_8u, test_params))
267 QA(VOLK_INIT_TEST(volk_16i_x2_add_saturated_16i, test_params))
268 QA(VOLK_INIT_TEST(volk_16u_x2_add_saturated_16u, test_params))
269 QA(VOLK_INIT_TEST(volk_32fc_s32fc_multiply2_32fc, test_params))
270 QA(VOLK_INIT_TEST(volk_32f_s32f_multiply_32f, test_params))
271 QA(VOLK_INIT_TEST(volk_32f_s32f_add_32f, test_params))
272 QA(VOLK_INIT_TEST(volk_32f_binary_slicer_32i, test_params))
273 QA(VOLK_INIT_TEST(volk_32f_binary_slicer_8i, test_params))
274 QA(VOLK_INIT_TEST(volk_32u_reverse_32u, test_params))
275 QA(VOLK_INIT_TEST(volk_32f_tanh_32f, test_params_inacc))
276 QA(VOLK_INIT_TEST(volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc, test_params))
277 QA(VOLK_INIT_TEST(volk_32f_exp_32f, test_params))
278 QA(VOLK_INIT_PUPP(volk_32f_x2_powpuppet_32f, volk_32f_x2_pow_32f, test_params_inacc))
280 volk_32f_s32f_mod_rangepuppet_32f, volk_32f_s32f_s32f_mod_range_32f, test_params))
282 volk_8u_x3_encodepolarpuppet_8u, volk_8u_x3_encodepolar_8u_x2, test_params))
283 QA(VOLK_INIT_PUPP(volk_32f_8u_polarbutterflypuppet_32f,
284 volk_32f_8u_polarbutterfly_32f,
285 test_params))
286 QA(VOLK_INIT_PUPP(volk_32fc_s32f_power_spectral_densitypuppet_32f,
287 volk_32fc_s32f_x2_power_spectral_density_32f,
288 test_params))
290 volk_32f_s32f_clamppuppet_32f, volk_32f_s32f_x2_clamp_32f, test_params_clamp))
291 QA(VOLK_INIT_PUPP(volk_32f_s32f_convertpuppet_8u,
292 volk_32f_s32f_x2_convert_8u,
293 test_params.make_tol(1)))
294 // no one uses these, so don't test them
295 // VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results,
296 // benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_branch_4_state_8, 1e-4, 2046,
297 // 10000, &results, benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_max_star_16i,
298 // 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
299 // VOLK_PROFILE(volk_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results,
300 // benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_permute_and_scalar_add, 1e-4,
301 // 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
302 // VOLK_PROFILE(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results,
303 // benchmark_mode, kernel_regex);
304
305 return test_cases;
306}