28 const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f);
29 const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f);
30 const __m256 a5 = _mm256_set1_ps(+0x1.972be6p-3f);
31 const __m256 a7 = _mm256_set1_ps(-0x1.1436ap-3f);
32 const __m256 a9 = _mm256_set1_ps(+0x1.5785aap-4f);
33 const __m256 a11 = _mm256_set1_ps(-0x1.2f3004p-5f);
34 const __m256 a13 = _mm256_set1_ps(+0x1.01a37cp-7f);
36 const __m256 x_times_x = _mm256_mul_ps(x, x);
39 arctan = _mm256_fmadd_ps(x_times_x, arctan, a11);
40 arctan = _mm256_fmadd_ps(x_times_x, arctan, a9);
41 arctan = _mm256_fmadd_ps(x_times_x, arctan, a7);
42 arctan = _mm256_fmadd_ps(x_times_x, arctan, a5);
43 arctan = _mm256_fmadd_ps(x_times_x, arctan, a3);
44 arctan = _mm256_fmadd_ps(x_times_x, arctan, a1);
45 arctan = _mm256_mul_ps(x, arctan);
59 const __m256 c0 = _mm256_set1_ps(0x1.ffffcep-1f);
60 const __m256 c1 = _mm256_set1_ps(0x1.55b648p-3f);
61 const __m256 c2 = _mm256_set1_ps(0x1.24d192p-4f);
62 const __m256 c3 = _mm256_set1_ps(0x1.0a788p-4f);
64 const __m256 u = _mm256_mul_ps(x, x);
66 p = _mm256_fmadd_ps(u, p, c2);
67 p = _mm256_fmadd_ps(u, p, c1);
68 p = _mm256_fmadd_ps(u, p, c0);
70 return _mm256_mul_ps(x, p);
81 const __m256 s1 = _mm256_set1_ps(-0x1.555552p-3f);
82 const __m256 s2 = _mm256_set1_ps(+0x1.110be2p-7f);
83 const __m256 s3 = _mm256_set1_ps(-0x1.9ab22ap-13f);
85 const __m256 x2 = _mm256_mul_ps(x, x);
86 const __m256 x3 = _mm256_mul_ps(x2, x);
88 __m256 poly = _mm256_fmadd_ps(x2, s3, s2);
89 poly = _mm256_fmadd_ps(x2, poly, s1);
90 return _mm256_fmadd_ps(x3, poly, x);
101 const __m256 c1 = _mm256_set1_ps(-0x1.fffff4p-2f);
102 const __m256 c2 = _mm256_set1_ps(+0x1.554a46p-5f);
103 const __m256 c3 = _mm256_set1_ps(-0x1.661be2p-10f);
104 const __m256 one = _mm256_set1_ps(1.0f);
106 const __m256 x2 = _mm256_mul_ps(x, x);
108 __m256 poly = _mm256_fmadd_ps(x2, c3, c2);
109 poly = _mm256_fmadd_ps(x2, poly, c1);
110 return _mm256_fmadd_ps(x2, poly, one);
123 const __m256 c0 = _mm256_set1_ps(+0x1.a8a726p+1f);
124 const __m256 c1 = _mm256_set1_ps(-0x1.0b7f7ep+2f);
125 const __m256 c2 = _mm256_set1_ps(+0x1.05d9ccp+2f);
126 const __m256 c3 = _mm256_set1_ps(-0x1.4d476cp+1f);
127 const __m256 c4 = _mm256_set1_ps(+0x1.04fc3ap+0f);
128 const __m256 c5 = _mm256_set1_ps(-0x1.c97982p-3f);
129 const __m256 c6 = _mm256_set1_ps(+0x1.57aa42p-6f);
133 poly = _mm256_fmadd_ps(poly, x, c5);
134 poly = _mm256_fmadd_ps(poly, x, c4);
135 poly = _mm256_fmadd_ps(poly, x, c3);
136 poly = _mm256_fmadd_ps(poly, x, c2);
137 poly = _mm256_fmadd_ps(poly, x, c1);
138 poly = _mm256_fmadd_ps(poly, x, c0);