71 #ifdef LV_HAVE_GENERIC 76 unsigned int num_points)
78 unsigned int number = 0;
79 const float* inputPtr = aVector;
80 float* outputPtr = cVector;
81 for (number = 0; number < num_points; number++) {
82 *outputPtr = (*inputPtr) + scalar;
89 #ifndef INCLUDED_volk_32f_s32f_add_32f_u_H 90 #define INCLUDED_volk_32f_s32f_add_32f_u_H 93 #include <xmmintrin.h> 98 unsigned int num_points)
100 unsigned int number = 0;
101 const unsigned int quarterPoints = num_points / 4;
103 float* cPtr = cVector;
104 const float* aPtr = aVector;
106 __m128 aVal, bVal, cVal;
107 bVal = _mm_set_ps1(scalar);
108 for (; number < quarterPoints; number++) {
109 aVal = _mm_loadu_ps(aPtr);
111 cVal = _mm_add_ps(aVal, bVal);
113 _mm_storeu_ps(cPtr, cVal);
119 number = quarterPoints * 4;
125 #include <immintrin.h> 128 const float* aVector,
130 unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int eighthPoints = num_points / 8;
135 float* cPtr = cVector;
136 const float* aPtr = aVector;
138 __m256 aVal, bVal, cVal;
139 bVal = _mm256_set1_ps(scalar);
140 for (; number < eighthPoints; number++) {
142 aVal = _mm256_loadu_ps(aPtr);
144 cVal = _mm256_add_ps(aVal, bVal);
146 _mm256_storeu_ps(cPtr, cVal);
152 number = eighthPoints * 8;
158 #include <arm_neon.h> 161 const float* aVector,
163 unsigned int num_points)
165 unsigned int number = 0;
166 const float* inputPtr = aVector;
167 float* outputPtr = cVector;
168 const unsigned int quarterPoints = num_points / 4;
170 float32x4_t aVal, cVal, scalarvec;
172 scalarvec = vdupq_n_f32(scalar);
174 for (number = 0; number < quarterPoints; number++) {
175 aVal = vld1q_f32(inputPtr);
176 cVal = vaddq_f32(aVal, scalarvec);
177 vst1q_f32(outputPtr, cVal);
182 number = quarterPoints * 4;
191 #ifndef INCLUDED_volk_32f_s32f_add_32f_a_H 192 #define INCLUDED_volk_32f_s32f_add_32f_a_H 195 #include <xmmintrin.h> 198 const float* aVector,
200 unsigned int num_points)
202 unsigned int number = 0;
203 const unsigned int quarterPoints = num_points / 4;
205 float* cPtr = cVector;
206 const float* aPtr = aVector;
208 __m128 aVal, bVal, cVal;
209 bVal = _mm_set_ps1(scalar);
210 for (; number < quarterPoints; number++) {
211 aVal = _mm_load_ps(aPtr);
213 cVal = _mm_add_ps(aVal, bVal);
215 _mm_store_ps(cPtr, cVal);
221 number = quarterPoints * 4;
227 #include <immintrin.h> 230 const float* aVector,
232 unsigned int num_points)
234 unsigned int number = 0;
235 const unsigned int eighthPoints = num_points / 8;
237 float* cPtr = cVector;
238 const float* aPtr = aVector;
240 __m256 aVal, bVal, cVal;
241 bVal = _mm256_set1_ps(scalar);
242 for (; number < eighthPoints; number++) {
243 aVal = _mm256_load_ps(aPtr);
245 cVal = _mm256_add_ps(aVal, bVal);
247 _mm256_store_ps(cPtr, cVal);
253 number = eighthPoints * 8;
260 extern void volk_32f_s32f_add_32f_a_orc_impl(
float* dst,
263 unsigned int num_points);
265 static inline void volk_32f_s32f_add_32f_u_orc(
float* cVector,
266 const float* aVector,
268 unsigned int num_points)
270 volk_32f_s32f_add_32f_a_orc_impl(cVector, aVector, scalar, num_points);
static void volk_32f_s32f_add_32f_u_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:95
static void volk_32f_s32f_add_32f_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:73
static void volk_32f_s32f_add_32f_a_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:197
static void volk_32f_s32f_add_32f_u_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:127
static void volk_32f_s32f_add_32f_a_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:229
static void volk_32f_s32f_add_32f_u_neon(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_add_32f.h:160