Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_64f_x2_add_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2018 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_64f_x2_add_64f_H
72 #define INCLUDED_volk_64f_x2_add_64f_H
73 
74 #include <inttypes.h>
75 
76 
77 #ifdef LV_HAVE_GENERIC
78 
79 static inline void volk_64f_x2_add_64f_generic(double* cVector,
80  const double* aVector,
81  const double* bVector,
82  unsigned int num_points)
83 {
84  double* cPtr = cVector;
85  const double* aPtr = aVector;
86  const double* bPtr = bVector;
87  unsigned int number = 0;
88 
89  for (number = 0; number < num_points; number++) {
90  *cPtr++ = (*aPtr++) + (*bPtr++);
91  }
92 }
93 
94 #endif /* LV_HAVE_GENERIC */
95 
96 /*
97  * Unaligned versions
98  */
99 
100 #ifdef LV_HAVE_SSE2
101 
102 #include <emmintrin.h>
103 
104 static inline void volk_64f_x2_add_64f_u_sse2(double* cVector,
105  const double* aVector,
106  const double* bVector,
107  unsigned int num_points)
108 {
109  unsigned int number = 0;
110  const unsigned int half_points = num_points / 2;
111 
112  double* cPtr = cVector;
113  const double* aPtr = aVector;
114  const double* bPtr = bVector;
115 
116  __m128d aVal, bVal, cVal;
117  for (; number < half_points; number++) {
118  aVal = _mm_loadu_pd(aPtr);
119  bVal = _mm_loadu_pd(bPtr);
120 
121  cVal = _mm_add_pd(aVal, bVal);
122 
123  _mm_storeu_pd(cPtr, cVal); // Store the results back into the C container
124 
125  aPtr += 2;
126  bPtr += 2;
127  cPtr += 2;
128  }
129 
130  number = half_points * 2;
131  for (; number < num_points; number++) {
132  *cPtr++ = (*aPtr++) + (*bPtr++);
133  }
134 }
135 
136 #endif /* LV_HAVE_SSE2 */
137 
138 
139 #ifdef LV_HAVE_AVX
140 
141 #include <immintrin.h>
142 
143 static inline void volk_64f_x2_add_64f_u_avx(double* cVector,
144  const double* aVector,
145  const double* bVector,
146  unsigned int num_points)
147 {
148  unsigned int number = 0;
149  const unsigned int quarter_points = num_points / 4;
150 
151  double* cPtr = cVector;
152  const double* aPtr = aVector;
153  const double* bPtr = bVector;
154 
155  __m256d aVal, bVal, cVal;
156  for (; number < quarter_points; number++) {
157 
158  aVal = _mm256_loadu_pd(aPtr);
159  bVal = _mm256_loadu_pd(bPtr);
160 
161  cVal = _mm256_add_pd(aVal, bVal);
162 
163  _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
164 
165  aPtr += 4;
166  bPtr += 4;
167  cPtr += 4;
168  }
169 
170  number = quarter_points * 4;
171  for (; number < num_points; number++) {
172  *cPtr++ = (*aPtr++) + (*bPtr++);
173  }
174 }
175 
176 #endif /* LV_HAVE_AVX */
177 
178 /*
179  * Aligned versions
180  */
181 
182 #ifdef LV_HAVE_SSE2
183 
184 #include <emmintrin.h>
185 
186 static inline void volk_64f_x2_add_64f_a_sse2(double* cVector,
187  const double* aVector,
188  const double* bVector,
189  unsigned int num_points)
190 {
191  unsigned int number = 0;
192  const unsigned int half_points = num_points / 2;
193 
194  double* cPtr = cVector;
195  const double* aPtr = aVector;
196  const double* bPtr = bVector;
197 
198  __m128d aVal, bVal, cVal;
199  for (; number < half_points; number++) {
200  aVal = _mm_load_pd(aPtr);
201  bVal = _mm_load_pd(bPtr);
202 
203  cVal = _mm_add_pd(aVal, bVal);
204 
205  _mm_store_pd(cPtr, cVal); // Store the results back into the C container
206 
207  aPtr += 2;
208  bPtr += 2;
209  cPtr += 2;
210  }
211 
212  number = half_points * 2;
213  for (; number < num_points; number++) {
214  *cPtr++ = (*aPtr++) + (*bPtr++);
215  }
216 }
217 
218 #endif /* LV_HAVE_SSE2 */
219 
220 
221 #ifdef LV_HAVE_AVX
222 
223 #include <immintrin.h>
224 
225 static inline void volk_64f_x2_add_64f_a_avx(double* cVector,
226  const double* aVector,
227  const double* bVector,
228  unsigned int num_points)
229 {
230  unsigned int number = 0;
231  const unsigned int quarter_points = num_points / 4;
232 
233  double* cPtr = cVector;
234  const double* aPtr = aVector;
235  const double* bPtr = bVector;
236 
237  __m256d aVal, bVal, cVal;
238  for (; number < quarter_points; number++) {
239 
240  aVal = _mm256_load_pd(aPtr);
241  bVal = _mm256_load_pd(bPtr);
242 
243  cVal = _mm256_add_pd(aVal, bVal);
244 
245  _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
246 
247  aPtr += 4;
248  bPtr += 4;
249  cPtr += 4;
250  }
251 
252  number = quarter_points * 4;
253  for (; number < num_points; number++) {
254  *cPtr++ = (*aPtr++) + (*bPtr++);
255  }
256 }
257 
258 #endif /* LV_HAVE_AVX */
259 
260 #endif /* INCLUDED_volk_64f_x2_add_64f_u_H */
static void volk_64f_x2_add_64f_u_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:104
static void volk_64f_x2_add_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:186
static void volk_64f_x2_add_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:225
static void volk_64f_x2_add_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:79
static void volk_64f_x2_add_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:143