ZenLib
MemoryUtils.h
Go to the documentation of this file.
00001 /*  Copyright (c) MediaArea.net SARL. All Rights Reserved.
00002  *
00003  *  Use of this source code is governed by a zlib-style license that can
00004  *  be found in the License.txt file in the root of the source tree.
00005  */
00006 
00007 //---------------------------------------------------------------------------
00008 #ifndef ZenLib_MemoryUtilsH
00009 #define ZenLib_MemoryUtilsH
00010 //---------------------------------------------------------------------------
00011 
00012 //---------------------------------------------------------------------------
00013 #include "ZenLib/Conf.h"
00014 #include "ZenLib/Conf.h"
00015 //---------------------------------------------------------------------------
00016 
00017 #include <cstring>
00018 #ifdef ZENLIB_MEMUTILS_SSE2
00019     #include <emmintrin.h>
00020 #endif //ZENLIB_MEMUTILS_SSE2
00021 
00022 namespace ZenLib
00023 {
00024 
00025 #ifndef ZENLIB_MEMUTILS_SSE2
00026     //-----------------------------------------------------------------------
00027     // Memory alloc/free
00028     #define malloc_Aligned128 (size) \
00029             malloc (size)
00030     #define free_Aligned128 (ptr) \
00031             free (ptr)
00032 
00033     //-----------------------------------------------------------------------
00034     // Arbitrary size - To Unaligned
00035     #define memcpy_Unaligned_Unaligned memcpy
00036     #define memcpy_Aligned128_Unaligned memcpy
00037 
00038     //-----------------------------------------------------------------------
00039     // Arbitrary size - To Aligned 128 bits (16 bytes)
00040     #define memcpy_Unaligned_Aligned128 memcpy
00041     #define memcpy_Aligned128_Aligned128 memcpy
00042 
00043     //-----------------------------------------------------------------------
00044     // 128 bits - To Unaligned
00045     #define memcpy_Unaligned_Unaligned_Once128 memcpy
00046 
00047     //-----------------------------------------------------------------------
00048     // 128 bits - To Aligned 128 bits (16 bytes)
00049     #define memcpy_Aligned128_Aligned128_Once128 memcpy
00050 
00051     //-----------------------------------------------------------------------
00052     // 1024 bits - To Unaligned
00053     #define memcpy_Unaligned_Unaligned_Once1024 memcpy
00054 
00055     //-----------------------------------------------------------------------
00056     // 1024 bits - To Aligned 128 bits (16 bytes)
00057     #define memcpy_Aligned128_Aligned128_Once1024 memcpy
00058 
00059     //-----------------------------------------------------------------------
00060     // 128-bit multiple - To Aligned 128 bits (16 bytes)
00061     #define memcpy_Unaligned_Aligned128_Size128 memcpy
00062     #define memcpy_Aligned128_Aligned128_Size128 memcpy
00063 
00064 #else // ZENLIB_MEMUTILS_SSE2
00065 
00066     //-----------------------------------------------------------------------
00067     // Memory alloc/free
00068 
00069     inline void*    malloc_Aligned128 (size_t size)
00070     {
00071         return _aligned_malloc (size, 16); //aligned_alloc in C11
00072     }
00073 
00074     inline void     free_Aligned128 ( void *ptr )
00075     {
00076         _aligned_free (ptr); //free in C11
00077     }
00078 
00079     //-----------------------------------------------------------------------
00080     // Arbitrary size - To Unaligned
00081 
00082     inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num)
00083     {
00084         size_t extra=num&0xF;
00085         __m128i* destination16=(__m128i*)destination;
00086         const __m128i* source16=(const __m128i*)source;
00087 
00088         num>>=4;
00089         while (num--)
00090             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00091 
00092         char* destination1=(char*)destination16;
00093         char* source1=(char*)source16;
00094         while (extra--)
00095             *destination1++=*source1++;
00096     }
00097 
00098     inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num)
00099     {
00100         size_t extra=num&0xF;
00101         __m128i* destination16=(__m128i*)destination;
00102         const __m128i* source16=(const __m128i*)source;
00103 
00104         num>>=4;
00105         while (num--)
00106             _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
00107 
00108         char* destination1=(char*)destination16;
00109         char* source1=(char*)source16;
00110         while (extra--)
00111             *destination1++=*source1++;
00112     }
00113 
00114     //-----------------------------------------------------------------------
00115     // Arbitrary size - To Aligned 128 bits (16 bytes)
00116 
00117     inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num)
00118     {
00119         size_t extra=num&0xF;
00120         __m128i* destination16=(__m128i*)destination;
00121         const __m128i* source16=(const __m128i*)source;
00122 
00123         num>>=4;
00124         while (num--)
00125             _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
00126 
00127         char* destination1=(char*)destination16;
00128         char* source1=(char*)source16;
00129         while (extra--)
00130             *destination1++=*source1++;
00131     }
00132 
00133     //-----------------------------------------------------------------------
00134     // 128 bits - To Unaligned
00135 
00136     inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source)
00137     {
00138         _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source));
00139     }
00140 
00141     //-----------------------------------------------------------------------
00142     // 128 bits - To Aligned 128 bits (16 bytes)
00143 
00144     inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num)
00145     {
00146         size_t extra=num&0xF;
00147         __m128i* destination16=(__m128i*)destination;
00148         const __m128i* source16=(const __m128i*)source;
00149 
00150         num>>=4;
00151         while (num--)
00152             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
00153 
00154         char* destination1=(char*)destination16;
00155         char* source1=(char*)source16;
00156         while (extra--)
00157             *destination1++=*source1++;
00158     }
00159 
00160     inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num)
00161     {
00162         __m128i* destination16=(__m128i*)destination;
00163         const __m128i* source16=(__m128i*)source;
00164 
00165         num>>=4;
00166         while (num--)
00167             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
00168     }
00169 
00170     //-----------------------------------------------------------------------
00171     // 1024 bits - To Unaligned
00172 
00173     inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t)
00174     {
00175         __m128i* destination16=(__m128i*)destination;
00176         const __m128i* source16=(__m128i*)source;
00177 
00178         size_t num=8;
00179         while (num--)
00180             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00181     }
00182 
00183     //-----------------------------------------------------------------------
00184     // 1024 bits - To Aligned 128 bits (16 bytes)
00185 
00186     inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source)
00187     {
00188         _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source));
00189     }
00190 
00191     //-----------------------------------------------------------------------
00192     // 128-bit multiple - To Unaligned (16 bytes)
00193 
00194     inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num)
00195     {
00196         __m128i* destination16=(__m128i*)destination;
00197         const __m128i* source16=(const __m128i*)source;
00198 
00199         num>>=4;
00200         while (num--)
00201             _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
00202     }
00203 
00204     inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num)
00205     {
00206         __m128i* destination16=(__m128i*)destination;
00207         const __m128i* source16=(__m128i*)source;
00208 
00209         num>>=4;
00210         while (num--)
00211             _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
00212     }
00213 
00214     //-----------------------------------------------------------------------
00215     // 128-bit multiple - To Aligned 128 bits (16 bytes)
00216 
00217     inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num)
00218     {
00219         __m128i* destination16=(__m128i*)destination;
00220         const __m128i* source16=(__m128i*)source;
00221 
00222         num>>=4;
00223         while (num--)
00224             _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
00225     }
00226 
00227 
00228     /* Slower
00229     inline void memcpy_Aligned128_Aligned128_Once1024 (void* destination, const void* source)
00230     {
00231         __m128i* destination16=(__m128i*)destination;
00232         const __m128i* source16=(__m128i*)source;
00233 
00234         size_t num=8;
00235         while (num--)
00236             _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
00237     }
00238     */
00239 
00240     /*
00241     inline void memcpy_Aligned256_Aligned256 (void* destination, const void* source, size_t num) //with AVX, actually slower
00242     {
00243         size_t extra=num&0x1F;
00244         __m256i* destination16=(__m256i*)destination;
00245         const __m256i* source16=(const __m256i*)source;
00246 
00247         num>>=5;
00248         while (num--)
00249             _mm256_storeu_si256 (destination16++, _mm256_loadu_si256(source16++));
00250 
00251         char* destination1=(char*)destination16;
00252         char* source1=(char*)source16;
00253         while (extra--)
00254             *destination1++=*source1++;
00255     }
00256     */
00257 
00258 #endif // ZENLIB_MEMUTILS_SSE2
00259 
00260 } //NameSpace
00261 
00262 #endif