include/SSEPlus_platform.h

Go to the documentation of this file.
00001 //
00002 // Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved.
00003 // This software is subject to the Apache v2.0 License.
00004 //
00005 
00006 // This file should contain all platform specific code. 
00007 
00008 #ifndef __SSEPLUS_PLATFORM_H__
00009 #define __SSEPLUS_PLATFORM_H__
00010 
00011 //TODO: Detect 32/64
00012 
00013 
00014 
00015 
00016 //---------------------------------------
00017 // Microsoft Visual Studio
00018 //---------------------------------------
00019 #if defined( _MSC_VER )
00020 #define SSP_MSVC
00021 
00022 #if( _MSC_VER >= 1500 ) 
00023 #define SSP_COMPILER_SUPPORTS_SSSE3
00024 #define SSP_COMPILER_SUPPORTS_SSE4a
00025 #define SSP_COMPILER_SUPPORTS_SSE41
00026 #define SSP_COMPILER_SUPPORTS_SSE42
00027 #endif
00028 
00029 #define SSP_FORCEINLINE                 __forceinline
00030 #define SSP_INCLUDE_FILE_SSE3           <intrin.h>          // All intrinsics, including SSE3
00031 #define SSP_INCLUDE_FILE_SSE4a          <intrin.h>          // All intrinsics, including SSE4a
00032 #define SSP_INCLUDE_FILE_SSE5           "SSEPlus_NoSSE5.h"  // SSE5
00033 #define SSP_INCLUDE_FILE_SSE4_1_SSE5    <smmintrin.h>       // Functions common to SSE4.1 and SSE5
00034 
00035 #include <intrin.h> // CPUID
00036 
00037 //---------------------------------------
00038 // GCC
00039 //---------------------------------------
00040 #elif defined( __GNUC__ )
00041 #define SSP_GNUC
00042 
00043 #if( __GNUC__       >= 4 )
00044 #if( __GNUC_MINOR__ >= 3 )
00045 #define SSP_COMPILER_SUPPORTS_SSSE3
00046 #define SSP_COMPILER_SUPPORTS_SSE4a
00047 #define SSP_COMPILER_SUPPORTS_SSE41
00048 #define SSP_COMPILER_SUPPORTS_SSE42
00049 #define SSP_COMPILER_SUPPORTS_SSE5
00050 #endif
00051 #endif
00052 
00053 #define SSP_FORCEINLINE                 __inline__
00054 #define SSP_INCLUDE_FILE_SSE3           <pmmintrin.h>           // SSE3
00055 #define SSP_INCLUDE_FILE_SSE4a          <ammintrin.h>           // All intrinsics, including SSE4a
00056 #define SSP_INCLUDE_FILE_SSE5           <bmmintrin.h>           // SSE5
00057 #define SSP_INCLUDE_FILE_SSE4_1_SSE5    <mmintrin-common.h>     // Functions common to SSE4.1 and SSE5
00058 
00059 // CPUID
00060 #if defined( SYS64 )
00061     #define __cpuid(CPUInfo, InfoType)    __asm__ __volatile__("    pushq %%rbx;                      \
00062                                                                 xorq %%rax, %%rax;                    \
00063                                                                 movl %%esi, %%eax;                    \
00064                                                                 cpuid;                                \
00065                                                                 movl %%eax, 0x0(%%rdi);               \
00066                                                                 movl %%ebx, 0x4(%%rdi);               \
00067                                                                 movl %%ecx, 0x8(%%rdi);               \
00068                                                                 movl %%edx, 0xc(%%rdi);               \
00069                                                                 popq %%rbx;"                          \
00070                                                                 : : "D" (CPUInfo), "S" (InfoType)     \
00071                                                                 : "%rax", "%rcx", "%rdx" )
00072 
00073 #elif defined( SYS32 )
00074     #define __cpuid(CPUInfo, InfoType)    __asm__ __volatile__("    pushl %%ebx;                      \
00075                                                                 xorl %%eax, %%eax;                    \
00076                                                                 movl %%esi, %%eax;                    \
00077                                                                 cpuid;                                \
00078                                                                 movl %%eax, 0x0(%%edi);               \
00079                                                                 movl %%ebx, 0x4(%%edi);               \
00080                                                                 movl %%ecx, 0x8(%%edi);               \
00081                                                                 movl %%edx, 0xc(%%edi);               \
00082                                                                 popl %%ebx;"                          \
00083                                                                 : : "D" (CPUInfo), "S" (InfoType)     \
00084                                                                 : "%eax", "%ecx", "%edx" )
00085 #endif
00086 #endif 
00087 
00088 //---------------------------------------
00089 // Microsoft Visual Studio Initialization
00090 //---------------------------------------
00091 
00092 #define SSP_ALL_SET_32I 0xFFFFFFFF
00093 #define SSP_ALL_SET_64I 0xFFFFFFFFFFFFFFFF
00094 
00095 
00096 #if defined(SSP_MSVC)
00097 //#undef SSP_MSVC       
00098 
00099 #define SSP_CONST_SETR_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00100     { (unsigned __int8)(a), (unsigned __int8)(b), (unsigned __int8)(c), (unsigned __int8)(d), \
00101       (unsigned __int8)(e), (unsigned __int8)(f), (unsigned __int8)(g), (unsigned __int8)(h), \
00102       (unsigned __int8)(i), (unsigned __int8)(j), (unsigned __int8)(k), (unsigned __int8)(l), \
00103       (unsigned __int8)(m), (unsigned __int8)(n), (unsigned __int8)(o), (unsigned __int8)(p) }
00104 
00105 #define SSP_CONST_SET_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00106     SSP_CONST_SETR_8I( (p), (o), (n), (m), (l), (k), (j), (i), (h), (g), (f), (e), (d), (c), (b), (a) )
00107 
00108 #define SSP_CONST_SET1_8I( x ) \
00109     SSP_CONST_SET_8I( (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x) )
00110 
00111 #define SSP_CONST_SETZERO_8I() \
00112     SSP_CONST_SET1_8I( 0 )
00113 
00114 #define __CNST16I28I_( x ) \
00115     ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF))
00116 
00117 #define SSP_CONST_SETR_16I( a, b, c, d, e, f, g, h ) \
00118     { __CNST16I28I_((a)), __CNST16I28I_((b)), __CNST16I28I_((c)), __CNST16I28I_((d)), __CNST16I28I_((e)), __CNST16I28I_((f)), __CNST16I28I_((g)), __CNST16I28I_((h)) }
00119 
00120 #define SSP_CONST_SET_16I( a, b, c, d, e, f, g, h ) \
00121     SSP_CONST_SETR_16I( (h), (g), (f), (e), (d), (c), (b), (a) )
00122 
00123 #define SSP_CONST_SET1_16I( x ) \
00124     SSP_CONST_SET_16I( (x), (x), (x), (x), (x), (x), (x), (x) )
00125 
00126 #define SSP_CONST_SETZERO_16I() \
00127     SSP_CONST_SETZERO_8I()
00128 
00129 #define __CNST32I28I_( x ) \
00130     ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF)), ((unsigned __int8)(((x) >> 16) & 0xFF)), ((unsigned __int8)(((x) >> 24) & 0xFF))
00131 
00132 #define SSP_CONST_SETR_32I( a, b, c, d ) \
00133     { __CNST32I28I_((a)), __CNST32I28I_((b)), __CNST32I28I_((c)), __CNST32I28I_((d)) }
00134 
00135 #define SSP_CONST_SET_32I( a, b, c, d ) \
00136     SSP_CONST_SETR_32I( (d), (c), (b), (a) )
00137 
00138 #define SSP_CONST_SET1_32I( x ) \
00139     SSP_CONST_SET_32I( (x), (x), (x), (x) )
00140 
00141 #define SSP_CONST_SETZERO_32I() \
00142     SSP_CONST_SETZERO_8I()
00143 
00144 #define __CNST64I28I_( x ) \
00145     ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF)), ((unsigned __int8)(((x) >> 16) & 0xFF)), ((unsigned __int8)(((x) >> 24) & 0xFF)), ((unsigned __int8)(((x) >> 32) & 0xFF)), ((unsigned __int8)(((x) >> 40) & 0xFF)), ((unsigned __int8)(((x) >> 48) & 0xFF)), ((unsigned __int8)(((x) >> 56) & 0xFF))
00146 
00147 #define SSP_CONST_SETR_64I( a, b ) \
00148     { __CNST64I28I_((a)), __CNST64I28I_((b)) }
00149 
00150 #define SSP_CONST_SET_64I( a, b ) \
00151     SSP_CONST_SETR_64I( (b), (a) )
00152 
00153 #define SSP_CONST_SET1_64I( x ) \
00154     SSP_CONST_SET_64I( (x), (x) )
00155 
00156 #define SSP_CONST_SETZERO_I() \
00157         { 0 }
00158 
00159 #define SSP_CONST_SETZERO_64I() \
00160     SSP_CONST_SETZERO_8I()
00161 
00162 #define SSP_CONST_SETR_32F( a, b, c, d ) \
00163     { (a), (b), (c), (d) }
00164 
00165 #define SSP_CONST_SET_32F( a, b, c, d ) \
00166     SSP_CONST_SETR_32F( (d), (c), (b), (a) )
00167 
00168 #define SSP_CONST_SET1_32F( x ) \
00169     SSP_CONST_SET_32F( (x), (x), (x), (x) )
00170 
00171 #define SSP_CONST_SETZERO_32F() \
00172     SSP_CONST_SET1_32F( 0 )
00173 
00174 #define SSP_CONST_SETR_64F( a, b ) \
00175     { (a), (b) }
00176 
00177 #define SSP_CONST_SET_64F( a, b ) \
00178     SSP_CONST_SETR_64F( (b), (a) )
00179 
00180 #define SSP_CONST_SET1_64F( x ) \
00181     SSP_CONST_SET_64F( (x), (x) )
00182 
00183 #define SSP_CONST_SETZERO_64F() \
00184     SSP_CONST_SET1_64F( 0 )
00185 
00186 #endif // SSP_MSVC
00187 
00188 
00189 //---------------------------------------
00190 // GCC Initialization
00191 //---------------------------------------
00192 #if defined(SSP_GNUC)
00193 //#undef SSP_GNUC
00194 
00195 #define __CNST8TO64_( a, b, c, d, e, f, g, h ) \
00196         ( (((h)&0xff)<<56) | (((g)&0xff)<<48) | (((f)&0xff)<<40) | (((e)&0xff)<<32) | \
00197         (((d)&0xff)<<24) | (((c)&0xff)<<16) | (((b)&0xff)<<8) | ((a)&0xff) )
00198 
00199 #define SSP_CONST_SETR_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00200     { __CNST8TO64_( (ssp_s64)(a), (ssp_s64)(b), (ssp_s64)(c), (ssp_s64)(d), \
00201                  (ssp_s64)(e), (ssp_s64)(f), (ssp_s64)(g), (ssp_s64)(h) ), \
00202       __CNST8TO64_( (ssp_s64)(i), (ssp_s64)(j), (ssp_s64)(k), (ssp_s64)(l), \
00203                  (ssp_s64)(m), (ssp_s64)(n), (ssp_s64)(o), (ssp_s64)(p) ) }
00204 
00205 #define SSP_CONST_SET_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00206     SSP_CONST_SETR_8I( (p), (o), (n), (m), (l), (k), (j), (i), (h), (g), (f), (e), (d), (c), (b), (a) )
00207 
00208 #define SSP_CONST_SET1_8I( x ) \
00209     SSP_CONST_SET_8I( (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x) )
00210 
00211 #define SSP_CONST_SETZERO_8I() \
00212     { 0, 0 }
00213 
00214 // 16 bit integer types
00215 
00216 #define __CNST16TO64_( a, b, c, d ) \
00217         ( ((d)<<48) | (((c)&0xFFFF)<<32) | (((b)&0xFFFF)<<16) | ((a)&0xFFFF) )
00218 
00219 #define SSP_CONST_SETR_16I( a, b, c, d, e, f, g, h ) \
00220     { __CNST16TO64_( (ssp_u64)(a), (ssp_u64)(b), \
00221                      (ssp_u64)(c), (ssp_u64)(d) ), \
00222       __CNST16TO64_( (ssp_u64)(e), (ssp_u64)(f), \
00223                      (ssp_u64)(g), (ssp_u64)(h) ) }
00224 
00225 #define SSP_CONST_SET_16I( a, b, c, d, e, f, g, h ) \
00226     SSP_CONST_SETR_16I( (h), (g), (f), (e), (d), (c), (b), (a) )
00227 
00228 #define SSP_CONST_SET1_16I( x ) \
00229     SSP_CONST_SET_16I( (x), (x), (x), (x), (x), (x), (x), (x) )
00230 
00231 #define SSP_CONST_SETZERO_16I() \
00232     SSP_CONST_SETZERO_8I()
00233 
00234 // 32 bit integer types
00235 
00236 #define __CNST32TO64_( a, b ) \
00237         ( ((b)<<32) | ((a) & 0xFFFFFFFF) )
00238 
00239 #define SSP_CONST_SETR_32I( a, b, c, d ) \
00240     { __CNST32TO64_( (ssp_u64)(a), (ssp_u64)(b) ), \
00241       __CNST32TO64_( (ssp_u64)(c), (ssp_u64)(d) ) }
00242 
00243 #define SSP_CONST_SET_32I( a, b, c, d ) \
00244     SSP_CONST_SETR_32I( (d), (c), (b), (a) )
00245 
00246 #define SSP_CONST_SET1_32I( x ) \
00247     SSP_CONST_SET_32I( (x), (x), (x), (x) )
00248 
00249 #define SSP_CONST_SETZERO_32I() \
00250     SSP_CONST_SETZERO_8I()
00251 
00252 // 64 bit integer types
00253 
00254 #define SSP_CONST_SETR_64I( a, b ) \
00255     { (a), (b) }
00256 
00257 #define SSP_CONST_SET_64I( a, b ) \
00258     SSP_CONST_SETR_64I( (b), (a) )
00259 
00260 #define SSP_CONST_SET1_64I( x ) \
00261     SSP_CONST_SET_64I( (x), (x) )
00262 
00263 #define SSP_CONST_SETZERO_64I() \
00264     SSP_CONST_SETZERO_8I()
00265 
00266 
00267 // 32 bit single precision floating point types
00268 
00269 #define SSP_CONST_SETR_32F( a, b, c, d ) \
00270     { (a), (b), (c), (d) }
00271 
00272 #define SSP_CONST_SET_32F( a, b, c, d ) \
00273     SSP_CONST_SETR_32F( (d), (c), (b), (a) )
00274 
00275 #define SSP_CONST_SET1_32F( x ) \
00276     SSP_CONST_SET_32F( (x), (x), (x), (x) )
00277 
00278 #define SSP_CONST_SETZERO_32F() \
00279     SSP_CONST_SET1_32F( 0 )
00280 
00281 // 64 bit double precision floating point types
00282 #define SSP_CONST_SETR_64F( a, b ) \
00283     { (a), (b) }
00284 
00285 #define SSP_CONST_SET_64F( a, b ) \
00286     SSP_CONST_SETR_64F( (b), (a) )
00287 
00288 #define SSP_CONST_SET1_64F( x ) \
00289     SSP_CONST_SET_64F( (x), (x) )
00290 
00291 #define SSP_CONST_SETZERO_64F() \
00292     SSP_CONST_SET1_64F( 0 )
00293 
00294 #endif // SSP_GNUC
00295 #endif // __SSEPLUS_PLATFORM_H__

Generated on Wed May 21 13:44:11 2008 for "SSEPlus" by  doxygen 1.5.4