00001
00002
00003
00004
00005
00006
00007
00008 #ifndef __SSEPLUS_PLATFORM_H__
00009 #define __SSEPLUS_PLATFORM_H__
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #if defined( _MSC_VER )
00020 #define SSP_MSVC
00021
00022 #if( _MSC_VER >= 1500 )
00023 #define SSP_COMPILER_SUPPORTS_SSSE3
00024 #define SSP_COMPILER_SUPPORTS_SSE4a
00025 #define SSP_COMPILER_SUPPORTS_SSE41
00026 #define SSP_COMPILER_SUPPORTS_SSE42
00027 #endif
00028
00029 #define SSP_FORCEINLINE __forceinline
00030 #define SSP_INCLUDE_FILE_SSE3 <intrin.h> // All intrinsics, including SSE3
00031 #define SSP_INCLUDE_FILE_SSE4a <intrin.h> // All intrinsics, including SSE4a
00032 #define SSP_INCLUDE_FILE_SSE5 "SSEPlus_NoSSE5.h" // SSE5
00033 #define SSP_INCLUDE_FILE_SSE4_1_SSE5 <smmintrin.h> // Functions common to SSE4.1 and SSE5
00034
00035 #include <intrin.h>
00036
00037
00038
00039
00040 #elif defined( __GNUC__ )
00041 #define SSP_GNUC
00042
00043 #if( __GNUC__ >= 4 )
00044 #if( __GNUC_MINOR__ >= 3 )
00045 #define SSP_COMPILER_SUPPORTS_SSSE3
00046 #define SSP_COMPILER_SUPPORTS_SSE4a
00047 #define SSP_COMPILER_SUPPORTS_SSE41
00048 #define SSP_COMPILER_SUPPORTS_SSE42
00049 #define SSP_COMPILER_SUPPORTS_SSE5
00050 #endif
00051 #endif
00052
00053 #define SSP_FORCEINLINE __inline__
00054 #define SSP_INCLUDE_FILE_SSE3 <pmmintrin.h> // SSE3
00055 #define SSP_INCLUDE_FILE_SSE4a <ammintrin.h> // All intrinsics, including SSE4a
00056 #define SSP_INCLUDE_FILE_SSE5 <bmmintrin.h> // SSE5
00057 #define SSP_INCLUDE_FILE_SSE4_1_SSE5 <mmintrin-common.h> // Functions common to SSE4.1 and SSE5
00058
00059
00060 #if defined( SYS64 )
00061 #define __cpuid(CPUInfo, InfoType) __asm__ __volatile__(" pushq %%rbx; \
00062 xorq %%rax, %%rax; \
00063 movl %%esi, %%eax; \
00064 cpuid; \
00065 movl %%eax, 0x0(%%rdi); \
00066 movl %%ebx, 0x4(%%rdi); \
00067 movl %%ecx, 0x8(%%rdi); \
00068 movl %%edx, 0xc(%%rdi); \
00069 popq %%rbx;" \
00070 : : "D" (CPUInfo), "S" (InfoType) \
00071 : "%rax", "%rcx", "%rdx" )
00072
00073 #elif defined( SYS32 )
00074 #define __cpuid(CPUInfo, InfoType) __asm__ __volatile__(" pushl %%ebx; \
00075 xorl %%eax, %%eax; \
00076 movl %%esi, %%eax; \
00077 cpuid; \
00078 movl %%eax, 0x0(%%edi); \
00079 movl %%ebx, 0x4(%%edi); \
00080 movl %%ecx, 0x8(%%edi); \
00081 movl %%edx, 0xc(%%edi); \
00082 popl %%ebx;" \
00083 : : "D" (CPUInfo), "S" (InfoType) \
00084 : "%eax", "%ecx", "%edx" )
00085 #endif
00086 #endif
00087
00088
00089
00090
00091
00092 #define SSP_ALL_SET_32I 0xFFFFFFFF
00093 #define SSP_ALL_SET_64I 0xFFFFFFFFFFFFFFFF
00094
00095
00096 #if defined(SSP_MSVC)
00097
00098
00099 #define SSP_CONST_SETR_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00100 { (unsigned __int8)(a), (unsigned __int8)(b), (unsigned __int8)(c), (unsigned __int8)(d), \
00101 (unsigned __int8)(e), (unsigned __int8)(f), (unsigned __int8)(g), (unsigned __int8)(h), \
00102 (unsigned __int8)(i), (unsigned __int8)(j), (unsigned __int8)(k), (unsigned __int8)(l), \
00103 (unsigned __int8)(m), (unsigned __int8)(n), (unsigned __int8)(o), (unsigned __int8)(p) }
00104
00105 #define SSP_CONST_SET_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00106 SSP_CONST_SETR_8I( (p), (o), (n), (m), (l), (k), (j), (i), (h), (g), (f), (e), (d), (c), (b), (a) )
00107
00108 #define SSP_CONST_SET1_8I( x ) \
00109 SSP_CONST_SET_8I( (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x) )
00110
00111 #define SSP_CONST_SETZERO_8I() \
00112 SSP_CONST_SET1_8I( 0 )
00113
00114 #define __CNST16I28I_( x ) \
00115 ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF))
00116
00117 #define SSP_CONST_SETR_16I( a, b, c, d, e, f, g, h ) \
00118 { __CNST16I28I_((a)), __CNST16I28I_((b)), __CNST16I28I_((c)), __CNST16I28I_((d)), __CNST16I28I_((e)), __CNST16I28I_((f)), __CNST16I28I_((g)), __CNST16I28I_((h)) }
00119
00120 #define SSP_CONST_SET_16I( a, b, c, d, e, f, g, h ) \
00121 SSP_CONST_SETR_16I( (h), (g), (f), (e), (d), (c), (b), (a) )
00122
00123 #define SSP_CONST_SET1_16I( x ) \
00124 SSP_CONST_SET_16I( (x), (x), (x), (x), (x), (x), (x), (x) )
00125
00126 #define SSP_CONST_SETZERO_16I() \
00127 SSP_CONST_SETZERO_8I()
00128
00129 #define __CNST32I28I_( x ) \
00130 ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF)), ((unsigned __int8)(((x) >> 16) & 0xFF)), ((unsigned __int8)(((x) >> 24) & 0xFF))
00131
00132 #define SSP_CONST_SETR_32I( a, b, c, d ) \
00133 { __CNST32I28I_((a)), __CNST32I28I_((b)), __CNST32I28I_((c)), __CNST32I28I_((d)) }
00134
00135 #define SSP_CONST_SET_32I( a, b, c, d ) \
00136 SSP_CONST_SETR_32I( (d), (c), (b), (a) )
00137
00138 #define SSP_CONST_SET1_32I( x ) \
00139 SSP_CONST_SET_32I( (x), (x), (x), (x) )
00140
00141 #define SSP_CONST_SETZERO_32I() \
00142 SSP_CONST_SETZERO_8I()
00143
00144 #define __CNST64I28I_( x ) \
00145 ((unsigned __int8)((x) & 0xFF)), ((unsigned __int8)(((x) >> 8) & 0xFF)), ((unsigned __int8)(((x) >> 16) & 0xFF)), ((unsigned __int8)(((x) >> 24) & 0xFF)), ((unsigned __int8)(((x) >> 32) & 0xFF)), ((unsigned __int8)(((x) >> 40) & 0xFF)), ((unsigned __int8)(((x) >> 48) & 0xFF)), ((unsigned __int8)(((x) >> 56) & 0xFF))
00146
00147 #define SSP_CONST_SETR_64I( a, b ) \
00148 { __CNST64I28I_((a)), __CNST64I28I_((b)) }
00149
00150 #define SSP_CONST_SET_64I( a, b ) \
00151 SSP_CONST_SETR_64I( (b), (a) )
00152
00153 #define SSP_CONST_SET1_64I( x ) \
00154 SSP_CONST_SET_64I( (x), (x) )
00155
00156 #define SSP_CONST_SETZERO_I() \
00157 { 0 }
00158
00159 #define SSP_CONST_SETZERO_64I() \
00160 SSP_CONST_SETZERO_8I()
00161
00162 #define SSP_CONST_SETR_32F( a, b, c, d ) \
00163 { (a), (b), (c), (d) }
00164
00165 #define SSP_CONST_SET_32F( a, b, c, d ) \
00166 SSP_CONST_SETR_32F( (d), (c), (b), (a) )
00167
00168 #define SSP_CONST_SET1_32F( x ) \
00169 SSP_CONST_SET_32F( (x), (x), (x), (x) )
00170
00171 #define SSP_CONST_SETZERO_32F() \
00172 SSP_CONST_SET1_32F( 0 )
00173
00174 #define SSP_CONST_SETR_64F( a, b ) \
00175 { (a), (b) }
00176
00177 #define SSP_CONST_SET_64F( a, b ) \
00178 SSP_CONST_SETR_64F( (b), (a) )
00179
00180 #define SSP_CONST_SET1_64F( x ) \
00181 SSP_CONST_SET_64F( (x), (x) )
00182
00183 #define SSP_CONST_SETZERO_64F() \
00184 SSP_CONST_SET1_64F( 0 )
00185
00186 #endif // SSP_MSVC
00187
00188
00189
00190
00191
00192 #if defined(SSP_GNUC)
00193
00194
00195 #define __CNST8TO64_( a, b, c, d, e, f, g, h ) \
00196 ( (((h)&0xff)<<56) | (((g)&0xff)<<48) | (((f)&0xff)<<40) | (((e)&0xff)<<32) | \
00197 (((d)&0xff)<<24) | (((c)&0xff)<<16) | (((b)&0xff)<<8) | ((a)&0xff) )
00198
00199 #define SSP_CONST_SETR_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00200 { __CNST8TO64_( (ssp_s64)(a), (ssp_s64)(b), (ssp_s64)(c), (ssp_s64)(d), \
00201 (ssp_s64)(e), (ssp_s64)(f), (ssp_s64)(g), (ssp_s64)(h) ), \
00202 __CNST8TO64_( (ssp_s64)(i), (ssp_s64)(j), (ssp_s64)(k), (ssp_s64)(l), \
00203 (ssp_s64)(m), (ssp_s64)(n), (ssp_s64)(o), (ssp_s64)(p) ) }
00204
00205 #define SSP_CONST_SET_8I( a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p ) \
00206 SSP_CONST_SETR_8I( (p), (o), (n), (m), (l), (k), (j), (i), (h), (g), (f), (e), (d), (c), (b), (a) )
00207
00208 #define SSP_CONST_SET1_8I( x ) \
00209 SSP_CONST_SET_8I( (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x), (x) )
00210
00211 #define SSP_CONST_SETZERO_8I() \
00212 { 0, 0 }
00213
00214
00215
00216 #define __CNST16TO64_( a, b, c, d ) \
00217 ( ((d)<<48) | (((c)&0xFFFF)<<32) | (((b)&0xFFFF)<<16) | ((a)&0xFFFF) )
00218
00219 #define SSP_CONST_SETR_16I( a, b, c, d, e, f, g, h ) \
00220 { __CNST16TO64_( (ssp_u64)(a), (ssp_u64)(b), \
00221 (ssp_u64)(c), (ssp_u64)(d) ), \
00222 __CNST16TO64_( (ssp_u64)(e), (ssp_u64)(f), \
00223 (ssp_u64)(g), (ssp_u64)(h) ) }
00224
00225 #define SSP_CONST_SET_16I( a, b, c, d, e, f, g, h ) \
00226 SSP_CONST_SETR_16I( (h), (g), (f), (e), (d), (c), (b), (a) )
00227
00228 #define SSP_CONST_SET1_16I( x ) \
00229 SSP_CONST_SET_16I( (x), (x), (x), (x), (x), (x), (x), (x) )
00230
00231 #define SSP_CONST_SETZERO_16I() \
00232 SSP_CONST_SETZERO_8I()
00233
00234
00235
00236 #define __CNST32TO64_( a, b ) \
00237 ( ((b)<<32) | ((a) & 0xFFFFFFFF) )
00238
00239 #define SSP_CONST_SETR_32I( a, b, c, d ) \
00240 { __CNST32TO64_( (ssp_u64)(a), (ssp_u64)(b) ), \
00241 __CNST32TO64_( (ssp_u64)(c), (ssp_u64)(d) ) }
00242
00243 #define SSP_CONST_SET_32I( a, b, c, d ) \
00244 SSP_CONST_SETR_32I( (d), (c), (b), (a) )
00245
00246 #define SSP_CONST_SET1_32I( x ) \
00247 SSP_CONST_SET_32I( (x), (x), (x), (x) )
00248
00249 #define SSP_CONST_SETZERO_32I() \
00250 SSP_CONST_SETZERO_8I()
00251
00252
00253
00254 #define SSP_CONST_SETR_64I( a, b ) \
00255 { (a), (b) }
00256
00257 #define SSP_CONST_SET_64I( a, b ) \
00258 SSP_CONST_SETR_64I( (b), (a) )
00259
00260 #define SSP_CONST_SET1_64I( x ) \
00261 SSP_CONST_SET_64I( (x), (x) )
00262
00263 #define SSP_CONST_SETZERO_64I() \
00264 SSP_CONST_SETZERO_8I()
00265
00266
00267
00268
00269 #define SSP_CONST_SETR_32F( a, b, c, d ) \
00270 { (a), (b), (c), (d) }
00271
00272 #define SSP_CONST_SET_32F( a, b, c, d ) \
00273 SSP_CONST_SETR_32F( (d), (c), (b), (a) )
00274
00275 #define SSP_CONST_SET1_32F( x ) \
00276 SSP_CONST_SET_32F( (x), (x), (x), (x) )
00277
00278 #define SSP_CONST_SETZERO_32F() \
00279 SSP_CONST_SET1_32F( 0 )
00280
00281
00282 #define SSP_CONST_SETR_64F( a, b ) \
00283 { (a), (b) }
00284
00285 #define SSP_CONST_SET_64F( a, b ) \
00286 SSP_CONST_SETR_64F( (b), (a) )
00287
00288 #define SSP_CONST_SET1_64F( x ) \
00289 SSP_CONST_SET_64F( (x), (x) )
00290
00291 #define SSP_CONST_SETZERO_64F() \
00292 SSP_CONST_SET1_64F( 0 )
00293
00294 #endif // SSP_GNUC
00295 #endif // __SSEPLUS_PLATFORM_H__