include/native/SSEPlus_native_SSE2.h

Go to the documentation of this file.
00001 //
00002 // Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved.
00003 // This software is subject to the Apache v2.0 License.
00004 //
00005 #ifndef __SSEPLUS_NATIVE_SSE2_H__
00006 #define __SSEPLUS_NATIVE_SSE2_H__
00007 
00008 #include "../SSEPlus_base.h"
00009 #include <emmintrin.h>  // SSE2
00010 
00018 SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2( __m128i a, __m128i b )
00019 {
00020     return _mm_add_epi16( a, b );
00021 }
00023 SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2( __m128i a, __m128i b )
00024 {
00025     return _mm_add_epi32( a, b );
00026 }
00028 SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2( __m128i a, __m128i b )
00029 {
00030     return _mm_add_epi64( a, b );
00031 }
00033 SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2( __m128i a, __m128i b )
00034 {
00035     return _mm_add_epi8( a, b );
00036 }
00038 SSP_FORCEINLINE __m128d ssp_add_pd_SSE2( __m128d a, __m128d b )
00039 {
00040     return _mm_add_pd( a, b );
00041 }
00043 SSP_FORCEINLINE __m128d ssp_add_sd_SSE2( __m128d a, __m128d b )
00044 {
00045     return _mm_add_sd( a, b );
00046 }
00048 SSP_FORCEINLINE __m64 ssp_add_si64_SSE2( __m64 a, __m64 b)
00049 {
00050     return _mm_add_si64( a, b );
00051 }
00053 SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2( __m128i a, __m128i b )
00054 {
00055     return _mm_adds_epi16( a, b );
00056 }
00058 SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2( __m128i a, __m128i b )
00059 {
00060     return _mm_adds_epi8( a, b );
00061 }
00063 SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2( __m128i a, __m128i b )
00064 {
00065     return _mm_adds_epu16( a, b );
00066 }
00068 SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2( __m128i a, __m128i b )
00069 {
00070     return _mm_adds_epu8( a, b );
00071 }
00073 SSP_FORCEINLINE __m128d ssp_and_pd_SSE2( __m128d a, __m128d b )
00074 {
00075     return _mm_and_pd( a, b );
00076 }
00078 SSP_FORCEINLINE __m128i ssp_and_si128_SSE2( __m128i a, __m128i b )
00079 {
00080     return _mm_and_si128( a, b );
00081 }
00083 SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2( __m128d a, __m128d b )
00084 {
00085     return _mm_andnot_pd( a, b );
00086 }
00088 SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2( __m128i a, __m128i b )
00089 {
00090     return _mm_andnot_si128( a, b );
00091 }
00093 SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2( __m128i a, __m128i b )
00094 {
00095     return _mm_avg_epu16( a, b );
00096 }
00098 SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2( __m128i a, __m128i b )
00099 {
00100     return _mm_avg_epu8( a, b );
00101 }
00102 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00103 //SSE2{Native,_mm_castpd_ps}
00104 //SSP_FORCEINLINE __m128 ssp_castpd_ps_SSE2( __m128d a )
00105 //{
00106 //    return _mm_castpd_ps( a );
00107 //}
00108 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00109 //SSE2{Native,_mm_castpd_si128}
00110 //SSP_FORCEINLINE __m128i ssp_castpd_si128_SSE2( __m128d a )
00111 //{
00112 //    return _mm_castpd_si128( a );
00113 //}
00114 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00115 //SSE2{Native,_mm_castps_pd}  
00116 //SSP_FORCEINLINE __m128d ssp_castps_pd_SSE2( __m128 a )
00117 //{
00118 //    return _mm_castps_pd( a );
00119 //}
00120 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00121 //SSE2{Native,_mm_castps_si128} 
00122 //SSP_FORCEINLINE __m128i ssp_castps_si128_SSE2( __m128 a )
00123 //{
00124 //    return _mm_castps_si128( a );
00125 //}
00126 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00127 //SSE2{Native,_mm_castsi128_pd} 
00128 //SSP_FORCEINLINE __m128d ssp_castsi128_pd_SSE2( __m128i a )
00129 //{
00130 //    return _mm_castsi128_pd( a );
00131 //}
00132 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00133 //SSE2{Native,_mm_castsi128_ps} 
00134 //SSP_FORCEINLINE __m128 ssp_castsi128_ps_SSE2( __m128i a )
00135 //{
00136 //    return _mm_castsi128_ps( a );
00137 //}
00138 
00140 SSP_FORCEINLINE void ssp_clflush_SSE2( void const *p )
00141 {
00142     _mm_clflush( p );
00143 }
00145 SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2( __m128i a, __m128i b )
00146 {
00147     return _mm_cmpeq_epi16( a, b );
00148 }
00150 SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2( __m128i a, __m128i b )
00151 {
00152     return _mm_cmpeq_epi32( a, b );
00153 }
00155 SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2( __m128i a, __m128i b )
00156 {
00157     return _mm_cmpeq_epi8( a, b );
00158 }
00160 SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2( __m128d a, __m128d b )
00161 {
00162     return _mm_cmpeq_pd( a, b );
00163 }
00165 SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2( __m128d a, __m128d b )
00166 {
00167     return _mm_cmpeq_sd( a, b );
00168 }
00170 SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2( __m128d a, __m128d b )
00171 {
00172     return _mm_cmpge_pd( a, b );
00173 }
00175 SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2( __m128d a, __m128d b )
00176 {
00177     return _mm_cmpge_sd( a, b );
00178 }
00180 SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2( __m128i a, __m128i b )
00181 {
00182     return _mm_cmpgt_epi16( a, b );
00183 }
00185 SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2( __m128i a, __m128i b )
00186 {
00187     return _mm_cmpgt_epi32( a, b );
00188 }
00190 SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2( __m128i a, __m128i b )
00191 {
00192     return _mm_cmpgt_epi8( a, b );
00193 }
00195 SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2( __m128d a, __m128d b )
00196 {
00197     return _mm_cmpgt_pd( a, b );
00198 }
00200 SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2( __m128d a, __m128d b )
00201 {
00202     return _mm_cmpgt_sd( a, b );
00203 }
00205 SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2( __m128d a, __m128d b )
00206 {
00207     return _mm_cmple_pd( a, b );
00208 }
00210 SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2( __m128d a, __m128d b )
00211 {
00212     return _mm_cmple_sd( a, b );
00213 }
00215 SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2( __m128i a, __m128i b )
00216 {
00217     return _mm_cmplt_epi16( a, b );
00218 }
00220 SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2( __m128i a, __m128i b )
00221 {
00222     return _mm_cmplt_epi32( a, b );
00223 }
00225 SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2( __m128i a, __m128i b )
00226 {
00227     return _mm_cmplt_epi8( a, b );
00228 }
00230 SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2( __m128d a, __m128d b )
00231 {
00232     return _mm_cmplt_pd( a, b );
00233 }
00235 SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2( __m128d a, __m128d b )
00236 {
00237     return _mm_cmplt_sd( a, b );
00238 }
00240 SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2( __m128d a, __m128d b )
00241 {
00242     return _mm_cmpneq_pd( a, b );
00243 }
00245 SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2( __m128d a, __m128d b )
00246 {
00247     return _mm_cmpneq_sd( a, b );
00248 }
00250 SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2( __m128d a, __m128d b )
00251 {
00252     return _mm_cmpnge_pd( a, b );
00253 }
00255 SSP_FORCEINLINE __m128d ssp_cmpnge_sd_SSE2( __m128d a, __m128d b )
00256 {
00257     return _mm_cmpnge_sd( a, b );
00258 }
00260 SSP_FORCEINLINE __m128d ssp_cmpngt_pd_SSE2( __m128d a, __m128d b )
00261 {
00262     return _mm_cmpngt_pd( a, b );
00263 }
00265 SSP_FORCEINLINE __m128d ssp_cmpngt_sd_SSE2( __m128d a, __m128d b )
00266 {
00267     return _mm_cmpngt_sd( a, b );
00268 }
00270 SSP_FORCEINLINE __m128d ssp_cmpnle_pd_SSE2( __m128d a, __m128d b )
00271 {
00272     return _mm_cmpnle_pd( a, b );
00273 }
00275 SSP_FORCEINLINE __m128d ssp_cmpnle_sd_SSE2( __m128d a, __m128d b )
00276 {
00277     return _mm_cmpnle_sd( a, b );
00278 }
00280 SSP_FORCEINLINE __m128d ssp_cmpnlt_pd_SSE2( __m128d a, __m128d b )
00281 {
00282     return _mm_cmpnlt_pd( a, b );
00283 }
00285 SSP_FORCEINLINE __m128d ssp_cmpnlt_sd_SSE2( __m128d a, __m128d b )
00286 {
00287     return _mm_cmpnlt_sd( a, b );
00288 }
00290 SSP_FORCEINLINE __m128d ssp_cmpord_pd_SSE2( __m128d a, __m128d b )
00291 {
00292     return _mm_cmpord_pd( a, b );
00293 }
00295 SSP_FORCEINLINE __m128d ssp_cmpord_sd_SSE2( __m128d a, __m128d b )
00296 {
00297     return _mm_cmpord_sd( a, b );
00298 }
00300 SSP_FORCEINLINE __m128d ssp_cmpunord_pd_SSE2( __m128d a, __m128d b )
00301 {
00302     return _mm_cmpunord_pd( a, b );
00303 }
00305 SSP_FORCEINLINE __m128d ssp_cmpunord_sd_SSE2( __m128d a, __m128d b )
00306 {
00307     return _mm_cmpunord_sd( a, b );
00308 }
00310 SSP_FORCEINLINE int ssp_comieq_sd_SSE2( __m128d a, __m128d b )
00311 {
00312     return _mm_comieq_sd( a, b );
00313 }
00315 SSP_FORCEINLINE int ssp_comige_sd_SSE2( __m128d a, __m128d b )
00316 {
00317     return _mm_comige_sd( a, b );
00318 }
00320 SSP_FORCEINLINE int ssp_comigt_sd_SSE2( __m128d a, __m128d b )
00321 {
00322     return _mm_comigt_sd( a, b );
00323 }
00325 SSP_FORCEINLINE int ssp_comile_sd_SSE2( __m128d a, __m128d b )
00326 {
00327     return _mm_comile_sd( a, b );
00328 }
00330 SSP_FORCEINLINE int ssp_comilt_sd_SSE2( __m128d a, __m128d b )
00331 {
00332     return _mm_comilt_sd( a, b );
00333 }
00335 SSP_FORCEINLINE int ssp_comineq_sd_SSE2( __m128d a, __m128d b )
00336 {
00337     return _mm_comineq_sd( a, b );
00338 }
00340 SSP_FORCEINLINE __m128d ssp_cvtepi32_pd_SSE2( __m128i a )
00341 {
00342     return _mm_cvtepi32_pd( a );
00343 }
00345 SSP_FORCEINLINE __m128 ssp_cvtepi32_ps_SSE2( __m128i a )
00346 {
00347     return _mm_cvtepi32_ps( a );
00348 }
00350 SSP_FORCEINLINE __m128i ssp_cvtpd_epi32_SSE2( __m128d a )
00351 {
00352     return _mm_cvtpd_epi32( a );
00353 }
00355 SSP_FORCEINLINE __m64 ssp_cvtpd_pi32_SSE2( __m128d a )
00356 {
00357     return _mm_cvtpd_pi32( a );
00358 }
00360 SSP_FORCEINLINE __m128 ssp_cvtpd_ps_SSE2( __m128d a )
00361 {
00362     return _mm_cvtpd_ps( a );
00363 }
00365 SSP_FORCEINLINE __m128d ssp_cvtpi32_pd_SSE2( __m64 a )
00366 {
00367     return _mm_cvtpi32_pd( a );
00368 }
00370 SSP_FORCEINLINE __m128i ssp_cvtps_epi32_SSE2( __m128 a )
00371 {
00372     return _mm_cvtps_epi32( a );
00373 }
00375 SSP_FORCEINLINE __m128d ssp_cvtps_pd_SSE2( __m128 a )
00376 {
00377     return _mm_cvtps_pd( a );
00378 }
00379 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00380 //SSE2{Native,_mm_cvtsd_f64} */ 
00381 //SSP_FORCEINLINE double ssp_cvtsd_f64_SSE2( __m128d a )
00382 //{
00383 //    return _mm_cvtsd_f64( a );
00384 //}
00385 
00387 SSP_FORCEINLINE int ssp_cvtsd_si32_SSE2( __m128d a )
00388 {
00389     return _mm_cvtsd_si32( a );
00390 }
00391 #ifdef SYS64
00394 //SSP_FORCEINLINE __int64 ssp_cvtsd_si64_SSE2( __m128d a )
00395 //{
00396 //    return _mm_cvtsd_si64( a );
00397 //}
00398 #endif
00399 
00400 SSP_FORCEINLINE __m128 ssp_cvtsd_ss_SSE2( __m128 a, __m128d b )
00401 {
00402     return _mm_cvtsd_ss( a, b );
00403 }
00405 SSP_FORCEINLINE int ssp_cvtsi128_si32_SSE2( __m128i a )
00406 {
00407     return _mm_cvtsi128_si32( a );
00408 }
00409 #ifdef SYS64
00410 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00412 //SSP_FORCEINLINE __int64 ssp_cvtsi128_si64_SSE2( __m128i a )
00413 //{
00414 //    return _mm_cvtsi128_si64( a );
00415 //}
00416 #endif
00417 
00418 SSP_FORCEINLINE __m128d ssp_cvtsi32_sd_SSE2( __m128d a, int b )
00419 {
00420     return _mm_cvtsi32_sd( a, b );
00421 }
00423 SSP_FORCEINLINE __m128i ssp_cvtsi32_si128_SSE2( int a )
00424 {
00425     return _mm_cvtsi32_si128( a );
00426 }
00427 #ifdef SYS64
00428 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00430 //SSP_FORCEINLINE __m128d ssp_cvtsi64_sd_SSE2( __m128d a, __int64 b )
00431 //{
00432 //    return _mm_cvtsi64_sd( a, b );
00433 //}
00434 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00436 //SSP_FORCEINLINE __m128i ssp_cvtsi64_si128_SSE2( __int64 a )
00437 //{
00438 //    return _mm_cvtsi64_si128( a );
00439 //}
00440 #endif
00441 
00442 SSP_FORCEINLINE __m128d ssp_cvtss_sd_SSE2( __m128d a, __m128 b )
00443 {
00444     return _mm_cvtss_sd( a, b );
00445 }
00447 SSP_FORCEINLINE __m128i ssp_cvttpd_epi32_SSE2( __m128d a )
00448 {
00449     return _mm_cvttpd_epi32( a );
00450 }
00452 SSP_FORCEINLINE __m64 ssp_cvttpd_pi32_SSE2( __m128d a )
00453 {
00454     return _mm_cvttpd_pi32( a );
00455 }
00457 SSP_FORCEINLINE __m128i ssp_cvttps_epi32_SSE2( __m128 a )
00458 {
00459     return _mm_cvttps_epi32( a );
00460 }
00462 SSP_FORCEINLINE int ssp_cvttsd_si32_SSE2( __m128d a )
00463 {
00464     return _mm_cvttsd_si32( a );
00465 }
00466 #ifdef SYS64
00467 // *** Microsoft Specific Intrinsic TODO: Write Reference for VS8
00469 //SSP_FORCEINLINE __int64 ssp_cvttsd_si64_SSE2( __m128d a )
00470 //{
00471 //    return _mm_cvttsd_si64( a );
00472 //}
00473 #endif
00474 
00475 SSP_FORCEINLINE __m128d ssp_div_pd_SSE2( __m128d a, __m128d b )
00476 {
00477     return _mm_div_pd( a, b );
00478 }
00480 SSP_FORCEINLINE __m128d ssp_div_sd_SSE2( __m128d a, __m128d b )
00481 {
00482     return _mm_div_sd( a, b );
00483 }
00485 SSP_FORCEINLINE int ssp_extract_epi16_SSE2( __m128i a, int imm )
00486 {
00487     switch( imm & 0x7 )
00488     {
00489         CASE_8( _mm_extract_epi16, a );
00490     }
00491 }
00493 SSP_FORCEINLINE __m128i ssp_insert_epi16_SSE2( __m128i a, int b, int imm )
00494 {
00495     switch( imm & 0x7 )
00496     {
00497         CASE_8( _mm_insert_epi16, a, b );
00498     }
00499 }
00501 SSP_FORCEINLINE void ssp_lfence_SSE2( void )
00502 {
00503     _mm_lfence();
00504 }
00506 SSP_FORCEINLINE __m128d ssp_load_pd_SSE2( double const*dp )
00507 {
00508     return _mm_load_pd( dp );
00509 }
00511 SSP_FORCEINLINE __m128d ssp_load_sd_SSE2( double const*dp )
00512 {
00513     return _mm_load_sd( dp );
00514 }
00516 SSP_FORCEINLINE __m128i ssp_load_si128_SSE2( __m128i const*p )
00517 {
00518     return _mm_load_si128( p );
00519 }
00521 SSP_FORCEINLINE __m128d ssp_load1_pd_SSE2( double const*dp )
00522 {
00523     return _mm_load1_pd( dp );
00524 }
00526 SSP_FORCEINLINE __m128d ssp_loadh_pd_SSE2( __m128d a, double const*dp )
00527 {
00528     return _mm_loadh_pd( a, dp );
00529 }
00531 SSP_FORCEINLINE __m128i ssp_loadl_epi64_SSE2( __m128i const*p)
00532 {
00533     return _mm_loadl_epi64( p );
00534 }
00536 SSP_FORCEINLINE __m128d ssp_loadl_pd_SSE2( __m128d a, double const*dp )
00537 {
00538     return _mm_loadl_pd( a, dp );
00539 }
00541 SSP_FORCEINLINE __m128d ssp_loadr_pd_SSE2( double const*dp )
00542 {
00543     return _mm_loadr_pd( dp );
00544 }
00546 SSP_FORCEINLINE __m128d ssp_loadu_pd_SSE2( double const*dp )
00547 {
00548     return _mm_loadu_pd( dp );
00549 }
00551 SSP_FORCEINLINE __m128i ssp_loadu_si128_SSE2( __m128i const*p )
00552 {
00553     return _mm_loadu_si128( p );
00554 }
00556 SSP_FORCEINLINE __m128i ssp_madd_epi16_SSE2( __m128i a, __m128i b )
00557 {
00558     return _mm_madd_epi16( a, b );
00559 }
00561 SSP_FORCEINLINE void ssp_maskmoveu_si128_SSE2( __m128i a, __m128i b, char *c )
00562 {
00563     _mm_maskmoveu_si128( a, b, c );
00564 }
00566 SSP_FORCEINLINE __m128i ssp_max_epi16_SSE2( __m128i a, __m128i b )
00567 {
00568     return _mm_max_epi16( a, b );
00569 }
00571 SSP_FORCEINLINE __m128i ssp_max_epu8_SSE2( __m128i a, __m128i b )
00572 {
00573     return _mm_max_epu8( a, b );
00574 }
00576 SSP_FORCEINLINE __m128d ssp_max_pd_SSE2( __m128d a, __m128d b )
00577 {
00578     return _mm_max_pd( a, b );
00579 }
00581 SSP_FORCEINLINE __m128d ssp_max_sd_SSE2( __m128d a, __m128d b )
00582 {
00583     return _mm_max_sd( a, b );
00584 }
00586 SSP_FORCEINLINE void ssp_mfence_SSE2( void )
00587 {
00588     _mm_mfence( );
00589 }
00591 SSP_FORCEINLINE __m128i ssp_min_epi16_SSE2( __m128i a, __m128i b )
00592 {
00593     return _mm_min_epi16( a, b );
00594 }
00596 SSP_FORCEINLINE __m128i ssp_min_epu8_SSE2( __m128i a, __m128i b )
00597 {
00598     return _mm_min_epu8( a, b );
00599 }
00601 SSP_FORCEINLINE __m128d ssp_min_pd_SSE2( __m128d a, __m128d b )
00602 {
00603     return _mm_min_pd( a, b );
00604 }
00606 SSP_FORCEINLINE __m128d ssp_min_sd_SSE2( __m128d a, __m128d b )
00607 {
00608     return _mm_min_sd( a, b );
00609 }
00611 SSP_FORCEINLINE __m128i ssp_move_epi64_SSE2( __m128i a )
00612 {
00613     return _mm_move_epi64( a );
00614 }
00616 SSP_FORCEINLINE __m128d ssp_move_sd_SSE2( __m128d a, __m128d b )
00617 {
00618     return _mm_move_sd( a, b );
00619 }
00621 SSP_FORCEINLINE int ssp_movemask_epi8_SSE2( __m128i a )
00622 {
00623     return _mm_movemask_epi8( a );
00624 }
00626 SSP_FORCEINLINE int ssp_movemask_pd_SSE2( __m128d a )
00627 {
00628     return _mm_movemask_pd( a );
00629 }
00631 SSP_FORCEINLINE __m64 ssp_movepi64_pi64_SSE2( __m128i a )
00632 {
00633     return _mm_movepi64_pi64( a );
00634 }
00636 SSP_FORCEINLINE __m128i ssp_movpi64_epi64_SSE2( __m64 a )
00637 {
00638     return _mm_movpi64_epi64( a );
00639 }
00641 SSP_FORCEINLINE __m128i ssp_mul_epu32_SSE2( __m128i a, __m128i b )
00642 {
00643     return _mm_mul_epu32( a, b );
00644 }
00646 SSP_FORCEINLINE __m128d ssp_mul_pd_SSE2( __m128d a, __m128d b )
00647 {
00648     return _mm_mul_pd( a, b );
00649 }
00651 SSP_FORCEINLINE __m128d ssp_mul_sd_SSE2( __m128d a, __m128d b )
00652 {
00653     return _mm_mul_sd( a, b );
00654 }
00656 SSP_FORCEINLINE __m64 ssp_mul_su32_SSE2( __m64 a, __m64 b)
00657 {
00658     return _mm_mul_su32( a, b );
00659 }
00661 SSP_FORCEINLINE __m128i ssp_mulhi_epi16_SSE2( __m128i a, __m128i b )
00662 {
00663     return _mm_mulhi_epi16( a, b );
00664 }
00666 SSP_FORCEINLINE __m128i ssp_mulhi_epu16_SSE2( __m128i a, __m128i b )
00667 {
00668     return _mm_mulhi_epu16( a, b );
00669 }
00671 SSP_FORCEINLINE __m128i ssp_mullo_epi16_SSE2( __m128i a, __m128i b )
00672 {
00673     return _mm_mullo_epi16( a, b );
00674 }
00676 SSP_FORCEINLINE __m128d ssp_or_pd_SSE2( __m128d a, __m128d b )
00677 {
00678     return _mm_or_pd( a, b );
00679 }
00681 SSP_FORCEINLINE __m128i ssp_or_si128_SSE2( __m128i a, __m128i b )
00682 {
00683     return _mm_or_si128( a, b );
00684 }
00686 SSP_FORCEINLINE __m128i ssp_packs_epi16_SSE2( __m128i a, __m128i b )
00687 {
00688     return _mm_packs_epi16( a, b );
00689 }
00691 SSP_FORCEINLINE __m128i ssp_packs_epi32_SSE2( __m128i a, __m128i b )
00692 {
00693     return _mm_packs_epi32( a, b );
00694 }
00696 SSP_FORCEINLINE __m128i ssp_packus_epi16_SSE2( __m128i a, __m128i b )
00697 {
00698     return _mm_packus_epi16( a, b );
00699 }
00701 SSP_FORCEINLINE void ssp_pause_SSE2( void )
00702 {
00703     _mm_pause();
00704 }
00706 SSP_FORCEINLINE __m128i ssp_sad_epu8_SSE2( __m128i a, __m128i b )
00707 {
00708     return _mm_sad_epu8( a, b );
00709 }
00711 SSP_FORCEINLINE __m128i ssp_set_epi16_SSE2( short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0 )
00712 {
00713     return _mm_set_epi16( w7, w6, w5, w4, w3, w2, w1, w0 );
00714 }
00716 SSP_FORCEINLINE __m128i ssp_set_epi32_SSE2( int i3, int i2, int i1, int i0 )
00717 {
00718     return _mm_set_epi32( i3, i2, i1, i0 );
00719 }
00721 SSP_FORCEINLINE __m128i ssp_set_epi64_SSE2( __m64 a1, __m64 a0 )
00722 {
00723     return _mm_set_epi64( a1, a0 );
00724 }
00726 SSP_FORCEINLINE __m128i ssp_set_epi8_SSE2( char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0 )
00727 {
00728     return _mm_set_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );
00729 }
00731 SSP_FORCEINLINE __m128d ssp_set_pd_SSE2( double a1, double a0)
00732 {
00733     return _mm_set_pd( a1, a0 );
00734 }
00736 SSP_FORCEINLINE __m128d ssp_set_sd_SSE2( double w)
00737 {
00738     return _mm_set_sd( w );
00739 }
00741 SSP_FORCEINLINE __m128i ssp_set1_epi16_SSE2( short w)
00742 {
00743     return _mm_set1_epi16( w );
00744 }
00746 SSP_FORCEINLINE __m128i ssp_set1_epi32_SSE2( int i )
00747 {
00748     return _mm_set1_epi32( i );
00749 }
00751 SSP_FORCEINLINE __m128i ssp_set1_epi64_SSE2( __m64 a )
00752 {
00753     return _mm_set1_epi64( a );
00754 }
00756 SSP_FORCEINLINE __m128i ssp_set1_epi8_SSE2( char b )
00757 {
00758     return _mm_set1_epi8( b );
00759 }
00761 SSP_FORCEINLINE __m128d ssp_set1_pd_SSE2( double a )
00762 {
00763     return _mm_set1_pd( a );
00764 }
00765 // Composite intrinsic not supported in GCC
00766 #ifdef SSP_MSVC
00767 
00768 SSP_FORCEINLINE __m128i ssp_setl_epi64_SSE2( __m128i a )
00769 {
00770     return _mm_setl_epi64( a );
00771 }
00772 #endif
00773 
00774 SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2( short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7 )
00775 {
00776     return _mm_setr_epi16( w0, w1, w2, w3, w4, w5, w6, w7 );
00777 }
00779 SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2( int i0, int i1, int i2, int i3)
00780 {
00781     return _mm_setr_epi32( i0, i1, i2, i3);
00782 }
00784 SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2( __m64 a0, __m64 a1)
00785 {
00786     return _mm_setr_epi64( a0, a1);
00787 }
00789 SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2( char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0 )
00790 {
00791     return _mm_setr_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );
00792 }
00794 SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2( double a0, double a1 )
00795 {
00796     return _mm_setr_pd( a0, a1);
00797 }
00799 SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2( void )
00800 {
00801     return _mm_setzero_pd( );
00802 }
00804 SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2( void )
00805 {
00806     return _mm_setzero_si128( );
00807 }
00809 SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2( __m128i a, int imm )
00810 {
00811     switch( imm & 0xFF )
00812     {
00813         CASE_256( _mm_shuffle_epi32, a );
00814     }
00815 }
00817 SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2( __m128d a, __m128d b, int imm )
00818 {
00819     switch( imm & 0xFF )
00820     {
00821         CASE_4( _mm_shuffle_pd, a, b );
00822     }
00823 }
00825 SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2( __m128i a, int imm )
00826 {
00827     switch( imm & 0xFF )
00828     {
00829         CASE_256( _mm_shufflehi_epi16, a );
00830     }
00831 }
00833 SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2( __m128i a, int imm )
00834 {
00835     switch( imm & 0xFF )
00836     {
00837         CASE_256( _mm_shufflelo_epi16, a );
00838     }
00839 }
00841 SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2( __m128i a, __m128i count )
00842 {
00843     return _mm_sll_epi16( a, count );
00844 }
00846 SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2( __m128i a, __m128i count )
00847 {
00848     return _mm_sll_epi32( a, count );
00849 }
00851 SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2( __m128i a, __m128i count )
00852 {
00853     return _mm_sll_epi64( a, count );
00854 }
00856 SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2( __m128i a, int count )
00857 {
00858     return _mm_slli_epi16( a, count );
00859 }
00861 SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2( __m128i a, int count )
00862 {
00863     return _mm_slli_epi32( a, count );
00864 }
00866 SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2( __m128i a, int count )
00867 {
00868     return _mm_slli_epi64( a, count );
00869 }
00871 SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2( __m128i a, int imm )
00872 {
00873     switch( imm & 0x7F )
00874     {
00875         CASE_128( _mm_slli_si128, a );
00876     }
00877 }
00879 SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2( __m128d a )
00880 {
00881     return _mm_sqrt_pd( a );
00882 }
00884 SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2( __m128d a, __m128d b )
00885 {
00886     return _mm_sqrt_sd( a, b );
00887 }
00889 SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2( __m128i a, __m128i count )
00890 {
00891     return _mm_sra_epi16( a, count );
00892 }
00894 SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2( __m128i a, __m128i count )
00895 {
00896     return _mm_sra_epi32( a, count );
00897 }
00899 SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2( __m128i a, int count )
00900 {
00901     return _mm_srai_epi16( a, count );
00902 }
00904 SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2( __m128i a, int count )
00905 {
00906     return _mm_srai_epi32( a, count );
00907 }
00909 SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2( __m128i a, __m128i count )
00910 {
00911     return _mm_srl_epi16( a, count );
00912 }
00914 SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2( __m128i a, __m128i count )
00915 {
00916     return _mm_srl_epi32( a, count );
00917 }
00919 SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2( __m128i a, __m128i count )
00920 {
00921     return _mm_srl_epi64( a, count );
00922 }
00924 SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2( __m128i a, int count )
00925 {
00926     return _mm_srli_epi16( a, count );
00927 }
00929 SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2( __m128i a, int count )
00930 {
00931     return _mm_srli_epi32( a, count );
00932 }
00934 SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2( __m128i a, int count )
00935 {
00936     return _mm_srli_epi64( a, count );
00937 }
00939 SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2( __m128i a, int imm )
00940 {
00941     switch( imm & 0x7F )
00942     {
00943         CASE_128( _mm_srli_si128, a );
00944     }
00945 }
00947 SSP_FORCEINLINE void ssp_store_pd_SSE2( double *dp, __m128d a )
00948 {
00949     _mm_store_pd( dp, a );
00950 }
00952 SSP_FORCEINLINE void ssp_store_sd_SSE2( double *dp, __m128d a )
00953 {
00954     _mm_store_sd( dp, a );
00955 }
00957 SSP_FORCEINLINE void ssp_store_si128_SSE2( __m128i *p, __m128i b )
00958 {
00959     _mm_store_si128( p, b );
00960 }
00962 SSP_FORCEINLINE void ssp_store1_pd_SSE2( double *dp, __m128d a )
00963 {
00964     _mm_store1_pd( dp, a );
00965 }
00967 SSP_FORCEINLINE void ssp_storeh_pd_SSE2( double *dp, __m128d a )
00968 {
00969     _mm_storeh_pd( dp, a );
00970 }
00972 SSP_FORCEINLINE void ssp_storel_epi64_SSE2( __m128i *p, __m128i b )
00973 {
00974     _mm_storel_epi64( p, b );
00975 }
00977 SSP_FORCEINLINE void ssp_storel_pd_SSE2( double *dp, __m128d a )
00978 {
00979     _mm_storel_pd( dp, a );
00980 }
00982 SSP_FORCEINLINE void ssp_storer_pd_SSE2( double *dp, __m128d a )
00983 {
00984     _mm_storer_pd( dp, a );
00985 }
00987 SSP_FORCEINLINE void ssp_storeu_pd_SSE2( double *dp, __m128d a )
00988 {
00989     _mm_storeu_pd( dp, a );
00990 }
00992 SSP_FORCEINLINE void ssp_storeu_si128_SSE2( __m128i *p, __m128i b )
00993 {
00994     _mm_storeu_si128( p, b );
00995 }
00997 SSP_FORCEINLINE void ssp_stream_pd_SSE2( double *dp, __m128d a )
00998 {
00999     _mm_stream_pd( dp, a );
01000 }
01002 SSP_FORCEINLINE void ssp_stream_si128_SSE2( __m128i *p, __m128i a )
01003 {
01004     _mm_stream_si128( p, a );
01005 }
01007 SSP_FORCEINLINE void ssp_stream_si32_SSE2( int *p, int i )
01008 {
01009     _mm_stream_si32( p, i );
01010 }
01012 SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2( __m128i a, __m128i b )
01013 {
01014     return _mm_sub_epi16( a, b );
01015 }
01017 SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2( __m128i a, __m128i b )
01018 {
01019     return _mm_sub_epi32( a, b );
01020 }
01022 SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2( __m128i a, __m128i b )
01023 {
01024     return _mm_sub_epi64( a, b );
01025 }
01027 SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2( __m128i a, __m128i b )
01028 {
01029     return _mm_sub_epi8( a, b );
01030 }
01032 SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2( __m128d a, __m128d b )
01033 {
01034     return _mm_sub_pd( a, b );
01035 }
01037 SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2( __m128d a, __m128d b )
01038 {
01039     return _mm_sub_sd( a, b );
01040 }
01042 SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2( __m64 a, __m64 b)
01043 {
01044     return _mm_sub_si64( a, b );
01045 }
01047 SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2( __m128i a, __m128i b )
01048 {
01049     return _mm_subs_epi16( a, b );
01050 }
01052 SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2( __m128i a, __m128i b )
01053 {
01054     return _mm_subs_epi8( a, b );
01055 }
01057 SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2( __m128i a, __m128i b )
01058 {
01059     return _mm_subs_epu16( a, b );
01060 }
01062 SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2( __m128i a, __m128i b )
01063 {
01064     return _mm_subs_epu8( a, b );
01065 }
01067 SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2( __m128d a, __m128d b )
01068 {
01069     return _mm_ucomieq_sd( a, b );
01070 }
01072 SSP_FORCEINLINE int ssp_ucomige_sd_SSE2( __m128d a, __m128d b )
01073 {
01074     return _mm_ucomige_sd( a, b );
01075 }
01077 SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2( __m128d a, __m128d b )
01078 {
01079     return _mm_ucomigt_sd( a, b );
01080 }
01082 SSP_FORCEINLINE int ssp_ucomile_sd_SSE2( __m128d a, __m128d b )
01083 {
01084     return _mm_ucomile_sd( a, b );
01085 }
01087 SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2( __m128d a, __m128d b )
01088 {
01089     return _mm_ucomilt_sd( a, b );
01090 }
01092 SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2( __m128d a, __m128d b )
01093 {
01094     return _mm_ucomineq_sd( a, b );
01095 }
01097 SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2( __m128i a, __m128i b )
01098 {
01099     return _mm_unpackhi_epi16( a, b );
01100 }
01102 SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2( __m128i a, __m128i b )
01103 {
01104     return _mm_unpackhi_epi32( a, b );
01105 }
01107 SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2( __m128i a, __m128i b )
01108 {
01109     return _mm_unpackhi_epi64( a, b );
01110 }
01112 SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2( __m128i a, __m128i b )
01113 {
01114     return _mm_unpackhi_epi8( a, b );
01115 }
01117 SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2( __m128d a, __m128d b )
01118 {
01119     return _mm_unpackhi_pd( a, b );
01120 }
01122 SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2( __m128i a, __m128i b )
01123 {
01124     return _mm_unpacklo_epi16( a, b );
01125 }
01127 SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2( __m128i a, __m128i b )
01128 {
01129     return _mm_unpacklo_epi32( a, b );
01130 }
01132 SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2( __m128i a, __m128i b )
01133 {
01134     return _mm_unpacklo_epi64( a, b );
01135 }
01137 SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2( __m128i a, __m128i b )
01138 {
01139     return _mm_unpacklo_epi8( a, b );
01140 }
01142 SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2( __m128d a, __m128d b )
01143 {
01144     return _mm_unpacklo_pd( a, b );
01145 }
01147 SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2( __m128d a, __m128d b )
01148 {
01149     return _mm_xor_pd( a, b );
01150 }
01152 SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2( __m128i a, __m128i b )
01153 {
01154     return _mm_xor_si128( a, b );
01155 }
01156 
01158 
01159 
01160 #endif // __SSP_NATIVE_SSE2_H__

Generated on Wed May 21 13:44:11 2008 for "SSEPlus" by  doxygen 1.5.4