00001
00002
00003
00004
00005 #ifndef __SSEPLUS_NATIVE_SSE2_H__
00006 #define __SSEPLUS_NATIVE_SSE2_H__
00007
00008 #include "../SSEPlus_base.h"
00009 #include <emmintrin.h>
00010
00018 SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2( __m128i a, __m128i b )
00019 {
00020 return _mm_add_epi16( a, b );
00021 }
00023 SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2( __m128i a, __m128i b )
00024 {
00025 return _mm_add_epi32( a, b );
00026 }
00028 SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2( __m128i a, __m128i b )
00029 {
00030 return _mm_add_epi64( a, b );
00031 }
00033 SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2( __m128i a, __m128i b )
00034 {
00035 return _mm_add_epi8( a, b );
00036 }
00038 SSP_FORCEINLINE __m128d ssp_add_pd_SSE2( __m128d a, __m128d b )
00039 {
00040 return _mm_add_pd( a, b );
00041 }
00043 SSP_FORCEINLINE __m128d ssp_add_sd_SSE2( __m128d a, __m128d b )
00044 {
00045 return _mm_add_sd( a, b );
00046 }
00048 SSP_FORCEINLINE __m64 ssp_add_si64_SSE2( __m64 a, __m64 b)
00049 {
00050 return _mm_add_si64( a, b );
00051 }
00053 SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2( __m128i a, __m128i b )
00054 {
00055 return _mm_adds_epi16( a, b );
00056 }
00058 SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2( __m128i a, __m128i b )
00059 {
00060 return _mm_adds_epi8( a, b );
00061 }
00063 SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2( __m128i a, __m128i b )
00064 {
00065 return _mm_adds_epu16( a, b );
00066 }
00068 SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2( __m128i a, __m128i b )
00069 {
00070 return _mm_adds_epu8( a, b );
00071 }
00073 SSP_FORCEINLINE __m128d ssp_and_pd_SSE2( __m128d a, __m128d b )
00074 {
00075 return _mm_and_pd( a, b );
00076 }
00078 SSP_FORCEINLINE __m128i ssp_and_si128_SSE2( __m128i a, __m128i b )
00079 {
00080 return _mm_and_si128( a, b );
00081 }
00083 SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2( __m128d a, __m128d b )
00084 {
00085 return _mm_andnot_pd( a, b );
00086 }
00088 SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2( __m128i a, __m128i b )
00089 {
00090 return _mm_andnot_si128( a, b );
00091 }
00093 SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2( __m128i a, __m128i b )
00094 {
00095 return _mm_avg_epu16( a, b );
00096 }
00098 SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2( __m128i a, __m128i b )
00099 {
00100 return _mm_avg_epu8( a, b );
00101 }
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00140 SSP_FORCEINLINE void ssp_clflush_SSE2( void const *p )
00141 {
00142 _mm_clflush( p );
00143 }
00145 SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2( __m128i a, __m128i b )
00146 {
00147 return _mm_cmpeq_epi16( a, b );
00148 }
00150 SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2( __m128i a, __m128i b )
00151 {
00152 return _mm_cmpeq_epi32( a, b );
00153 }
00155 SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2( __m128i a, __m128i b )
00156 {
00157 return _mm_cmpeq_epi8( a, b );
00158 }
00160 SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2( __m128d a, __m128d b )
00161 {
00162 return _mm_cmpeq_pd( a, b );
00163 }
00165 SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2( __m128d a, __m128d b )
00166 {
00167 return _mm_cmpeq_sd( a, b );
00168 }
00170 SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2( __m128d a, __m128d b )
00171 {
00172 return _mm_cmpge_pd( a, b );
00173 }
00175 SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2( __m128d a, __m128d b )
00176 {
00177 return _mm_cmpge_sd( a, b );
00178 }
00180 SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2( __m128i a, __m128i b )
00181 {
00182 return _mm_cmpgt_epi16( a, b );
00183 }
00185 SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2( __m128i a, __m128i b )
00186 {
00187 return _mm_cmpgt_epi32( a, b );
00188 }
00190 SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2( __m128i a, __m128i b )
00191 {
00192 return _mm_cmpgt_epi8( a, b );
00193 }
00195 SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2( __m128d a, __m128d b )
00196 {
00197 return _mm_cmpgt_pd( a, b );
00198 }
00200 SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2( __m128d a, __m128d b )
00201 {
00202 return _mm_cmpgt_sd( a, b );
00203 }
00205 SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2( __m128d a, __m128d b )
00206 {
00207 return _mm_cmple_pd( a, b );
00208 }
00210 SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2( __m128d a, __m128d b )
00211 {
00212 return _mm_cmple_sd( a, b );
00213 }
00215 SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2( __m128i a, __m128i b )
00216 {
00217 return _mm_cmplt_epi16( a, b );
00218 }
00220 SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2( __m128i a, __m128i b )
00221 {
00222 return _mm_cmplt_epi32( a, b );
00223 }
00225 SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2( __m128i a, __m128i b )
00226 {
00227 return _mm_cmplt_epi8( a, b );
00228 }
00230 SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2( __m128d a, __m128d b )
00231 {
00232 return _mm_cmplt_pd( a, b );
00233 }
00235 SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2( __m128d a, __m128d b )
00236 {
00237 return _mm_cmplt_sd( a, b );
00238 }
00240 SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2( __m128d a, __m128d b )
00241 {
00242 return _mm_cmpneq_pd( a, b );
00243 }
00245 SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2( __m128d a, __m128d b )
00246 {
00247 return _mm_cmpneq_sd( a, b );
00248 }
00250 SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2( __m128d a, __m128d b )
00251 {
00252 return _mm_cmpnge_pd( a, b );
00253 }
00255 SSP_FORCEINLINE __m128d ssp_cmpnge_sd_SSE2( __m128d a, __m128d b )
00256 {
00257 return _mm_cmpnge_sd( a, b );
00258 }
00260 SSP_FORCEINLINE __m128d ssp_cmpngt_pd_SSE2( __m128d a, __m128d b )
00261 {
00262 return _mm_cmpngt_pd( a, b );
00263 }
00265 SSP_FORCEINLINE __m128d ssp_cmpngt_sd_SSE2( __m128d a, __m128d b )
00266 {
00267 return _mm_cmpngt_sd( a, b );
00268 }
00270 SSP_FORCEINLINE __m128d ssp_cmpnle_pd_SSE2( __m128d a, __m128d b )
00271 {
00272 return _mm_cmpnle_pd( a, b );
00273 }
00275 SSP_FORCEINLINE __m128d ssp_cmpnle_sd_SSE2( __m128d a, __m128d b )
00276 {
00277 return _mm_cmpnle_sd( a, b );
00278 }
00280 SSP_FORCEINLINE __m128d ssp_cmpnlt_pd_SSE2( __m128d a, __m128d b )
00281 {
00282 return _mm_cmpnlt_pd( a, b );
00283 }
00285 SSP_FORCEINLINE __m128d ssp_cmpnlt_sd_SSE2( __m128d a, __m128d b )
00286 {
00287 return _mm_cmpnlt_sd( a, b );
00288 }
00290 SSP_FORCEINLINE __m128d ssp_cmpord_pd_SSE2( __m128d a, __m128d b )
00291 {
00292 return _mm_cmpord_pd( a, b );
00293 }
00295 SSP_FORCEINLINE __m128d ssp_cmpord_sd_SSE2( __m128d a, __m128d b )
00296 {
00297 return _mm_cmpord_sd( a, b );
00298 }
00300 SSP_FORCEINLINE __m128d ssp_cmpunord_pd_SSE2( __m128d a, __m128d b )
00301 {
00302 return _mm_cmpunord_pd( a, b );
00303 }
00305 SSP_FORCEINLINE __m128d ssp_cmpunord_sd_SSE2( __m128d a, __m128d b )
00306 {
00307 return _mm_cmpunord_sd( a, b );
00308 }
00310 SSP_FORCEINLINE int ssp_comieq_sd_SSE2( __m128d a, __m128d b )
00311 {
00312 return _mm_comieq_sd( a, b );
00313 }
00315 SSP_FORCEINLINE int ssp_comige_sd_SSE2( __m128d a, __m128d b )
00316 {
00317 return _mm_comige_sd( a, b );
00318 }
00320 SSP_FORCEINLINE int ssp_comigt_sd_SSE2( __m128d a, __m128d b )
00321 {
00322 return _mm_comigt_sd( a, b );
00323 }
00325 SSP_FORCEINLINE int ssp_comile_sd_SSE2( __m128d a, __m128d b )
00326 {
00327 return _mm_comile_sd( a, b );
00328 }
00330 SSP_FORCEINLINE int ssp_comilt_sd_SSE2( __m128d a, __m128d b )
00331 {
00332 return _mm_comilt_sd( a, b );
00333 }
00335 SSP_FORCEINLINE int ssp_comineq_sd_SSE2( __m128d a, __m128d b )
00336 {
00337 return _mm_comineq_sd( a, b );
00338 }
00340 SSP_FORCEINLINE __m128d ssp_cvtepi32_pd_SSE2( __m128i a )
00341 {
00342 return _mm_cvtepi32_pd( a );
00343 }
00345 SSP_FORCEINLINE __m128 ssp_cvtepi32_ps_SSE2( __m128i a )
00346 {
00347 return _mm_cvtepi32_ps( a );
00348 }
00350 SSP_FORCEINLINE __m128i ssp_cvtpd_epi32_SSE2( __m128d a )
00351 {
00352 return _mm_cvtpd_epi32( a );
00353 }
00355 SSP_FORCEINLINE __m64 ssp_cvtpd_pi32_SSE2( __m128d a )
00356 {
00357 return _mm_cvtpd_pi32( a );
00358 }
00360 SSP_FORCEINLINE __m128 ssp_cvtpd_ps_SSE2( __m128d a )
00361 {
00362 return _mm_cvtpd_ps( a );
00363 }
00365 SSP_FORCEINLINE __m128d ssp_cvtpi32_pd_SSE2( __m64 a )
00366 {
00367 return _mm_cvtpi32_pd( a );
00368 }
00370 SSP_FORCEINLINE __m128i ssp_cvtps_epi32_SSE2( __m128 a )
00371 {
00372 return _mm_cvtps_epi32( a );
00373 }
00375 SSP_FORCEINLINE __m128d ssp_cvtps_pd_SSE2( __m128 a )
00376 {
00377 return _mm_cvtps_pd( a );
00378 }
00379
00380
00381
00382
00383
00384
00385
00387 SSP_FORCEINLINE int ssp_cvtsd_si32_SSE2( __m128d a )
00388 {
00389 return _mm_cvtsd_si32( a );
00390 }
00391 #ifdef SYS64
00394 //SSP_FORCEINLINE __int64 ssp_cvtsd_si64_SSE2( __m128d a )
00395
00396
00397
00398 #endif
00399
00400 SSP_FORCEINLINE __m128 ssp_cvtsd_ss_SSE2( __m128 a, __m128d b )
00401 {
00402 return _mm_cvtsd_ss( a, b );
00403 }
00405 SSP_FORCEINLINE int ssp_cvtsi128_si32_SSE2( __m128i a )
00406 {
00407 return _mm_cvtsi128_si32( a );
00408 }
00409 #ifdef SYS64
00410
00412
00413
00414
00415
00416 #endif
00417
00418 SSP_FORCEINLINE __m128d ssp_cvtsi32_sd_SSE2( __m128d a, int b )
00419 {
00420 return _mm_cvtsi32_sd( a, b );
00421 }
00423 SSP_FORCEINLINE __m128i ssp_cvtsi32_si128_SSE2( int a )
00424 {
00425 return _mm_cvtsi32_si128( a );
00426 }
00427 #ifdef SYS64
00428
00430
00431
00432
00433
00434
00436
00437
00438
00439
00440 #endif
00441
00442 SSP_FORCEINLINE __m128d ssp_cvtss_sd_SSE2( __m128d a, __m128 b )
00443 {
00444 return _mm_cvtss_sd( a, b );
00445 }
00447 SSP_FORCEINLINE __m128i ssp_cvttpd_epi32_SSE2( __m128d a )
00448 {
00449 return _mm_cvttpd_epi32( a );
00450 }
00452 SSP_FORCEINLINE __m64 ssp_cvttpd_pi32_SSE2( __m128d a )
00453 {
00454 return _mm_cvttpd_pi32( a );
00455 }
00457 SSP_FORCEINLINE __m128i ssp_cvttps_epi32_SSE2( __m128 a )
00458 {
00459 return _mm_cvttps_epi32( a );
00460 }
00462 SSP_FORCEINLINE int ssp_cvttsd_si32_SSE2( __m128d a )
00463 {
00464 return _mm_cvttsd_si32( a );
00465 }
00466 #ifdef SYS64
00467
00469
00470
00471
00472
00473 #endif
00474
00475 SSP_FORCEINLINE __m128d ssp_div_pd_SSE2( __m128d a, __m128d b )
00476 {
00477 return _mm_div_pd( a, b );
00478 }
00480 SSP_FORCEINLINE __m128d ssp_div_sd_SSE2( __m128d a, __m128d b )
00481 {
00482 return _mm_div_sd( a, b );
00483 }
00485 SSP_FORCEINLINE int ssp_extract_epi16_SSE2( __m128i a, int imm )
00486 {
00487 switch( imm & 0x7 )
00488 {
00489 CASE_8( _mm_extract_epi16, a );
00490 }
00491 }
00493 SSP_FORCEINLINE __m128i ssp_insert_epi16_SSE2( __m128i a, int b, int imm )
00494 {
00495 switch( imm & 0x7 )
00496 {
00497 CASE_8( _mm_insert_epi16, a, b );
00498 }
00499 }
00501 SSP_FORCEINLINE void ssp_lfence_SSE2( void )
00502 {
00503 _mm_lfence();
00504 }
00506 SSP_FORCEINLINE __m128d ssp_load_pd_SSE2( double const*dp )
00507 {
00508 return _mm_load_pd( dp );
00509 }
00511 SSP_FORCEINLINE __m128d ssp_load_sd_SSE2( double const*dp )
00512 {
00513 return _mm_load_sd( dp );
00514 }
00516 SSP_FORCEINLINE __m128i ssp_load_si128_SSE2( __m128i const*p )
00517 {
00518 return _mm_load_si128( p );
00519 }
00521 SSP_FORCEINLINE __m128d ssp_load1_pd_SSE2( double const*dp )
00522 {
00523 return _mm_load1_pd( dp );
00524 }
00526 SSP_FORCEINLINE __m128d ssp_loadh_pd_SSE2( __m128d a, double const*dp )
00527 {
00528 return _mm_loadh_pd( a, dp );
00529 }
00531 SSP_FORCEINLINE __m128i ssp_loadl_epi64_SSE2( __m128i const*p)
00532 {
00533 return _mm_loadl_epi64( p );
00534 }
00536 SSP_FORCEINLINE __m128d ssp_loadl_pd_SSE2( __m128d a, double const*dp )
00537 {
00538 return _mm_loadl_pd( a, dp );
00539 }
00541 SSP_FORCEINLINE __m128d ssp_loadr_pd_SSE2( double const*dp )
00542 {
00543 return _mm_loadr_pd( dp );
00544 }
00546 SSP_FORCEINLINE __m128d ssp_loadu_pd_SSE2( double const*dp )
00547 {
00548 return _mm_loadu_pd( dp );
00549 }
00551 SSP_FORCEINLINE __m128i ssp_loadu_si128_SSE2( __m128i const*p )
00552 {
00553 return _mm_loadu_si128( p );
00554 }
00556 SSP_FORCEINLINE __m128i ssp_madd_epi16_SSE2( __m128i a, __m128i b )
00557 {
00558 return _mm_madd_epi16( a, b );
00559 }
00561 SSP_FORCEINLINE void ssp_maskmoveu_si128_SSE2( __m128i a, __m128i b, char *c )
00562 {
00563 _mm_maskmoveu_si128( a, b, c );
00564 }
00566 SSP_FORCEINLINE __m128i ssp_max_epi16_SSE2( __m128i a, __m128i b )
00567 {
00568 return _mm_max_epi16( a, b );
00569 }
00571 SSP_FORCEINLINE __m128i ssp_max_epu8_SSE2( __m128i a, __m128i b )
00572 {
00573 return _mm_max_epu8( a, b );
00574 }
00576 SSP_FORCEINLINE __m128d ssp_max_pd_SSE2( __m128d a, __m128d b )
00577 {
00578 return _mm_max_pd( a, b );
00579 }
00581 SSP_FORCEINLINE __m128d ssp_max_sd_SSE2( __m128d a, __m128d b )
00582 {
00583 return _mm_max_sd( a, b );
00584 }
00586 SSP_FORCEINLINE void ssp_mfence_SSE2( void )
00587 {
00588 _mm_mfence( );
00589 }
00591 SSP_FORCEINLINE __m128i ssp_min_epi16_SSE2( __m128i a, __m128i b )
00592 {
00593 return _mm_min_epi16( a, b );
00594 }
00596 SSP_FORCEINLINE __m128i ssp_min_epu8_SSE2( __m128i a, __m128i b )
00597 {
00598 return _mm_min_epu8( a, b );
00599 }
00601 SSP_FORCEINLINE __m128d ssp_min_pd_SSE2( __m128d a, __m128d b )
00602 {
00603 return _mm_min_pd( a, b );
00604 }
00606 SSP_FORCEINLINE __m128d ssp_min_sd_SSE2( __m128d a, __m128d b )
00607 {
00608 return _mm_min_sd( a, b );
00609 }
00611 SSP_FORCEINLINE __m128i ssp_move_epi64_SSE2( __m128i a )
00612 {
00613 return _mm_move_epi64( a );
00614 }
00616 SSP_FORCEINLINE __m128d ssp_move_sd_SSE2( __m128d a, __m128d b )
00617 {
00618 return _mm_move_sd( a, b );
00619 }
00621 SSP_FORCEINLINE int ssp_movemask_epi8_SSE2( __m128i a )
00622 {
00623 return _mm_movemask_epi8( a );
00624 }
00626 SSP_FORCEINLINE int ssp_movemask_pd_SSE2( __m128d a )
00627 {
00628 return _mm_movemask_pd( a );
00629 }
00631 SSP_FORCEINLINE __m64 ssp_movepi64_pi64_SSE2( __m128i a )
00632 {
00633 return _mm_movepi64_pi64( a );
00634 }
00636 SSP_FORCEINLINE __m128i ssp_movpi64_epi64_SSE2( __m64 a )
00637 {
00638 return _mm_movpi64_epi64( a );
00639 }
00641 SSP_FORCEINLINE __m128i ssp_mul_epu32_SSE2( __m128i a, __m128i b )
00642 {
00643 return _mm_mul_epu32( a, b );
00644 }
00646 SSP_FORCEINLINE __m128d ssp_mul_pd_SSE2( __m128d a, __m128d b )
00647 {
00648 return _mm_mul_pd( a, b );
00649 }
00651 SSP_FORCEINLINE __m128d ssp_mul_sd_SSE2( __m128d a, __m128d b )
00652 {
00653 return _mm_mul_sd( a, b );
00654 }
00656 SSP_FORCEINLINE __m64 ssp_mul_su32_SSE2( __m64 a, __m64 b)
00657 {
00658 return _mm_mul_su32( a, b );
00659 }
00661 SSP_FORCEINLINE __m128i ssp_mulhi_epi16_SSE2( __m128i a, __m128i b )
00662 {
00663 return _mm_mulhi_epi16( a, b );
00664 }
00666 SSP_FORCEINLINE __m128i ssp_mulhi_epu16_SSE2( __m128i a, __m128i b )
00667 {
00668 return _mm_mulhi_epu16( a, b );
00669 }
00671 SSP_FORCEINLINE __m128i ssp_mullo_epi16_SSE2( __m128i a, __m128i b )
00672 {
00673 return _mm_mullo_epi16( a, b );
00674 }
00676 SSP_FORCEINLINE __m128d ssp_or_pd_SSE2( __m128d a, __m128d b )
00677 {
00678 return _mm_or_pd( a, b );
00679 }
00681 SSP_FORCEINLINE __m128i ssp_or_si128_SSE2( __m128i a, __m128i b )
00682 {
00683 return _mm_or_si128( a, b );
00684 }
00686 SSP_FORCEINLINE __m128i ssp_packs_epi16_SSE2( __m128i a, __m128i b )
00687 {
00688 return _mm_packs_epi16( a, b );
00689 }
00691 SSP_FORCEINLINE __m128i ssp_packs_epi32_SSE2( __m128i a, __m128i b )
00692 {
00693 return _mm_packs_epi32( a, b );
00694 }
00696 SSP_FORCEINLINE __m128i ssp_packus_epi16_SSE2( __m128i a, __m128i b )
00697 {
00698 return _mm_packus_epi16( a, b );
00699 }
00701 SSP_FORCEINLINE void ssp_pause_SSE2( void )
00702 {
00703 _mm_pause();
00704 }
00706 SSP_FORCEINLINE __m128i ssp_sad_epu8_SSE2( __m128i a, __m128i b )
00707 {
00708 return _mm_sad_epu8( a, b );
00709 }
00711 SSP_FORCEINLINE __m128i ssp_set_epi16_SSE2( short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0 )
00712 {
00713 return _mm_set_epi16( w7, w6, w5, w4, w3, w2, w1, w0 );
00714 }
00716 SSP_FORCEINLINE __m128i ssp_set_epi32_SSE2( int i3, int i2, int i1, int i0 )
00717 {
00718 return _mm_set_epi32( i3, i2, i1, i0 );
00719 }
00721 SSP_FORCEINLINE __m128i ssp_set_epi64_SSE2( __m64 a1, __m64 a0 )
00722 {
00723 return _mm_set_epi64( a1, a0 );
00724 }
00726 SSP_FORCEINLINE __m128i ssp_set_epi8_SSE2( char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0 )
00727 {
00728 return _mm_set_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );
00729 }
00731 SSP_FORCEINLINE __m128d ssp_set_pd_SSE2( double a1, double a0)
00732 {
00733 return _mm_set_pd( a1, a0 );
00734 }
00736 SSP_FORCEINLINE __m128d ssp_set_sd_SSE2( double w)
00737 {
00738 return _mm_set_sd( w );
00739 }
00741 SSP_FORCEINLINE __m128i ssp_set1_epi16_SSE2( short w)
00742 {
00743 return _mm_set1_epi16( w );
00744 }
00746 SSP_FORCEINLINE __m128i ssp_set1_epi32_SSE2( int i )
00747 {
00748 return _mm_set1_epi32( i );
00749 }
00751 SSP_FORCEINLINE __m128i ssp_set1_epi64_SSE2( __m64 a )
00752 {
00753 return _mm_set1_epi64( a );
00754 }
00756 SSP_FORCEINLINE __m128i ssp_set1_epi8_SSE2( char b )
00757 {
00758 return _mm_set1_epi8( b );
00759 }
00761 SSP_FORCEINLINE __m128d ssp_set1_pd_SSE2( double a )
00762 {
00763 return _mm_set1_pd( a );
00764 }
00765
00766 #ifdef SSP_MSVC
00767
00768 SSP_FORCEINLINE __m128i ssp_setl_epi64_SSE2( __m128i a )
00769 {
00770 return _mm_setl_epi64( a );
00771 }
00772 #endif
00773
00774 SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2( short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7 )
00775 {
00776 return _mm_setr_epi16( w0, w1, w2, w3, w4, w5, w6, w7 );
00777 }
00779 SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2( int i0, int i1, int i2, int i3)
00780 {
00781 return _mm_setr_epi32( i0, i1, i2, i3);
00782 }
00784 SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2( __m64 a0, __m64 a1)
00785 {
00786 return _mm_setr_epi64( a0, a1);
00787 }
00789 SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2( char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0 )
00790 {
00791 return _mm_setr_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );
00792 }
00794 SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2( double a0, double a1 )
00795 {
00796 return _mm_setr_pd( a0, a1);
00797 }
00799 SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2( void )
00800 {
00801 return _mm_setzero_pd( );
00802 }
00804 SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2( void )
00805 {
00806 return _mm_setzero_si128( );
00807 }
00809 SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2( __m128i a, int imm )
00810 {
00811 switch( imm & 0xFF )
00812 {
00813 CASE_256( _mm_shuffle_epi32, a );
00814 }
00815 }
00817 SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2( __m128d a, __m128d b, int imm )
00818 {
00819 switch( imm & 0xFF )
00820 {
00821 CASE_4( _mm_shuffle_pd, a, b );
00822 }
00823 }
00825 SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2( __m128i a, int imm )
00826 {
00827 switch( imm & 0xFF )
00828 {
00829 CASE_256( _mm_shufflehi_epi16, a );
00830 }
00831 }
00833 SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2( __m128i a, int imm )
00834 {
00835 switch( imm & 0xFF )
00836 {
00837 CASE_256( _mm_shufflelo_epi16, a );
00838 }
00839 }
00841 SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2( __m128i a, __m128i count )
00842 {
00843 return _mm_sll_epi16( a, count );
00844 }
00846 SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2( __m128i a, __m128i count )
00847 {
00848 return _mm_sll_epi32( a, count );
00849 }
00851 SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2( __m128i a, __m128i count )
00852 {
00853 return _mm_sll_epi64( a, count );
00854 }
00856 SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2( __m128i a, int count )
00857 {
00858 return _mm_slli_epi16( a, count );
00859 }
00861 SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2( __m128i a, int count )
00862 {
00863 return _mm_slli_epi32( a, count );
00864 }
00866 SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2( __m128i a, int count )
00867 {
00868 return _mm_slli_epi64( a, count );
00869 }
00871 SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2( __m128i a, int imm )
00872 {
00873 switch( imm & 0x7F )
00874 {
00875 CASE_128( _mm_slli_si128, a );
00876 }
00877 }
00879 SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2( __m128d a )
00880 {
00881 return _mm_sqrt_pd( a );
00882 }
00884 SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2( __m128d a, __m128d b )
00885 {
00886 return _mm_sqrt_sd( a, b );
00887 }
00889 SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2( __m128i a, __m128i count )
00890 {
00891 return _mm_sra_epi16( a, count );
00892 }
00894 SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2( __m128i a, __m128i count )
00895 {
00896 return _mm_sra_epi32( a, count );
00897 }
00899 SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2( __m128i a, int count )
00900 {
00901 return _mm_srai_epi16( a, count );
00902 }
00904 SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2( __m128i a, int count )
00905 {
00906 return _mm_srai_epi32( a, count );
00907 }
00909 SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2( __m128i a, __m128i count )
00910 {
00911 return _mm_srl_epi16( a, count );
00912 }
00914 SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2( __m128i a, __m128i count )
00915 {
00916 return _mm_srl_epi32( a, count );
00917 }
00919 SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2( __m128i a, __m128i count )
00920 {
00921 return _mm_srl_epi64( a, count );
00922 }
00924 SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2( __m128i a, int count )
00925 {
00926 return _mm_srli_epi16( a, count );
00927 }
00929 SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2( __m128i a, int count )
00930 {
00931 return _mm_srli_epi32( a, count );
00932 }
00934 SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2( __m128i a, int count )
00935 {
00936 return _mm_srli_epi64( a, count );
00937 }
00939 SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2( __m128i a, int imm )
00940 {
00941 switch( imm & 0x7F )
00942 {
00943 CASE_128( _mm_srli_si128, a );
00944 }
00945 }
00947 SSP_FORCEINLINE void ssp_store_pd_SSE2( double *dp, __m128d a )
00948 {
00949 _mm_store_pd( dp, a );
00950 }
00952 SSP_FORCEINLINE void ssp_store_sd_SSE2( double *dp, __m128d a )
00953 {
00954 _mm_store_sd( dp, a );
00955 }
00957 SSP_FORCEINLINE void ssp_store_si128_SSE2( __m128i *p, __m128i b )
00958 {
00959 _mm_store_si128( p, b );
00960 }
00962 SSP_FORCEINLINE void ssp_store1_pd_SSE2( double *dp, __m128d a )
00963 {
00964 _mm_store1_pd( dp, a );
00965 }
00967 SSP_FORCEINLINE void ssp_storeh_pd_SSE2( double *dp, __m128d a )
00968 {
00969 _mm_storeh_pd( dp, a );
00970 }
00972 SSP_FORCEINLINE void ssp_storel_epi64_SSE2( __m128i *p, __m128i b )
00973 {
00974 _mm_storel_epi64( p, b );
00975 }
00977 SSP_FORCEINLINE void ssp_storel_pd_SSE2( double *dp, __m128d a )
00978 {
00979 _mm_storel_pd( dp, a );
00980 }
00982 SSP_FORCEINLINE void ssp_storer_pd_SSE2( double *dp, __m128d a )
00983 {
00984 _mm_storer_pd( dp, a );
00985 }
00987 SSP_FORCEINLINE void ssp_storeu_pd_SSE2( double *dp, __m128d a )
00988 {
00989 _mm_storeu_pd( dp, a );
00990 }
00992 SSP_FORCEINLINE void ssp_storeu_si128_SSE2( __m128i *p, __m128i b )
00993 {
00994 _mm_storeu_si128( p, b );
00995 }
00997 SSP_FORCEINLINE void ssp_stream_pd_SSE2( double *dp, __m128d a )
00998 {
00999 _mm_stream_pd( dp, a );
01000 }
01002 SSP_FORCEINLINE void ssp_stream_si128_SSE2( __m128i *p, __m128i a )
01003 {
01004 _mm_stream_si128( p, a );
01005 }
01007 SSP_FORCEINLINE void ssp_stream_si32_SSE2( int *p, int i )
01008 {
01009 _mm_stream_si32( p, i );
01010 }
01012 SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2( __m128i a, __m128i b )
01013 {
01014 return _mm_sub_epi16( a, b );
01015 }
01017 SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2( __m128i a, __m128i b )
01018 {
01019 return _mm_sub_epi32( a, b );
01020 }
01022 SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2( __m128i a, __m128i b )
01023 {
01024 return _mm_sub_epi64( a, b );
01025 }
01027 SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2( __m128i a, __m128i b )
01028 {
01029 return _mm_sub_epi8( a, b );
01030 }
01032 SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2( __m128d a, __m128d b )
01033 {
01034 return _mm_sub_pd( a, b );
01035 }
01037 SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2( __m128d a, __m128d b )
01038 {
01039 return _mm_sub_sd( a, b );
01040 }
01042 SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2( __m64 a, __m64 b)
01043 {
01044 return _mm_sub_si64( a, b );
01045 }
01047 SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2( __m128i a, __m128i b )
01048 {
01049 return _mm_subs_epi16( a, b );
01050 }
01052 SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2( __m128i a, __m128i b )
01053 {
01054 return _mm_subs_epi8( a, b );
01055 }
01057 SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2( __m128i a, __m128i b )
01058 {
01059 return _mm_subs_epu16( a, b );
01060 }
01062 SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2( __m128i a, __m128i b )
01063 {
01064 return _mm_subs_epu8( a, b );
01065 }
01067 SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2( __m128d a, __m128d b )
01068 {
01069 return _mm_ucomieq_sd( a, b );
01070 }
01072 SSP_FORCEINLINE int ssp_ucomige_sd_SSE2( __m128d a, __m128d b )
01073 {
01074 return _mm_ucomige_sd( a, b );
01075 }
01077 SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2( __m128d a, __m128d b )
01078 {
01079 return _mm_ucomigt_sd( a, b );
01080 }
01082 SSP_FORCEINLINE int ssp_ucomile_sd_SSE2( __m128d a, __m128d b )
01083 {
01084 return _mm_ucomile_sd( a, b );
01085 }
01087 SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2( __m128d a, __m128d b )
01088 {
01089 return _mm_ucomilt_sd( a, b );
01090 }
01092 SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2( __m128d a, __m128d b )
01093 {
01094 return _mm_ucomineq_sd( a, b );
01095 }
01097 SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2( __m128i a, __m128i b )
01098 {
01099 return _mm_unpackhi_epi16( a, b );
01100 }
01102 SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2( __m128i a, __m128i b )
01103 {
01104 return _mm_unpackhi_epi32( a, b );
01105 }
01107 SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2( __m128i a, __m128i b )
01108 {
01109 return _mm_unpackhi_epi64( a, b );
01110 }
01112 SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2( __m128i a, __m128i b )
01113 {
01114 return _mm_unpackhi_epi8( a, b );
01115 }
01117 SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2( __m128d a, __m128d b )
01118 {
01119 return _mm_unpackhi_pd( a, b );
01120 }
01122 SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2( __m128i a, __m128i b )
01123 {
01124 return _mm_unpacklo_epi16( a, b );
01125 }
01127 SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2( __m128i a, __m128i b )
01128 {
01129 return _mm_unpacklo_epi32( a, b );
01130 }
01132 SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2( __m128i a, __m128i b )
01133 {
01134 return _mm_unpacklo_epi64( a, b );
01135 }
01137 SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2( __m128i a, __m128i b )
01138 {
01139 return _mm_unpacklo_epi8( a, b );
01140 }
01142 SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2( __m128d a, __m128d b )
01143 {
01144 return _mm_unpacklo_pd( a, b );
01145 }
01147 SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2( __m128d a, __m128d b )
01148 {
01149 return _mm_xor_pd( a, b );
01150 }
01152 SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2( __m128i a, __m128i b )
01153 {
01154 return _mm_xor_si128( a, b );
01155 }
01156
01158
01159
01160 #endif // __SSP_NATIVE_SSE2_H__