include/emulation/SSEPlus_emulation_comps_SSE2.h

Go to the documentation of this file.
00001 //
00002 // Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved.
00003 // This software is subject to the Apache v2.0 License.
00004 //
00005 #ifndef __SSEPLUS_EMULATION_COMPS_SSE2_H__
00006 #define __SSEPLUS_EMULATION_COMPS_SSE2_H__
00007 
00008 #include "../SSEPlus_SSE2.h"
00009 
00010 
00016 //----------------------------------------
00017 // COMEQ (Condition 0)
00018 //----------------------------------------
00019 
00021 SSP_FORCEINLINE __m128i ssp_comeq_epi16_SSE2(__m128i a, __m128i b)
00022 {
00023     a = _mm_cmpeq_epi16( a, b );
00024     return a;   
00025 }
00026 
00028 SSP_FORCEINLINE __m128i ssp_comeq_epi32_SSE2(__m128i a, __m128i b)
00029 {
00030     a = _mm_cmpeq_epi32( a, b );
00031     return a;   
00032 }
00033 
00035 SSP_FORCEINLINE __m128i ssp_comeq_epi64_SSE2(__m128i a, __m128i b)
00036 {
00037     ssp_m128 A, B;
00038     A.i = a;
00039     B.i = b;
00040     A.i = _mm_cmpeq_epi32( A.i, B.i );  // A0=B0,  A1=B1, A2=B2,  A3=B3
00041     B.f = _mm_movehdup_ps( A.f );       // A1=B1,  A1=B1, A3=B3,  A3=B3
00042     A.f = _mm_moveldup_ps( A.f );       // A0=B0,  A0=B0, A2=B2,  A2=B2
00043     A.i = _mm_and_si128  ( A.i, B.i );  // A0=B0 & A1=B1, A2=B2 & A3=B3   
00044     return A.i;
00045 }
00046 
00048 SSP_FORCEINLINE __m128i ssp_comeq_epi8_SSE2(__m128i a, __m128i b)
00049 {
00050     a = _mm_cmpeq_epi8( a, b );
00051     return a;
00052 }
00053 
00055 SSP_FORCEINLINE __m128i ssp_comeq_epu16_SSE2(__m128i a, __m128i b)
00056 {
00057     a = _mm_cmpeq_epi16( a, b );
00058     return a; 
00059 }
00060 
00062 SSP_FORCEINLINE __m128i ssp_comeq_epu32_SSE2(__m128i a, __m128i b)
00063 {
00064     a = _mm_cmpeq_epi32( a, b );
00065     return a; 
00066 }
00067 
00069 SSP_FORCEINLINE __m128i ssp_comeq_epu64_SSE2(__m128i a, __m128i b)
00070 {
00071     a = ssp_comeq_epi64_SSE2( a, b );  
00072     return a;
00073 }
00074 
00076 SSP_FORCEINLINE __m128i ssp_comeq_epu8_SSE2(__m128i a, __m128i b)
00077 {
00078     a = _mm_cmpeq_epi8( a, b );
00079     return a;
00080 }
00081 
00083 SSP_FORCEINLINE __m128d ssp_comeq_pd_SSE2(__m128d a, __m128d b)
00084 {
00085     a = _mm_cmpeq_pd( a, b );
00086     return a;
00087 }
00088 
00090 SSP_FORCEINLINE __m128 ssp_comeq_ps_SSE2(__m128 a, __m128 b)
00091 {
00092     a = _mm_cmpeq_ps( a, b );
00093     return a;
00094 }
00095 
00097 SSP_FORCEINLINE __m128d ssp_comeq_sd_SSE2(__m128d a, __m128d b)
00098 {
00099     a = _mm_cmpeq_sd( a, b );
00100     return a;
00101 }
00102 
00104 SSP_FORCEINLINE __m128 ssp_comeq_ss_SSE2(__m128 a, __m128 b)
00105 {
00106     a = _mm_cmpeq_ss( a, b );
00107     return a;
00108 }
00109 
00110 //----------------------------------------
00111 // COMLT (Condition 1)
00112 //----------------------------------------
00113 
00115 SSP_FORCEINLINE __m128i ssp_comlt_epi16_SSE2(__m128i a, __m128i b)
00116 {
00117     a = _mm_cmplt_epi16( a, b );
00118     return a;
00119 }
00120 
00122 SSP_FORCEINLINE __m128i ssp_comlt_epi32_SSE2(__m128i a, __m128i b)
00123 {
00124     a = _mm_cmplt_epi32( a, b );
00125     return a;
00126 }
00127   //TODO:SSE2
00129 SSP_FORCEINLINE __m128i ssp_comlt_epi64_SSE2(__m128i a, __m128i b)
00130 {
00131     a = ssp_comlt_epi64_REF( a, b );
00132     return a;
00133 }
00134 
00136 SSP_FORCEINLINE __m128i ssp_comlt_epi8_SSE2(__m128i a, __m128i b)
00137 {
00138     a = _mm_cmplt_epi8( a, b );
00139     return a;
00140 }
00141 
00143 SSP_FORCEINLINE __m128i ssp_comlt_epu16_SSE2(__m128i a, __m128i b)
00144 {
00145     __m128i signMask, mask;
00146 
00147     mask     = _mm_cmplt_epi16( a, b );              // FFFF where a < b (signed)
00148     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00149     signMask = _mm_srai_epi16 ( signMask, 15 );      // fill all fields with sign bit     
00150     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00151     return mask;
00152 }
00153 
00155 SSP_FORCEINLINE __m128i ssp_comlt_epu32_SSE2(__m128i a, __m128i b)
00156 {
00157     __m128i signMask, mask;
00158 
00159     mask     = _mm_cmplt_epi32( a, b );              // FFFF where a < b (signed)
00160     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00161     signMask = _mm_srai_epi32 ( signMask, 31 );      // fill all fields with sign bit     
00162     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00163     return mask;
00164 }
00165   // TODO: SSE2
00167 SSP_FORCEINLINE __m128i ssp_comlt_epu64_SSE2(__m128i a, __m128i b)
00168 {
00169     a = ssp_comlt_epu64_REF( a, b );
00170     return a;
00171 }
00172   //TODO:SSE2
00174 SSP_FORCEINLINE __m128i ssp_comlt_epu8_SSE2(__m128i a, __m128i b)
00175 {
00176     a = ssp_comlt_epu8_REF( a, b );
00177     return a;
00178 }
00179 
00181 SSP_FORCEINLINE __m128d ssp_comlt_pd_SSE2(__m128d a, __m128d b)
00182 {
00183     a = _mm_cmplt_pd( a, b );
00184     return a;
00185 }
00186 
00188 SSP_FORCEINLINE __m128 ssp_comlt_ps_SSE2(__m128 a, __m128 b)
00189 {
00190     a = _mm_cmplt_ps( a, b );
00191     return a;
00192 }
00193 
00195 SSP_FORCEINLINE __m128d ssp_comlt_sd_SSE2(__m128d a, __m128d b)
00196 {
00197     a = _mm_cmplt_sd( a, b );
00198     return a;
00199 }
00200 
00202 SSP_FORCEINLINE __m128 ssp_comlt_ss_SSE2(__m128 a, __m128 b)
00203 {
00204     a = _mm_cmplt_ss( a, b );
00205     return a;
00206 }
00207 
00208 //----------------------------------------
00209 // COMLE (Condition 2)
00210 //----------------------------------------
00211 
00213 SSP_FORCEINLINE __m128i ssp_comle_epi16_SSE2(__m128i a, __m128i b)
00214 {
00215     __m128i c;    
00216     c = _mm_cmplt_epi16( a, b );
00217     a = _mm_cmpeq_epi16( a, b );
00218     a = _mm_or_si128   ( a, c );
00219     return a;
00220 }
00221 
00223 SSP_FORCEINLINE __m128i ssp_comle_epi32_SSE2(__m128i a, __m128i b)
00224 {
00225     __m128i c;    
00226     c = _mm_cmplt_epi32( a, b );
00227     a = _mm_cmpeq_epi32( a, b );
00228     a = _mm_or_si128   ( a, c );
00229     return a;
00230 }
00231   //TODO:SSE2
00233 SSP_FORCEINLINE __m128i ssp_comle_epi64_SSE2(__m128i a, __m128i b)
00234 {
00235     a = ssp_comle_epi64_REF( a, b );
00236     return a;
00237 }
00238 
00240 SSP_FORCEINLINE __m128i ssp_comle_epi8_SSE2(__m128i a, __m128i b)
00241 {
00242     __m128i c;    
00243     c = _mm_cmplt_epi8( a, b );
00244     a = _mm_cmpeq_epi8( a, b );
00245     a = _mm_or_si128  ( a, c );
00246     return a;
00247 }
00248   //TODO:SSE2
00250 SSP_FORCEINLINE __m128i ssp_comle_epu16_SSE2(__m128i a, __m128i b)
00251 {
00252     a = ssp_comle_epu16_REF( a, b );
00253     return a;
00254 }
00255   //TODO:SSE2
00257 SSP_FORCEINLINE __m128i ssp_comle_epu32_SSE2(__m128i a, __m128i b)
00258 {
00259     a = ssp_comle_epu32_REF( a, b );
00260     return a;
00261 }
00262   //TODO:SSE2
00264 SSP_FORCEINLINE __m128i ssp_comle_epu64_SSE2(__m128i a, __m128i b)
00265 {
00266     a = ssp_comle_epu64_REF( a, b );
00267     return a;
00268 }
00269   //TODO:SSE2
00271 SSP_FORCEINLINE __m128i ssp_comle_epu8_SSE2(__m128i a, __m128i b)
00272 {
00273     a = ssp_comle_epu8_REF( a, b );
00274     return a;
00275 }
00276 
00278 SSP_FORCEINLINE __m128d ssp_comle_pd_SSE2(__m128d a, __m128d b)
00279 {
00280    a = _mm_cmple_pd( a, b );
00281    return a;
00282 }
00283 
00285 SSP_FORCEINLINE __m128 ssp_comle_ps_SSE2(__m128 a, __m128 b)
00286 {
00287    a = _mm_cmple_ps( a, b );
00288    return a;
00289 }
00290 
00292 SSP_FORCEINLINE __m128d ssp_comle_sd_SSE2(__m128d a, __m128d b)
00293 {
00294    a = _mm_cmple_sd( a, b );
00295    return a;
00296 }
00297 
00299 SSP_FORCEINLINE __m128 ssp_comle_ss_SSE2(__m128 a, __m128 b)
00300 {
00301    a = _mm_cmple_ss( a, b );
00302    return a;
00303 }
00304 
00305 //----------------------------------------
00306 // COMUNORD (Condition 3)
00307 //----------------------------------------
00308 
00310 SSP_FORCEINLINE __m128d ssp_comunord_pd_SSE2(__m128d a, __m128d b)
00311 {
00312     a = _mm_or_pd    ( a, b );
00313     a = _mm_cmpneq_pd( a, a );
00314     return a;   
00315 }
00316 
00318 SSP_FORCEINLINE __m128 ssp_comunord_ps_SSE2(__m128 a, __m128 b)
00319 {
00320     a = _mm_or_ps    ( a, b );
00321     a = _mm_cmpneq_ps( a, a );
00322     return a;      
00323 }
00324 
00326 SSP_FORCEINLINE __m128d ssp_comunord_sd_SSE2(__m128d a, __m128d b)
00327 {
00328     b = _mm_or_pd    ( a, b );
00329     a = _mm_cmpneq_sd( a, b );
00330     return a; 
00331 }
00332 
00334 SSP_FORCEINLINE __m128 ssp_comunord_ss_SSE2(__m128 a, __m128 b)
00335 {
00336     b = _mm_or_ps    ( a, b );
00337     a = _mm_cmpneq_ss( a, b );
00338     return a; 
00339 }
00340 
00341 
00342 //----------------------------------------
00343 // COMNEQ (Condition 4)
00344 //----------------------------------------
00345 
00347 SSP_FORCEINLINE __m128i ssp_comneq_epi16_SSE2(__m128i a, __m128i b)
00348 {
00349     a = ssp_comeq_epi16_SSE2( a, b );
00350     a = ssp_logical_invert_si128_SSE2( a );
00351     return a;   
00352 }
00353 
00355 SSP_FORCEINLINE __m128i ssp_comneq_epi32_SSE2(__m128i a, __m128i b)
00356 {
00357     a = ssp_comeq_epi32_SSE2( a, b );
00358     a = ssp_logical_invert_si128_SSE2( a );
00359     return a;   
00360 }
00361 
00363 SSP_FORCEINLINE __m128i ssp_comneq_epi64_SSE2(__m128i a, __m128i b)
00364 {
00365     a = ssp_comeq_epi64_SSE2( a, b );
00366     a = ssp_logical_invert_si128_SSE2( a );
00367     return a;   
00368 }
00369 
00371 SSP_FORCEINLINE __m128i ssp_comneq_epi8_SSE2(__m128i a, __m128i b)
00372 {
00373     a = ssp_comeq_epi8_SSE2( a, b );
00374     a = ssp_logical_invert_si128_SSE2( a );
00375     return a;   
00376 }
00377 
00379 SSP_FORCEINLINE __m128i ssp_comneq_epu16_SSE2(__m128i a, __m128i b)
00380 {
00381     a = ssp_comeq_epu16_SSE2( a, b );
00382     a = ssp_logical_invert_si128_SSE2( a );
00383     return a;   
00384 }
00385 
00387 SSP_FORCEINLINE __m128i ssp_comneq_epu32_SSE2(__m128i a, __m128i b)
00388 {
00389     a = ssp_comeq_epu32_SSE2( a, b );
00390     a = ssp_logical_invert_si128_SSE2( a );
00391     return a;   
00392 }
00393 
00395 SSP_FORCEINLINE __m128i ssp_comneq_epu64_SSE2(__m128i a, __m128i b)
00396 {
00397     a = ssp_comeq_epu64_SSE2( a, b );
00398     a = ssp_logical_invert_si128_SSE2( a );
00399     return a;   
00400 }
00401 
00403 SSP_FORCEINLINE __m128i ssp_comneq_epu8_SSE2(__m128i a, __m128i b)
00404 {
00405     a = ssp_comeq_epu8_SSE2( a, b );
00406     a = ssp_logical_invert_si128_SSE2( a );
00407     return a;   
00408 }
00409 
00411 SSP_FORCEINLINE __m128d ssp_comneq_pd_SSE2(__m128d a, __m128d b)
00412 {
00413     a = _mm_cmpneq_pd( a, b );
00414     return a;
00415 }
00416 
00418 SSP_FORCEINLINE __m128 ssp_comneq_ps_SSE2(__m128 a, __m128 b)
00419 {
00420     a = _mm_cmpneq_ps( a, b );
00421     return a;
00422 }
00423 
00425 SSP_FORCEINLINE __m128d ssp_comneq_sd_SSE2(__m128d a, __m128d b)
00426 {
00427     a = _mm_cmpneq_sd( a, b );
00428     return a;
00429 }
00430 
00432 SSP_FORCEINLINE __m128 ssp_comneq_ss_SSE2(__m128 a, __m128 b)
00433 {
00434     a = _mm_cmpneq_ss( a, b );
00435     return a;
00436 }
00437 
00438 //----------------------------------------
00439 // COMNLT (Condition 5)
00440 //----------------------------------------
00441 
00443 SSP_FORCEINLINE __m128d ssp_comnlt_pd_SSE2(__m128d a, __m128d b)
00444 {
00445     a = _mm_cmpnlt_pd( a, b );    
00446     return a;
00447 }
00448 
00450 SSP_FORCEINLINE __m128 ssp_comnlt_ps_SSE2(__m128 a, __m128 b)
00451 {
00452     a = _mm_cmpnlt_ps( a, b );    
00453     return a;
00454 }
00455 
00457 SSP_FORCEINLINE __m128d ssp_comnlt_sd_SSE2(__m128d a, __m128d b)
00458 {
00459     a = _mm_cmpnlt_sd( a, b );    
00460     return a;
00461 }
00462 
00464 SSP_FORCEINLINE __m128 ssp_comnlt_ss_SSE2(__m128 a, __m128 b)
00465 {
00466     a = _mm_cmpnlt_ss( a, b );    
00467     return a;
00468 }
00469 
00470 
00471 //----------------------------------------
00472 // COMNLE (Condition 6)
00473 //----------------------------------------
00474 
00476 SSP_FORCEINLINE __m128d ssp_comnle_pd_SSE2(__m128d a, __m128d b)
00477 {    
00478     a = _mm_cmpnle_pd( a, b );
00479     return a;
00480 }
00481 
00483 SSP_FORCEINLINE __m128 ssp_comnle_ps_SSE2(__m128 a, __m128 b)
00484 {
00485     a = _mm_cmpnle_ps( a, b );
00486     return a;
00487 }
00488 
00490 SSP_FORCEINLINE __m128d ssp_comnle_sd_SSE2(__m128d a, __m128d b)
00491 {
00492     a = _mm_cmpnle_sd( a, b );
00493     return a;
00494 }
00495 
00497 SSP_FORCEINLINE __m128 ssp_comnle_ss_SSE2(__m128 a, __m128 b)
00498 {
00499     a = _mm_cmpnle_ss( a, b );
00500     return a;
00501 }
00502 
00503 
00504 //----------------------------------------
00505 // COMORD (Condition 7)
00506 //----------------------------------------
00507 
00509 SSP_FORCEINLINE __m128d ssp_comord_pd_SSE2(__m128d a, __m128d b)
00510 {
00511     a = _mm_cmpord_pd( a, b );
00512     return a;
00513 }
00514 
00516 SSP_FORCEINLINE __m128 ssp_comord_ps_SSE2(__m128 a, __m128 b)
00517 {
00518     a = _mm_cmpord_ps( a, b );
00519     return a;
00520 }
00521 
00523 SSP_FORCEINLINE __m128d ssp_comord_sd_SSE2(__m128d a, __m128d b)
00524 {
00525     a = _mm_cmpord_sd( a, b );
00526     return a;
00527 }
00528   //TODO:SSE2
00530 SSP_FORCEINLINE __m128 ssp_comord_ss_SSE2(__m128 a, __m128 b)
00531 {
00532     a = _mm_cmpord_ss( a, b );
00533     return a;
00534 }
00535 
00536 
00537 //----------------------------------------
00538 // COMUEQ (Condition 8)
00539 //----------------------------------------
00540 
00542 SSP_FORCEINLINE __m128d ssp_comueq_pd_SSE2(__m128d a, __m128d b)
00543 {
00544     __m128d c;
00545     c = _mm_cmpunord_pd( a, b );
00546     a = _mm_cmpeq_pd   ( a, b );
00547     a = _mm_or_pd      ( a, c );
00548     return a;   
00549 }
00550 
00552 SSP_FORCEINLINE __m128 ssp_comueq_ps_SSE2(__m128 a, __m128 b)
00553 {
00554     __m128 c;
00555     c = _mm_cmpunord_ps( a, b );
00556     a = _mm_cmpeq_ps   ( a, b );
00557     a = _mm_or_ps      ( a, c );
00558     return a;   
00559 }
00560 
00562 SSP_FORCEINLINE __m128d ssp_comueq_sd_SSE2(__m128d a, __m128d b)
00563 {
00564     __m128d c;
00565     c = _mm_cmpunord_sd( a, b );
00566     b = _mm_cmpeq_sd   ( a, b );
00567     b = _mm_or_pd      ( b, c );
00568     a = _mm_move_sd    ( a, b );
00569     return a;   
00570 }
00571 
00573 SSP_FORCEINLINE __m128 ssp_comueq_ss_SSE2(__m128 a, __m128 b)
00574 {
00575     __m128 c;
00576     c = _mm_cmpunord_ss( a, b );
00577     b = _mm_cmpeq_ss   ( a, b );
00578     b = _mm_or_ps      ( a, c );
00579     a = _mm_move_ss    ( a, b );
00580     return a;   
00581 }
00582 
00583 
00584 //----------------------------------------
00585 // COMNGE (Condition 9)
00586 //----------------------------------------
00587 
00589 SSP_FORCEINLINE __m128d ssp_comnge_pd_SSE2(__m128d a, __m128d b)
00590 {
00591     a = _mm_cmpnge_pd( a, b );
00592     return a;
00593 }
00594 
00596 SSP_FORCEINLINE __m128 ssp_comnge_ps_SSE2(__m128 a, __m128 b)
00597 {
00598     a = _mm_cmpnge_ps( a, b );
00599     return a;
00600 }
00601 
00603 SSP_FORCEINLINE __m128d ssp_comnge_sd_SSE2(__m128d a, __m128d b)
00604 {
00605     a = _mm_cmpnge_sd( a, b );
00606     return a;
00607 }
00608 
00610 SSP_FORCEINLINE __m128 ssp_comnge_ss_SSE2(__m128 a, __m128 b)
00611 {
00612     a = _mm_cmpnge_ss( a, b );
00613     return a;
00614 }
00615 
00616 
00617 //----------------------------------------
00618 // COMNGT (Condition 10)
00619 //----------------------------------------
00620 
00622 SSP_FORCEINLINE __m128d ssp_comngt_pd_SSE2(__m128d a, __m128d b)
00623 {
00624     a = _mm_cmpngt_pd( a, b );
00625     return a;
00626 }
00627 
00629 SSP_FORCEINLINE __m128 ssp_comngt_ps_SSE2(__m128 a, __m128 b)
00630 {
00631     a = _mm_cmpngt_ps( a, b );
00632     return a;
00633 }
00634 
00636 SSP_FORCEINLINE __m128d ssp_comngt_sd_SSE2(__m128d a, __m128d b)
00637 {
00638     a = _mm_cmpngt_sd( a, b );
00639     return a;
00640 }
00641 
00643 SSP_FORCEINLINE __m128 ssp_comngt_ss_SSE2(__m128 a, __m128 b)
00644 {
00645     a = _mm_cmpngt_ss( a, b );
00646     return a;
00647 }
00648 
00649 
00650 //----------------------------------------
00651 // COMFALSE (Condition 11)
00652 //----------------------------------------
00653 
00655 SSP_FORCEINLINE __m128i ssp_comfalse_epi16_SSE2(__m128i a, __m128i b)
00656 {
00657         return _mm_setzero_si128();
00658 }
00659 
00661 SSP_FORCEINLINE __m128i ssp_comfalse_epi32_SSE2(__m128i a, __m128i b)
00662 {
00663         return _mm_setzero_si128();
00664 }
00665 
00667 SSP_FORCEINLINE __m128i ssp_comfalse_epi64_SSE2(__m128i a, __m128i b)
00668 {
00669         return _mm_setzero_si128();
00670 }
00671 
00673 SSP_FORCEINLINE __m128i ssp_comfalse_epi8_SSE2(__m128i a, __m128i b)
00674 {
00675         return _mm_setzero_si128();
00676 }
00677 
00679 SSP_FORCEINLINE __m128i ssp_comfalse_epu16_SSE2(__m128i a, __m128i b)
00680 {
00681         return _mm_setzero_si128();
00682 }
00683 
00685 SSP_FORCEINLINE __m128i ssp_comfalse_epu32_SSE2(__m128i a, __m128i b)
00686 {
00687         return _mm_setzero_si128();
00688 }
00689 
00691 SSP_FORCEINLINE __m128i ssp_comfalse_epu64_SSE2(__m128i a, __m128i b)
00692 {
00693         return _mm_setzero_si128();
00694 }
00695 
00697 SSP_FORCEINLINE __m128i ssp_comfalse_epu8_SSE2(__m128i a, __m128i b)
00698 {
00699         return _mm_setzero_si128();
00700 }
00701 
00703 SSP_FORCEINLINE __m128d ssp_comfalse_pd_SSE2(__m128d a, __m128d b)
00704 {
00705         return _mm_setzero_pd();
00706 }
00707 
00709 SSP_FORCEINLINE __m128 ssp_comfalse_ps_SSE2(__m128 a, __m128 b)
00710 {
00711         return _mm_setzero_ps();
00712 }
00713 
00715 SSP_FORCEINLINE __m128d ssp_comfalse_sd_SSE2(__m128d a, __m128d b)
00716 {
00717         ssp_m128 B;
00718         B.i = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
00719         return _mm_and_pd(a, B.d);
00720 }
00721 
00723 SSP_FORCEINLINE __m128 ssp_comfalse_ss_SSE2(__m128 a, __m128 b)
00724 {
00725         ssp_m128 B;
00726         B.i = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
00727         return _mm_and_ps(a, B.f);
00728 }
00729 
00730 
00731 //----------------------------------------
00732 // COMONEQ (Condition 12)
00733 //----------------------------------------
00734 
00736 SSP_FORCEINLINE __m128d ssp_comoneq_pd_SSE2(__m128d a, __m128d b)
00737 {
00738     __m128d c;
00739     c = _mm_cmpord_pd( a, b );
00740     a = _mm_cmpneq_pd( a, b );
00741     a = _mm_and_pd   ( a, c );
00742     return a;
00743 }
00744 
00746 SSP_FORCEINLINE __m128 ssp_comoneq_ps_SSE2(__m128 a, __m128 b)
00747 {
00748     __m128 c;
00749     c = _mm_cmpord_ps( a, b );
00750     a = _mm_cmpneq_ps( a, b );
00751     a = _mm_and_ps   ( a, c );
00752     return a;
00753 }
00754 
00755 
00757 SSP_FORCEINLINE __m128d ssp_comoneq_sd_SSE2(__m128d a, __m128d b)
00758 {
00759     __m128d c;
00760     c = _mm_cmpord_pd( a, b );
00761     b = _mm_cmpneq_pd( a, b );
00762     b = _mm_and_pd   ( b, c );
00763     a = _mm_move_sd  ( a, b );
00764     return a;   
00765 }
00766 
00768 SSP_FORCEINLINE __m128 ssp_comoneq_ss_SSE2(__m128 a, __m128 b)
00769 {
00770     __m128 c;
00771     c = _mm_cmpord_ps( a, b );
00772     b = _mm_cmpneq_ps( a, b );
00773     b = _mm_and_ps   ( b, c );
00774     a = _mm_move_ss  ( a, b );
00775     return a; 
00776 }
00777 
00778 
00779 //----------------------------------------
00780 // COMGE (Condition 13)
00781 //----------------------------------------
00782 
00784 SSP_FORCEINLINE __m128i ssp_comge_epi16_SSE2(__m128i a, __m128i b)
00785 {
00786     __m128i c;
00787     c = _mm_cmpgt_epi16( a, b );
00788     a = _mm_cmpeq_epi16( a, b );
00789     a = _mm_or_si128  ( a, c );
00790     return a;
00791 }
00792 
00794 SSP_FORCEINLINE __m128i ssp_comge_epi32_SSE2(__m128i a, __m128i b)
00795 {
00796     __m128i c;
00797     c = _mm_cmpgt_epi32( a, b );
00798     a = _mm_cmpeq_epi32( a, b );
00799     a = _mm_or_si128   ( a, c );
00800     return a;
00801 }
00802   //TODO:SSE2
00804 SSP_FORCEINLINE __m128i ssp_comge_epi64_SSE2(__m128i a, __m128i b)
00805 {
00806     a = ssp_comge_epi64_REF( a, b );
00807     return a;
00808 }
00809 
00811 SSP_FORCEINLINE __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b)
00812 {
00813     __m128i c;
00814     c = _mm_cmpgt_epi8( a, b );
00815     a = _mm_cmpeq_epi8( a, b );
00816     a = _mm_or_si128  ( a, c );
00817     return a;
00818 }
00819 
00820 
00822 SSP_FORCEINLINE __m128i ssp_comge_epu16_SSE2(__m128i a, __m128i b)
00823 {
00824     __m128i mask;
00825     mask = ssp_comge_epi16_SSE2( a, b );         // FFFF where a < b (signed)
00826     mask = ssp_logical_signinvert_16_SSE2( mask, a, b );
00827     return mask;
00828 }
00829 
00831 SSP_FORCEINLINE __m128i ssp_comge_epu32_SSE2(__m128i a, __m128i b)
00832 {
00833     __m128i mask;
00834     mask = ssp_comge_epi32_SSE2( a, b );         // FFFF where a < b (signed)
00835     mask = ssp_logical_signinvert_32_SSE2( mask, a, b );
00836     return mask;
00837 }
00838   //TODO: SSE2
00840 SSP_FORCEINLINE __m128i ssp_comge_epu64_SSE2(__m128i a, __m128i b)
00841 {
00842     a = ssp_comge_epu64_REF( a, b );
00843     return a;
00844 }
00845   //TODO:SSE2
00847 SSP_FORCEINLINE __m128i ssp_comge_epu8_SSE2(__m128i a, __m128i b)
00848 {
00849     a = ssp_comge_epu8_REF( a, b );
00850     return a;
00851 }
00852 
00854 SSP_FORCEINLINE __m128d ssp_comge_pd_SSE2(__m128d a, __m128d b)
00855 {
00856     a = _mm_cmpge_pd( a, b );
00857     return a;    
00858 }
00859 
00861 SSP_FORCEINLINE __m128 ssp_comge_ps_SSE2(__m128 a, __m128 b)
00862 {
00863     a = _mm_cmpge_ps( a, b );
00864     return a;   
00865 }
00866 
00868 SSP_FORCEINLINE __m128d ssp_comge_sd_SSE2(__m128d a, __m128d b)
00869 {
00870     a = _mm_cmpge_sd( a, b );
00871     return a;   
00872 }
00873 
00875 SSP_FORCEINLINE __m128 ssp_comge_ss_SSE2(__m128 a, __m128 b)
00876 {
00877     a = _mm_cmpge_ss( a, b );
00878     return a;   
00879 }
00880 
00881 
00882 //----------------------------------------
00883 // COMGT (Condition 14)
00884 //----------------------------------------
00885 
00887 SSP_FORCEINLINE __m128i ssp_comgt_epi16_SSE2(__m128i a, __m128i b)
00888 {
00889     a = _mm_cmpgt_epi16( a, b );
00890     return a;  
00891 }
00892 
00894 SSP_FORCEINLINE __m128i ssp_comgt_epi32_SSE2(__m128i a, __m128i b)
00895 {
00896     a = _mm_cmpgt_epi32( a, b );
00897     return a;  
00898 }
00899   //TODO: SSE2
00901 SSP_FORCEINLINE __m128i ssp_comgt_epi64_SSE2(__m128i a, __m128i b)
00902 {
00903     a = ssp_comgt_epi64_REF( a, b );
00904     return a;
00905 }
00906 
00908 SSP_FORCEINLINE __m128i ssp_comgt_epi8_SSE2(__m128i a, __m128i b)
00909 {
00910      a = _mm_cmpgt_epi8( a, b );
00911     return a;  
00912 }
00913 
00915 SSP_FORCEINLINE __m128i ssp_comgt_epu16_SSE2(__m128i a, __m128i b)
00916 {
00917     __m128i signMask, mask;
00918 
00919     mask     = _mm_cmpgt_epi16( a, b );              // FFFF where a > b (signed)
00920     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00921     signMask = _mm_srai_epi16 ( signMask, 15 );      // fill all fields with sign bit     
00922     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00923     return mask;
00924 }
00925 
00927 SSP_FORCEINLINE __m128i ssp_comgt_epu32_SSE2(__m128i a, __m128i b)
00928 {
00929     __m128i signMask, mask;
00930 
00931     mask     = _mm_cmpgt_epi32( a, b );              // FFFF where a < b (signed)
00932     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00933     signMask = _mm_srai_epi32 ( signMask, 31 );      // fill all fields with sign bit     
00934     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00935     return mask;
00936 }
00937   //TODO:SSE2
00939 SSP_FORCEINLINE __m128i ssp_comgt_epu64_SSE2(__m128i a, __m128i b)
00940 {
00941     a = ssp_comgt_epu64_REF( a, b );
00942     return a;
00943 }
00944   //TODO:SSE2
00946 SSP_FORCEINLINE __m128i ssp_comgt_epu8_SSE2(__m128i a, __m128i b)
00947 {
00948     a = ssp_comgt_epu8_REF( a, b );
00949     return a;
00950 }
00951 
00953 SSP_FORCEINLINE __m128d ssp_comgt_pd_SSE2(__m128d a, __m128d b)
00954 {
00955     a = _mm_cmpgt_pd( a, b );
00956     return a;
00957 }
00958 
00960 SSP_FORCEINLINE __m128 ssp_comgt_ps_SSE2(__m128 a, __m128 b)
00961 {
00962     a = _mm_cmpgt_ps( a, b );
00963     return a;
00964 }
00965 
00967 SSP_FORCEINLINE __m128d ssp_comgt_sd_SSE2(__m128d a, __m128d b)
00968 {
00969     a = _mm_cmpgt_sd( a, b );
00970     return a;
00971 }
00972 
00974 SSP_FORCEINLINE __m128 ssp_comgt_ss_SSE2(__m128 a, __m128 b)
00975 {
00976     a = _mm_cmpgt_ss( a, b );
00977     return a;
00978 }
00979 
00980 
00981 //----------------------------------------
00982 // COMTRUE (Condition 15)
00983 //----------------------------------------
00984 
00986 SSP_FORCEINLINE __m128i ssp_comtrue_epi16_SSE2(__m128i a, __m128i b)
00987 {
00988         return _mm_set1_epi32(0xFFFFFFFF);
00989 }
00990 
00992 SSP_FORCEINLINE __m128i ssp_comtrue_epi32_SSE2(__m128i a, __m128i b)
00993 {
00994         return _mm_set1_epi32(0xFFFFFFFF);
00995 }
00996 
00998 SSP_FORCEINLINE __m128i ssp_comtrue_epi64_SSE2(__m128i a, __m128i b)
00999 {
01000         return _mm_set1_epi32(0xFFFFFFFF);
01001 }
01002 
01004 SSP_FORCEINLINE __m128i ssp_comtrue_epi8_SSE2(__m128i a, __m128i b)
01005 {
01006         return _mm_set1_epi32(0xFFFFFFFF);
01007 }
01008 
01010 SSP_FORCEINLINE __m128i ssp_comtrue_epu16_SSE2(__m128i a, __m128i b)
01011 {
01012         return _mm_set1_epi32(0xFFFFFFFF);
01013 }
01014 
01016 SSP_FORCEINLINE __m128i ssp_comtrue_epu32_SSE2(__m128i a, __m128i b)
01017 {
01018         return _mm_set1_epi32(0xFFFFFFFF);
01019 }
01020 
01022 SSP_FORCEINLINE __m128i ssp_comtrue_epu64_SSE2(__m128i a, __m128i b)
01023 {
01024         return _mm_set1_epi32(0xFFFFFFFF);
01025 }
01026 
01028 SSP_FORCEINLINE __m128i ssp_comtrue_epu8_SSE2(__m128i a, __m128i b)
01029 {
01030         return _mm_set1_epi32(0xFFFFFFFF);
01031 }
01032 
01034 SSP_FORCEINLINE __m128d ssp_comtrue_pd_SSE2(__m128d a, __m128d b)
01035 {
01036         ssp_m128 B;
01037         B.i = _mm_set1_epi32(0xFFFFFFFF);
01038         return B.d;
01039 }
01040 
01042 SSP_FORCEINLINE __m128 ssp_comtrue_ps_SSE2(__m128 a, __m128 b)
01043 {
01044         ssp_m128 B;
01045         B.i = _mm_set1_epi32(0xFFFFFFFF);
01046         return B.f;
01047 }
01048 
01050 SSP_FORCEINLINE __m128d ssp_comtrue_sd_SSE2(__m128d a, __m128d b)
01051 {
01052         ssp_m128 B;
01053         B.i = _mm_set_epi32(0, 0, 0xFFFFFFFF, 0xFFFFFFFF);
01054         return _mm_or_pd(a, B.d);
01055 }
01056 
01058 SSP_FORCEINLINE __m128 ssp_comtrue_ss_SSE2(__m128 a, __m128 b)
01059 {
01060         ssp_m128 B;
01061         B.i = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
01062         return _mm_or_ps(a, B.f);
01063 }
01064 
01065 
01071 #endif // __SSEPLUS_EMULATION_COMPS_SSE2_H__

Generated on Wed May 21 13:44:11 2008 for "SSEPlus" by  doxygen 1.5.4