include/emulation/SSEPlus_emulation_comps_REF.h

Go to the documentation of this file.
00001 //
00002 // Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved.
00003 // This software is subject to the Apache v2.0 License.
00004 //
00005 #ifndef __SSEPLUS_EMULATION_COMPS_REF_H__
00006 #define __SSEPLUS_EMULATION_COMPS_REF_H__
00007 
00008 #include "../SSEPlus_base.h"
00009 
00015 //----------------------------------------
00016 // COMEQ (Condition 0)
00017 //----------------------------------------
00018 
00020 SSP_FORCEINLINE __m128i ssp_comeq_epi16_REF(__m128i a, __m128i b)
00021 {
00022     ssp_m128 A,B;
00023     A.i = a;
00024     B.i = b;
00025     A.u16[0] = (A.s16[0]==B.s16[0]) ? 0xFFFF : 0;
00026     A.u16[1] = (A.s16[1]==B.s16[1]) ? 0xFFFF : 0;
00027     A.u16[2] = (A.s16[2]==B.s16[2]) ? 0xFFFF : 0;
00028     A.u16[3] = (A.s16[3]==B.s16[3]) ? 0xFFFF : 0;
00029     A.u16[4] = (A.s16[4]==B.s16[4]) ? 0xFFFF : 0;
00030     A.u16[5] = (A.s16[5]==B.s16[5]) ? 0xFFFF : 0;
00031     A.u16[6] = (A.s16[6]==B.s16[6]) ? 0xFFFF : 0;
00032     A.u16[7] = (A.s16[7]==B.s16[7]) ? 0xFFFF : 0;
00033     return A.i;
00034 }
00035 
00037 SSP_FORCEINLINE __m128i ssp_comeq_epi32_REF(__m128i a, __m128i b)
00038 {
00039     ssp_m128 A,B;
00040     A.i = a;
00041     B.i = b;
00042     A.u32[0] = (A.s32[0]==B.s32[0]) ? 0xFFFFFFFF : 0;
00043     A.u32[1] = (A.s32[1]==B.s32[1]) ? 0xFFFFFFFF : 0;
00044     A.u32[2] = (A.s32[2]==B.s32[2]) ? 0xFFFFFFFF : 0;
00045     A.u32[3] = (A.s32[3]==B.s32[3]) ? 0xFFFFFFFF : 0;
00046     return A.i;
00047 }
00048 
00050 SSP_FORCEINLINE __m128i ssp_comeq_epi64_REF(__m128i a, __m128i b)
00051 {
00052     ssp_m128 A,B;
00053     A.i = a;
00054     B.i = b;
00055     A.u64[0] = (A.s64[0]==B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00056     A.u64[1] = (A.s64[1]==B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00057     return A.i;
00058 }
00059 
00061 SSP_FORCEINLINE __m128i ssp_comeq_epi8_REF(__m128i a, __m128i b)
00062 {
00063     ssp_m128 A,B;
00064     A.i = a;
00065     B.i = b;
00066     A.u8[ 0] = (A.s8[ 0]==B.s8[ 0]) ? 0xFF : 0;
00067     A.u8[ 1] = (A.s8[ 1]==B.s8[ 1]) ? 0xFF : 0;
00068     A.u8[ 2] = (A.s8[ 2]==B.s8[ 2]) ? 0xFF : 0;
00069     A.u8[ 3] = (A.s8[ 3]==B.s8[ 3]) ? 0xFF : 0;
00070     A.u8[ 4] = (A.s8[ 4]==B.s8[ 4]) ? 0xFF : 0;
00071     A.u8[ 5] = (A.s8[ 5]==B.s8[ 5]) ? 0xFF : 0;
00072     A.u8[ 6] = (A.s8[ 6]==B.s8[ 6]) ? 0xFF : 0;
00073     A.u8[ 7] = (A.s8[ 7]==B.s8[ 7]) ? 0xFF : 0; 
00074         A.u8[ 8] = (A.s8[ 8]==B.s8[ 8]) ? 0xFF : 0;
00075     A.u8[ 9] = (A.s8[ 9]==B.s8[ 9]) ? 0xFF : 0;
00076     A.u8[10]= (A.s8[10]==B.s8[10]) ? 0xFF : 0;
00077     A.u8[11] = (A.s8[11]==B.s8[11]) ? 0xFF : 0;
00078     A.u8[12] = (A.s8[12]==B.s8[12]) ? 0xFF : 0;
00079     A.u8[13] = (A.s8[13]==B.s8[13]) ? 0xFF : 0;
00080     A.u8[14] = (A.s8[14]==B.s8[14]) ? 0xFF : 0;
00081     A.u8[15] = (A.s8[15]==B.s8[15]) ? 0xFF : 0;
00082     return A.i;
00083 }
00084 
00086 SSP_FORCEINLINE __m128i ssp_comeq_epu16_REF(__m128i a, __m128i b)
00087 {
00088     ssp_m128 A,B;
00089     A.i = a;
00090     B.i = b;
00091     A.u16[0] = (A.u16[0]==B.u16[0]) ? 0xFFFF : 0;
00092     A.u16[1] = (A.u16[1]==B.u16[1]) ? 0xFFFF : 0;
00093     A.u16[2] = (A.u16[2]==B.u16[2]) ? 0xFFFF : 0;
00094     A.u16[3] = (A.u16[3]==B.u16[3]) ? 0xFFFF : 0;
00095     A.u16[4] = (A.u16[4]==B.u16[4]) ? 0xFFFF : 0;
00096     A.u16[5] = (A.u16[5]==B.u16[5]) ? 0xFFFF : 0;
00097     A.u16[6] = (A.u16[6]==B.u16[6]) ? 0xFFFF : 0;
00098     A.u16[7] = (A.u16[7]==B.u16[7]) ? 0xFFFF : 0;
00099     return A.i;
00100 }
00101 
00103 SSP_FORCEINLINE __m128i ssp_comeq_epu32_REF(__m128i a, __m128i b)
00104 {
00105     ssp_m128 A,B;
00106     A.i = a;
00107     B.i = b;
00108     A.u32[0] = (A.u32[0]==B.u32[0]) ? 0xFFFFFFFF : 0;
00109     A.u32[1] = (A.u32[1]==B.u32[1]) ? 0xFFFFFFFF : 0;
00110     A.u32[2] = (A.u32[2]==B.u32[2]) ? 0xFFFFFFFF : 0;
00111     A.u32[3] = (A.u32[3]==B.u32[3]) ? 0xFFFFFFFF : 0;
00112     return A.i;
00113 }
00114 
00116 SSP_FORCEINLINE __m128i ssp_comeq_epu64_REF(__m128i a, __m128i b)
00117 {
00118     ssp_m128 A,B;
00119     A.i = a;
00120     B.i = b;
00121     A.u64[0] = (A.u64[0]==B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00122     A.u64[1] = (A.u64[1]==B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00123     return A.i;
00124 }
00125 
00127 SSP_FORCEINLINE __m128i ssp_comeq_epu8_REF(__m128i a, __m128i b)
00128 {
00129     ssp_m128 A,B;
00130     A.i = a;
00131     B.i = b;
00132     A.u8[ 0] = (A.u8[ 0]==B.u8[ 0]) ? 0xFF : 0;
00133     A.u8[ 1] = (A.u8[ 1]==B.u8[ 1]) ? 0xFF : 0;
00134     A.u8[ 2] = (A.u8[ 2]==B.u8[ 2]) ? 0xFF : 0;
00135     A.u8[ 3] = (A.u8[ 3]==B.u8[ 3]) ? 0xFF : 0;
00136     A.u8[ 4] = (A.u8[ 4]==B.u8[ 4]) ? 0xFF : 0;
00137     A.u8[ 5] = (A.u8[ 5]==B.u8[ 5]) ? 0xFF : 0;
00138     A.u8[ 6] = (A.u8[ 6]==B.u8[ 6]) ? 0xFF : 0;
00139     A.u8[ 7] = (A.u8[ 7]==B.u8[ 7]) ? 0xFF : 0; 
00140         A.u8[ 8] = (A.u8[ 8]==B.u8[ 8]) ? 0xFF : 0;
00141     A.u8[ 9] = (A.u8[ 9]==B.u8[ 9]) ? 0xFF : 0;
00142     A.u8[10] = (A.u8[10]==B.u8[10]) ? 0xFF : 0;
00143     A.u8[11] = (A.u8[11]==B.u8[11]) ? 0xFF : 0;
00144     A.u8[12] = (A.u8[12]==B.u8[12]) ? 0xFF : 0;
00145     A.u8[13] = (A.u8[13]==B.u8[13]) ? 0xFF : 0;
00146     A.u8[14] = (A.u8[14]==B.u8[14]) ? 0xFF : 0;
00147     A.u8[15] = (A.u8[15]==B.u8[15]) ? 0xFF : 0;
00148     return A.i;
00149 }
00150 
00152 SSP_FORCEINLINE __m128d ssp_comeq_pd_REF(__m128d a, __m128d b)
00153 {
00154     ssp_m128 A,B;
00155     A.d = a;
00156     B.d = b;
00157     A.u64[0] = (A.f64[0]==B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00158     A.u64[1] = (A.f64[1]==B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00159     return A.d;
00160 }
00161 
00163 SSP_FORCEINLINE __m128 ssp_comeq_ps_REF(__m128 a, __m128 b)
00164 {
00165     ssp_m128 A,B;
00166     A.f = a;
00167     B.f = b;
00168     A.u32[0] = (A.f32[0]==B.f32[0]) ? 0xFFFFFFFF : 0;
00169     A.u32[1] = (A.f32[1]==B.f32[1]) ? 0xFFFFFFFF : 0;
00170     A.u32[2] = (A.f32[2]==B.f32[2]) ? 0xFFFFFFFF : 0;
00171     A.u32[3] = (A.f32[3]==B.f32[3]) ? 0xFFFFFFFF : 0;
00172     return A.f;
00173 }
00174 
00176 SSP_FORCEINLINE __m128d ssp_comeq_sd_REF(__m128d a, __m128d b)
00177 {
00178     ssp_m128 A,B;
00179     A.d = a;
00180     B.d = b;
00181     A.u64[0] = (A.f64[0]==B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00182     return A.d;
00183 }
00184 
00186 SSP_FORCEINLINE __m128 ssp_comeq_ss_REF(__m128 a, __m128 b)
00187 {
00188     ssp_m128 A,B;
00189     A.f = a;
00190     B.f = b;
00191     A.u32[0] = (A.f32[0]==B.f32[0]) ? 0xFFFFFFFF : 0;
00192     return A.f;
00193 }
00194 
00195 //----------------------------------------
00196 // COMLT (Condition 1)
00197 //----------------------------------------
00198 
00200 SSP_FORCEINLINE __m128i ssp_comlt_epi16_REF(__m128i a, __m128i b)
00201 {
00202     ssp_m128 A,B;
00203     A.i = a;
00204     B.i = b;
00205     A.u16[0] = (A.s16[0]<B.s16[0]) ? 0xFFFF : 0;
00206     A.u16[1] = (A.s16[1]<B.s16[1]) ? 0xFFFF : 0;
00207     A.u16[2] = (A.s16[2]<B.s16[2]) ? 0xFFFF : 0;
00208     A.u16[3] = (A.s16[3]<B.s16[3]) ? 0xFFFF : 0;
00209     A.u16[4] = (A.s16[4]<B.s16[4]) ? 0xFFFF : 0;
00210     A.u16[5] = (A.s16[5]<B.s16[5]) ? 0xFFFF : 0;
00211     A.u16[6] = (A.s16[6]<B.s16[6]) ? 0xFFFF : 0;
00212     A.u16[7] = (A.s16[7]<B.s16[7]) ? 0xFFFF : 0;
00213     return A.i;
00214 }
00215 
00217 SSP_FORCEINLINE __m128i ssp_comlt_epi32_REF(__m128i a, __m128i b)
00218 {
00219     ssp_m128 A,B;
00220     A.i = a;
00221     B.i = b;
00222     A.u32[0] = (A.s32[0]<B.s32[0]) ? 0xFFFFFFFF : 0;
00223     A.u32[1] = (A.s32[1]<B.s32[1]) ? 0xFFFFFFFF : 0;
00224     A.u32[2] = (A.s32[2]<B.s32[2]) ? 0xFFFFFFFF : 0;
00225     A.u32[3] = (A.s32[3]<B.s32[3]) ? 0xFFFFFFFF : 0;
00226     return A.i;
00227 }
00228 
00230 SSP_FORCEINLINE __m128i ssp_comlt_epi64_REF(__m128i a, __m128i b)
00231 {
00232     ssp_m128 A,B;
00233     A.i = a;
00234     B.i = b;
00235     A.u64[0] = (A.s64[0]<B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00236     A.u64[1] = (A.s64[1]<B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00237     return A.i;
00238 }
00239 
00241 SSP_FORCEINLINE __m128i ssp_comlt_epi8_REF(__m128i a, __m128i b)
00242 {
00243     ssp_m128 A,B;
00244     A.i = a;
00245     B.i = b;
00246     A.u8[ 0] = (A.s8[ 0]<B.s8[ 0]) ? 0xFF : 0;
00247     A.u8[ 1] = (A.s8[ 1]<B.s8[ 1]) ? 0xFF : 0;
00248     A.u8[ 2] = (A.s8[ 2]<B.s8[ 2]) ? 0xFF : 0;
00249     A.u8[ 3] = (A.s8[ 3]<B.s8[ 3]) ? 0xFF : 0;
00250     A.u8[ 4] = (A.s8[ 4]<B.s8[ 4]) ? 0xFF : 0;
00251     A.u8[ 5] = (A.s8[ 5]<B.s8[ 5]) ? 0xFF : 0;
00252     A.u8[ 6] = (A.s8[ 6]<B.s8[ 6]) ? 0xFF : 0;
00253     A.u8[ 7] = (A.s8[ 7]<B.s8[ 7]) ? 0xFF : 0; 
00254         A.u8[ 8] = (A.s8[ 8]<B.s8[ 8]) ? 0xFF : 0;
00255     A.u8[ 9] = (A.s8[ 9]<B.s8[ 9]) ? 0xFF : 0;
00256     A.u8[10] = (A.s8[10]<B.s8[10]) ? 0xFF : 0;
00257     A.u8[11] = (A.s8[11]<B.s8[11]) ? 0xFF : 0;
00258     A.u8[12] = (A.s8[12]<B.s8[12]) ? 0xFF : 0;
00259     A.u8[13] = (A.s8[13]<B.s8[13]) ? 0xFF : 0;
00260     A.u8[14] = (A.s8[14]<B.s8[14]) ? 0xFF : 0;
00261     A.u8[15] = (A.s8[15]<B.s8[15]) ? 0xFF : 0;
00262     return A.i;
00263 }
00264 
00266 SSP_FORCEINLINE __m128i ssp_comlt_epu16_REF(__m128i a, __m128i b)
00267 {
00268     ssp_m128 A,B;
00269     A.i = a;
00270     B.i = b;
00271     A.u16[0] = (A.u16[0]<B.u16[0]) ? 0xFFFF : 0;
00272     A.u16[1] = (A.u16[1]<B.u16[1]) ? 0xFFFF : 0;
00273     A.u16[2] = (A.u16[2]<B.u16[2]) ? 0xFFFF : 0;
00274     A.u16[3] = (A.u16[3]<B.u16[3]) ? 0xFFFF : 0;
00275     A.u16[4] = (A.u16[4]<B.u16[4]) ? 0xFFFF : 0;
00276     A.u16[5] = (A.u16[5]<B.u16[5]) ? 0xFFFF : 0;
00277     A.u16[6] = (A.u16[6]<B.u16[6]) ? 0xFFFF : 0;
00278     A.u16[7] = (A.u16[7]<B.u16[7]) ? 0xFFFF : 0;
00279     return A.i;
00280 }
00281 
00283 SSP_FORCEINLINE __m128i ssp_comlt_epu32_REF(__m128i a, __m128i b)
00284 {
00285     ssp_m128 A,B;
00286     A.i = a;
00287     B.i = b;
00288     A.u32[0] = (A.u32[0]<B.u32[0]) ? 0xFFFFFFFF : 0;
00289     A.u32[1] = (A.u32[1]<B.u32[1]) ? 0xFFFFFFFF : 0;
00290     A.u32[2] = (A.u32[2]<B.u32[2]) ? 0xFFFFFFFF : 0;
00291     A.u32[3] = (A.u32[3]<B.u32[3]) ? 0xFFFFFFFF : 0;
00292     return A.i;
00293 }
00294 
00296 SSP_FORCEINLINE __m128i ssp_comlt_epu64_REF(__m128i a, __m128i b)
00297 {
00298     ssp_m128 A,B;
00299     A.i = a;
00300     B.i = b;
00301     A.u64[0] = (A.u64[0]<B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00302     A.u64[1] = (A.u64[1]<B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00303     return A.i;
00304 }
00305 
00307 SSP_FORCEINLINE __m128i ssp_comlt_epu8_REF(__m128i a, __m128i b)
00308 {
00309     ssp_m128 A,B;
00310     A.i = a;
00311     B.i = b;
00312     A.u8[ 0] = (A.u8[ 0]<B.u8[0]) ? 0xFF : 0;
00313     A.u8[ 1] = (A.u8[ 1]<B.u8[1]) ? 0xFF : 0;
00314     A.u8[ 2] = (A.u8[ 2]<B.u8[2]) ? 0xFF : 0;
00315     A.u8[ 3] = (A.u8[ 3]<B.u8[3]) ? 0xFF : 0;
00316     A.u8[ 4] = (A.u8[ 4]<B.u8[4]) ? 0xFF : 0;
00317     A.u8[ 5] = (A.u8[ 5]<B.u8[5]) ? 0xFF : 0;
00318     A.u8[ 6] = (A.u8[ 6]<B.u8[6]) ? 0xFF : 0;
00319     A.u8[ 7] = (A.u8[ 7]<B.u8[7]) ? 0xFF : 0; 
00320         A.u8[ 8] = (A.u8[ 8]<B.u8[8]) ? 0xFF : 0;
00321     A.u8[ 9] = (A.u8[ 9]<B.u8[9]) ? 0xFF : 0;
00322     A.u8[10] = (A.u8[10]<B.u8[10]) ? 0xFF : 0;
00323     A.u8[11] = (A.u8[11]<B.u8[11]) ? 0xFF : 0;
00324     A.u8[12] = (A.u8[12]<B.u8[12]) ? 0xFF : 0;
00325     A.u8[13] = (A.u8[13]<B.u8[13]) ? 0xFF : 0;
00326     A.u8[14] = (A.u8[14]<B.u8[14]) ? 0xFF : 0;
00327     A.u8[15] = (A.u8[15]<B.u8[15]) ? 0xFF : 0;
00328     return A.i;
00329 }
00330 
00332 SSP_FORCEINLINE __m128d ssp_comlt_pd_REF(__m128d a, __m128d b)
00333 {
00334     ssp_m128 A,B;
00335     A.d = a;
00336     B.d = b;
00337         A.u64[0] = (A.f64[0]<B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00338     A.u64[1] = (A.f64[1]<B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00339     return A.d;
00340 }
00341 
00343 SSP_FORCEINLINE __m128 ssp_comlt_ps_REF(__m128 a, __m128 b)
00344 {
00345     ssp_m128 A,B;
00346     A.f = a;
00347     B.f = b;
00348     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0xFFFFFFFF : 0;
00349     A.u32[1] = (A.f32[1]<B.f32[1]) ? 0xFFFFFFFF : 0;
00350     A.u32[2] = (A.f32[2]<B.f32[2]) ? 0xFFFFFFFF : 0;
00351     A.u32[3] = (A.f32[3]<B.f32[3]) ? 0xFFFFFFFF : 0;
00352     return A.f;
00353 }
00354 
00356 SSP_FORCEINLINE __m128d ssp_comlt_sd_REF(__m128d a, __m128d b)
00357 {
00358     ssp_m128 A,B;
00359     A.d = a;
00360     B.d = b;
00361         A.u64[0] = (A.f64[0]<B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00362     return A.d;
00363 }
00364 
00366 SSP_FORCEINLINE __m128 ssp_comlt_ss_REF(__m128 a, __m128 b)
00367 {
00368     ssp_m128 A,B;
00369     A.f = a;
00370     B.f = b;
00371         A.u32[0] = (A.f32[0]<B.f32[0]) ? 0xFFFFFFFF : 0;
00372     return A.f;
00373 }
00374 
00375 //----------------------------------------
00376 // COMLE (Condition 2)
00377 //----------------------------------------
00378 
00380 SSP_FORCEINLINE __m128i ssp_comle_epi16_REF(__m128i a, __m128i b)
00381 {
00382     ssp_m128 A,B;
00383     A.i = a;
00384     B.i = b;
00385     A.u16[0] = (A.s16[0]<=B.s16[0]) ? 0xFFFF : 0;
00386     A.u16[1] = (A.s16[1]<=B.s16[1]) ? 0xFFFF : 0;
00387     A.u16[2] = (A.s16[2]<=B.s16[2]) ? 0xFFFF : 0;
00388     A.u16[3] = (A.s16[3]<=B.s16[3]) ? 0xFFFF : 0;
00389     A.u16[4] = (A.s16[4]<=B.s16[4]) ? 0xFFFF : 0;
00390     A.u16[5] = (A.s16[5]<=B.s16[5]) ? 0xFFFF : 0;
00391     A.u16[6] = (A.s16[6]<=B.s16[6]) ? 0xFFFF : 0;
00392     A.u16[7] = (A.s16[7]<=B.s16[7]) ? 0xFFFF : 0;
00393     return A.i;
00394 }
00395 
00397 SSP_FORCEINLINE __m128i ssp_comle_epi32_REF(__m128i a, __m128i b)
00398 {
00399     ssp_m128 A,B;
00400     A.i = a;
00401     B.i = b;
00402     A.u32[0] = (A.s32[0]<=B.s32[0]) ? 0xFFFFFFFF : 0;
00403     A.u32[1] = (A.s32[1]<=B.s32[1]) ? 0xFFFFFFFF : 0;
00404     A.u32[2] = (A.s32[2]<=B.s32[2]) ? 0xFFFFFFFF : 0;
00405     A.u32[3] = (A.s32[3]<=B.s32[3]) ? 0xFFFFFFFF : 0;
00406     return A.i;
00407 }
00408 
00410 SSP_FORCEINLINE __m128i ssp_comle_epi64_REF(__m128i a, __m128i b)
00411 {
00412     ssp_m128 A,B;
00413     A.i = a;
00414     B.i = b;
00415     A.u64[0] = (A.s64[0]<=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00416     A.u64[1] = (A.s64[1]<=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00417     return A.i;
00418 }
00419 
00421 SSP_FORCEINLINE __m128i ssp_comle_epi8_REF(__m128i a, __m128i b)
00422 {
00423     ssp_m128 A,B;
00424     A.i = a;
00425     B.i = b;
00426     A.u8[ 0] = (A.s8[ 0]<=B.s8[ 0]) ? 0xFF : 0;
00427     A.u8[ 1] = (A.s8[ 1]<=B.s8[ 1]) ? 0xFF : 0;
00428     A.u8[ 2] = (A.s8[ 2]<=B.s8[ 2]) ? 0xFF : 0;
00429     A.u8[ 3] = (A.s8[ 3]<=B.s8[ 3]) ? 0xFF : 0;
00430     A.u8[ 4] = (A.s8[ 4]<=B.s8[ 4]) ? 0xFF : 0;
00431     A.u8[ 5] = (A.s8[ 5]<=B.s8[ 5]) ? 0xFF : 0;
00432     A.u8[ 6] = (A.s8[ 6]<=B.s8[ 6]) ? 0xFF : 0;
00433     A.u8[ 7] = (A.s8[ 7]<=B.s8[ 7]) ? 0xFF : 0; 
00434         A.u8[ 8] = (A.s8[ 8]<=B.s8[ 8]) ? 0xFF : 0;
00435     A.u8[ 9] = (A.s8[ 9]<=B.s8[ 9]) ? 0xFF : 0;
00436     A.u8[10] = (A.s8[10]<=B.s8[10]) ? 0xFF : 0;
00437     A.u8[11] = (A.s8[11]<=B.s8[11]) ? 0xFF : 0;
00438     A.u8[12] = (A.s8[12]<=B.s8[12]) ? 0xFF : 0;
00439     A.u8[13] = (A.s8[13]<=B.s8[13]) ? 0xFF : 0;
00440     A.u8[14] = (A.s8[14]<=B.s8[14]) ? 0xFF : 0;
00441     A.u8[15] = (A.s8[15]<=B.s8[15]) ? 0xFF : 0;
00442     return A.i;
00443 }
00444 
00446 SSP_FORCEINLINE __m128i ssp_comle_epu16_REF(__m128i a, __m128i b)
00447 {
00448     ssp_m128 A,B;
00449     A.i = a;
00450     B.i = b;
00451     A.u16[0] = (A.u16[0]<=B.u16[0]) ? 0xFFFF : 0;
00452     A.u16[1] = (A.u16[1]<=B.u16[1]) ? 0xFFFF : 0;
00453     A.u16[2] = (A.u16[2]<=B.u16[2]) ? 0xFFFF : 0;
00454     A.u16[3] = (A.u16[3]<=B.u16[3]) ? 0xFFFF : 0;
00455     A.u16[4] = (A.u16[4]<=B.u16[4]) ? 0xFFFF : 0;
00456     A.u16[5] = (A.u16[5]<=B.u16[5]) ? 0xFFFF : 0;
00457     A.u16[6] = (A.u16[6]<=B.u16[6]) ? 0xFFFF : 0;
00458     A.u16[7] = (A.u16[7]<=B.u16[7]) ? 0xFFFF : 0;
00459     return A.i;
00460 }
00461 
00463 SSP_FORCEINLINE __m128i ssp_comle_epu32_REF(__m128i a, __m128i b)
00464 {
00465     ssp_m128 A,B;
00466     A.i = a;
00467     B.i = b;
00468     A.u32[0] = (A.u32[0]<=B.u32[0]) ? 0xFFFFFFFF : 0;
00469     A.u32[1] = (A.u32[1]<=B.u32[1]) ? 0xFFFFFFFF : 0;
00470     A.u32[2] = (A.u32[2]<=B.u32[2]) ? 0xFFFFFFFF : 0;
00471     A.u32[3] = (A.u32[3]<=B.u32[3]) ? 0xFFFFFFFF : 0;
00472     return A.i;
00473 }
00474 
00476 SSP_FORCEINLINE __m128i ssp_comle_epu64_REF(__m128i a, __m128i b)
00477 {
00478     ssp_m128 A,B;
00479     A.i = a;
00480     B.i = b;
00481     A.u64[0] = (A.u64[0]<=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00482     A.u64[1] = (A.u64[1]<=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00483     return A.i;
00484 }
00485 
00487 SSP_FORCEINLINE __m128i ssp_comle_epu8_REF(__m128i a, __m128i b)
00488 {
00489     ssp_m128 A,B;
00490     A.i = a;
00491     B.i = b;
00492     A.u8[ 0] = (A.u8[ 0]<=B.u8[ 0]) ? 0xFF : 0;
00493     A.u8[ 1] = (A.u8[ 1]<=B.u8[ 1]) ? 0xFF : 0;
00494     A.u8[ 2] = (A.u8[ 2]<=B.u8[ 2]) ? 0xFF : 0;
00495     A.u8[ 3] = (A.u8[ 3]<=B.u8[ 3]) ? 0xFF : 0;
00496     A.u8[ 4] = (A.u8[ 4]<=B.u8[ 4]) ? 0xFF : 0;
00497     A.u8[ 5] = (A.u8[ 5]<=B.u8[ 5]) ? 0xFF : 0;
00498     A.u8[ 6] = (A.u8[ 6]<=B.u8[ 6]) ? 0xFF : 0;
00499     A.u8[ 7] = (A.u8[ 7]<=B.u8[ 7]) ? 0xFF : 0; 
00500         A.u8[ 8] = (A.u8[ 8]<=B.u8[ 8]) ? 0xFF : 0;
00501     A.u8[ 9] = (A.u8[ 9]<=B.u8[ 9]) ? 0xFF : 0;
00502     A.u8[10] = (A.u8[10]<=B.u8[10]) ? 0xFF : 0;
00503     A.u8[11] = (A.u8[11]<=B.u8[11]) ? 0xFF : 0;
00504     A.u8[12] = (A.u8[12]<=B.u8[12]) ? 0xFF : 0;
00505     A.u8[13] = (A.u8[13]<=B.u8[13]) ? 0xFF : 0;
00506     A.u8[14] = (A.u8[14]<=B.u8[14]) ? 0xFF : 0;
00507     A.u8[15] = (A.u8[15]<=B.u8[15]) ? 0xFF : 0;
00508     return A.i;
00509 }
00510 
00512 SSP_FORCEINLINE __m128d ssp_comle_pd_REF(__m128d a, __m128d b)
00513 {
00514     ssp_m128 A,B;
00515     A.d = a;
00516     B.d = b;
00517    
00518         A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00519     A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00520     return A.d;
00521 }
00522 
00524 SSP_FORCEINLINE __m128 ssp_comle_ps_REF(__m128 a, __m128 b)
00525 {
00526     ssp_m128 A,B;
00527     A.f = a;
00528     B.f = b;
00529     A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0xFFFFFFFF : 0;
00530     A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0xFFFFFFFF : 0;
00531     A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0xFFFFFFFF : 0;
00532     A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0xFFFFFFFF : 0;
00533     return A.f;
00534 }
00535 
00537 SSP_FORCEINLINE __m128d ssp_comle_sd_REF(__m128d a, __m128d b)
00538 {
00539     ssp_m128 A,B;
00540     A.d = a;
00541     B.d = b;
00542    
00543         A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00544     return A.d;
00545 }
00546 
00548 SSP_FORCEINLINE __m128 ssp_comle_ss_REF(__m128 a, __m128 b)
00549 {
00550     ssp_m128 A,B;
00551     A.f = a;
00552     B.f = b;
00553         A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0xFFFFFFFF : 0;
00554     return A.f;
00555 }
00556 
00557 //----------------------------------------
00558 // COMUNORD (Condition 3)
00559 //----------------------------------------
00560 
00562 SSP_FORCEINLINE __m128d ssp_comunord_pd_REF(__m128d a, __m128d b)
00563 {
00564     ssp_m128 A,B;
00565     A.d = a;
00566     B.d = b; // NAN(A)              || NAN(B)         
00567     A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0;
00568     A.u64[1] = ((A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1])) ? 0xFFFFFFFFFFFFFFFF : 0;
00569     return A.d;
00570 }
00571 
00573 SSP_FORCEINLINE __m128 ssp_comunord_ps_REF(__m128 a, __m128 b)
00574 {
00575     ssp_m128 A,B;
00576     A.f = a;
00577     B.f = b; // NAN(A)              || NAN(B)         
00578     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00579     A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0;
00580     A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0;
00581     A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0;
00582     return A.f;
00583 }
00584 
00586 SSP_FORCEINLINE __m128d ssp_comunord_sd_REF(__m128d a, __m128d b)
00587 {
00588     ssp_m128 A,B;
00589     A.d = a;
00590     B.d = b; // NAN(A)              || NAN(B)         
00591     A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0;
00592     return A.d;
00593 }
00594 
00596 SSP_FORCEINLINE __m128 ssp_comunord_ss_REF(__m128 a, __m128 b)
00597 {
00598     ssp_m128 A,B;
00599     A.f = a;
00600     B.f = b; // NAN(A)              || NAN(B)         
00601     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00602     return A.f;
00603 }
00604 
00605 
00606 //----------------------------------------
00607 // COMNEQ (Condition 4)
00608 //----------------------------------------
00609 
00611 SSP_FORCEINLINE __m128i ssp_comneq_epi16_REF(__m128i a, __m128i b)
00612 {
00613     ssp_m128 A,B;
00614     A.i = a;
00615     B.i = b;
00616     A.u16[0] = (A.s16[0]!=B.s16[0]) ? 0xFFFF : 0;
00617     A.u16[1] = (A.s16[1]!=B.s16[1]) ? 0xFFFF : 0;
00618     A.u16[2] = (A.s16[2]!=B.s16[2]) ? 0xFFFF : 0;
00619     A.u16[3] = (A.s16[3]!=B.s16[3]) ? 0xFFFF : 0;
00620     A.u16[4] = (A.s16[4]!=B.s16[4]) ? 0xFFFF : 0;
00621     A.u16[5] = (A.s16[5]!=B.s16[5]) ? 0xFFFF : 0;
00622     A.u16[6] = (A.s16[6]!=B.s16[6]) ? 0xFFFF : 0;
00623     A.u16[7] = (A.s16[7]!=B.s16[7]) ? 0xFFFF : 0;
00624     return A.i;
00625 }
00626 
00628 SSP_FORCEINLINE __m128i ssp_comneq_epi32_REF(__m128i a, __m128i b)
00629 {
00630     ssp_m128 A,B;
00631     A.i = a;
00632     B.i = b;
00633     A.u32[0] = (A.s32[0]!=B.s32[0]) ? 0xFFFFFFFF : 0;
00634     A.u32[1] = (A.s32[1]!=B.s32[1]) ? 0xFFFFFFFF : 0;
00635     A.u32[2] = (A.s32[2]!=B.s32[2]) ? 0xFFFFFFFF : 0;
00636     A.u32[3] = (A.s32[3]!=B.s32[3]) ? 0xFFFFFFFF : 0;
00637     return A.i;
00638 }
00639 
00641 SSP_FORCEINLINE __m128i ssp_comneq_epi64_REF(__m128i a, __m128i b)
00642 {
00643     ssp_m128 A,B;
00644     A.i = a;
00645     B.i = b;
00646     A.u64[0] = (A.s64[0]!=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00647     A.u64[1] = (A.s64[1]!=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00648     return A.i;
00649 }
00650 
00652 SSP_FORCEINLINE __m128i ssp_comneq_epi8_REF(__m128i a, __m128i b)
00653 {
00654     ssp_m128 A,B;
00655     A.i = a;
00656     B.i = b;
00657     A.u8[ 0] = (A.s8[ 0]!=B.s8[ 0]) ? 0xFF : 0;
00658     A.u8[ 1] = (A.s8[ 1]!=B.s8[ 1]) ? 0xFF : 0;
00659     A.u8[ 2] = (A.s8[ 2]!=B.s8[ 2]) ? 0xFF : 0;
00660     A.u8[ 3] = (A.s8[ 3]!=B.s8[ 3]) ? 0xFF : 0;
00661     A.u8[ 4] = (A.s8[ 4]!=B.s8[ 4]) ? 0xFF : 0;
00662     A.u8[ 5] = (A.s8[ 5]!=B.s8[ 5]) ? 0xFF : 0;
00663     A.u8[ 6] = (A.s8[ 6]!=B.s8[ 6]) ? 0xFF : 0;
00664     A.u8[ 7] = (A.s8[ 7]!=B.s8[ 7]) ? 0xFF : 0; 
00665         A.u8[ 8] = (A.s8[ 8]!=B.s8[ 8]) ? 0xFF : 0;
00666     A.u8[ 9] = (A.s8[ 9]!=B.s8[ 9]) ? 0xFF : 0;
00667     A.u8[10] = (A.s8[10]!=B.s8[10]) ? 0xFF : 0;
00668     A.u8[11] = (A.s8[11]!=B.s8[11]) ? 0xFF : 0;
00669     A.u8[12] = (A.s8[12]!=B.s8[12]) ? 0xFF : 0;
00670     A.u8[13] = (A.s8[13]!=B.s8[13]) ? 0xFF : 0;
00671     A.u8[14] = (A.s8[14]!=B.s8[14]) ? 0xFF : 0;
00672     A.u8[15] = (A.s8[15]!=B.s8[15]) ? 0xFF : 0;
00673     return A.i;
00674 }
00675 
00677 SSP_FORCEINLINE __m128i ssp_comneq_epu16_REF(__m128i a, __m128i b)
00678 {
00679     ssp_m128 A,B;
00680     A.i = a;
00681     B.i = b;
00682     A.u16[0] = (A.u16[0]!=B.u16[0]) ? 0xFFFF : 0;
00683     A.u16[1] = (A.u16[1]!=B.u16[1]) ? 0xFFFF : 0;
00684     A.u16[2] = (A.u16[2]!=B.u16[2]) ? 0xFFFF : 0;
00685     A.u16[3] = (A.u16[3]!=B.u16[3]) ? 0xFFFF : 0;
00686     A.u16[4] = (A.u16[4]!=B.u16[4]) ? 0xFFFF : 0;
00687     A.u16[5] = (A.u16[5]!=B.u16[5]) ? 0xFFFF : 0;
00688     A.u16[6] = (A.u16[6]!=B.u16[6]) ? 0xFFFF : 0;
00689     A.u16[7] = (A.u16[7]!=B.u16[7]) ? 0xFFFF : 0;
00690     return A.i;
00691 }
00692 
00694 SSP_FORCEINLINE __m128i ssp_comneq_epu32_REF(__m128i a, __m128i b)
00695 {
00696     ssp_m128 A,B;
00697     A.i = a;
00698     B.i = b;
00699     A.u32[0] = (A.u32[0]!=B.u32[0]) ? 0xFFFFFFFF : 0;
00700     A.u32[1] = (A.u32[1]!=B.u32[1]) ? 0xFFFFFFFF : 0;
00701     A.u32[2] = (A.u32[2]!=B.u32[2]) ? 0xFFFFFFFF : 0;
00702     A.u32[3] = (A.u32[3]!=B.u32[3]) ? 0xFFFFFFFF : 0;
00703     return A.i;
00704 }
00705 
00707 SSP_FORCEINLINE __m128i ssp_comneq_epu64_REF(__m128i a, __m128i b)
00708 {
00709     ssp_m128 A,B;
00710     A.i = a;
00711     B.i = b;
00712     A.u64[0] = (A.u64[0]!=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00713     A.u64[1] = (A.u64[1]!=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00714     return A.i;
00715 }
00716 
00718 SSP_FORCEINLINE __m128i ssp_comneq_epu8_REF(__m128i a, __m128i b)
00719 {
00720     ssp_m128 A,B;
00721     A.i = a;
00722     B.i = b;
00723     A.u8[ 0] = (A.u8[ 0]!=B.u8[ 0]) ? 0xFF : 0;
00724     A.u8[ 1] = (A.u8[ 1]!=B.u8[ 1]) ? 0xFF : 0;
00725     A.u8[ 2] = (A.u8[ 2]!=B.u8[ 2]) ? 0xFF : 0;
00726     A.u8[ 3] = (A.u8[ 3]!=B.u8[ 3]) ? 0xFF : 0;
00727     A.u8[ 4] = (A.u8[ 4]!=B.u8[ 4]) ? 0xFF : 0;
00728     A.u8[ 5] = (A.u8[ 5]!=B.u8[ 5]) ? 0xFF : 0;
00729     A.u8[ 6] = (A.u8[ 6]!=B.u8[ 6]) ? 0xFF : 0;
00730     A.u8[ 7] = (A.u8[ 7]!=B.u8[ 7]) ? 0xFF : 0; 
00731         A.u8[ 8] = (A.u8[ 8]!=B.u8[ 8]) ? 0xFF : 0;
00732     A.u8[ 9] = (A.u8[ 9]!=B.u8[ 9]) ? 0xFF : 0;
00733     A.u8[10] = (A.u8[10]!=B.u8[10]) ? 0xFF : 0;
00734     A.u8[11] = (A.u8[11]!=B.u8[11]) ? 0xFF : 0;
00735     A.u8[12] = (A.u8[12]!=B.u8[12]) ? 0xFF : 0;
00736     A.u8[13] = (A.u8[13]!=B.u8[13]) ? 0xFF : 0;
00737     A.u8[14] = (A.u8[14]!=B.u8[14]) ? 0xFF : 0;
00738     A.u8[15] = (A.u8[15]!=B.u8[15]) ? 0xFF : 0;
00739     return A.i;
00740 }
00741 
00743 SSP_FORCEINLINE __m128d ssp_comneq_pd_REF(__m128d a, __m128d b)
00744 {
00745     ssp_m128 A,B;
00746     A.d = a;
00747     B.d = b;
00748     
00749         A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00750     A.u64[1] = (A.f64[1]!=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00751     return A.d;
00752 }
00753 
00755 SSP_FORCEINLINE __m128 ssp_comneq_ps_REF(__m128 a, __m128 b)
00756 {
00757     ssp_m128 A,B;
00758     A.f = a;
00759     B.f = b;
00760     A.u32[0] = (A.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00761     A.u32[1] = (A.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0;
00762     A.u32[2] = (A.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0;
00763     A.u32[3] = (A.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0;
00764     return A.f;
00765 }
00766 
00768 SSP_FORCEINLINE __m128d ssp_comneq_sd_REF(__m128d a, __m128d b)
00769 {
00770     ssp_m128 A,B;
00771     A.d = a;
00772     B.d = b;
00773     
00774         A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00775     return A.d;
00776 }
00777 
00779 SSP_FORCEINLINE __m128 ssp_comneq_ss_REF(__m128 a, __m128 b)
00780 {
00781     ssp_m128 A,B;
00782     A.f = a;
00783     B.f = b;
00784         A.u32[0] = (A.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00785     return A.f;
00786 }
00787 
00788 
00789 //----------------------------------------
00790 // COMNLT (Condition 5)
00791 //----------------------------------------
00792 
00794 SSP_FORCEINLINE __m128d ssp_comnlt_pd_REF(__m128d a, __m128d b)
00795 {
00796     ssp_m128 A,B;
00797     A.d = a;
00798     B.d = b;
00799  
00800     A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00801     A.u64[1] = (A.f64[1]<B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00802  
00803     return A.d;
00804 }
00805 
00807 SSP_FORCEINLINE __m128 ssp_comnlt_ps_REF(__m128 a, __m128 b)
00808 {
00809     ssp_m128 A,B;
00810     A.f = a;
00811     B.f = b;
00812     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0 : 0xFFFFFFFF;
00813     A.u32[1] = (A.f32[1]<B.f32[1]) ? 0 : 0xFFFFFFFF;
00814     A.u32[2] = (A.f32[2]<B.f32[2]) ? 0 : 0xFFFFFFFF;
00815     A.u32[3] = (A.f32[3]<B.f32[3]) ? 0 : 0xFFFFFFFF;
00816     return A.f;
00817 }
00818 
00820 SSP_FORCEINLINE __m128d ssp_comnlt_sd_REF(__m128d a, __m128d b)
00821 {
00822     ssp_m128 A,B;
00823     A.d = a;
00824     B.d = b;
00825  
00826     A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00827  
00828     return A.d;
00829 }
00830 
00832 SSP_FORCEINLINE __m128 ssp_comnlt_ss_REF(__m128 a, __m128 b)
00833 {
00834     ssp_m128 A,B;
00835     A.f = a;
00836     B.f = b;
00837     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0 : 0xFFFFFFFF;
00838     return A.f;
00839 }
00840 
00841 
00842 //----------------------------------------
00843 // COMNLE (Condition 6)
00844 //----------------------------------------
00845 
00847 SSP_FORCEINLINE __m128d ssp_comnle_pd_REF(__m128d a, __m128d b)
00848 {
00849     ssp_m128 A,B;
00850     A.d = a;
00851     B.d = b;
00852     A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00853     A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00854  
00855     return A.d;
00856 }
00857 
00859 SSP_FORCEINLINE __m128 ssp_comnle_ps_REF(__m128 a, __m128 b)
00860 {
00861     ssp_m128 A,B;
00862     A.f = a;
00863     B.f = b;
00864     A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0 : 0xFFFFFFFF;
00865     A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0 : 0xFFFFFFFF;
00866     A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0 : 0xFFFFFFFF;
00867     A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0 : 0xFFFFFFFF;
00868     return A.f;
00869 }
00870 
00872 SSP_FORCEINLINE __m128d ssp_comnle_sd_REF(__m128d a, __m128d b)
00873 {
00874     ssp_m128 A,B;
00875     A.d = a;
00876     B.d = b;
00877     A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00878  
00879     return A.d;
00880 }
00881 
00883 SSP_FORCEINLINE __m128 ssp_comnle_ss_REF(__m128 a, __m128 b)
00884 {
00885     ssp_m128 A,B;
00886     A.f = a;
00887     B.f = b;
00888         A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0 : 0xFFFFFFFF;
00889     return A.f;
00890 }
00891 
00892 
00893 //----------------------------------------
00894 // COMORD (Condition 7)
00895 //----------------------------------------
00896 
00898 SSP_FORCEINLINE __m128d ssp_comord_pd_REF(__m128d a, __m128d b)
00899 {
00900     ssp_m128 A,B;
00901     A.d = a;
00902     B.d = b; // NAN(A)              || NAN(B)         
00903     A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00904     A.u64[1] = (A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00905     return A.d;
00906 }
00907 
00909 SSP_FORCEINLINE __m128 ssp_comord_ps_REF(__m128 a, __m128 b)
00910 {
00911     ssp_m128 A,B;
00912     A.f = a;
00913     B.f = b; // NAN(A)              || NAN(B)         
00914     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF;
00915     A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0 : 0xFFFFFFFF;
00916     A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0 : 0xFFFFFFFF;
00917     A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0 : 0xFFFFFFFF;
00918     return A.f;
00919 }
00920 
00922 SSP_FORCEINLINE __m128d ssp_comord_sd_REF(__m128d a, __m128d b)
00923 {
00924     ssp_m128 A,B;
00925     A.d = a;
00926     B.d = b; // NAN(A)              || NAN(B)         
00927     A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00928     return A.d;
00929 }
00930 
00932 SSP_FORCEINLINE __m128 ssp_comord_ss_REF(__m128 a, __m128 b)
00933 {
00934     ssp_m128 A,B;
00935     A.f = a;
00936     B.f = b; // NAN(A)              || NAN(B)         
00937     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF;
00938     return A.f;
00939 }
00940 
00941 
00942 //----------------------------------------
00943 // COMUEQ (Condition 8)
00944 //----------------------------------------
00945 
00947 SSP_FORCEINLINE __m128d ssp_comueq_pd_REF(__m128d a, __m128d b)
00948 {
00949     ssp_m128 A,B;
00950     A.d = a;
00951     B.d = b;
00952         A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00953     A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00954     return A.d;
00955 }
00956 
00958 SSP_FORCEINLINE __m128 ssp_comueq_ps_REF(__m128 a, __m128 b)
00959 {
00960     ssp_m128 A,B;
00961     A.f = a;
00962     B.f = b;             
00963     A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
00964     A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF;
00965     A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF;
00966     A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF;
00967     return A.f;
00968 }
00969 
00971 SSP_FORCEINLINE __m128d ssp_comueq_sd_REF(__m128d a, __m128d b)
00972 {
00973     ssp_m128 A,B;
00974     A.d = a;
00975     B.d = b;
00976         A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00977     return A.d;
00978 }
00979 
00981 SSP_FORCEINLINE __m128 ssp_comueq_ss_REF(__m128 a, __m128 b)
00982 {
00983     ssp_m128 A,B;
00984     A.f = a;
00985     B.f = b;
00986         A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
00987     return A.f;
00988 }
00989 
00990 
00991 //----------------------------------------
00992 // COMNGE (Condition 9)
00993 //----------------------------------------
00994 
00996 SSP_FORCEINLINE __m128d ssp_comnge_pd_REF(__m128d a, __m128d b)
00997 {
00998     ssp_m128 A,B;
00999     A.d = a;
01000     B.d = b;
01001         A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01002     A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01003     return A.d;
01004 }
01005 
01007 SSP_FORCEINLINE __m128 ssp_comnge_ps_REF(__m128 a, __m128 b)
01008 {
01009     ssp_m128 A,B;
01010     A.f = a;
01011     B.f = b;
01012     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0 : 0xFFFFFFFF;
01013     A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0 : 0xFFFFFFFF;
01014     A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0 : 0xFFFFFFFF;
01015     A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0 : 0xFFFFFFFF;
01016     return A.f;
01017 }
01018 
01020 SSP_FORCEINLINE __m128d ssp_comnge_sd_REF(__m128d a, __m128d b)
01021 {
01022     ssp_m128 A,B;
01023     A.d = a;
01024     B.d = b;
01025         A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01026     return A.d;
01027 }
01028 
01030 SSP_FORCEINLINE __m128 ssp_comnge_ss_REF(__m128 a, __m128 b)
01031 {
01032     ssp_m128 A,B;
01033     A.f = a;
01034     B.f = b;
01035     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0 : 0xFFFFFFFF;
01036     return A.f;
01037 }
01038 
01039 
01040 //----------------------------------------
01041 // COMNGT (Condition 10)
01042 //----------------------------------------
01043 
01045 SSP_FORCEINLINE __m128d ssp_comngt_pd_REF(__m128d a, __m128d b)
01046 {
01047     ssp_m128 A,B;
01048     A.d = a;
01049     B.d = b;
01050         A.u64[0] = (A.f64[0]>B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01051     A.u64[1] = (A.f64[1]>B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01052     return A.d;
01053 }
01054 
01056 SSP_FORCEINLINE __m128 ssp_comngt_ps_REF(__m128 a, __m128 b)
01057 {
01058     ssp_m128 A,B;
01059     A.f = a;
01060     B.f = b;
01061     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
01062     A.u32[1] = (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF;
01063     A.u32[2] = (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF;
01064     A.u32[3] = (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF;
01065     return A.f;
01066 }
01067 
01069 SSP_FORCEINLINE __m128d ssp_comngt_sd_REF(__m128d a, __m128d b)
01070 {
01071     ssp_m128 A,B;
01072     A.d = a;
01073     B.d = b;
01074         A.u64[0] = (A.f64[0]>B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01075     return A.d;
01076 }
01077 
01079 SSP_FORCEINLINE __m128 ssp_comngt_ss_REF(__m128 a, __m128 b)
01080 {
01081     ssp_m128 A,B;
01082     A.f = a;
01083     B.f = b;
01084         A.u32[0] = (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
01085     return A.f;
01086 }
01087 
01088 
01089 //----------------------------------------
01090 // COMFALSE (Condition 11)
01091 //----------------------------------------
01092 
01094 SSP_FORCEINLINE __m128i ssp_comfalse_epi16_REF(__m128i a, __m128i b)
01095 {
01096     const static __m128i tmp = SSP_CONST_SET_32I( 0,0,0,0 );  
01097     return tmp;
01098 }
01099 
01101 SSP_FORCEINLINE __m128i ssp_comfalse_epi32_REF(__m128i a, __m128i b)
01102 {
01103     return ssp_comfalse_epi16_REF(a,b);
01104 }
01105 
01107 SSP_FORCEINLINE __m128i ssp_comfalse_epi64_REF(__m128i a, __m128i b)
01108 {
01109     return ssp_comfalse_epi16_REF(a,b);
01110 }
01111 
01113 SSP_FORCEINLINE __m128i ssp_comfalse_epi8_REF(__m128i a, __m128i b)
01114 {
01115     return ssp_comfalse_epi16_REF(a,b);
01116 }
01117 
01119 SSP_FORCEINLINE __m128i ssp_comfalse_epu16_REF(__m128i a, __m128i b)
01120 {
01121     return ssp_comfalse_epi16_REF(a,b);
01122 }
01123 
01125 SSP_FORCEINLINE __m128i ssp_comfalse_epu32_REF(__m128i a, __m128i b)
01126 {
01127     return ssp_comfalse_epi16_REF(a,b);
01128 }
01129 
01131 SSP_FORCEINLINE __m128i ssp_comfalse_epu64_REF(__m128i a, __m128i b)
01132 {
01133     return ssp_comfalse_epi16_REF(a,b);
01134 }
01135 
01137 SSP_FORCEINLINE __m128i ssp_comfalse_epu8_REF(__m128i a, __m128i b)
01138 {
01139     return ssp_comfalse_epi16_REF(a,b);
01140 }
01141 
01143 SSP_FORCEINLINE __m128d ssp_comfalse_pd_REF(__m128d a, __m128d b)
01144 {
01145     const static __m128d tmp = SSP_CONST_SET_64F( 0, 0 );      
01146     return tmp;
01147 }
01148 
01150 SSP_FORCEINLINE __m128 ssp_comfalse_ps_REF(__m128 a, __m128 b)
01151 {
01152    const static __m128 tmp = SSP_CONST_SET_32F( 0, 0, 0, 0 );      
01153    return tmp;
01154 }
01155 
01157 SSP_FORCEINLINE __m128d ssp_comfalse_sd_REF(__m128d a, __m128d b)
01158 {
01159     ssp_m128 A;
01160     A.d = a;
01161     A.u64[0] = 0;
01162     return A.d;
01163 }
01164 
01166 SSP_FORCEINLINE __m128 ssp_comfalse_ss_REF(__m128 a, __m128 b)
01167 {
01168     ssp_m128 A;
01169     A.f = a;
01170     A.u32[0] = 0;
01171     return A.f;
01172 }
01173 
01174 
01175 //----------------------------------------
01176 // COMONEQ (Condition 12)
01177 //----------------------------------------
01178 
01180 SSP_FORCEINLINE __m128d ssp_comoneq_pd_REF(__m128d a, __m128d b)
01181 {
01182     ssp_m128 A,B;
01183     A.d = a;
01184     B.d = b; 
01185     A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0]))  ? 0xFFFFFFFFFFFFFFFF : 0;
01186     A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1]))  ? 0xFFFFFFFFFFFFFFFF : 0;
01187     return A.d;   
01188 }
01189 
01191 SSP_FORCEINLINE __m128 ssp_comoneq_ps_REF(__m128 a, __m128 b)
01192 {
01193     ssp_m128 A,B;
01194     A.f = a;
01195     B.f = b; 
01196     A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0])  ? 0xFFFFFFFF : 0;
01197     A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1])  ? 0xFFFFFFFF : 0;
01198     A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2])  ? 0xFFFFFFFF : 0;
01199     A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3])  ? 0xFFFFFFFF : 0;
01200     return A.f;   
01201 }
01202 
01204 SSP_FORCEINLINE __m128d ssp_comoneq_sd_REF(__m128d a, __m128d b)
01205 {
01206     ssp_m128 A,B;
01207     A.d = a;
01208     B.d = b; 
01209     A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0]))  ? 0xFFFFFFFFFFFFFFFF : 0; 
01210     return A.d;   
01211 }
01212 
01214 SSP_FORCEINLINE __m128 ssp_comoneq_ss_REF(__m128 a, __m128 b)
01215 {
01216     ssp_m128 A,B;
01217     A.f = a;
01218     B.f = b;  
01219         A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0])  ? 0xFFFFFFFF : 0;
01220     return A.f;   
01221 }
01222 
01223 
01224 //----------------------------------------
01225 // COMGE (Condition 13)
01226 //----------------------------------------
01227 
01229 SSP_FORCEINLINE __m128i ssp_comge_epi16_REF(__m128i a, __m128i b)
01230 {
01231     ssp_m128 A,B;
01232     A.i = a;
01233     B.i = b;
01234     A.u16[0] = (A.s16[0]>=B.s16[0]) ? 0xFFFF : 0;
01235     A.u16[1] = (A.s16[1]>=B.s16[1]) ? 0xFFFF : 0;
01236     A.u16[2] = (A.s16[2]>=B.s16[2]) ? 0xFFFF : 0;
01237     A.u16[3] = (A.s16[3]>=B.s16[3]) ? 0xFFFF : 0;
01238     A.u16[4] = (A.s16[4]>=B.s16[4]) ? 0xFFFF : 0;
01239     A.u16[5] = (A.s16[5]>=B.s16[5]) ? 0xFFFF : 0;
01240     A.u16[6] = (A.s16[6]>=B.s16[6]) ? 0xFFFF : 0;
01241     A.u16[7] = (A.s16[7]>=B.s16[7]) ? 0xFFFF : 0;
01242     return A.i;
01243 }
01244 
01246 SSP_FORCEINLINE __m128i ssp_comge_epi32_REF(__m128i a, __m128i b)
01247 {
01248     ssp_m128 A,B;
01249     A.i = a;
01250     B.i = b;
01251     A.u32[0] = (A.s32[0]>=B.s32[0]) ? 0xFFFFFFFF : 0;
01252     A.u32[1] = (A.s32[1]>=B.s32[1]) ? 0xFFFFFFFF : 0;
01253     A.u32[2] = (A.s32[2]>=B.s32[2]) ? 0xFFFFFFFF : 0;
01254     A.u32[3] = (A.s32[3]>=B.s32[3]) ? 0xFFFFFFFF : 0;
01255     return A.i;
01256 }
01257 
01259 SSP_FORCEINLINE __m128i ssp_comge_epi64_REF(__m128i a, __m128i b)
01260 {
01261     ssp_m128 A,B;
01262     A.i = a;
01263     B.i = b;
01264     A.u64[0] = (A.s64[0]>=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01265     A.u64[1] = (A.s64[1]>=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01266     return A.i;
01267 }
01268 
01270 SSP_FORCEINLINE __m128i ssp_comge_epi8_REF(__m128i a, __m128i b)
01271 {
01272     ssp_m128 A,B;
01273     A.i = a;
01274     B.i = b;
01275     A.u8[ 0] = (A.s8[ 0]>=B.s8[ 0]) ? 0xFF : 0;
01276     A.u8[ 1] = (A.s8[ 1]>=B.s8[ 1]) ? 0xFF : 0;
01277     A.u8[ 2] = (A.s8[ 2]>=B.s8[ 2]) ? 0xFF : 0;
01278     A.u8[ 3] = (A.s8[ 3]>=B.s8[ 3]) ? 0xFF : 0;
01279     A.u8[ 4] = (A.s8[ 4]>=B.s8[ 4]) ? 0xFF : 0;
01280     A.u8[ 5] = (A.s8[ 5]>=B.s8[ 5]) ? 0xFF : 0;
01281     A.u8[ 6] = (A.s8[ 6]>=B.s8[ 6]) ? 0xFF : 0;
01282     A.u8[ 7] = (A.s8[ 7]>=B.s8[ 7]) ? 0xFF : 0; 
01283         A.u8[ 8] = (A.s8[ 8]>=B.s8[ 8]) ? 0xFF : 0;
01284     A.u8[ 9] = (A.s8[ 9]>=B.s8[ 9]) ? 0xFF : 0;
01285     A.u8[10] = (A.s8[10]>=B.s8[10]) ? 0xFF : 0;
01286     A.u8[11] = (A.s8[11]>=B.s8[11]) ? 0xFF : 0;
01287     A.u8[12] = (A.s8[12]>=B.s8[12]) ? 0xFF : 0;
01288     A.u8[13] = (A.s8[13]>=B.s8[13]) ? 0xFF : 0;
01289     A.u8[14] = (A.s8[14]>=B.s8[14]) ? 0xFF : 0;
01290     A.u8[15] = (A.s8[15]>=B.s8[15]) ? 0xFF : 0;
01291     return A.i;
01292 }
01293 
01295 SSP_FORCEINLINE __m128i ssp_comge_epu16_REF(__m128i a, __m128i b)
01296 {
01297     ssp_m128 A,B;
01298     A.i = a;
01299     B.i = b;
01300     A.u16[0] = (A.u16[0]>=B.u16[0]) ? 0xFFFF : 0;
01301     A.u16[1] = (A.u16[1]>=B.u16[1]) ? 0xFFFF : 0;
01302     A.u16[2] = (A.u16[2]>=B.u16[2]) ? 0xFFFF : 0;
01303     A.u16[3] = (A.u16[3]>=B.u16[3]) ? 0xFFFF : 0;
01304     A.u16[4] = (A.u16[4]>=B.u16[4]) ? 0xFFFF : 0;
01305     A.u16[5] = (A.u16[5]>=B.u16[5]) ? 0xFFFF : 0;
01306     A.u16[6] = (A.u16[6]>=B.u16[6]) ? 0xFFFF : 0;
01307     A.u16[7] = (A.u16[7]>=B.u16[7]) ? 0xFFFF : 0;
01308     return A.i;
01309 }
01310 
01312 SSP_FORCEINLINE __m128i ssp_comge_epu32_REF(__m128i a, __m128i b)
01313 {
01314     ssp_m128 A,B;
01315     A.i = a;
01316     B.i = b;
01317     A.u32[0] = (A.u32[0]>=B.u32[0]) ? 0xFFFFFFFF : 0;
01318     A.u32[1] = (A.u32[1]>=B.u32[1]) ? 0xFFFFFFFF : 0;
01319     A.u32[2] = (A.u32[2]>=B.u32[2]) ? 0xFFFFFFFF : 0;
01320     A.u32[3] = (A.u32[3]>=B.u32[3]) ? 0xFFFFFFFF : 0;
01321     return A.i;
01322 }
01323 
01325 SSP_FORCEINLINE __m128i ssp_comge_epu64_REF(__m128i a, __m128i b)
01326 {
01327     ssp_m128 A,B;
01328     A.i = a;
01329     B.i = b;
01330     A.u64[0] = (A.u64[0]>=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01331     A.u64[1] = (A.u64[1]>=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01332     return A.i;
01333 }
01334 
01336 SSP_FORCEINLINE __m128i ssp_comge_epu8_REF(__m128i a, __m128i b)
01337 {
01338     ssp_m128 A,B;
01339     A.i = a;
01340     B.i = b;
01341     A.u8[ 0] = (A.u8[ 0]>=B.u8[ 0]) ? 0xFF : 0;
01342     A.u8[ 1] = (A.u8[ 1]>=B.u8[ 1]) ? 0xFF : 0;
01343     A.u8[ 2] = (A.u8[ 2]>=B.u8[ 2]) ? 0xFF : 0;
01344     A.u8[ 3] = (A.u8[ 3]>=B.u8[ 3]) ? 0xFF : 0;
01345     A.u8[ 4] = (A.u8[ 4]>=B.u8[ 4]) ? 0xFF : 0;
01346     A.u8[ 5] = (A.u8[ 5]>=B.u8[ 5]) ? 0xFF : 0;
01347     A.u8[ 6] = (A.u8[ 6]>=B.u8[ 6]) ? 0xFF : 0;
01348     A.u8[ 7] = (A.u8[ 7]>=B.u8[ 7]) ? 0xFF : 0; 
01349         A.u8[ 8] = (A.u8[ 8]>=B.u8[ 8]) ? 0xFF : 0;
01350     A.u8[ 9] = (A.u8[ 9]>=B.u8[ 9]) ? 0xFF : 0;
01351     A.u8[10] = (A.u8[10]>=B.u8[10]) ? 0xFF : 0;
01352     A.u8[11] = (A.u8[11]>=B.u8[11]) ? 0xFF : 0;
01353     A.u8[12] = (A.u8[12]>=B.u8[12]) ? 0xFF : 0;
01354     A.u8[13] = (A.u8[13]>=B.u8[13]) ? 0xFF : 0;
01355     A.u8[14] = (A.u8[14]>=B.u8[14]) ? 0xFF : 0;
01356     A.u8[15] = (A.u8[15]>=B.u8[15]) ? 0xFF : 0;
01357     return A.i;
01358 }
01359 
01361 SSP_FORCEINLINE __m128d ssp_comge_pd_REF(__m128d a, __m128d b)
01362 {
01363     ssp_m128 A,B;
01364     A.d = a;
01365     B.d = b;
01366     A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01367     A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01368     return A.d;
01369 }
01370 
01372 SSP_FORCEINLINE __m128 ssp_comge_ps_REF(__m128 a, __m128 b)
01373 {
01374     ssp_m128 A,B;
01375     A.f = a;
01376     B.f = b;
01377     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0xFFFFFFFF : 0;
01378     A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0xFFFFFFFF : 0;
01379     A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0xFFFFFFFF : 0;
01380     A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0xFFFFFFFF : 0;
01381     return A.f;
01382 }
01383 
01385 SSP_FORCEINLINE __m128d ssp_comge_sd_REF(__m128d a, __m128d b)
01386 {
01387     ssp_m128 A,B;
01388     A.d = a;
01389     B.d = b;
01390     A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0 ;
01391     return A.d;
01392 }
01393 
01395 SSP_FORCEINLINE __m128 ssp_comge_ss_REF(__m128 a, __m128 b)
01396 {
01397     ssp_m128 A,B;
01398     A.f = a;
01399     B.f = b; 
01400     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0xFFFFFFFF : 0; 
01401     return A.f;
01402 }
01403 
01404 
01405 
01406 //----------------------------------------
01407 // COMGT (Condition 14)
01408 //----------------------------------------
01409 
01411 SSP_FORCEINLINE __m128i ssp_comgt_epi16_REF(__m128i a, __m128i b)
01412 {
01413     ssp_m128 A,B;
01414     A.i = a;
01415     B.i = b;
01416     A.u16[0] = (A.s16[0]>B.s16[0]) ? 0xFFFF : 0;
01417     A.u16[1] = (A.s16[1]>B.s16[1]) ? 0xFFFF : 0;
01418     A.u16[2] = (A.s16[2]>B.s16[2]) ? 0xFFFF : 0;
01419     A.u16[3] = (A.s16[3]>B.s16[3]) ? 0xFFFF : 0;
01420     A.u16[4] = (A.s16[4]>B.s16[4]) ? 0xFFFF : 0;
01421     A.u16[5] = (A.s16[5]>B.s16[5]) ? 0xFFFF : 0;
01422     A.u16[6] = (A.s16[6]>B.s16[6]) ? 0xFFFF : 0;
01423     A.u16[7] = (A.s16[7]>B.s16[7]) ? 0xFFFF : 0;
01424     return A.i;
01425 }
01426 
01428 SSP_FORCEINLINE __m128i ssp_comgt_epi32_REF(__m128i a, __m128i b)
01429 {
01430     ssp_m128 A,B;
01431     A.i = a;
01432     B.i = b;
01433     A.u32[0] = (A.s32[0]>B.s32[0]) ? 0xFFFFFFFF : 0;
01434     A.u32[1] = (A.s32[1]>B.s32[1]) ? 0xFFFFFFFF : 0;
01435     A.u32[2] = (A.s32[2]>B.s32[2]) ? 0xFFFFFFFF : 0;
01436     A.u32[3] = (A.s32[3]>B.s32[3]) ? 0xFFFFFFFF : 0;
01437     return A.i;
01438 }
01439 
01441 SSP_FORCEINLINE __m128i ssp_comgt_epi64_REF(__m128i a, __m128i b)
01442 {
01443     ssp_m128 A,B;
01444     A.i = a;
01445     B.i = b;
01446     A.u64[0] = (A.s64[0]>B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01447     A.u64[1] = (A.s64[1]>B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01448     return A.i;
01449 }
01450 
01452 SSP_FORCEINLINE __m128i ssp_comgt_epi8_REF(__m128i a, __m128i b)
01453 {
01454     ssp_m128 A,B;
01455     A.i = a;
01456     B.i = b;
01457     A.u8[ 0] = (A.s8[ 0]>B.s8[ 0]) ? 0xFF : 0;
01458     A.u8[ 1] = (A.s8[ 1]>B.s8[ 1]) ? 0xFF : 0;
01459     A.u8[ 2] = (A.s8[ 2]>B.s8[ 2]) ? 0xFF : 0;
01460     A.u8[ 3] = (A.s8[ 3]>B.s8[ 3]) ? 0xFF : 0;
01461     A.u8[ 4] = (A.s8[ 4]>B.s8[ 4]) ? 0xFF : 0;
01462     A.u8[ 5] = (A.s8[ 5]>B.s8[ 5]) ? 0xFF : 0;
01463     A.u8[ 6] = (A.s8[ 6]>B.s8[ 6]) ? 0xFF : 0;
01464     A.u8[ 7] = (A.s8[ 7]>B.s8[ 7]) ? 0xFF : 0; 
01465         A.u8[ 8] = (A.s8[ 8]>B.s8[ 8]) ? 0xFF : 0;
01466     A.u8[ 9] = (A.s8[ 9]>B.s8[ 9]) ? 0xFF : 0;
01467     A.u8[10] = (A.s8[10]>B.s8[10]) ? 0xFF : 0;
01468     A.u8[11] = (A.s8[11]>B.s8[11]) ? 0xFF : 0;
01469     A.u8[12] = (A.s8[12]>B.s8[12]) ? 0xFF : 0;
01470     A.u8[13] = (A.s8[13]>B.s8[13]) ? 0xFF : 0;
01471     A.u8[14] = (A.s8[14]>B.s8[14]) ? 0xFF : 0;
01472     A.u8[15] = (A.s8[15]>B.s8[15]) ? 0xFF : 0;
01473     return A.i;
01474 }
01475 
01477 SSP_FORCEINLINE __m128i ssp_comgt_epu16_REF(__m128i a, __m128i b)
01478 {
01479     ssp_m128 A,B;
01480     A.i = a;
01481     B.i = b;
01482     A.u16[0] = (A.u16[0]>B.u16[0]) ? 0xFFFF : 0;
01483     A.u16[1] = (A.u16[1]>B.u16[1]) ? 0xFFFF : 0;
01484     A.u16[2] = (A.u16[2]>B.u16[2]) ? 0xFFFF : 0;
01485     A.u16[3] = (A.u16[3]>B.u16[3]) ? 0xFFFF : 0;
01486     A.u16[4] = (A.u16[4]>B.u16[4]) ? 0xFFFF : 0;
01487     A.u16[5] = (A.u16[5]>B.u16[5]) ? 0xFFFF : 0;
01488     A.u16[6] = (A.u16[6]>B.u16[6]) ? 0xFFFF : 0;
01489     A.u16[7] = (A.u16[7]>B.u16[7]) ? 0xFFFF : 0;
01490     return A.i;
01491 }
01492 
01494 SSP_FORCEINLINE __m128i ssp_comgt_epu32_REF(__m128i a, __m128i b)
01495 {
01496     ssp_m128 A,B;
01497     A.i = a;
01498     B.i = b;
01499     A.u32[0] = (A.u32[0]>B.u32[0]) ? 0xFFFFFFFF : 0;
01500     A.u32[1] = (A.u32[1]>B.u32[1]) ? 0xFFFFFFFF : 0;
01501     A.u32[2] = (A.u32[2]>B.u32[2]) ? 0xFFFFFFFF : 0;
01502     A.u32[3] = (A.u32[3]>B.u32[3]) ? 0xFFFFFFFF : 0;
01503     return A.i;
01504 }
01505 
01507 SSP_FORCEINLINE __m128i ssp_comgt_epu64_REF(__m128i a, __m128i b)
01508 {
01509     ssp_m128 A,B;
01510     A.i = a;
01511     B.i = b;
01512     A.u64[0] = (A.u64[0]>B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01513     A.u64[1] = (A.u64[1]>B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01514     return A.i;
01515 }
01516 
01518 SSP_FORCEINLINE __m128i ssp_comgt_epu8_REF(__m128i a, __m128i b)
01519 {
01520     ssp_m128 A,B;
01521     A.i = a;
01522     B.i = b;
01523     A.u8[ 0] = (A.u8[ 0]>B.u8[ 0]) ? 0xFF : 0;
01524     A.u8[ 1] = (A.u8[ 1]>B.u8[ 1]) ? 0xFF : 0;
01525     A.u8[ 2] = (A.u8[ 2]>B.u8[ 2]) ? 0xFF : 0;
01526     A.u8[ 3] = (A.u8[ 3]>B.u8[ 3]) ? 0xFF : 0;
01527     A.u8[ 4] = (A.u8[ 4]>B.u8[ 4]) ? 0xFF : 0;
01528     A.u8[ 5] = (A.u8[ 5]>B.u8[ 5]) ? 0xFF : 0;
01529     A.u8[ 6] = (A.u8[ 6]>B.u8[ 6]) ? 0xFF : 0;
01530     A.u8[ 7] = (A.u8[ 7]>B.u8[ 7]) ? 0xFF : 0; 
01531         A.u8[ 8] = (A.u8[ 8]>B.u8[ 8]) ? 0xFF : 0;
01532     A.u8[ 9] = (A.u8[ 9]>B.u8[ 9]) ? 0xFF : 0;
01533     A.u8[10] = (A.u8[10]>B.u8[10]) ? 0xFF : 0;
01534     A.u8[11] = (A.u8[11]>B.u8[11]) ? 0xFF : 0;
01535     A.u8[12] = (A.u8[12]>B.u8[12]) ? 0xFF : 0;
01536     A.u8[13] = (A.u8[13]>B.u8[13]) ? 0xFF : 0;
01537     A.u8[14] = (A.u8[14]>B.u8[14]) ? 0xFF : 0;
01538     A.u8[15] = (A.u8[15]>B.u8[15]) ? 0xFF : 0;
01539     return A.i;
01540 }
01541 
01543 SSP_FORCEINLINE __m128d ssp_comgt_pd_REF(__m128d a, __m128d b)
01544 {
01545     ssp_m128 A,B;
01546     A.d = a;
01547     B.d = b;
01548     A.u64[0] = (A.f64[0]>B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01549     A.u64[1] = (A.f64[1]>B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01550  
01551     return A.d;
01552 }
01553 
01555 SSP_FORCEINLINE __m128 ssp_comgt_ps_REF(__m128 a, __m128 b)
01556 {
01557     ssp_m128 A,B;
01558     A.f = a;
01559     B.f = b;
01560     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0;
01561     A.u32[1] = (A.f32[1]>B.f32[1]) ? 0xFFFFFFFF : 0;
01562     A.u32[2] = (A.f32[2]>B.f32[2]) ? 0xFFFFFFFF : 0;
01563     A.u32[3] = (A.f32[3]>B.f32[3]) ? 0xFFFFFFFF : 0;
01564     return A.f;
01565 }
01566 
01568 SSP_FORCEINLINE __m128d ssp_comgt_sd_REF(__m128d a, __m128d b)
01569 {
01570     ssp_m128 A,B;
01571     A.d = a;
01572     B.d = b;
01573     A.u64[0] = (A.f64[0]>B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01574     return A.d;
01575 }
01576 
01578 SSP_FORCEINLINE __m128 ssp_comgt_ss_REF(__m128 a, __m128 b)
01579 {
01580     ssp_m128 A,B;
01581     A.f = a;
01582     B.f = b;
01583     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0;
01584     return A.f;
01585 }
01586 
01587 
01588 //----------------------------------------
01589 // COMTRUE (Condition 15)
01590 //----------------------------------------
01591 
01593 SSP_FORCEINLINE __m128i ssp_comtrue_epi16_REF(__m128i a, __m128i b)
01594 {
01595     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );  
01596     return tmp;
01597 }
01598 
01600 SSP_FORCEINLINE __m128i ssp_comtrue_epi32_REF(__m128i a, __m128i b)
01601 {
01602     return ssp_comtrue_epi16_REF(a,b);
01603 }
01604 
01606 SSP_FORCEINLINE __m128i ssp_comtrue_epi64_REF(__m128i a, __m128i b)
01607 {
01608     return ssp_comtrue_epi16_REF(a,b);
01609 }
01610 
01612 SSP_FORCEINLINE __m128i ssp_comtrue_epi8_REF(__m128i a, __m128i b)
01613 {
01614     return ssp_comtrue_epi16_REF(a,b);
01615 }
01616 
01618 SSP_FORCEINLINE __m128i ssp_comtrue_epu16_REF(__m128i a, __m128i b)
01619 {
01620     return ssp_comtrue_epi16_REF(a,b);
01621 }
01622 
01624 SSP_FORCEINLINE __m128i ssp_comtrue_epu32_REF(__m128i a, __m128i b)
01625 {
01626     return ssp_comtrue_epi16_REF(a,b);
01627 }
01628 
01630 SSP_FORCEINLINE __m128i ssp_comtrue_epu64_REF(__m128i a, __m128i b)
01631 {
01632     return ssp_comtrue_epi16_REF(a,b);
01633 }
01634 
01636 SSP_FORCEINLINE __m128i ssp_comtrue_epu8_REF(__m128i a, __m128i b)
01637 {
01638     return ssp_comtrue_epi16_REF(a,b);
01639 }
01640 
01642 SSP_FORCEINLINE __m128d ssp_comtrue_pd_REF(__m128d a, __m128d b)
01643 {   
01644     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );
01645     ssp_m128 A;  
01646     A.i = tmp;
01647     return A.d;
01648 }
01649 
01651 SSP_FORCEINLINE __m128 ssp_comtrue_ps_REF(__m128 a, __m128 b)
01652 {   
01653     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );
01654     ssp_m128 A;  
01655     A.i = tmp;
01656     return A.f;
01657 }
01658 
01660 SSP_FORCEINLINE __m128d ssp_comtrue_sd_REF(__m128d a, __m128d b)
01661 {   
01662     ssp_m128 A;    
01663     A.d      = a;
01664     A.u64[0] = 0xFFFFFFFFFFFFFFFF;
01665     return A.d;
01666 }
01667 
01669 SSP_FORCEINLINE __m128 ssp_comtrue_ss_REF(__m128 a, __m128 b)
01670 {   
01671     ssp_m128 A;
01672     A.f = a;
01673     A.u32[0] = 0xFFFFFFFF;
01674     return A.f;
01675 }
01676 
01681 #endif // __SSP_EMULATION_COMPS_REF_H__

Generated on Wed May 21 13:44:11 2008 for "SSEPlus" by  doxygen 1.5.4