00001 
00002 
00003 
00004 
00005 #ifndef __SSEPLUS_LOGICAL_SSE2_H__
00006 #define __SSEPLUS_LOGICAL_SSE2_H__
00007 
00008 #include "../native/SSEPlus_native_SSE2.h"
00009 
00017 SSP_FORCEINLINE __m128i ssp_logical_signinvert_16_SSE2( __m128i mask, __m128i a, __m128i b)
00018 {
00019     __m128i signMask;   
00020     signMask = _mm_xor_si128  ( a, b );              
00021     signMask = _mm_srai_epi16 ( signMask, 15 );      
00022     mask     = _mm_xor_si128  ( mask, signMask );    
00023     return mask;  
00024 }
00025 
00027 SSP_FORCEINLINE __m128i ssp_logical_signinvert_32_SSE2( __m128i mask, __m128i a, __m128i b)
00028 {
00029     __m128i signMask;   
00030     signMask = _mm_xor_si128  ( a, b );              
00031     signMask = _mm_srai_epi32 ( signMask, 31 );      
00032     mask     = _mm_xor_si128  ( mask, signMask );    
00033     return mask;  
00034 }
00035 
00036 
00037 SSP_FORCEINLINE __m128i ssp_logical_invert_si128_SSE2( __m128i a )
00038 {
00039     const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
00040     a = _mm_xor_si128( a, mask );
00041     return a;
00042 }
00043 
00044 SSP_FORCEINLINE __m128d ssp_logical_invert_sd_SSE2( __m128d a )
00045 {
00046     const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0xFFFFFFFF, 0, 0 );
00047     ssp_m128 A;
00048     A.d    = a;
00049     A.i = _mm_xor_si128( A.i, mask );
00050     return A.d;
00051 }
00052 
00053 SSP_FORCEINLINE __m128 ssp_logical_invert_ss_SSE2( __m128 a )
00054 {
00055     const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0, 0, 0 );
00056     ssp_m128 A;
00057     A.f    = a;
00058     A.i = _mm_xor_si128( A.i, mask );
00059     return A.f;
00060 }
00061 
00062 
00063 SSP_FORCEINLINE __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask )   
00064 {
00065     a = _mm_and_si128   ( a,    mask );                                 
00066     b = _mm_andnot_si128( mask, b    );                                 
00067     a = _mm_or_si128    ( a,    b    );                                 
00068     return a; 
00069 }
00070 
00071 
00072 
00073 
00074 
00075 
00076 
00077 
00078 
00079 
00080 
00081 
00082 
00083 SSP_FORCEINLINE
00084 __m128i ssp_movmask_imm8_to_epi32_SSE2( int mask )
00085 {
00086     __m128i screen;
00087     const static __m128i mulShiftImm = SSP_CONST_SET_16I( 0x1000, 0x0000, 0x2000, 0x0000, 0x4000, 0x0000, 0x8000, 0x0000 ); 
00088     screen = _mm_set1_epi16 ( mask                );   
00089     screen = _mm_mullo_epi16( screen, mulShiftImm );   
00090     screen = _mm_srai_epi32 ( screen, 31          );   
00091     return screen;
00092 }
00093 
00094 
00098 SSP_FORCEINLINE __m128i ssp_slli_epi8_SSE2(__m128i a, const int b)
00099 {                                            
00100     __m128i t1 = _mm_srli_epi16( a, 8 );     
00101     __m128i t2 = _mm_slli_epi16( a, b + 8 ); 
00102     t1 = _mm_slli_epi16( t1, b + 8 );        
00103     t2 = _mm_srli_epi16( t1, 8 );            
00104     t1 = _mm_or_si128( t1, t2 );             
00105     return t1;
00106 }
00107 
00111 SSP_FORCEINLINE __m128i ssp_srli_epi8_SSE2(__m128i a, const int b)
00112 {                                            
00113     __m128i t1 = _mm_slli_epi16( a, 8 );     
00114     __m128i t2 = _mm_srli_epi16( a, b + 8 ); 
00115     t1 = _mm_srli_epi16( t1, b + 8 );        
00116     t2 = _mm_slli_epi16( t1, 8 );            
00117     t1 = _mm_or_si128( t1, t2 );             
00118     return t1;
00119 }
00120 
00125 #endif // __SSEPLUS_LOGICAL_SSE2_H__