00001
00002
00003
00004
00005 #ifndef __SSEPLUS_LOGICAL_SSE2_H__
00006 #define __SSEPLUS_LOGICAL_SSE2_H__
00007
00008 #include "../native/SSEPlus_native_SSE2.h"
00009
00017 SSP_FORCEINLINE __m128i ssp_logical_signinvert_16_SSE2( __m128i mask, __m128i a, __m128i b)
00018 {
00019 __m128i signMask;
00020 signMask = _mm_xor_si128 ( a, b );
00021 signMask = _mm_srai_epi16 ( signMask, 15 );
00022 mask = _mm_xor_si128 ( mask, signMask );
00023 return mask;
00024 }
00025
00027 SSP_FORCEINLINE __m128i ssp_logical_signinvert_32_SSE2( __m128i mask, __m128i a, __m128i b)
00028 {
00029 __m128i signMask;
00030 signMask = _mm_xor_si128 ( a, b );
00031 signMask = _mm_srai_epi32 ( signMask, 31 );
00032 mask = _mm_xor_si128 ( mask, signMask );
00033 return mask;
00034 }
00035
00036
00037 SSP_FORCEINLINE __m128i ssp_logical_invert_si128_SSE2( __m128i a )
00038 {
00039 const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
00040 a = _mm_xor_si128( a, mask );
00041 return a;
00042 }
00043
00044 SSP_FORCEINLINE __m128d ssp_logical_invert_sd_SSE2( __m128d a )
00045 {
00046 const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0xFFFFFFFF, 0, 0 );
00047 ssp_m128 A;
00048 A.d = a;
00049 A.i = _mm_xor_si128( A.i, mask );
00050 return A.d;
00051 }
00052
00053 SSP_FORCEINLINE __m128 ssp_logical_invert_ss_SSE2( __m128 a )
00054 {
00055 const static __m128i mask = SSP_CONST_SET_32I( 0xFFFFFFFF, 0, 0, 0 );
00056 ssp_m128 A;
00057 A.f = a;
00058 A.i = _mm_xor_si128( A.i, mask );
00059 return A.f;
00060 }
00061
00062
00063 SSP_FORCEINLINE __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask )
00064 {
00065 a = _mm_and_si128 ( a, mask );
00066 b = _mm_andnot_si128( mask, b );
00067 a = _mm_or_si128 ( a, b );
00068 return a;
00069 }
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083 SSP_FORCEINLINE
00084 __m128i ssp_movmask_imm8_to_epi32_SSE2( int mask )
00085 {
00086 __m128i screen;
00087 const static __m128i mulShiftImm = SSP_CONST_SET_16I( 0x1000, 0x0000, 0x2000, 0x0000, 0x4000, 0x0000, 0x8000, 0x0000 );
00088 screen = _mm_set1_epi16 ( mask );
00089 screen = _mm_mullo_epi16( screen, mulShiftImm );
00090 screen = _mm_srai_epi32 ( screen, 31 );
00091 return screen;
00092 }
00093
00094
00098 SSP_FORCEINLINE __m128i ssp_slli_epi8_SSE2(__m128i a, const int b)
00099 {
00100 __m128i t1 = _mm_srli_epi16( a, 8 );
00101 __m128i t2 = _mm_slli_epi16( a, b + 8 );
00102 t1 = _mm_slli_epi16( t1, b + 8 );
00103 t2 = _mm_srli_epi16( t1, 8 );
00104 t1 = _mm_or_si128( t1, t2 );
00105 return t1;
00106 }
00107
00111 SSP_FORCEINLINE __m128i ssp_srli_epi8_SSE2(__m128i a, const int b)
00112 {
00113 __m128i t1 = _mm_slli_epi16( a, 8 );
00114 __m128i t2 = _mm_srli_epi16( a, b + 8 );
00115 t1 = _mm_srli_epi16( t1, b + 8 );
00116 t2 = _mm_slli_epi16( t1, 8 );
00117 t1 = _mm_or_si128( t1, t2 );
00118 return t1;
00119 }
00120
00125 #endif // __SSEPLUS_LOGICAL_SSE2_H__