00001
00002
00003
00004
00005 #ifndef __SSEPLUS_ARITHMETIC_SSE2_H__
00006 #define __SSEPLUS_ARITHMETIC_SSE2_H__
00007
00008 #include "../native/SSEPlus_native_SSE2.h"
00009 #include "../emulation/SSEPlus_emulation_SSE2.h"
00010
00016 SSP_FORCEINLINE
00017 __m128 ssp_arithmetic_hadd4_dup_ps_SSE2( __m128 a )
00018 {
00019 __m128 t;
00020 t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(2, 3, 0, 1) );
00021 a = _mm_add_ps( a, t );
00022
00023 t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(1, 0, 3, 2) );
00024 a = _mm_add_ps( a, t );
00025 return a;
00026 }
00027
00028
00038 SSP_FORCEINLINE
00039 __m128i ssp_arithmetic_hadd4_epi16_SSE2( __m128i a, const unsigned int offset )
00040 {
00041 ssp_m128 A,B;
00042 A.i = a;
00043
00044 if( offset >= 2 ) B.i = _mm_slli_si128( A.i, 4 );
00045 else B.i = _mm_srli_si128( A.i, 4 );
00046
00047 A.i = _mm_add_epi16 ( A.i, B.i );
00048
00049 if( offset & 1 ) B.i = _mm_slli_si128( A.i, 2 );
00050 else B.i = _mm_srli_si128( A.i, 2 );
00051
00052 A.i = _mm_add_epi16 ( A.i, B.i );
00053 return A.i;
00054 }
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00090 SSP_FORCEINLINE
00091 __m128 ssp_round_ps_neg_zero_SSE2( __m128 a, int iRoundMode )
00092 {
00093 const static __m128i SIGN_BIT = SSP_CONST_SET_32I( 0x80000000, 0x80000000, 0x80000000,0x80000000 );
00094 ssp_m128 A, sign;
00095 A.f = a;
00096
00097 sign.i = _mm_and_si128 ( A.i, SIGN_BIT );
00098 A.f = ssp_round_ps_SSE2( A.f, iRoundMode );
00099 A.i = _mm_or_si128 ( A.i, sign.i );
00100
00101 return A.f;
00102 }
00103
00108 #endif // __SSEPLUS_ARITHMETIC_SSE2_H__