00001
00002
00003
00004
00005 #ifndef __SSEPLUS_NATIVE_SSE4A_H__
00006 #define __SSEPLUS_NATIVE_SSE4A_H__
00007
00008 #include "../SSEPlus_base.h"
00009 #include SSP_INCLUDE_FILE_SSE4a
00010
00017 SSP_FORCEINLINE __m128i ssp_extract_si64_SSE4A( __m128i a ,__m128i b )
00018 {
00019 return _mm_extract_si64( a, b );
00020 }
00022 SSP_FORCEINLINE __m128i ssp_extracti_si64_SSE4A( __m128i a, int len, int ndx )
00023 {
00024 return _mm_extracti_si64( a, len, ndx );
00025 }
00027 SSP_FORCEINLINE __m128i ssp_insert_si64_SSE4A( __m128i a, __m128i b )
00028 {
00029 return _mm_insert_si64( a, b );
00030 }
00032 SSP_FORCEINLINE __m128i ssp_inserti_si64_SSE4A( __m128i a, __m128i b, int len, int ndx )
00033 {
00034 return _mm_inserti_si64( a, b, len, ndx );
00035 }
00037 SSP_FORCEINLINE void ssp_stream_sd_SSE4A( double *dst ,__m128d src )
00038 {
00039 _mm_stream_sd( dst, src );
00040 }
00042 SSP_FORCEINLINE void ssp_stream_ss_SSE4A( float *dst, __m128 src )
00043 {
00044 _mm_stream_ss( dst, src );
00045 }
00046
00050 SSP_FORCEINLINE unsigned short ssp_lzcnt16_SSE4A( unsigned short val )
00051 {
00052 return __lzcnt16( val );
00053 }
00057 SSP_FORCEINLINE unsigned int ssp_lzcnt_SSE4A( unsigned int val )
00058 {
00059 return __lzcnt( val );
00060 }
00061
00062 #ifdef SYS64
00063
00064 SSP_FORCEINLINE ssp_u64 ssp_lzcnt64_SSE4A( ssp_u64 val )
00065 {
00066 return __lzcnt64( val );
00067 }
00068 #endif
00069
00071 SSP_FORCEINLINE unsigned short ssp_popcnt16_SSE4A( unsigned short val )
00072 {
00073 return __popcnt16( val );
00074 }
00076 SSP_FORCEINLINE unsigned int ssp_popcnt_SSE4A( unsigned int val )
00077 {
00078 return __popcnt( val );
00079 }
00080
00081 #ifdef SYS64
00082
00083 SSP_FORCEINLINE ssp_u64 ssp_popcnt64_SSE4A( ssp_u64 val )
00084 {
00085 return __popcnt64( val );
00086 }
00087 #endif
00088
00094 #endif // __SSEPLUS_NATIVE_SSE4A_H__