Native SSE4A Operations{ | |
SSP_FORCEINLINE __m128i | ssp_extract_si64_SSE4A (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_extracti_si64_SSE4A (__m128i a, int len, int ndx) |
SSP_FORCEINLINE __m128i | ssp_insert_si64_SSE4A (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_inserti_si64_SSE4A (__m128i a, __m128i b, int len, int ndx) |
SSP_FORCEINLINE void | ssp_stream_sd_SSE4A (double *dst, __m128d src) |
SSP_FORCEINLINE void | ssp_stream_ss_SSE4A (float *dst, __m128 src) |
SSP_FORCEINLINE unsigned short | ssp_lzcnt16_SSE4A (unsigned short val) |
SSP_FORCEINLINE unsigned int | ssp_lzcnt_SSE4A (unsigned int val) |
SSP_FORCEINLINE unsigned short | ssp_popcnt16_SSE4A (unsigned short val) |
SSP_FORCEINLINE unsigned int | ssp_popcnt_SSE4A (unsigned int val) |
SSP_FORCEINLINE __m128i ssp_extract_si64_SSE4A | ( | __m128i | a, | |
__m128i | b | |||
) |
Native implementation of _mm_extract_si64 [SSE4a]. (Searches MSDN)
Definition at line 17 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE __m128i ssp_extracti_si64_SSE4A | ( | __m128i | a, | |
int | len, | |||
int | ndx | |||
) |
Native implementation of _mm_extracti_si64 [SSE4a]. (Searches MSDN)
Definition at line 22 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE __m128i ssp_insert_si64_SSE4A | ( | __m128i | a, | |
__m128i | b | |||
) |
Native implementation of _mm_insert_si64 [SSE4a]. (Searches MSDN)
Definition at line 27 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE __m128i ssp_inserti_si64_SSE4A | ( | __m128i | a, | |
__m128i | b, | |||
int | len, | |||
int | ndx | |||
) |
Native implementation of _mm_inserti_si64 [SSE4a]. (Searches MSDN)
Definition at line 32 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE unsigned short ssp_lzcnt16_SSE4A | ( | unsigned short | val | ) |
Native implementation of __lzcnt16 [SSE4a]. (Searches MSDN)
NOTE: Support for the LZCNT instruction is indicated by ECX bit 5 (LZCNT) as returned by CPUID function 8000_0001h. If the LZCNT instruction is not available, the encoding is treated as the BSR instruction. Software MUST check the CPUID bit once per program or library initialization before using the LZCNT instruction, or inconsistent behavior may result.
Definition at line 50 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE unsigned int ssp_lzcnt_SSE4A | ( | unsigned int | val | ) |
Native implementation of __lzcnt [SSE4a]. (Searches MSDN)
NOTE: Support for the LZCNT instruction is indicated by ECX bit 5 (LZCNT) as returned by CPUID function 8000_0001h. If the LZCNT instruction is not available, the encoding is treated as the BSR instruction. Software MUST check the CPUID bit once per program or library initialization before using the LZCNT instruction, or inconsistent behavior may result.
Definition at line 57 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE unsigned short ssp_popcnt16_SSE4A | ( | unsigned short | val | ) |
Native implementation of __popcnt16 [SSE4a]. (Searches MSDN)
Definition at line 70 of file SSEPlus_native_SSE4a.h.
00070 {Native,__popcnt16} */ 00071 SSP_FORCEINLINE unsigned short ssp_popcnt16_SSE4A( unsigned short val ) 00072 { 00073 return __popcnt16( val );
SSP_FORCEINLINE unsigned int ssp_popcnt_SSE4A | ( | unsigned int | val | ) |
Native implementation of __popcnt [SSE4a]. (Searches MSDN)
Definition at line 75 of file SSEPlus_native_SSE4a.h.
00075 {Native,__popcnt} */ 00076 SSP_FORCEINLINE unsigned int ssp_popcnt_SSE4A( unsigned int val ) 00077 { 00078 return __popcnt( val );
SSP_FORCEINLINE void ssp_stream_sd_SSE4A | ( | double * | dst, | |
__m128d | src | |||
) |
Native implementation of _mm_stream_sd [SSE4a]. (Searches MSDN)
Definition at line 37 of file SSEPlus_native_SSE4a.h.
SSP_FORCEINLINE void ssp_stream_ss_SSE4A | ( | float * | dst, | |
__m128 | src | |||
) |
Native implementation of _mm_stream_ss [SSE4a]. (Searches MSDN)
Definition at line 42 of file SSEPlus_native_SSE4a.h.