00001 // 00002 // Copyright (c) 2006-2008 Advanced Micro Devices, Inc. All Rights Reserved. 00003 // This software is subject to the Apache v2.0 License. 00004 // 00005 #ifndef __SSEPLUS_SSE3_H__ 00006 #define __SSEPLUS_SSE3_H__ 00007 00008 #include "SSEPlus_base.h" 00009 00010 //============================================ 00011 // SSE3 Native 00012 //============================================ 00013 __m128d ssp_addsub_pd_SSE3 ( __m128d a, __m128d b ); 00014 __m128 ssp_addsub_ps_SSE3 ( __m128 a, __m128 b ); 00015 __m128d ssp_hadd_pd_SSE3 ( __m128d a, __m128d b ); 00016 __m128 ssp_hadd_ps_SSE3 ( __m128 a, __m128 b ); 00017 __m128d ssp_hsub_pd_SSE3 ( __m128d a, __m128d b ); 00018 __m128 ssp_hsub_ps_SSE3 ( __m128 a, __m128 b ); 00019 __m128i ssp_lddqu_si128_SSE3 ( __m128i const *p ); 00020 __m128d ssp_loaddup_pd_SSE3 ( double const * dp ); 00021 __m128d ssp_movedup_pd_SSE3 ( __m128d a ); 00022 __m128 ssp_movehdup_ps_SSE3 ( __m128 a ); 00023 __m128 ssp_moveldup_ps_SSE3 ( __m128 a ); 00024 00028 //__m128i ssp_abs_epi16_SSE3 ( __m128i a ); 00029 //__m128i ssp_abs_epi32_SSE3 ( __m128i a ); 00030 //__m128i ssp_abs_epi8_SSE3 ( __m128i a ); 00031 //__m64 ssp_abs_pi16_SSE3 ( __m64 a ); 00032 //__m64 ssp_abs_pi32_SSE3 ( __m64 a ); 00033 //__m64 ssp_abs_pi8_SSE3 ( __m64 a ); 00034 //__m128i ssp_alignr_epi8_SSE3 ( __m128i a, __m128i b, int n ); 00035 //__m64 ssp_alignr_pi8_SSE3 ( __m64 a, __m64 b, int n ); 00036 //__m128i ssp_hadd_epi16_SSE3 ( __m128i a, __m128i b ); 00037 //__m128i ssp_hadd_epi32_SSE3 ( __m128i a, __m128i b ); 00038 //__m64 ssp_hadd_pi16_SSE3 ( __m64 a, __m64 b ); 00039 //__m64 ssp_hadd_pi32_SSE3 ( __m64 a, __m64 b ); 00040 //__m128i ssp_hadds_epi16_SSE3 ( __m128i a, __m128i b ); 00041 //__m64 ssp_hadds_pi16_SSE3 ( __m64 a, __m64 b ); 00042 //__m128i ssp_hsub_epi16_SSE3 ( __m128i a, __m128i b ); 00043 //__m128i ssp_hsub_epi32_SSE3 ( __m128i a, __m128i b ); 00044 //__m64 ssp_hsub_pi16_SSE3 ( __m64 a, __m64 b ); 00045 //__m64 ssp_hsub_pi32_SSE3 ( __m64 a, __m64 b ); 00046 //__m128i ssp_hsubs_epi16_SSE3 ( __m128i a, __m128i b ); 00047 //__m64 ssp_hsubs_pi16_SSE3 ( __m64 a, __m64 b ); 00048 //__m128i ssp_maddubs_epi16_SSE3 ( __m128i a, __m128i b ); 00049 //__m64 ssp_maddubs_pi16_SSE3 ( __m64 a, __m64 b ); 00050 //__m128i ssp_mulhrs_epi16_SSE3 ( __m128i a, __m128i b ); 00051 //__m64 ssp_mulhrs_pi16_SSE3 ( __m64 a, __m64 b ); 00052 //__m128i ssp_shuffle_epi8_SSE3 ( __m128i a, __m128i b ); 00053 //__m64 ssp_shuffle_pi8_SSE3 ( __m64 a, __m64 b ); 00054 //__m128i ssp_sign_epi16_SSE3 ( __m128i a, __m128i b ); 00055 //__m128i ssp_sign_epi32_SSE3 ( __m128i a, __m128i b ); 00056 //__m128i ssp_sign_epi8_SSE3 ( __m128i a, __m128i b ); 00057 //__m64 ssp_sign_pi16_SSE3 ( __m64 a, __m64 b ); 00058 //__m64 ssp_sign_pi32_SSE3 ( __m64 a, __m64 b ); 00059 //__m64 ssp_sign_pi8_SSE3 ( __m64 a, __m64 b ); 00060 // 00064 //__m128i ssp_extract_si64_SSE3 ( __m128i,__m128i ); 00065 //__m128i ssp_extracti_si64_SSE3 ( __m128i, int, int ); 00066 //__m128i ssp_insert_si64_SSE3 ( __m128i,__m128i ); 00067 //__m128i ssp_inserti_si64_SSE3 ( __m128i, __m128i, int, int ); 00068 //void ssp_stream_sd_SSE3 ( double*,__m128d ); 00069 //void ssp_stream_ss_SSE3 ( float*,__m128 ); 00070 // 00074 //__m128i ssp_blend_epi16_SSE3 ( __m128i v1, __m128i v2, const int mask ); 00075 //__m128d ssp_blend_pd_SSE3 ( __m128d v1, __m128d v2, const int mask ); 00076 //__m128 ssp_blend_ps_SSE3 ( __m128 v1, __m128 v2, const int mask ); 00077 //__m128i ssp_blendv_epi8_SSE3 ( __m128i v1, __m128i v2, __m128i mask ); 00078 //__m128d ssp_blendv_pd_SSE3 ( __m128d v1, __m128d v2, __m128d mask ); 00079 //__m128 ssp_blendv_ps_SSE3 ( __m128 v1, __m128 v2, __m128 mask ); 00080 //__m128d ssp_ceil_pd_SSE3 ( __m128d a ); 00081 //__m128 ssp_ceil_ps_SSE3 ( __m128 a ); 00082 //__m128d ssp_ceil_sd_SSE3 ( __m128d a, __m128d b ); 00083 //__m128 ssp_ceil_ss_SSE3 ( __m128 a, __m128 b ); 00084 //__m128i ssp_cmpeq_epi64_SSE3 ( __m128i val1, __m128i val2 ); 00085 //__m128i ssp_cvtepi16_epi32_SSE3 ( __m128i shortValues ); 00086 //__m128i ssp_cvtepi16_epi64_SSE3 ( __m128i shortValues ); 00087 //__m128i ssp_cvtepi32_epi64_SSE3 ( __m128i intValues ); 00088 //__m128i ssp_cvtepi8_epi16_SSE3 ( __m128i byteValues ); 00089 //__m128i ssp_cvtepi8_epi32_SSE3 ( __m128i byteValues ); 00090 //__m128i ssp_cvtepi8_epi64_SSE3 ( __m128i byteValues ); 00091 //__m128i ssp_cvtepu16_epi32_SSE3 ( __m128i shortValues ); 00092 //__m128i ssp_cvtepu16_epi64_SSE3 ( __m128i shortValues ); 00093 //__m128i ssp_cvtepu32_epi64_SSE3 ( __m128i intValues ); 00094 //__m128i ssp_cvtepu8_epi16_SSE3 ( __m128i byteValues ); 00095 //__m128i ssp_cvtepu8_epi32_SSE3 ( __m128i byteValues ); 00096 //__m128i ssp_cvtepu8_epi64_SSE3 ( __m128i shortValues ); 00097 //__m128d ssp_dp_pd_SSE3 ( __m128d val1, __m128d val2, const int mask ); 00098 //__m128 ssp_dp_ps_SSE3 ( __m128 val1, __m128 val2, const int mask ); 00099 //int ssp_extract_epi32_SSE3 ( __m128i src, const int ndx ); 00100 //__int64 ssp_extract_epi64_SSE3 ( __m128i src, const int ndx ); 00101 //int ssp_extract_epi8_SSE3 ( __m128i src, const int ndx ); 00102 //int ssp_extract_ps_SSE3 ( __m128 src, const int ndx ); 00103 //__m128d ssp_floor_pd_SSE3 ( __m128d a ); 00104 //__m128 ssp_floor_ps_SSE3 ( __m128 a ); 00105 //__m128d ssp_floor_sd_SSE3 ( __m128d a, __m128d b ); 00106 //__m128 ssp_floor_ss_SSE3 ( __m128 a, __m128 b ); 00107 //__m128i ssp_insert_epi32_SSE3 ( __m128i dst, int s, const int ndx ); 00108 //__m128i ssp_insert_epi64_SSE3 ( __m128i dst, __int64 s, const int ndx ); 00109 //__m128i ssp_insert_epi8_SSE3 ( __m128i dst, int s, const int ndx ); 00110 //__m128 ssp_insert_ps_SSE3 ( __m128 dst, __m128 src, const int ndx ); 00111 //__m128i ssp_max_epi32_SSE3 ( __m128i val1, __m128i val2 ); 00112 //__m128i ssp_max_epi8_SSE3 ( __m128i val1, __m128i val2 ); 00113 //__m128i ssp_max_epu16_SSE3 ( __m128i val1, __m128i val2 ); 00114 //__m128i ssp_max_epu32_SSE3 ( __m128i val1, __m128i val2 ); 00115 //__m128i ssp_min_epi32_SSE3 ( __m128i val1, __m128i val2 ); 00116 //__m128i ssp_min_epi8_SSE3 ( __m128i val1, __m128i val2 ); 00117 //__m128i ssp_min_epu16_SSE3 ( __m128i val1, __m128i val2 ); 00118 //__m128i ssp_min_epu32_SSE3 ( __m128i val1, __m128i val2 ); 00119 //__m128i ssp_minpos_epu16_SSE3 ( __m128i shortValues ); 00120 //__m128i ssp_mpsadbw_epu8_SSE3 ( __m128i s1, __m128i s2, const int msk ); 00121 //__m128i ssp_mul_epi32_SSE3 ( __m128i a, __m128i b ); 00122 //__m128i ssp_mullo_epi32_SSE3 ( __m128i a, __m128i b ); 00123 //__m128i ssp_packus_epi32_SSE3 ( __m128i val1, __m128i val2 ); 00124 //__m128d ssp_round_pd_SSE3 ( __m128d val, int iRoundMode ); 00125 //__m128 ssp_round_ps_SSE3 ( __m128 val, int iRoundMode ); 00126 //__m128d ssp_round_sd_SSE3 ( __m128d dst, __m128d val, int iRoundMode ); 00127 //__m128 ssp_round_ss_SSE3 ( __m128 dst, __m128 val, int iRoundMode ); 00128 //__m128i ssp_stream_load_si128_SSE3( __m128i* v1 ); 00129 //int ssp_testc_si128_SSE3 ( __m128i mask, __m128i val ); 00130 //int ssp_testnzc_si128_SSE3 ( __m128i mask, __m128i s2 ); 00131 //int ssp_testz_si128_SSE3 ( __m128i mask, __m128i val ); 00132 // 00136 //int ssp_cmpestra_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00137 //int ssp_cmpestrc_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00138 //int ssp_cmpestri_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00139 //__m128i ssp_cmpestrm_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00140 //int ssp_cmpestro_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00141 //int ssp_cmpestrs_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00142 //int ssp_cmpestrz_SSE3 ( __m128i a, int la, __m128i b, int lb, const int mode ); 00143 //__m128i ssp_cmpgt_epi64_SSE3 ( __m128i a, __m128i b ); 00144 //int ssp_cmpistra_SSE3 ( __m128i a, __m128i b, const int mode ); 00145 //int ssp_cmpistrc_SSE3 ( __m128i a, __m128i b, const int mode ); 00146 //int ssp_cmpistri_SSE3 ( __m128i a, __m128i b, const int mode ); 00147 //__m128i ssp_cmpistrm_SSE3 ( __m128i a, __m128i b, const int mode ); 00148 //int ssp_cmpistro_SSE3 ( __m128i a, __m128i b, const int mode ); 00149 //int ssp_cmpistrs_SSE3 ( __m128i a, __m128i b, const int mode ); 00150 //int ssp_cmpistrz_SSE3 ( __m128i a, __m128i b, const int mode ); 00151 //unsigned int ssp_crc32_u16_SSE3 ( unsigned int crc, unsigned short v ); 00152 //unsigned int ssp_crc32_u32_SSE3 ( unsigned int crc, unsigned int v ); 00153 //unsigned __int64 ssp_crc32_u64_SSE3 ( unsigned int crc, unsigned __int64 v ); 00154 //unsigned int ssp_crc32_u8_SSE3 ( unsigned int crc, unsigned char v ); 00155 //int ssp_popcnt_u32_SSE3 ( unsigned int a ); 00156 //int ssp_popcnt_u64_SSE3 ( unsigned __int64 a ); 00157 00158 #include "native/SSEPlus_native_SSE3.h" 00159 #include "emulation/SSEPlus_emulation_SSE3.h" 00160 #include "arithmetic/SSEPlus_arithmetic_SSE3.h" 00161 00162 #endif // __SSEPLUS_SSE3_H__