SSE[3,4A,...,5] implemented in reference | |
SSP_FORCEINLINE __m128i | ssp_comeq_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comeq_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comeq_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comeq_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comeq_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comeq_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comlt_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comlt_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comlt_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comlt_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comlt_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comle_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comle_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comle_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comle_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comle_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comle_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comunord_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comunord_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comunord_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comunord_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comneq_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comneq_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comneq_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comneq_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comneq_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnlt_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnlt_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnlt_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnlt_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnle_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnle_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnle_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnle_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comord_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comord_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comord_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comord_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comueq_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comueq_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comueq_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comueq_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnge_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnge_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comnge_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comnge_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comngt_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comngt_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comngt_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comngt_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comfalse_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comfalse_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comfalse_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comfalse_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comfalse_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comoneq_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comoneq_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comoneq_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comoneq_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comge_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comge_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comge_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comge_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comge_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comge_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comgt_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comgt_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comgt_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comgt_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comgt_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epu64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_comtrue_epu8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_comtrue_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comtrue_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_comtrue_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_comtrue_ss_REF (__m128 a, __m128 b) |
SSE[3,4A,...,5] implemented in reference | |
SSP_FORCEINLINE __m128d | ssp_frcz_pd_REF (__m128d a) |
SSP_FORCEINLINE __m128 | ssp_frcz_ps_REF (__m128 a) |
SSP_FORCEINLINE __m128d | ssp_frcz_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_frcz_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128i | ssp_haddd_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddd_epi8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddd_epu16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddd_epu8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epi8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epu16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epu32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddq_epu8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddw_epi8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_haddw_epu8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_hsubd_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_hsubq_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_hsubw_epi8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_macc_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_macc_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128 | ssp_macc_ps_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_macc_pd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_macc_ss_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_macc_sd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128i | ssp_maccd_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_macchi_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_macclo_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maccs_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maccs_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maccsd_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maccshi_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maccslo_epi32_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maddd_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_maddsd_epi16_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128 | ssp_nmacc_ps_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_nmacc_pd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_nmacc_ss_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_nmacc_sd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_msub_ps_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_msub_pd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_msub_ss_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_msub_sd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_nmsub_ps_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_nmsub_pd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_nmsub_ss_REF (__m128 a, __m128 b, __m128 c) |
SSP_FORCEINLINE __m128d | ssp_nmsub_sd_REF (__m128d a, __m128d b, __m128d c) |
SSP_FORCEINLINE __m128 | ssp_addsub_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_addsub_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128i | ssp_blend_epi16_REF (__m128i a, __m128i b, const int mask) |
SSP_FORCEINLINE __m128d | ssp_blend_pd_REF (__m128d a, __m128d b, const int mask) |
SSP_FORCEINLINE __m128 | ssp_blend_ps_REF (__m128 a, __m128 b, const int mask) |
SSP_FORCEINLINE __m128i | ssp_blendv_epi8_REF (__m128i a, __m128i b, __m128i mask) |
SSP_FORCEINLINE __m128d | ssp_blendv_pd_REF (__m128d a, __m128d b, __m128d mask) |
SSP_FORCEINLINE __m128 | ssp_blendv_ps_REF (__m128 a, __m128 b, __m128 mask) |
SSP_FORCEINLINE __m128i | ssp_cmpeq_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_dp_pd_REF (__m128d a, __m128d b, const int mask) |
SSP_FORCEINLINE __m128 | ssp_dp_ps_REF (__m128 a, __m128 b, const int mask) |
SSP_FORCEINLINE __m128i | ssp_maddubs_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_maddubs_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m128i | ssp_mulhrs_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_mulhrs_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE int | ssp_extract_epi8_REF (__m128i a, const int ndx) |
SSP_FORCEINLINE int | ssp_extract_epi32_REF (__m128i a, const int imm) |
SSP_FORCEINLINE ssp_s64 | ssp_extract_epi64_REF (__m128i a, const int ndx) |
SSP_FORCEINLINE int | ssp_extract_ps_REF (__m128 a, const int ndx) |
SSP_FORCEINLINE __m128i | ssp_extract_si64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_extracti_si64_REF (__m128i a, int len, int ndx) |
SSP_FORCEINLINE __m128i | ssp_hadd_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_hadd_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_hadd_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m64 | ssp_hadd_pi32_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m128i | ssp_hadds_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_hadds_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m128 | ssp_hadd_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_hadd_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128i | ssp_hsub_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_hsub_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_hsub_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m64 | ssp_hsub_pi32_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m128i | ssp_hsubs_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_hsubs_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m128 | ssp_hsub_ps_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_hsub_pd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128i | ssp_insert_epi8_REF (__m128i a, int b, const int ndx) |
SSP_FORCEINLINE __m128i | ssp_insert_epi32_REF (__m128i a, int b, const int ndx) |
SSP_FORCEINLINE __m128i | ssp_insert_epi64_REF (__m128i a, ssp_s64 b, const int ndx) |
SSP_FORCEINLINE __m128 | ssp_insert_ps_REF (__m128 a, __m128 b, const int sel) |
SSP_FORCEINLINE __m128i | ssp_insert_si64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_inserti_si64_REF (__m128i a, __m128i b, int len, int ndx) |
SSP_FORCEINLINE __m128d | ssp_loaddup_pd_REF (double const *dp) |
SSP_FORCEINLINE __m128i | ssp_lddqu_si128_REF (__m128i const *p) |
SSP_FORCEINLINE __m128i | ssp_stream_load_si128_REF (__m128i *p) |
SSP_FORCEINLINE __m128i | ssp_min_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_max_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_min_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_max_epu16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_min_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_max_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_min_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_max_epu32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_minpos_epu16_REF (__m128i shortValues) |
SSP_FORCEINLINE __m128i | ssp_minpos_epu16_REFb (__m128i shortValues) |
SSP_FORCEINLINE __m128 | ssp_movehdup_ps_REF (__m128 a) |
SSP_FORCEINLINE __m128 | ssp_moveldup_ps_REF (__m128 a) |
SSP_FORCEINLINE __m128d | ssp_movedup_pd_REF (__m128d a) |
SSP_FORCEINLINE __m128i | ssp_mul_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_mullo_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_mpsadbw_epu8_REF (__m128i a, __m128i b, const int msk) |
SSP_FORCEINLINE __m128i | ssp_packus_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128d | ssp_ceil_pd_REF (__m128d a) |
SSP_FORCEINLINE __m128 | ssp_ceil_ps_REF (__m128 a) |
SSP_FORCEINLINE __m128d | ssp_ceil_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_ceil_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_floor_pd_REF (__m128d a) |
SSP_FORCEINLINE __m128 | ssp_floor_ps_REF (__m128 a) |
SSP_FORCEINLINE __m128d | ssp_floor_sd_REF (__m128d a, __m128d b) |
SSP_FORCEINLINE __m128 | ssp_floor_ss_REF (__m128 a, __m128 b) |
SSP_FORCEINLINE __m128d | ssp_round_pd_REF (__m128d val, int iRoundMode) |
SSP_FORCEINLINE __m128 | ssp_round_ps_REF (__m128 val, int iRoundMode) |
SSP_FORCEINLINE __m128d | ssp_round_sd_REF (__m128d dst, __m128d val, int iRoundMode) |
SSP_FORCEINLINE __m128 | ssp_round_ss_REF (__m128 dst, __m128 val, int iRoundMode) |
SSP_FORCEINLINE int | ssp_testc_si128_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE int | ssp_testz_si128_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE int | ssp_testnzc_si128_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_cvtepi8_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepi8_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepi8_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepi16_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepi16_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepi32_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu8_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu8_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu8_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu16_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu16_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_cvtepu32_epi64_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_abs_epi8_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_abs_epi16_REF (__m128i a) |
SSP_FORCEINLINE __m128i | ssp_abs_epi32_REF (__m128i a) |
SSP_FORCEINLINE __m64 | ssp_abs_pi8_REF (__m64 a) |
SSP_FORCEINLINE __m64 | ssp_abs_pi16_REF (__m64 a) |
SSP_FORCEINLINE __m64 | ssp_abs_pi32_REF (__m64 a) |
SSP_FORCEINLINE __m128i | ssp_alignr_epi8_REF (__m128i a, __m128i b, const int ralign) |
SSP_FORCEINLINE __m64 | ssp_alignr_pi8_REF (__m64 a, __m64 b, const int ralign) |
SSP_FORCEINLINE __m128i | ssp_shuffle_epi8_REF (__m128i a, __m128i mask) |
SSP_FORCEINLINE __m64 | ssp_shuffle_pi8_REF (__m64 a, __m64 mask) |
SSP_FORCEINLINE __m128i | ssp_sign_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sign_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sign_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m64 | ssp_sign_pi8_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m64 | ssp_sign_pi16_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE __m64 | ssp_sign_pi32_REF (__m64 a, __m64 b) |
SSP_FORCEINLINE void | ssp_stream_sd_REF (double *dst, __m128d src) |
SSP_FORCEINLINE void | ssp_stream_ss_REF (float *dst, __m128 src) |
SSP_FORCEINLINE unsigned short | ssp_lzcnt16_REF (unsigned short val) |
SSP_FORCEINLINE unsigned int | ssp_lzcnt_REF (unsigned int val) |
SSP_FORCEINLINE ssp_u64 | ssp_lzcnt64_REF (ssp_u64 val) |
SSP_FORCEINLINE unsigned short | ssp_popcnt16_REF (unsigned short val) |
SSP_FORCEINLINE unsigned int | ssp_popcnt_REF (unsigned int val) |
SSP_FORCEINLINE ssp_u64 | ssp_popcnt64_REF (ssp_u64 val) |
SSP_FORCEINLINE __m128i | ssp_perm_epi8_REF (__m128i a, __m128i b, __m128i c) |
SSP_FORCEINLINE __m128 | ssp_perm_ps_REF (__m128 a, __m128 b, __m128i c) |
SSP_FORCEINLINE __m128d | ssp_perm_pd_REF (__m128d a, __m128d b, __m128i c) |
SSP_FORCEINLINE __m128i | ssp_rot_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_rot_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_rot_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_rot_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_roti_epi8_REF (__m128i a, const int b) |
SSP_FORCEINLINE __m128i | ssp_roti_epi16_REF (__m128i a, const int b) |
SSP_FORCEINLINE __m128i | ssp_roti_epi32_REF (__m128i a, const int b) |
SSP_FORCEINLINE __m128i | ssp_roti_epi64_REF (__m128i a, const int b) |
SSP_FORCEINLINE __m128i | ssp_shl_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sha_epi8_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_shl_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sha_epi16_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_shl_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sha_epi32_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_shl_epi64_REF (__m128i a, __m128i b) |
SSP_FORCEINLINE __m128i | ssp_sha_epi64_REF (__m128i a, __m128i b) |
#define | SSP_SATURATION(a, pos_limit, neg_limit) (a>pos_limit) ? pos_limit : ((a<neg_limit)?neg_limit:a) |
#define | SSP_SET_MIN(sd, s) sd=(sd<s)?sd:s; |
#define | SSP_SET_MAX(sd, s) sd=(sd>s)?sd:s; |
#define SSP_SATURATION | ( | a, | |||
pos_limit, | |||||
neg_limit | ) | (a>pos_limit) ? pos_limit : ((a<neg_limit)?neg_limit:a) |
Definition at line 435 of file SSEPlus_emulation_REF.h.
#define SSP_SET_MAX | ( | sd, | |||
s | ) | sd=(sd>s)?sd:s; |
Definition at line 1592 of file SSEPlus_emulation_REF.h.
#define SSP_SET_MIN | ( | sd, | |||
s | ) | sd=(sd<s)?sd:s; |
Definition at line 1591 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE __m128i ssp_abs_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_abs_epi16 [SSSE3]. (Searches MSDN)
Definition at line 2575 of file SSEPlus_emulation_REF.h.
02576 { 02577 ssp_m128 A; 02578 A.i = a; 02579 02580 A.s16[0] = (A.s16[0] < 0) ? -A.s16[0] : A.s16[0]; 02581 A.s16[1] = (A.s16[1] < 0) ? -A.s16[1] : A.s16[1]; 02582 A.s16[2] = (A.s16[2] < 0) ? -A.s16[2] : A.s16[2]; 02583 A.s16[3] = (A.s16[3] < 0) ? -A.s16[3] : A.s16[3]; 02584 A.s16[4] = (A.s16[4] < 0) ? -A.s16[4] : A.s16[4]; 02585 A.s16[5] = (A.s16[5] < 0) ? -A.s16[5] : A.s16[5]; 02586 A.s16[6] = (A.s16[6] < 0) ? -A.s16[6] : A.s16[6]; 02587 A.s16[7] = (A.s16[7] < 0) ? -A.s16[7] : A.s16[7]; 02588 02589 return A.i; 02590 }
SSP_FORCEINLINE __m128i ssp_abs_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_abs_epi32 [SSSE3]. (Searches MSDN)
Definition at line 2593 of file SSEPlus_emulation_REF.h.
02594 { 02595 ssp_m128 A; 02596 A.i = a; 02597 02598 A.s32[0] = (A.s32[0] < 0) ? -A.s32[0] : A.s32[0]; 02599 A.s32[1] = (A.s32[1] < 0) ? -A.s32[1] : A.s32[1]; 02600 A.s32[2] = (A.s32[2] < 0) ? -A.s32[2] : A.s32[2]; 02601 A.s32[3] = (A.s32[3] < 0) ? -A.s32[3] : A.s32[3]; 02602 02603 return A.i; 02604 }
SSP_FORCEINLINE __m128i ssp_abs_epi8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_abs_epi8 [SSSE3]. (Searches MSDN)
Definition at line 2548 of file SSEPlus_emulation_REF.h.
02549 { 02550 ssp_m128 A; 02551 A.i = a; 02552 02553 A.s8[0] = (A.s8[0] < 0) ? -A.s8[0] : A.s8[0]; 02554 A.s8[1] = (A.s8[1] < 0) ? -A.s8[1] : A.s8[1]; 02555 A.s8[2] = (A.s8[2] < 0) ? -A.s8[2] : A.s8[2]; 02556 A.s8[3] = (A.s8[3] < 0) ? -A.s8[3] : A.s8[3]; 02557 A.s8[4] = (A.s8[4] < 0) ? -A.s8[4] : A.s8[4]; 02558 A.s8[5] = (A.s8[5] < 0) ? -A.s8[5] : A.s8[5]; 02559 A.s8[6] = (A.s8[6] < 0) ? -A.s8[6] : A.s8[6]; 02560 A.s8[7] = (A.s8[7] < 0) ? -A.s8[7] : A.s8[7]; 02561 A.s8[8] = (A.s8[8] < 0) ? -A.s8[8] : A.s8[8]; 02562 A.s8[9] = (A.s8[9] < 0) ? -A.s8[9] : A.s8[9]; 02563 A.s8[10] = (A.s8[10]< 0) ? -A.s8[10] : A.s8[10]; 02564 A.s8[11] = (A.s8[11]< 0) ? -A.s8[11] : A.s8[11]; 02565 A.s8[12] = (A.s8[12]< 0) ? -A.s8[12] : A.s8[12]; 02566 A.s8[13] = (A.s8[13]< 0) ? -A.s8[13] : A.s8[13]; 02567 A.s8[14] = (A.s8[14]< 0) ? -A.s8[14] : A.s8[14]; 02568 A.s8[15] = (A.s8[15]< 0) ? -A.s8[15] : A.s8[15]; 02569 02570 return A.i; 02571 }
SSP_FORCEINLINE __m64 ssp_abs_pi16_REF | ( | __m64 | a | ) |
Reference implementation of _mm_abs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2630 of file SSEPlus_emulation_REF.h.
02631 { 02632 ssp_m64 A; 02633 A.m64 = a; 02634 02635 A.s16[0] = (A.s16[0] < 0) ? -A.s16[0] : A.s16[0]; 02636 A.s16[1] = (A.s16[1] < 0) ? -A.s16[1] : A.s16[1]; 02637 A.s16[2] = (A.s16[2] < 0) ? -A.s16[2] : A.s16[2]; 02638 A.s16[3] = (A.s16[3] < 0) ? -A.s16[3] : A.s16[3]; 02639 02640 return A.m64; 02641 }
SSP_FORCEINLINE __m64 ssp_abs_pi32_REF | ( | __m64 | a | ) |
Reference implementation of _mm_abs_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2647 of file SSEPlus_emulation_REF.h.
02648 { 02649 ssp_m64 A; 02650 A.m64 = a; 02651 02652 A.s32[0] = (A.s32[0] < 0) ? -A.s32[0] : A.s32[0]; 02653 A.s32[1] = (A.s32[1] < 0) ? -A.s32[1] : A.s32[1]; 02654 02655 return A.m64; 02656 }
SSP_FORCEINLINE __m64 ssp_abs_pi8_REF | ( | __m64 | a | ) |
Reference implementation of _mm_abs_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2609 of file SSEPlus_emulation_REF.h.
02610 { 02611 ssp_m64 A; 02612 A.m64 = a; 02613 02614 A.s8[0] = (A.s8[0] < 0) ? -A.s8[0] : A.s8[0]; 02615 A.s8[1] = (A.s8[1] < 0) ? -A.s8[1] : A.s8[1]; 02616 A.s8[2] = (A.s8[2] < 0) ? -A.s8[2] : A.s8[2]; 02617 A.s8[3] = (A.s8[3] < 0) ? -A.s8[3] : A.s8[3]; 02618 A.s8[4] = (A.s8[4] < 0) ? -A.s8[4] : A.s8[4]; 02619 A.s8[5] = (A.s8[5] < 0) ? -A.s8[5] : A.s8[5]; 02620 A.s8[6] = (A.s8[6] < 0) ? -A.s8[6] : A.s8[6]; 02621 A.s8[7] = (A.s8[7] < 0) ? -A.s8[7] : A.s8[7]; 02622 02623 return A.m64; 02624 }
SSP_FORCEINLINE __m128d ssp_addsub_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_addsub_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_addsub_ps [SSE3]. (Searches MSDN)
Definition at line 775 of file SSEPlus_emulation_REF.h.
00776 { 00777 ssp_m128 A, B; 00778 A.f = a; 00779 B.f = b; 00780 00781 A.f32[0] -= B.f32[0]; 00782 A.f32[1] += B.f32[1]; 00783 A.f32[2] -= B.f32[2]; 00784 A.f32[3] += B.f32[3]; 00785 return A.f; 00786 }
SSP_FORCEINLINE __m128i ssp_alignr_epi8_REF | ( | __m128i | a, | |
__m128i | b, | |||
const int | ralign | |||
) |
Reference implementation of _mm_alignr_epi8 [SSSE3]. (Searches MSDN)
Definition at line 2661 of file SSEPlus_emulation_REF.h.
02662 { 02663 ssp_m128 C[3]; 02664 ssp_s8 * tmp; 02665 int i, j; 02666 02667 if (ralign <0) return b; //only shift to right, no negative 02668 C[2].i = _mm_setzero_si128(); 02669 if (ralign > 32) return C[2].i; 02670 C[1].i = a; 02671 C[0].i = b; 02672 tmp = & (C[0].s8[0]); 02673 02674 for (i=ralign+15, j=15; i >=ralign; i--, j--) { 02675 C[2].s8[j] = tmp[i]; 02676 } 02677 02678 return C[2].i; 02679 }
SSP_FORCEINLINE __m64 ssp_alignr_pi8_REF | ( | __m64 | a, | |
__m64 | b, | |||
const int | ralign | |||
) |
Reference implementation of _mm_alignr_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2684 of file SSEPlus_emulation_REF.h.
02685 { 02686 ssp_m64 C[3]; 02687 ssp_s8 * tmp; 02688 int i, j; 02689 02690 if (ralign <0) return b; //only shift to right, no negative 02691 C[2].u32[0] = 0; 02692 C[2].u32[1] = 0; 02693 if (ralign > 16) return C[2].m64; 02694 C[1].m64 = a; 02695 C[0].m64 = b; 02696 tmp = & (C[0].s8[0]); 02697 02698 for (i=ralign+7, j=7; i >=ralign; i--, j--) { 02699 C[2].s8[j] = tmp[i]; 02700 } 02701 02702 return C[2].m64; 02703 }
SSP_FORCEINLINE __m128i ssp_blend_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
const int | mask | |||
) |
Reference implementation of _mm_blend_epi16 [SSE4.1]. (Searches MSDN)
Definition at line 805 of file SSEPlus_emulation_REF.h.
00806 { 00807 ssp_m128 A, B; 00808 A.i = a; 00809 B.i = b; 00810 00811 A.s16[0] = (mask & 0x01) ? B.s16[0] : A.s16[0]; 00812 A.s16[1] = (mask & 0x02) ? B.s16[1] : A.s16[1]; 00813 A.s16[2] = (mask & 0x04) ? B.s16[2] : A.s16[2]; 00814 A.s16[3] = (mask & 0x08) ? B.s16[3] : A.s16[3]; 00815 A.s16[4] = (mask & 0x10) ? B.s16[4] : A.s16[4]; 00816 A.s16[5] = (mask & 0x20) ? B.s16[5] : A.s16[5]; 00817 A.s16[6] = (mask & 0x40) ? B.s16[6] : A.s16[6]; 00818 A.s16[7] = (mask & 0x80) ? B.s16[7] : A.s16[7]; 00819 return A.i; 00820 }
SSP_FORCEINLINE __m128d ssp_blend_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
const int | mask | |||
) |
Reference implementation of _mm_blend_pd [SSE4.1]. (Searches MSDN)
Definition at line 823 of file SSEPlus_emulation_REF.h.
00824 { 00825 ssp_m128 A, B; 00826 A.d = a; 00827 B.d = b; 00828 00829 A.f64[0] = (mask & 0x1) ? B.f64[0] : A.f64[0]; 00830 A.f64[1] = (mask & 0x2) ? B.f64[1] : A.f64[1]; 00831 return A.d; 00832 }
SSP_FORCEINLINE __m128 ssp_blend_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
const int | mask | |||
) |
Reference implementation of _mm_blend_ps [SSE4.1]. (Searches MSDN)
Definition at line 835 of file SSEPlus_emulation_REF.h.
00836 { 00837 ssp_m128 A, B; 00838 A.f = a; 00839 B.f = b; 00840 00841 A.f32[0] = (mask & 0x1) ? B.f32[0] : A.f32[0]; 00842 A.f32[1] = (mask & 0x2) ? B.f32[1] : A.f32[1]; 00843 A.f32[2] = (mask & 0x4) ? B.f32[2] : A.f32[2]; 00844 A.f32[3] = (mask & 0x8) ? B.f32[3] : A.f32[3]; 00845 return A.f; 00846 }
SSP_FORCEINLINE __m128i ssp_blendv_epi8_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | mask | |||
) |
Reference implementation of _mm_blendv_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 849 of file SSEPlus_emulation_REF.h.
00850 { 00851 ssp_m128 A, B, Mask; 00852 A.i = a; 00853 B.i = b; 00854 Mask.i = mask; 00855 00856 A.s8[0] = (Mask.s8[0] & 0x80) ? B.s8[0] : A.s8[0]; 00857 A.s8[1] = (Mask.s8[1] & 0x80) ? B.s8[1] : A.s8[1]; 00858 A.s8[2] = (Mask.s8[2] & 0x80) ? B.s8[2] : A.s8[2]; 00859 A.s8[3] = (Mask.s8[3] & 0x80) ? B.s8[3] : A.s8[3]; 00860 A.s8[4] = (Mask.s8[4] & 0x80) ? B.s8[4] : A.s8[4]; 00861 A.s8[5] = (Mask.s8[5] & 0x80) ? B.s8[5] : A.s8[5]; 00862 A.s8[6] = (Mask.s8[6] & 0x80) ? B.s8[6] : A.s8[6]; 00863 A.s8[7] = (Mask.s8[7] & 0x80) ? B.s8[7] : A.s8[7]; 00864 A.s8[8] = (Mask.s8[8] & 0x80) ? B.s8[8] : A.s8[8]; 00865 A.s8[9] = (Mask.s8[9] & 0x80) ? B.s8[9] : A.s8[9]; 00866 A.s8[10] = (Mask.s8[10] & 0x80) ? B.s8[10] : A.s8[10]; 00867 A.s8[11] = (Mask.s8[11] & 0x80) ? B.s8[11] : A.s8[11]; 00868 A.s8[12] = (Mask.s8[12] & 0x80) ? B.s8[12] : A.s8[12]; 00869 A.s8[13] = (Mask.s8[13] & 0x80) ? B.s8[13] : A.s8[13]; 00870 A.s8[14] = (Mask.s8[14] & 0x80) ? B.s8[14] : A.s8[14]; 00871 A.s8[15] = (Mask.s8[15] & 0x80) ? B.s8[15] : A.s8[15]; 00872 return A.i; 00873 }
SSP_FORCEINLINE __m128d ssp_blendv_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | mask | |||
) |
Reference implementation of _mm_blendv_pd [SSE4.1]. (Searches MSDN)
Definition at line 876 of file SSEPlus_emulation_REF.h.
00877 { 00878 ssp_m128 A, B, Mask; 00879 A.d = a; 00880 B.d = b; 00881 Mask.d = mask; 00882 00883 A.f64[0] = (Mask.u64[0] & 0x8000000000000000ll) ? B.f64[0] : A.f64[0]; 00884 A.f64[1] = (Mask.u64[1] & 0x8000000000000000ll) ? B.f64[1] : A.f64[1]; 00885 return A.d; 00886 }
SSP_FORCEINLINE __m128 ssp_blendv_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | mask | |||
) |
Reference implementation of _mm_blendv_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 889 of file SSEPlus_emulation_REF.h.
00890 { 00891 ssp_m128 A, B, Mask; 00892 A.f = a; 00893 B.f = b; 00894 Mask.f = mask; 00895 00896 A.f32[0] = (Mask.u32[0] & 0x80000000) ? B.f32[0] : A.f32[0]; 00897 A.f32[1] = (Mask.u32[1] & 0x80000000) ? B.f32[1] : A.f32[1]; 00898 A.f32[2] = (Mask.u32[2] & 0x80000000) ? B.f32[2] : A.f32[2]; 00899 A.f32[3] = (Mask.u32[3] & 0x80000000) ? B.f32[3] : A.f32[3]; 00900 return A.f; 00901 }
SSP_FORCEINLINE __m128d ssp_ceil_pd_REF | ( | __m128d | a | ) |
Reference implementation of _mm_ceil_pd [SSE4.1]. (Searches MSDN)
Definition at line 1999 of file SSEPlus_emulation_REF.h.
02000 { 02001 ssp_m128 A; 02002 A.d = a; 02003 02004 A.f64[0] = ceil( A.f64[0] ); 02005 A.f64[1] = ceil( A.f64[1] ); 02006 return A.d; 02007 }
SSP_FORCEINLINE __m128 ssp_ceil_ps_REF | ( | __m128 | a | ) |
Reference implementation of _mm_ceil_ps [SSE4.1]. (Searches MSDN)
Definition at line 2010 of file SSEPlus_emulation_REF.h.
02011 { 02012 ssp_m128 A; 02013 A.f = a; 02014 02015 A.f32[0] = (ssp_f32)ceil( A.f32[0] ); 02016 A.f32[1] = (ssp_f32)ceil( A.f32[1] ); 02017 A.f32[2] = (ssp_f32)ceil( A.f32[2] ); 02018 A.f32[3] = (ssp_f32)ceil( A.f32[3] ); 02019 return A.f; 02020 }
SSP_FORCEINLINE __m128d ssp_ceil_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_ceil_sd [SSE4.1]. (Searches MSDN)
Definition at line 2023 of file SSEPlus_emulation_REF.h.
02024 { 02025 ssp_m128 A,B; 02026 A.d = a; 02027 B.d = b; 02028 02029 A.f64[0] = ceil( B.f64[0] ); 02030 return A.d; 02031 }
SSP_FORCEINLINE __m128 ssp_ceil_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_ceil_ss [SSE4.1]. (Searches MSDN)
Definition at line 2034 of file SSEPlus_emulation_REF.h.
02035 { 02036 ssp_m128 A,B; 02037 A.f = a; 02038 B.f = b; 02039 02040 A.f32[0] = (ssp_f32)ceil( B.f32[0] ); 02041 return A.f; 02042 }
SSP_FORCEINLINE __m128i ssp_cmpeq_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_cmpeq_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 908 of file SSEPlus_emulation_REF.h.
00909 { 00910 ssp_m128 A, B; 00911 A.i = a; 00912 B.i = b; 00913 00914 if( A.s64[0] == B.s64[0] ) 00915 A.s64[0] = 0xFFFFFFFFFFFFFFFFll; 00916 else 00917 A.s64[0] = 0x0ll; 00918 00919 if( A.s64[1] == B.s64[1] ) 00920 A.s64[1] = 0xFFFFFFFFFFFFFFFFll; 00921 else 00922 A.s64[1] = 0x0ll; 00923 return A.i; 00924 }
SSP_FORCEINLINE __m128i ssp_comeq_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 20 of file SSEPlus_emulation_comps_REF.h.
00021 { 00022 ssp_m128 A,B; 00023 A.i = a; 00024 B.i = b; 00025 A.u16[0] = (A.s16[0]==B.s16[0]) ? 0xFFFF : 0; 00026 A.u16[1] = (A.s16[1]==B.s16[1]) ? 0xFFFF : 0; 00027 A.u16[2] = (A.s16[2]==B.s16[2]) ? 0xFFFF : 0; 00028 A.u16[3] = (A.s16[3]==B.s16[3]) ? 0xFFFF : 0; 00029 A.u16[4] = (A.s16[4]==B.s16[4]) ? 0xFFFF : 0; 00030 A.u16[5] = (A.s16[5]==B.s16[5]) ? 0xFFFF : 0; 00031 A.u16[6] = (A.s16[6]==B.s16[6]) ? 0xFFFF : 0; 00032 A.u16[7] = (A.s16[7]==B.s16[7]) ? 0xFFFF : 0; 00033 return A.i; 00034 }
SSP_FORCEINLINE __m128i ssp_comeq_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 37 of file SSEPlus_emulation_comps_REF.h.
00038 { 00039 ssp_m128 A,B; 00040 A.i = a; 00041 B.i = b; 00042 A.u32[0] = (A.s32[0]==B.s32[0]) ? 0xFFFFFFFF : 0; 00043 A.u32[1] = (A.s32[1]==B.s32[1]) ? 0xFFFFFFFF : 0; 00044 A.u32[2] = (A.s32[2]==B.s32[2]) ? 0xFFFFFFFF : 0; 00045 A.u32[3] = (A.s32[3]==B.s32[3]) ? 0xFFFFFFFF : 0; 00046 return A.i; 00047 }
SSP_FORCEINLINE __m128i ssp_comeq_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 50 of file SSEPlus_emulation_comps_REF.h.
00051 { 00052 ssp_m128 A,B; 00053 A.i = a; 00054 B.i = b; 00055 A.u64[0] = (A.s64[0]==B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00056 A.u64[1] = (A.s64[1]==B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00057 return A.i; 00058 }
SSP_FORCEINLINE __m128i ssp_comeq_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 61 of file SSEPlus_emulation_comps_REF.h.
00062 { 00063 ssp_m128 A,B; 00064 A.i = a; 00065 B.i = b; 00066 A.u8[ 0] = (A.s8[ 0]==B.s8[ 0]) ? 0xFF : 0; 00067 A.u8[ 1] = (A.s8[ 1]==B.s8[ 1]) ? 0xFF : 0; 00068 A.u8[ 2] = (A.s8[ 2]==B.s8[ 2]) ? 0xFF : 0; 00069 A.u8[ 3] = (A.s8[ 3]==B.s8[ 3]) ? 0xFF : 0; 00070 A.u8[ 4] = (A.s8[ 4]==B.s8[ 4]) ? 0xFF : 0; 00071 A.u8[ 5] = (A.s8[ 5]==B.s8[ 5]) ? 0xFF : 0; 00072 A.u8[ 6] = (A.s8[ 6]==B.s8[ 6]) ? 0xFF : 0; 00073 A.u8[ 7] = (A.s8[ 7]==B.s8[ 7]) ? 0xFF : 0; 00074 A.u8[ 8] = (A.s8[ 8]==B.s8[ 8]) ? 0xFF : 0; 00075 A.u8[ 9] = (A.s8[ 9]==B.s8[ 9]) ? 0xFF : 0; 00076 A.u8[10]= (A.s8[10]==B.s8[10]) ? 0xFF : 0; 00077 A.u8[11] = (A.s8[11]==B.s8[11]) ? 0xFF : 0; 00078 A.u8[12] = (A.s8[12]==B.s8[12]) ? 0xFF : 0; 00079 A.u8[13] = (A.s8[13]==B.s8[13]) ? 0xFF : 0; 00080 A.u8[14] = (A.s8[14]==B.s8[14]) ? 0xFF : 0; 00081 A.u8[15] = (A.s8[15]==B.s8[15]) ? 0xFF : 0; 00082 return A.i; 00083 }
SSP_FORCEINLINE __m128i ssp_comeq_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 86 of file SSEPlus_emulation_comps_REF.h.
00087 { 00088 ssp_m128 A,B; 00089 A.i = a; 00090 B.i = b; 00091 A.u16[0] = (A.u16[0]==B.u16[0]) ? 0xFFFF : 0; 00092 A.u16[1] = (A.u16[1]==B.u16[1]) ? 0xFFFF : 0; 00093 A.u16[2] = (A.u16[2]==B.u16[2]) ? 0xFFFF : 0; 00094 A.u16[3] = (A.u16[3]==B.u16[3]) ? 0xFFFF : 0; 00095 A.u16[4] = (A.u16[4]==B.u16[4]) ? 0xFFFF : 0; 00096 A.u16[5] = (A.u16[5]==B.u16[5]) ? 0xFFFF : 0; 00097 A.u16[6] = (A.u16[6]==B.u16[6]) ? 0xFFFF : 0; 00098 A.u16[7] = (A.u16[7]==B.u16[7]) ? 0xFFFF : 0; 00099 return A.i; 00100 }
SSP_FORCEINLINE __m128i ssp_comeq_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 103 of file SSEPlus_emulation_comps_REF.h.
00104 { 00105 ssp_m128 A,B; 00106 A.i = a; 00107 B.i = b; 00108 A.u32[0] = (A.u32[0]==B.u32[0]) ? 0xFFFFFFFF : 0; 00109 A.u32[1] = (A.u32[1]==B.u32[1]) ? 0xFFFFFFFF : 0; 00110 A.u32[2] = (A.u32[2]==B.u32[2]) ? 0xFFFFFFFF : 0; 00111 A.u32[3] = (A.u32[3]==B.u32[3]) ? 0xFFFFFFFF : 0; 00112 return A.i; 00113 }
SSP_FORCEINLINE __m128i ssp_comeq_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 116 of file SSEPlus_emulation_comps_REF.h.
00117 { 00118 ssp_m128 A,B; 00119 A.i = a; 00120 B.i = b; 00121 A.u64[0] = (A.u64[0]==B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00122 A.u64[1] = (A.u64[1]==B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00123 return A.i; 00124 }
SSP_FORCEINLINE __m128i ssp_comeq_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comeq_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 127 of file SSEPlus_emulation_comps_REF.h.
00128 { 00129 ssp_m128 A,B; 00130 A.i = a; 00131 B.i = b; 00132 A.u8[ 0] = (A.u8[ 0]==B.u8[ 0]) ? 0xFF : 0; 00133 A.u8[ 1] = (A.u8[ 1]==B.u8[ 1]) ? 0xFF : 0; 00134 A.u8[ 2] = (A.u8[ 2]==B.u8[ 2]) ? 0xFF : 0; 00135 A.u8[ 3] = (A.u8[ 3]==B.u8[ 3]) ? 0xFF : 0; 00136 A.u8[ 4] = (A.u8[ 4]==B.u8[ 4]) ? 0xFF : 0; 00137 A.u8[ 5] = (A.u8[ 5]==B.u8[ 5]) ? 0xFF : 0; 00138 A.u8[ 6] = (A.u8[ 6]==B.u8[ 6]) ? 0xFF : 0; 00139 A.u8[ 7] = (A.u8[ 7]==B.u8[ 7]) ? 0xFF : 0; 00140 A.u8[ 8] = (A.u8[ 8]==B.u8[ 8]) ? 0xFF : 0; 00141 A.u8[ 9] = (A.u8[ 9]==B.u8[ 9]) ? 0xFF : 0; 00142 A.u8[10] = (A.u8[10]==B.u8[10]) ? 0xFF : 0; 00143 A.u8[11] = (A.u8[11]==B.u8[11]) ? 0xFF : 0; 00144 A.u8[12] = (A.u8[12]==B.u8[12]) ? 0xFF : 0; 00145 A.u8[13] = (A.u8[13]==B.u8[13]) ? 0xFF : 0; 00146 A.u8[14] = (A.u8[14]==B.u8[14]) ? 0xFF : 0; 00147 A.u8[15] = (A.u8[15]==B.u8[15]) ? 0xFF : 0; 00148 return A.i; 00149 }
SSP_FORCEINLINE __m128d ssp_comeq_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comeq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 152 of file SSEPlus_emulation_comps_REF.h.
00153 { 00154 ssp_m128 A,B; 00155 A.d = a; 00156 B.d = b; 00157 A.u64[0] = (A.f64[0]==B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00158 A.u64[1] = (A.f64[1]==B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00159 return A.d; 00160 }
SSP_FORCEINLINE __m128 ssp_comeq_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comeq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 163 of file SSEPlus_emulation_comps_REF.h.
00164 { 00165 ssp_m128 A,B; 00166 A.f = a; 00167 B.f = b; 00168 A.u32[0] = (A.f32[0]==B.f32[0]) ? 0xFFFFFFFF : 0; 00169 A.u32[1] = (A.f32[1]==B.f32[1]) ? 0xFFFFFFFF : 0; 00170 A.u32[2] = (A.f32[2]==B.f32[2]) ? 0xFFFFFFFF : 0; 00171 A.u32[3] = (A.f32[3]==B.f32[3]) ? 0xFFFFFFFF : 0; 00172 return A.f; 00173 }
SSP_FORCEINLINE __m128d ssp_comeq_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comeq_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comfalse_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1094 of file SSEPlus_emulation_comps_REF.h.
SSP_FORCEINLINE __m128i ssp_comfalse_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1101 of file SSEPlus_emulation_comps_REF.h.
01102 { 01103 return ssp_comfalse_epi16_REF(a,b); 01104 }
SSP_FORCEINLINE __m128i ssp_comfalse_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1107 of file SSEPlus_emulation_comps_REF.h.
01108 { 01109 return ssp_comfalse_epi16_REF(a,b); 01110 }
SSP_FORCEINLINE __m128i ssp_comfalse_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1113 of file SSEPlus_emulation_comps_REF.h.
01114 { 01115 return ssp_comfalse_epi16_REF(a,b); 01116 }
SSP_FORCEINLINE __m128i ssp_comfalse_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1119 of file SSEPlus_emulation_comps_REF.h.
01120 { 01121 return ssp_comfalse_epi16_REF(a,b); 01122 }
SSP_FORCEINLINE __m128i ssp_comfalse_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1125 of file SSEPlus_emulation_comps_REF.h.
01126 { 01127 return ssp_comfalse_epi16_REF(a,b); 01128 }
SSP_FORCEINLINE __m128i ssp_comfalse_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1131 of file SSEPlus_emulation_comps_REF.h.
01132 { 01133 return ssp_comfalse_epi16_REF(a,b); 01134 }
SSP_FORCEINLINE __m128i ssp_comfalse_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comfalse_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1137 of file SSEPlus_emulation_comps_REF.h.
01138 { 01139 return ssp_comfalse_epi16_REF(a,b); 01140 }
SSP_FORCEINLINE __m128d ssp_comfalse_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comfalse_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1143 of file SSEPlus_emulation_comps_REF.h.
SSP_FORCEINLINE __m128 ssp_comfalse_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comfalse_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1150 of file SSEPlus_emulation_comps_REF.h.
SSP_FORCEINLINE __m128d ssp_comfalse_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comfalse_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comge_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1229 of file SSEPlus_emulation_comps_REF.h.
01230 { 01231 ssp_m128 A,B; 01232 A.i = a; 01233 B.i = b; 01234 A.u16[0] = (A.s16[0]>=B.s16[0]) ? 0xFFFF : 0; 01235 A.u16[1] = (A.s16[1]>=B.s16[1]) ? 0xFFFF : 0; 01236 A.u16[2] = (A.s16[2]>=B.s16[2]) ? 0xFFFF : 0; 01237 A.u16[3] = (A.s16[3]>=B.s16[3]) ? 0xFFFF : 0; 01238 A.u16[4] = (A.s16[4]>=B.s16[4]) ? 0xFFFF : 0; 01239 A.u16[5] = (A.s16[5]>=B.s16[5]) ? 0xFFFF : 0; 01240 A.u16[6] = (A.s16[6]>=B.s16[6]) ? 0xFFFF : 0; 01241 A.u16[7] = (A.s16[7]>=B.s16[7]) ? 0xFFFF : 0; 01242 return A.i; 01243 }
SSP_FORCEINLINE __m128i ssp_comge_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1246 of file SSEPlus_emulation_comps_REF.h.
01247 { 01248 ssp_m128 A,B; 01249 A.i = a; 01250 B.i = b; 01251 A.u32[0] = (A.s32[0]>=B.s32[0]) ? 0xFFFFFFFF : 0; 01252 A.u32[1] = (A.s32[1]>=B.s32[1]) ? 0xFFFFFFFF : 0; 01253 A.u32[2] = (A.s32[2]>=B.s32[2]) ? 0xFFFFFFFF : 0; 01254 A.u32[3] = (A.s32[3]>=B.s32[3]) ? 0xFFFFFFFF : 0; 01255 return A.i; 01256 }
SSP_FORCEINLINE __m128i ssp_comge_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1259 of file SSEPlus_emulation_comps_REF.h.
01260 { 01261 ssp_m128 A,B; 01262 A.i = a; 01263 B.i = b; 01264 A.u64[0] = (A.s64[0]>=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01265 A.u64[1] = (A.s64[1]>=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01266 return A.i; 01267 }
SSP_FORCEINLINE __m128i ssp_comge_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1270 of file SSEPlus_emulation_comps_REF.h.
01271 { 01272 ssp_m128 A,B; 01273 A.i = a; 01274 B.i = b; 01275 A.u8[ 0] = (A.s8[ 0]>=B.s8[ 0]) ? 0xFF : 0; 01276 A.u8[ 1] = (A.s8[ 1]>=B.s8[ 1]) ? 0xFF : 0; 01277 A.u8[ 2] = (A.s8[ 2]>=B.s8[ 2]) ? 0xFF : 0; 01278 A.u8[ 3] = (A.s8[ 3]>=B.s8[ 3]) ? 0xFF : 0; 01279 A.u8[ 4] = (A.s8[ 4]>=B.s8[ 4]) ? 0xFF : 0; 01280 A.u8[ 5] = (A.s8[ 5]>=B.s8[ 5]) ? 0xFF : 0; 01281 A.u8[ 6] = (A.s8[ 6]>=B.s8[ 6]) ? 0xFF : 0; 01282 A.u8[ 7] = (A.s8[ 7]>=B.s8[ 7]) ? 0xFF : 0; 01283 A.u8[ 8] = (A.s8[ 8]>=B.s8[ 8]) ? 0xFF : 0; 01284 A.u8[ 9] = (A.s8[ 9]>=B.s8[ 9]) ? 0xFF : 0; 01285 A.u8[10] = (A.s8[10]>=B.s8[10]) ? 0xFF : 0; 01286 A.u8[11] = (A.s8[11]>=B.s8[11]) ? 0xFF : 0; 01287 A.u8[12] = (A.s8[12]>=B.s8[12]) ? 0xFF : 0; 01288 A.u8[13] = (A.s8[13]>=B.s8[13]) ? 0xFF : 0; 01289 A.u8[14] = (A.s8[14]>=B.s8[14]) ? 0xFF : 0; 01290 A.u8[15] = (A.s8[15]>=B.s8[15]) ? 0xFF : 0; 01291 return A.i; 01292 }
SSP_FORCEINLINE __m128i ssp_comge_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1295 of file SSEPlus_emulation_comps_REF.h.
01296 { 01297 ssp_m128 A,B; 01298 A.i = a; 01299 B.i = b; 01300 A.u16[0] = (A.u16[0]>=B.u16[0]) ? 0xFFFF : 0; 01301 A.u16[1] = (A.u16[1]>=B.u16[1]) ? 0xFFFF : 0; 01302 A.u16[2] = (A.u16[2]>=B.u16[2]) ? 0xFFFF : 0; 01303 A.u16[3] = (A.u16[3]>=B.u16[3]) ? 0xFFFF : 0; 01304 A.u16[4] = (A.u16[4]>=B.u16[4]) ? 0xFFFF : 0; 01305 A.u16[5] = (A.u16[5]>=B.u16[5]) ? 0xFFFF : 0; 01306 A.u16[6] = (A.u16[6]>=B.u16[6]) ? 0xFFFF : 0; 01307 A.u16[7] = (A.u16[7]>=B.u16[7]) ? 0xFFFF : 0; 01308 return A.i; 01309 }
SSP_FORCEINLINE __m128i ssp_comge_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1312 of file SSEPlus_emulation_comps_REF.h.
01313 { 01314 ssp_m128 A,B; 01315 A.i = a; 01316 B.i = b; 01317 A.u32[0] = (A.u32[0]>=B.u32[0]) ? 0xFFFFFFFF : 0; 01318 A.u32[1] = (A.u32[1]>=B.u32[1]) ? 0xFFFFFFFF : 0; 01319 A.u32[2] = (A.u32[2]>=B.u32[2]) ? 0xFFFFFFFF : 0; 01320 A.u32[3] = (A.u32[3]>=B.u32[3]) ? 0xFFFFFFFF : 0; 01321 return A.i; 01322 }
SSP_FORCEINLINE __m128i ssp_comge_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1325 of file SSEPlus_emulation_comps_REF.h.
01326 { 01327 ssp_m128 A,B; 01328 A.i = a; 01329 B.i = b; 01330 A.u64[0] = (A.u64[0]>=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01331 A.u64[1] = (A.u64[1]>=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01332 return A.i; 01333 }
SSP_FORCEINLINE __m128i ssp_comge_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comge_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1336 of file SSEPlus_emulation_comps_REF.h.
01337 { 01338 ssp_m128 A,B; 01339 A.i = a; 01340 B.i = b; 01341 A.u8[ 0] = (A.u8[ 0]>=B.u8[ 0]) ? 0xFF : 0; 01342 A.u8[ 1] = (A.u8[ 1]>=B.u8[ 1]) ? 0xFF : 0; 01343 A.u8[ 2] = (A.u8[ 2]>=B.u8[ 2]) ? 0xFF : 0; 01344 A.u8[ 3] = (A.u8[ 3]>=B.u8[ 3]) ? 0xFF : 0; 01345 A.u8[ 4] = (A.u8[ 4]>=B.u8[ 4]) ? 0xFF : 0; 01346 A.u8[ 5] = (A.u8[ 5]>=B.u8[ 5]) ? 0xFF : 0; 01347 A.u8[ 6] = (A.u8[ 6]>=B.u8[ 6]) ? 0xFF : 0; 01348 A.u8[ 7] = (A.u8[ 7]>=B.u8[ 7]) ? 0xFF : 0; 01349 A.u8[ 8] = (A.u8[ 8]>=B.u8[ 8]) ? 0xFF : 0; 01350 A.u8[ 9] = (A.u8[ 9]>=B.u8[ 9]) ? 0xFF : 0; 01351 A.u8[10] = (A.u8[10]>=B.u8[10]) ? 0xFF : 0; 01352 A.u8[11] = (A.u8[11]>=B.u8[11]) ? 0xFF : 0; 01353 A.u8[12] = (A.u8[12]>=B.u8[12]) ? 0xFF : 0; 01354 A.u8[13] = (A.u8[13]>=B.u8[13]) ? 0xFF : 0; 01355 A.u8[14] = (A.u8[14]>=B.u8[14]) ? 0xFF : 0; 01356 A.u8[15] = (A.u8[15]>=B.u8[15]) ? 0xFF : 0; 01357 return A.i; 01358 }
SSP_FORCEINLINE __m128d ssp_comge_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comge_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1361 of file SSEPlus_emulation_comps_REF.h.
01362 { 01363 ssp_m128 A,B; 01364 A.d = a; 01365 B.d = b; 01366 A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01367 A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01368 return A.d; 01369 }
SSP_FORCEINLINE __m128 ssp_comge_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comge_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1372 of file SSEPlus_emulation_comps_REF.h.
01373 { 01374 ssp_m128 A,B; 01375 A.f = a; 01376 B.f = b; 01377 A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0xFFFFFFFF : 0; 01378 A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0xFFFFFFFF : 0; 01379 A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0xFFFFFFFF : 0; 01380 A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0xFFFFFFFF : 0; 01381 return A.f; 01382 }
SSP_FORCEINLINE __m128d ssp_comge_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comge_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comgt_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1411 of file SSEPlus_emulation_comps_REF.h.
01412 { 01413 ssp_m128 A,B; 01414 A.i = a; 01415 B.i = b; 01416 A.u16[0] = (A.s16[0]>B.s16[0]) ? 0xFFFF : 0; 01417 A.u16[1] = (A.s16[1]>B.s16[1]) ? 0xFFFF : 0; 01418 A.u16[2] = (A.s16[2]>B.s16[2]) ? 0xFFFF : 0; 01419 A.u16[3] = (A.s16[3]>B.s16[3]) ? 0xFFFF : 0; 01420 A.u16[4] = (A.s16[4]>B.s16[4]) ? 0xFFFF : 0; 01421 A.u16[5] = (A.s16[5]>B.s16[5]) ? 0xFFFF : 0; 01422 A.u16[6] = (A.s16[6]>B.s16[6]) ? 0xFFFF : 0; 01423 A.u16[7] = (A.s16[7]>B.s16[7]) ? 0xFFFF : 0; 01424 return A.i; 01425 }
SSP_FORCEINLINE __m128i ssp_comgt_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1428 of file SSEPlus_emulation_comps_REF.h.
01429 { 01430 ssp_m128 A,B; 01431 A.i = a; 01432 B.i = b; 01433 A.u32[0] = (A.s32[0]>B.s32[0]) ? 0xFFFFFFFF : 0; 01434 A.u32[1] = (A.s32[1]>B.s32[1]) ? 0xFFFFFFFF : 0; 01435 A.u32[2] = (A.s32[2]>B.s32[2]) ? 0xFFFFFFFF : 0; 01436 A.u32[3] = (A.s32[3]>B.s32[3]) ? 0xFFFFFFFF : 0; 01437 return A.i; 01438 }
SSP_FORCEINLINE __m128i ssp_comgt_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1441 of file SSEPlus_emulation_comps_REF.h.
01442 { 01443 ssp_m128 A,B; 01444 A.i = a; 01445 B.i = b; 01446 A.u64[0] = (A.s64[0]>B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01447 A.u64[1] = (A.s64[1]>B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01448 return A.i; 01449 }
SSP_FORCEINLINE __m128i ssp_comgt_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1452 of file SSEPlus_emulation_comps_REF.h.
01453 { 01454 ssp_m128 A,B; 01455 A.i = a; 01456 B.i = b; 01457 A.u8[ 0] = (A.s8[ 0]>B.s8[ 0]) ? 0xFF : 0; 01458 A.u8[ 1] = (A.s8[ 1]>B.s8[ 1]) ? 0xFF : 0; 01459 A.u8[ 2] = (A.s8[ 2]>B.s8[ 2]) ? 0xFF : 0; 01460 A.u8[ 3] = (A.s8[ 3]>B.s8[ 3]) ? 0xFF : 0; 01461 A.u8[ 4] = (A.s8[ 4]>B.s8[ 4]) ? 0xFF : 0; 01462 A.u8[ 5] = (A.s8[ 5]>B.s8[ 5]) ? 0xFF : 0; 01463 A.u8[ 6] = (A.s8[ 6]>B.s8[ 6]) ? 0xFF : 0; 01464 A.u8[ 7] = (A.s8[ 7]>B.s8[ 7]) ? 0xFF : 0; 01465 A.u8[ 8] = (A.s8[ 8]>B.s8[ 8]) ? 0xFF : 0; 01466 A.u8[ 9] = (A.s8[ 9]>B.s8[ 9]) ? 0xFF : 0; 01467 A.u8[10] = (A.s8[10]>B.s8[10]) ? 0xFF : 0; 01468 A.u8[11] = (A.s8[11]>B.s8[11]) ? 0xFF : 0; 01469 A.u8[12] = (A.s8[12]>B.s8[12]) ? 0xFF : 0; 01470 A.u8[13] = (A.s8[13]>B.s8[13]) ? 0xFF : 0; 01471 A.u8[14] = (A.s8[14]>B.s8[14]) ? 0xFF : 0; 01472 A.u8[15] = (A.s8[15]>B.s8[15]) ? 0xFF : 0; 01473 return A.i; 01474 }
SSP_FORCEINLINE __m128i ssp_comgt_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1477 of file SSEPlus_emulation_comps_REF.h.
01478 { 01479 ssp_m128 A,B; 01480 A.i = a; 01481 B.i = b; 01482 A.u16[0] = (A.u16[0]>B.u16[0]) ? 0xFFFF : 0; 01483 A.u16[1] = (A.u16[1]>B.u16[1]) ? 0xFFFF : 0; 01484 A.u16[2] = (A.u16[2]>B.u16[2]) ? 0xFFFF : 0; 01485 A.u16[3] = (A.u16[3]>B.u16[3]) ? 0xFFFF : 0; 01486 A.u16[4] = (A.u16[4]>B.u16[4]) ? 0xFFFF : 0; 01487 A.u16[5] = (A.u16[5]>B.u16[5]) ? 0xFFFF : 0; 01488 A.u16[6] = (A.u16[6]>B.u16[6]) ? 0xFFFF : 0; 01489 A.u16[7] = (A.u16[7]>B.u16[7]) ? 0xFFFF : 0; 01490 return A.i; 01491 }
SSP_FORCEINLINE __m128i ssp_comgt_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1494 of file SSEPlus_emulation_comps_REF.h.
01495 { 01496 ssp_m128 A,B; 01497 A.i = a; 01498 B.i = b; 01499 A.u32[0] = (A.u32[0]>B.u32[0]) ? 0xFFFFFFFF : 0; 01500 A.u32[1] = (A.u32[1]>B.u32[1]) ? 0xFFFFFFFF : 0; 01501 A.u32[2] = (A.u32[2]>B.u32[2]) ? 0xFFFFFFFF : 0; 01502 A.u32[3] = (A.u32[3]>B.u32[3]) ? 0xFFFFFFFF : 0; 01503 return A.i; 01504 }
SSP_FORCEINLINE __m128i ssp_comgt_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1507 of file SSEPlus_emulation_comps_REF.h.
01508 { 01509 ssp_m128 A,B; 01510 A.i = a; 01511 B.i = b; 01512 A.u64[0] = (A.u64[0]>B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01513 A.u64[1] = (A.u64[1]>B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01514 return A.i; 01515 }
SSP_FORCEINLINE __m128i ssp_comgt_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comgt_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1518 of file SSEPlus_emulation_comps_REF.h.
01519 { 01520 ssp_m128 A,B; 01521 A.i = a; 01522 B.i = b; 01523 A.u8[ 0] = (A.u8[ 0]>B.u8[ 0]) ? 0xFF : 0; 01524 A.u8[ 1] = (A.u8[ 1]>B.u8[ 1]) ? 0xFF : 0; 01525 A.u8[ 2] = (A.u8[ 2]>B.u8[ 2]) ? 0xFF : 0; 01526 A.u8[ 3] = (A.u8[ 3]>B.u8[ 3]) ? 0xFF : 0; 01527 A.u8[ 4] = (A.u8[ 4]>B.u8[ 4]) ? 0xFF : 0; 01528 A.u8[ 5] = (A.u8[ 5]>B.u8[ 5]) ? 0xFF : 0; 01529 A.u8[ 6] = (A.u8[ 6]>B.u8[ 6]) ? 0xFF : 0; 01530 A.u8[ 7] = (A.u8[ 7]>B.u8[ 7]) ? 0xFF : 0; 01531 A.u8[ 8] = (A.u8[ 8]>B.u8[ 8]) ? 0xFF : 0; 01532 A.u8[ 9] = (A.u8[ 9]>B.u8[ 9]) ? 0xFF : 0; 01533 A.u8[10] = (A.u8[10]>B.u8[10]) ? 0xFF : 0; 01534 A.u8[11] = (A.u8[11]>B.u8[11]) ? 0xFF : 0; 01535 A.u8[12] = (A.u8[12]>B.u8[12]) ? 0xFF : 0; 01536 A.u8[13] = (A.u8[13]>B.u8[13]) ? 0xFF : 0; 01537 A.u8[14] = (A.u8[14]>B.u8[14]) ? 0xFF : 0; 01538 A.u8[15] = (A.u8[15]>B.u8[15]) ? 0xFF : 0; 01539 return A.i; 01540 }
SSP_FORCEINLINE __m128d ssp_comgt_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comgt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1543 of file SSEPlus_emulation_comps_REF.h.
01544 { 01545 ssp_m128 A,B; 01546 A.d = a; 01547 B.d = b; 01548 A.u64[0] = (A.f64[0]>B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 01549 A.u64[1] = (A.f64[1]>B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 01550 01551 return A.d; 01552 }
SSP_FORCEINLINE __m128 ssp_comgt_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comgt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1555 of file SSEPlus_emulation_comps_REF.h.
01556 { 01557 ssp_m128 A,B; 01558 A.f = a; 01559 B.f = b; 01560 A.u32[0] = (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0; 01561 A.u32[1] = (A.f32[1]>B.f32[1]) ? 0xFFFFFFFF : 0; 01562 A.u32[2] = (A.f32[2]>B.f32[2]) ? 0xFFFFFFFF : 0; 01563 A.u32[3] = (A.f32[3]>B.f32[3]) ? 0xFFFFFFFF : 0; 01564 return A.f; 01565 }
SSP_FORCEINLINE __m128d ssp_comgt_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comgt_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comle_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 380 of file SSEPlus_emulation_comps_REF.h.
00381 { 00382 ssp_m128 A,B; 00383 A.i = a; 00384 B.i = b; 00385 A.u16[0] = (A.s16[0]<=B.s16[0]) ? 0xFFFF : 0; 00386 A.u16[1] = (A.s16[1]<=B.s16[1]) ? 0xFFFF : 0; 00387 A.u16[2] = (A.s16[2]<=B.s16[2]) ? 0xFFFF : 0; 00388 A.u16[3] = (A.s16[3]<=B.s16[3]) ? 0xFFFF : 0; 00389 A.u16[4] = (A.s16[4]<=B.s16[4]) ? 0xFFFF : 0; 00390 A.u16[5] = (A.s16[5]<=B.s16[5]) ? 0xFFFF : 0; 00391 A.u16[6] = (A.s16[6]<=B.s16[6]) ? 0xFFFF : 0; 00392 A.u16[7] = (A.s16[7]<=B.s16[7]) ? 0xFFFF : 0; 00393 return A.i; 00394 }
SSP_FORCEINLINE __m128i ssp_comle_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 397 of file SSEPlus_emulation_comps_REF.h.
00398 { 00399 ssp_m128 A,B; 00400 A.i = a; 00401 B.i = b; 00402 A.u32[0] = (A.s32[0]<=B.s32[0]) ? 0xFFFFFFFF : 0; 00403 A.u32[1] = (A.s32[1]<=B.s32[1]) ? 0xFFFFFFFF : 0; 00404 A.u32[2] = (A.s32[2]<=B.s32[2]) ? 0xFFFFFFFF : 0; 00405 A.u32[3] = (A.s32[3]<=B.s32[3]) ? 0xFFFFFFFF : 0; 00406 return A.i; 00407 }
SSP_FORCEINLINE __m128i ssp_comle_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 410 of file SSEPlus_emulation_comps_REF.h.
00411 { 00412 ssp_m128 A,B; 00413 A.i = a; 00414 B.i = b; 00415 A.u64[0] = (A.s64[0]<=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00416 A.u64[1] = (A.s64[1]<=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00417 return A.i; 00418 }
SSP_FORCEINLINE __m128i ssp_comle_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 421 of file SSEPlus_emulation_comps_REF.h.
00422 { 00423 ssp_m128 A,B; 00424 A.i = a; 00425 B.i = b; 00426 A.u8[ 0] = (A.s8[ 0]<=B.s8[ 0]) ? 0xFF : 0; 00427 A.u8[ 1] = (A.s8[ 1]<=B.s8[ 1]) ? 0xFF : 0; 00428 A.u8[ 2] = (A.s8[ 2]<=B.s8[ 2]) ? 0xFF : 0; 00429 A.u8[ 3] = (A.s8[ 3]<=B.s8[ 3]) ? 0xFF : 0; 00430 A.u8[ 4] = (A.s8[ 4]<=B.s8[ 4]) ? 0xFF : 0; 00431 A.u8[ 5] = (A.s8[ 5]<=B.s8[ 5]) ? 0xFF : 0; 00432 A.u8[ 6] = (A.s8[ 6]<=B.s8[ 6]) ? 0xFF : 0; 00433 A.u8[ 7] = (A.s8[ 7]<=B.s8[ 7]) ? 0xFF : 0; 00434 A.u8[ 8] = (A.s8[ 8]<=B.s8[ 8]) ? 0xFF : 0; 00435 A.u8[ 9] = (A.s8[ 9]<=B.s8[ 9]) ? 0xFF : 0; 00436 A.u8[10] = (A.s8[10]<=B.s8[10]) ? 0xFF : 0; 00437 A.u8[11] = (A.s8[11]<=B.s8[11]) ? 0xFF : 0; 00438 A.u8[12] = (A.s8[12]<=B.s8[12]) ? 0xFF : 0; 00439 A.u8[13] = (A.s8[13]<=B.s8[13]) ? 0xFF : 0; 00440 A.u8[14] = (A.s8[14]<=B.s8[14]) ? 0xFF : 0; 00441 A.u8[15] = (A.s8[15]<=B.s8[15]) ? 0xFF : 0; 00442 return A.i; 00443 }
SSP_FORCEINLINE __m128i ssp_comle_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 446 of file SSEPlus_emulation_comps_REF.h.
00447 { 00448 ssp_m128 A,B; 00449 A.i = a; 00450 B.i = b; 00451 A.u16[0] = (A.u16[0]<=B.u16[0]) ? 0xFFFF : 0; 00452 A.u16[1] = (A.u16[1]<=B.u16[1]) ? 0xFFFF : 0; 00453 A.u16[2] = (A.u16[2]<=B.u16[2]) ? 0xFFFF : 0; 00454 A.u16[3] = (A.u16[3]<=B.u16[3]) ? 0xFFFF : 0; 00455 A.u16[4] = (A.u16[4]<=B.u16[4]) ? 0xFFFF : 0; 00456 A.u16[5] = (A.u16[5]<=B.u16[5]) ? 0xFFFF : 0; 00457 A.u16[6] = (A.u16[6]<=B.u16[6]) ? 0xFFFF : 0; 00458 A.u16[7] = (A.u16[7]<=B.u16[7]) ? 0xFFFF : 0; 00459 return A.i; 00460 }
SSP_FORCEINLINE __m128i ssp_comle_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 463 of file SSEPlus_emulation_comps_REF.h.
00464 { 00465 ssp_m128 A,B; 00466 A.i = a; 00467 B.i = b; 00468 A.u32[0] = (A.u32[0]<=B.u32[0]) ? 0xFFFFFFFF : 0; 00469 A.u32[1] = (A.u32[1]<=B.u32[1]) ? 0xFFFFFFFF : 0; 00470 A.u32[2] = (A.u32[2]<=B.u32[2]) ? 0xFFFFFFFF : 0; 00471 A.u32[3] = (A.u32[3]<=B.u32[3]) ? 0xFFFFFFFF : 0; 00472 return A.i; 00473 }
SSP_FORCEINLINE __m128i ssp_comle_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 476 of file SSEPlus_emulation_comps_REF.h.
00477 { 00478 ssp_m128 A,B; 00479 A.i = a; 00480 B.i = b; 00481 A.u64[0] = (A.u64[0]<=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00482 A.u64[1] = (A.u64[1]<=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00483 return A.i; 00484 }
SSP_FORCEINLINE __m128i ssp_comle_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comle_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 487 of file SSEPlus_emulation_comps_REF.h.
00488 { 00489 ssp_m128 A,B; 00490 A.i = a; 00491 B.i = b; 00492 A.u8[ 0] = (A.u8[ 0]<=B.u8[ 0]) ? 0xFF : 0; 00493 A.u8[ 1] = (A.u8[ 1]<=B.u8[ 1]) ? 0xFF : 0; 00494 A.u8[ 2] = (A.u8[ 2]<=B.u8[ 2]) ? 0xFF : 0; 00495 A.u8[ 3] = (A.u8[ 3]<=B.u8[ 3]) ? 0xFF : 0; 00496 A.u8[ 4] = (A.u8[ 4]<=B.u8[ 4]) ? 0xFF : 0; 00497 A.u8[ 5] = (A.u8[ 5]<=B.u8[ 5]) ? 0xFF : 0; 00498 A.u8[ 6] = (A.u8[ 6]<=B.u8[ 6]) ? 0xFF : 0; 00499 A.u8[ 7] = (A.u8[ 7]<=B.u8[ 7]) ? 0xFF : 0; 00500 A.u8[ 8] = (A.u8[ 8]<=B.u8[ 8]) ? 0xFF : 0; 00501 A.u8[ 9] = (A.u8[ 9]<=B.u8[ 9]) ? 0xFF : 0; 00502 A.u8[10] = (A.u8[10]<=B.u8[10]) ? 0xFF : 0; 00503 A.u8[11] = (A.u8[11]<=B.u8[11]) ? 0xFF : 0; 00504 A.u8[12] = (A.u8[12]<=B.u8[12]) ? 0xFF : 0; 00505 A.u8[13] = (A.u8[13]<=B.u8[13]) ? 0xFF : 0; 00506 A.u8[14] = (A.u8[14]<=B.u8[14]) ? 0xFF : 0; 00507 A.u8[15] = (A.u8[15]<=B.u8[15]) ? 0xFF : 0; 00508 return A.i; 00509 }
SSP_FORCEINLINE __m128d ssp_comle_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comle_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 512 of file SSEPlus_emulation_comps_REF.h.
00513 { 00514 ssp_m128 A,B; 00515 A.d = a; 00516 B.d = b; 00517 00518 A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00519 A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00520 return A.d; 00521 }
SSP_FORCEINLINE __m128 ssp_comle_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comle_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 524 of file SSEPlus_emulation_comps_REF.h.
00525 { 00526 ssp_m128 A,B; 00527 A.f = a; 00528 B.f = b; 00529 A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0xFFFFFFFF : 0; 00530 A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0xFFFFFFFF : 0; 00531 A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0xFFFFFFFF : 0; 00532 A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0xFFFFFFFF : 0; 00533 return A.f; 00534 }
SSP_FORCEINLINE __m128d ssp_comle_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comle_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 537 of file SSEPlus_emulation_comps_REF.h.
00538 { 00539 ssp_m128 A,B; 00540 A.d = a; 00541 B.d = b; 00542 00543 A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00544 return A.d; 00545 }
SSP_FORCEINLINE __m128 ssp_comle_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comlt_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 200 of file SSEPlus_emulation_comps_REF.h.
00201 { 00202 ssp_m128 A,B; 00203 A.i = a; 00204 B.i = b; 00205 A.u16[0] = (A.s16[0]<B.s16[0]) ? 0xFFFF : 0; 00206 A.u16[1] = (A.s16[1]<B.s16[1]) ? 0xFFFF : 0; 00207 A.u16[2] = (A.s16[2]<B.s16[2]) ? 0xFFFF : 0; 00208 A.u16[3] = (A.s16[3]<B.s16[3]) ? 0xFFFF : 0; 00209 A.u16[4] = (A.s16[4]<B.s16[4]) ? 0xFFFF : 0; 00210 A.u16[5] = (A.s16[5]<B.s16[5]) ? 0xFFFF : 0; 00211 A.u16[6] = (A.s16[6]<B.s16[6]) ? 0xFFFF : 0; 00212 A.u16[7] = (A.s16[7]<B.s16[7]) ? 0xFFFF : 0; 00213 return A.i; 00214 }
SSP_FORCEINLINE __m128i ssp_comlt_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 217 of file SSEPlus_emulation_comps_REF.h.
00218 { 00219 ssp_m128 A,B; 00220 A.i = a; 00221 B.i = b; 00222 A.u32[0] = (A.s32[0]<B.s32[0]) ? 0xFFFFFFFF : 0; 00223 A.u32[1] = (A.s32[1]<B.s32[1]) ? 0xFFFFFFFF : 0; 00224 A.u32[2] = (A.s32[2]<B.s32[2]) ? 0xFFFFFFFF : 0; 00225 A.u32[3] = (A.s32[3]<B.s32[3]) ? 0xFFFFFFFF : 0; 00226 return A.i; 00227 }
SSP_FORCEINLINE __m128i ssp_comlt_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 230 of file SSEPlus_emulation_comps_REF.h.
00231 { 00232 ssp_m128 A,B; 00233 A.i = a; 00234 B.i = b; 00235 A.u64[0] = (A.s64[0]<B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00236 A.u64[1] = (A.s64[1]<B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00237 return A.i; 00238 }
SSP_FORCEINLINE __m128i ssp_comlt_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 241 of file SSEPlus_emulation_comps_REF.h.
00242 { 00243 ssp_m128 A,B; 00244 A.i = a; 00245 B.i = b; 00246 A.u8[ 0] = (A.s8[ 0]<B.s8[ 0]) ? 0xFF : 0; 00247 A.u8[ 1] = (A.s8[ 1]<B.s8[ 1]) ? 0xFF : 0; 00248 A.u8[ 2] = (A.s8[ 2]<B.s8[ 2]) ? 0xFF : 0; 00249 A.u8[ 3] = (A.s8[ 3]<B.s8[ 3]) ? 0xFF : 0; 00250 A.u8[ 4] = (A.s8[ 4]<B.s8[ 4]) ? 0xFF : 0; 00251 A.u8[ 5] = (A.s8[ 5]<B.s8[ 5]) ? 0xFF : 0; 00252 A.u8[ 6] = (A.s8[ 6]<B.s8[ 6]) ? 0xFF : 0; 00253 A.u8[ 7] = (A.s8[ 7]<B.s8[ 7]) ? 0xFF : 0; 00254 A.u8[ 8] = (A.s8[ 8]<B.s8[ 8]) ? 0xFF : 0; 00255 A.u8[ 9] = (A.s8[ 9]<B.s8[ 9]) ? 0xFF : 0; 00256 A.u8[10] = (A.s8[10]<B.s8[10]) ? 0xFF : 0; 00257 A.u8[11] = (A.s8[11]<B.s8[11]) ? 0xFF : 0; 00258 A.u8[12] = (A.s8[12]<B.s8[12]) ? 0xFF : 0; 00259 A.u8[13] = (A.s8[13]<B.s8[13]) ? 0xFF : 0; 00260 A.u8[14] = (A.s8[14]<B.s8[14]) ? 0xFF : 0; 00261 A.u8[15] = (A.s8[15]<B.s8[15]) ? 0xFF : 0; 00262 return A.i; 00263 }
SSP_FORCEINLINE __m128i ssp_comlt_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 266 of file SSEPlus_emulation_comps_REF.h.
00267 { 00268 ssp_m128 A,B; 00269 A.i = a; 00270 B.i = b; 00271 A.u16[0] = (A.u16[0]<B.u16[0]) ? 0xFFFF : 0; 00272 A.u16[1] = (A.u16[1]<B.u16[1]) ? 0xFFFF : 0; 00273 A.u16[2] = (A.u16[2]<B.u16[2]) ? 0xFFFF : 0; 00274 A.u16[3] = (A.u16[3]<B.u16[3]) ? 0xFFFF : 0; 00275 A.u16[4] = (A.u16[4]<B.u16[4]) ? 0xFFFF : 0; 00276 A.u16[5] = (A.u16[5]<B.u16[5]) ? 0xFFFF : 0; 00277 A.u16[6] = (A.u16[6]<B.u16[6]) ? 0xFFFF : 0; 00278 A.u16[7] = (A.u16[7]<B.u16[7]) ? 0xFFFF : 0; 00279 return A.i; 00280 }
SSP_FORCEINLINE __m128i ssp_comlt_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 283 of file SSEPlus_emulation_comps_REF.h.
00284 { 00285 ssp_m128 A,B; 00286 A.i = a; 00287 B.i = b; 00288 A.u32[0] = (A.u32[0]<B.u32[0]) ? 0xFFFFFFFF : 0; 00289 A.u32[1] = (A.u32[1]<B.u32[1]) ? 0xFFFFFFFF : 0; 00290 A.u32[2] = (A.u32[2]<B.u32[2]) ? 0xFFFFFFFF : 0; 00291 A.u32[3] = (A.u32[3]<B.u32[3]) ? 0xFFFFFFFF : 0; 00292 return A.i; 00293 }
SSP_FORCEINLINE __m128i ssp_comlt_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 296 of file SSEPlus_emulation_comps_REF.h.
00297 { 00298 ssp_m128 A,B; 00299 A.i = a; 00300 B.i = b; 00301 A.u64[0] = (A.u64[0]<B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00302 A.u64[1] = (A.u64[1]<B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00303 return A.i; 00304 }
SSP_FORCEINLINE __m128i ssp_comlt_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comlt_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 307 of file SSEPlus_emulation_comps_REF.h.
00308 { 00309 ssp_m128 A,B; 00310 A.i = a; 00311 B.i = b; 00312 A.u8[ 0] = (A.u8[ 0]<B.u8[0]) ? 0xFF : 0; 00313 A.u8[ 1] = (A.u8[ 1]<B.u8[1]) ? 0xFF : 0; 00314 A.u8[ 2] = (A.u8[ 2]<B.u8[2]) ? 0xFF : 0; 00315 A.u8[ 3] = (A.u8[ 3]<B.u8[3]) ? 0xFF : 0; 00316 A.u8[ 4] = (A.u8[ 4]<B.u8[4]) ? 0xFF : 0; 00317 A.u8[ 5] = (A.u8[ 5]<B.u8[5]) ? 0xFF : 0; 00318 A.u8[ 6] = (A.u8[ 6]<B.u8[6]) ? 0xFF : 0; 00319 A.u8[ 7] = (A.u8[ 7]<B.u8[7]) ? 0xFF : 0; 00320 A.u8[ 8] = (A.u8[ 8]<B.u8[8]) ? 0xFF : 0; 00321 A.u8[ 9] = (A.u8[ 9]<B.u8[9]) ? 0xFF : 0; 00322 A.u8[10] = (A.u8[10]<B.u8[10]) ? 0xFF : 0; 00323 A.u8[11] = (A.u8[11]<B.u8[11]) ? 0xFF : 0; 00324 A.u8[12] = (A.u8[12]<B.u8[12]) ? 0xFF : 0; 00325 A.u8[13] = (A.u8[13]<B.u8[13]) ? 0xFF : 0; 00326 A.u8[14] = (A.u8[14]<B.u8[14]) ? 0xFF : 0; 00327 A.u8[15] = (A.u8[15]<B.u8[15]) ? 0xFF : 0; 00328 return A.i; 00329 }
SSP_FORCEINLINE __m128d ssp_comlt_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comlt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 332 of file SSEPlus_emulation_comps_REF.h.
00333 { 00334 ssp_m128 A,B; 00335 A.d = a; 00336 B.d = b; 00337 A.u64[0] = (A.f64[0]<B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00338 A.u64[1] = (A.f64[1]<B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00339 return A.d; 00340 }
SSP_FORCEINLINE __m128 ssp_comlt_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comlt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 343 of file SSEPlus_emulation_comps_REF.h.
00344 { 00345 ssp_m128 A,B; 00346 A.f = a; 00347 B.f = b; 00348 A.u32[0] = (A.f32[0]<B.f32[0]) ? 0xFFFFFFFF : 0; 00349 A.u32[1] = (A.f32[1]<B.f32[1]) ? 0xFFFFFFFF : 0; 00350 A.u32[2] = (A.f32[2]<B.f32[2]) ? 0xFFFFFFFF : 0; 00351 A.u32[3] = (A.f32[3]<B.f32[3]) ? 0xFFFFFFFF : 0; 00352 return A.f; 00353 }
SSP_FORCEINLINE __m128d ssp_comlt_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comlt_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128i ssp_comneq_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 611 of file SSEPlus_emulation_comps_REF.h.
00612 { 00613 ssp_m128 A,B; 00614 A.i = a; 00615 B.i = b; 00616 A.u16[0] = (A.s16[0]!=B.s16[0]) ? 0xFFFF : 0; 00617 A.u16[1] = (A.s16[1]!=B.s16[1]) ? 0xFFFF : 0; 00618 A.u16[2] = (A.s16[2]!=B.s16[2]) ? 0xFFFF : 0; 00619 A.u16[3] = (A.s16[3]!=B.s16[3]) ? 0xFFFF : 0; 00620 A.u16[4] = (A.s16[4]!=B.s16[4]) ? 0xFFFF : 0; 00621 A.u16[5] = (A.s16[5]!=B.s16[5]) ? 0xFFFF : 0; 00622 A.u16[6] = (A.s16[6]!=B.s16[6]) ? 0xFFFF : 0; 00623 A.u16[7] = (A.s16[7]!=B.s16[7]) ? 0xFFFF : 0; 00624 return A.i; 00625 }
SSP_FORCEINLINE __m128i ssp_comneq_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 628 of file SSEPlus_emulation_comps_REF.h.
00629 { 00630 ssp_m128 A,B; 00631 A.i = a; 00632 B.i = b; 00633 A.u32[0] = (A.s32[0]!=B.s32[0]) ? 0xFFFFFFFF : 0; 00634 A.u32[1] = (A.s32[1]!=B.s32[1]) ? 0xFFFFFFFF : 0; 00635 A.u32[2] = (A.s32[2]!=B.s32[2]) ? 0xFFFFFFFF : 0; 00636 A.u32[3] = (A.s32[3]!=B.s32[3]) ? 0xFFFFFFFF : 0; 00637 return A.i; 00638 }
SSP_FORCEINLINE __m128i ssp_comneq_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 641 of file SSEPlus_emulation_comps_REF.h.
00642 { 00643 ssp_m128 A,B; 00644 A.i = a; 00645 B.i = b; 00646 A.u64[0] = (A.s64[0]!=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00647 A.u64[1] = (A.s64[1]!=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00648 return A.i; 00649 }
SSP_FORCEINLINE __m128i ssp_comneq_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 652 of file SSEPlus_emulation_comps_REF.h.
00653 { 00654 ssp_m128 A,B; 00655 A.i = a; 00656 B.i = b; 00657 A.u8[ 0] = (A.s8[ 0]!=B.s8[ 0]) ? 0xFF : 0; 00658 A.u8[ 1] = (A.s8[ 1]!=B.s8[ 1]) ? 0xFF : 0; 00659 A.u8[ 2] = (A.s8[ 2]!=B.s8[ 2]) ? 0xFF : 0; 00660 A.u8[ 3] = (A.s8[ 3]!=B.s8[ 3]) ? 0xFF : 0; 00661 A.u8[ 4] = (A.s8[ 4]!=B.s8[ 4]) ? 0xFF : 0; 00662 A.u8[ 5] = (A.s8[ 5]!=B.s8[ 5]) ? 0xFF : 0; 00663 A.u8[ 6] = (A.s8[ 6]!=B.s8[ 6]) ? 0xFF : 0; 00664 A.u8[ 7] = (A.s8[ 7]!=B.s8[ 7]) ? 0xFF : 0; 00665 A.u8[ 8] = (A.s8[ 8]!=B.s8[ 8]) ? 0xFF : 0; 00666 A.u8[ 9] = (A.s8[ 9]!=B.s8[ 9]) ? 0xFF : 0; 00667 A.u8[10] = (A.s8[10]!=B.s8[10]) ? 0xFF : 0; 00668 A.u8[11] = (A.s8[11]!=B.s8[11]) ? 0xFF : 0; 00669 A.u8[12] = (A.s8[12]!=B.s8[12]) ? 0xFF : 0; 00670 A.u8[13] = (A.s8[13]!=B.s8[13]) ? 0xFF : 0; 00671 A.u8[14] = (A.s8[14]!=B.s8[14]) ? 0xFF : 0; 00672 A.u8[15] = (A.s8[15]!=B.s8[15]) ? 0xFF : 0; 00673 return A.i; 00674 }
SSP_FORCEINLINE __m128i ssp_comneq_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 677 of file SSEPlus_emulation_comps_REF.h.
00678 { 00679 ssp_m128 A,B; 00680 A.i = a; 00681 B.i = b; 00682 A.u16[0] = (A.u16[0]!=B.u16[0]) ? 0xFFFF : 0; 00683 A.u16[1] = (A.u16[1]!=B.u16[1]) ? 0xFFFF : 0; 00684 A.u16[2] = (A.u16[2]!=B.u16[2]) ? 0xFFFF : 0; 00685 A.u16[3] = (A.u16[3]!=B.u16[3]) ? 0xFFFF : 0; 00686 A.u16[4] = (A.u16[4]!=B.u16[4]) ? 0xFFFF : 0; 00687 A.u16[5] = (A.u16[5]!=B.u16[5]) ? 0xFFFF : 0; 00688 A.u16[6] = (A.u16[6]!=B.u16[6]) ? 0xFFFF : 0; 00689 A.u16[7] = (A.u16[7]!=B.u16[7]) ? 0xFFFF : 0; 00690 return A.i; 00691 }
SSP_FORCEINLINE __m128i ssp_comneq_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)
Definition at line 694 of file SSEPlus_emulation_comps_REF.h.
00695 { 00696 ssp_m128 A,B; 00697 A.i = a; 00698 B.i = b; 00699 A.u32[0] = (A.u32[0]!=B.u32[0]) ? 0xFFFFFFFF : 0; 00700 A.u32[1] = (A.u32[1]!=B.u32[1]) ? 0xFFFFFFFF : 0; 00701 A.u32[2] = (A.u32[2]!=B.u32[2]) ? 0xFFFFFFFF : 0; 00702 A.u32[3] = (A.u32[3]!=B.u32[3]) ? 0xFFFFFFFF : 0; 00703 return A.i; 00704 }
SSP_FORCEINLINE __m128i ssp_comneq_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 707 of file SSEPlus_emulation_comps_REF.h.
00708 { 00709 ssp_m128 A,B; 00710 A.i = a; 00711 B.i = b; 00712 A.u64[0] = (A.u64[0]!=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00713 A.u64[1] = (A.u64[1]!=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00714 return A.i; 00715 }
SSP_FORCEINLINE __m128i ssp_comneq_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comneq_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)
Definition at line 718 of file SSEPlus_emulation_comps_REF.h.
00719 { 00720 ssp_m128 A,B; 00721 A.i = a; 00722 B.i = b; 00723 A.u8[ 0] = (A.u8[ 0]!=B.u8[ 0]) ? 0xFF : 0; 00724 A.u8[ 1] = (A.u8[ 1]!=B.u8[ 1]) ? 0xFF : 0; 00725 A.u8[ 2] = (A.u8[ 2]!=B.u8[ 2]) ? 0xFF : 0; 00726 A.u8[ 3] = (A.u8[ 3]!=B.u8[ 3]) ? 0xFF : 0; 00727 A.u8[ 4] = (A.u8[ 4]!=B.u8[ 4]) ? 0xFF : 0; 00728 A.u8[ 5] = (A.u8[ 5]!=B.u8[ 5]) ? 0xFF : 0; 00729 A.u8[ 6] = (A.u8[ 6]!=B.u8[ 6]) ? 0xFF : 0; 00730 A.u8[ 7] = (A.u8[ 7]!=B.u8[ 7]) ? 0xFF : 0; 00731 A.u8[ 8] = (A.u8[ 8]!=B.u8[ 8]) ? 0xFF : 0; 00732 A.u8[ 9] = (A.u8[ 9]!=B.u8[ 9]) ? 0xFF : 0; 00733 A.u8[10] = (A.u8[10]!=B.u8[10]) ? 0xFF : 0; 00734 A.u8[11] = (A.u8[11]!=B.u8[11]) ? 0xFF : 0; 00735 A.u8[12] = (A.u8[12]!=B.u8[12]) ? 0xFF : 0; 00736 A.u8[13] = (A.u8[13]!=B.u8[13]) ? 0xFF : 0; 00737 A.u8[14] = (A.u8[14]!=B.u8[14]) ? 0xFF : 0; 00738 A.u8[15] = (A.u8[15]!=B.u8[15]) ? 0xFF : 0; 00739 return A.i; 00740 }
SSP_FORCEINLINE __m128d ssp_comneq_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comneq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 743 of file SSEPlus_emulation_comps_REF.h.
00744 { 00745 ssp_m128 A,B; 00746 A.d = a; 00747 B.d = b; 00748 00749 A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00750 A.u64[1] = (A.f64[1]!=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0; 00751 return A.d; 00752 }
SSP_FORCEINLINE __m128 ssp_comneq_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comneq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 755 of file SSEPlus_emulation_comps_REF.h.
00756 { 00757 ssp_m128 A,B; 00758 A.f = a; 00759 B.f = b; 00760 A.u32[0] = (A.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0; 00761 A.u32[1] = (A.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0; 00762 A.u32[2] = (A.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0; 00763 A.u32[3] = (A.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0; 00764 return A.f; 00765 }
SSP_FORCEINLINE __m128d ssp_comneq_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comneq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 768 of file SSEPlus_emulation_comps_REF.h.
00769 { 00770 ssp_m128 A,B; 00771 A.d = a; 00772 B.d = b; 00773 00774 A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0; 00775 return A.d; 00776 }
SSP_FORCEINLINE __m128 ssp_comneq_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comnge_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comnge_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 996 of file SSEPlus_emulation_comps_REF.h.
00997 { 00998 ssp_m128 A,B; 00999 A.d = a; 01000 B.d = b; 01001 A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 01002 A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF; 01003 return A.d; 01004 }
SSP_FORCEINLINE __m128 ssp_comnge_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comnge_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1007 of file SSEPlus_emulation_comps_REF.h.
01008 { 01009 ssp_m128 A,B; 01010 A.f = a; 01011 B.f = b; 01012 A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0 : 0xFFFFFFFF; 01013 A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0 : 0xFFFFFFFF; 01014 A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0 : 0xFFFFFFFF; 01015 A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0 : 0xFFFFFFFF; 01016 return A.f; 01017 }
SSP_FORCEINLINE __m128d ssp_comnge_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comnge_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comngt_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comngt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1045 of file SSEPlus_emulation_comps_REF.h.
01046 { 01047 ssp_m128 A,B; 01048 A.d = a; 01049 B.d = b; 01050 A.u64[0] = (A.f64[0]>B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 01051 A.u64[1] = (A.f64[1]>B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF; 01052 return A.d; 01053 }
SSP_FORCEINLINE __m128 ssp_comngt_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comngt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1056 of file SSEPlus_emulation_comps_REF.h.
01057 { 01058 ssp_m128 A,B; 01059 A.f = a; 01060 B.f = b; 01061 A.u32[0] = (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF; 01062 A.u32[1] = (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF; 01063 A.u32[2] = (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF; 01064 A.u32[3] = (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF; 01065 return A.f; 01066 }
SSP_FORCEINLINE __m128d ssp_comngt_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comngt_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comnle_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comnle_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 847 of file SSEPlus_emulation_comps_REF.h.
00848 { 00849 ssp_m128 A,B; 00850 A.d = a; 00851 B.d = b; 00852 A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00853 A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00854 00855 return A.d; 00856 }
SSP_FORCEINLINE __m128 ssp_comnle_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comnle_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 859 of file SSEPlus_emulation_comps_REF.h.
00860 { 00861 ssp_m128 A,B; 00862 A.f = a; 00863 B.f = b; 00864 A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0 : 0xFFFFFFFF; 00865 A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0 : 0xFFFFFFFF; 00866 A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0 : 0xFFFFFFFF; 00867 A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0 : 0xFFFFFFFF; 00868 return A.f; 00869 }
SSP_FORCEINLINE __m128d ssp_comnle_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comnle_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 872 of file SSEPlus_emulation_comps_REF.h.
00873 { 00874 ssp_m128 A,B; 00875 A.d = a; 00876 B.d = b; 00877 A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00878 00879 return A.d; 00880 }
SSP_FORCEINLINE __m128 ssp_comnle_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comnlt_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comnlt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 794 of file SSEPlus_emulation_comps_REF.h.
00795 { 00796 ssp_m128 A,B; 00797 A.d = a; 00798 B.d = b; 00799 00800 A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00801 A.u64[1] = (A.f64[1]<B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00802 00803 return A.d; 00804 }
SSP_FORCEINLINE __m128 ssp_comnlt_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comnlt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 807 of file SSEPlus_emulation_comps_REF.h.
00808 { 00809 ssp_m128 A,B; 00810 A.f = a; 00811 B.f = b; 00812 A.u32[0] = (A.f32[0]<B.f32[0]) ? 0 : 0xFFFFFFFF; 00813 A.u32[1] = (A.f32[1]<B.f32[1]) ? 0 : 0xFFFFFFFF; 00814 A.u32[2] = (A.f32[2]<B.f32[2]) ? 0 : 0xFFFFFFFF; 00815 A.u32[3] = (A.f32[3]<B.f32[3]) ? 0 : 0xFFFFFFFF; 00816 return A.f; 00817 }
SSP_FORCEINLINE __m128d ssp_comnlt_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comnlt_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 820 of file SSEPlus_emulation_comps_REF.h.
00821 { 00822 ssp_m128 A,B; 00823 A.d = a; 00824 B.d = b; 00825 00826 A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00827 00828 return A.d; 00829 }
SSP_FORCEINLINE __m128 ssp_comnlt_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comoneq_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comoneq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1180 of file SSEPlus_emulation_comps_REF.h.
01181 { 01182 ssp_m128 A,B; 01183 A.d = a; 01184 B.d = b; 01185 A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0; 01186 A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1])) ? 0xFFFFFFFFFFFFFFFF : 0; 01187 return A.d; 01188 }
SSP_FORCEINLINE __m128 ssp_comoneq_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comoneq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1191 of file SSEPlus_emulation_comps_REF.h.
01192 { 01193 ssp_m128 A,B; 01194 A.f = a; 01195 B.f = b; 01196 A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0; 01197 A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1]) ? 0xFFFFFFFF : 0; 01198 A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2]) ? 0xFFFFFFFF : 0; 01199 A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3]) ? 0xFFFFFFFF : 0; 01200 return A.f; 01201 }
SSP_FORCEINLINE __m128d ssp_comoneq_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comoneq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1204 of file SSEPlus_emulation_comps_REF.h.
01205 { 01206 ssp_m128 A,B; 01207 A.d = a; 01208 B.d = b; 01209 A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0; 01210 return A.d; 01211 }
SSP_FORCEINLINE __m128 ssp_comoneq_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comoneq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1214 of file SSEPlus_emulation_comps_REF.h.
01215 { 01216 ssp_m128 A,B; 01217 A.f = a; 01218 B.f = b; 01219 A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0; 01220 return A.f; 01221 }
SSP_FORCEINLINE __m128d ssp_comord_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comord_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 898 of file SSEPlus_emulation_comps_REF.h.
00899 { 00900 ssp_m128 A,B; 00901 A.d = a; 00902 B.d = b; // NAN(A) || NAN(B) 00903 A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00904 A.u64[1] = (A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00905 return A.d; 00906 }
SSP_FORCEINLINE __m128 ssp_comord_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comord_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 909 of file SSEPlus_emulation_comps_REF.h.
00910 { 00911 ssp_m128 A,B; 00912 A.f = a; 00913 B.f = b; // NAN(A) || NAN(B) 00914 A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF; 00915 A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0 : 0xFFFFFFFF; 00916 A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0 : 0xFFFFFFFF; 00917 A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0 : 0xFFFFFFFF; 00918 return A.f; 00919 }
SSP_FORCEINLINE __m128d ssp_comord_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comord_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 922 of file SSEPlus_emulation_comps_REF.h.
00923 { 00924 ssp_m128 A,B; 00925 A.d = a; 00926 B.d = b; // NAN(A) || NAN(B) 00927 A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF; 00928 return A.d; 00929 }
SSP_FORCEINLINE __m128 ssp_comord_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comord_ss/ comss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 932 of file SSEPlus_emulation_comps_REF.h.
00933 { 00934 ssp_m128 A,B; 00935 A.f = a; 00936 B.f = b; // NAN(A) || NAN(B) 00937 A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF; 00938 return A.f; 00939 }
SSP_FORCEINLINE __m128i ssp_comtrue_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1593 of file SSEPlus_emulation_comps_REF.h.
01594 { 01595 const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF ); 01596 return tmp; 01597 }
SSP_FORCEINLINE __m128i ssp_comtrue_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1600 of file SSEPlus_emulation_comps_REF.h.
01601 { 01602 return ssp_comtrue_epi16_REF(a,b); 01603 }
SSP_FORCEINLINE __m128i ssp_comtrue_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1606 of file SSEPlus_emulation_comps_REF.h.
01607 { 01608 return ssp_comtrue_epi16_REF(a,b); 01609 }
SSP_FORCEINLINE __m128i ssp_comtrue_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1612 of file SSEPlus_emulation_comps_REF.h.
01613 { 01614 return ssp_comtrue_epi16_REF(a,b); 01615 }
SSP_FORCEINLINE __m128i ssp_comtrue_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epu16/ pcomw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1618 of file SSEPlus_emulation_comps_REF.h.
01619 { 01620 return ssp_comtrue_epi16_REF(a,b); 01621 }
SSP_FORCEINLINE __m128i ssp_comtrue_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epu32/ pcomd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1624 of file SSEPlus_emulation_comps_REF.h.
01625 { 01626 return ssp_comtrue_epi16_REF(a,b); 01627 }
SSP_FORCEINLINE __m128i ssp_comtrue_epu64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epu64/ pcomq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1630 of file SSEPlus_emulation_comps_REF.h.
01631 { 01632 return ssp_comtrue_epi16_REF(a,b); 01633 }
SSP_FORCEINLINE __m128i ssp_comtrue_epu8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_comtrue_epu8/ pcomb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1636 of file SSEPlus_emulation_comps_REF.h.
01637 { 01638 return ssp_comtrue_epi16_REF(a,b); 01639 }
SSP_FORCEINLINE __m128d ssp_comtrue_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comtrue_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1642 of file SSEPlus_emulation_comps_REF.h.
01643 { 01644 const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF ); 01645 ssp_m128 A; 01646 A.i = tmp; 01647 return A.d; 01648 }
SSP_FORCEINLINE __m128 ssp_comtrue_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comtrue_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 1651 of file SSEPlus_emulation_comps_REF.h.
01652 { 01653 const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF ); 01654 ssp_m128 A; 01655 A.i = tmp; 01656 return A.f; 01657 }
SSP_FORCEINLINE __m128d ssp_comtrue_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m128 ssp_comtrue_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
SSP_FORCEINLINE __m128d ssp_comueq_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comueq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 947 of file SSEPlus_emulation_comps_REF.h.
00948 { 00949 ssp_m128 A,B; 00950 A.d = a; 00951 B.d = b; 00952 A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF; 00953 A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1])) ? 0 : 0xFFFFFFFFFFFFFFFF; 00954 return A.d; 00955 }
SSP_FORCEINLINE __m128 ssp_comueq_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comueq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 958 of file SSEPlus_emulation_comps_REF.h.
00959 { 00960 ssp_m128 A,B; 00961 A.f = a; 00962 B.f = b; 00963 A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF; 00964 A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF; 00965 A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF; 00966 A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF; 00967 return A.f; 00968 }
SSP_FORCEINLINE __m128d ssp_comueq_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comueq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 971 of file SSEPlus_emulation_comps_REF.h.
00972 { 00973 ssp_m128 A,B; 00974 A.d = a; 00975 B.d = b; 00976 A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF; 00977 return A.d; 00978 }
SSP_FORCEINLINE __m128 ssp_comueq_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comueq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 981 of file SSEPlus_emulation_comps_REF.h.
00982 { 00983 ssp_m128 A,B; 00984 A.f = a; 00985 B.f = b; 00986 A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF; 00987 return A.f; 00988 }
SSP_FORCEINLINE __m128d ssp_comunord_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comunord_pd/ compd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 562 of file SSEPlus_emulation_comps_REF.h.
00563 { 00564 ssp_m128 A,B; 00565 A.d = a; 00566 B.d = b; // NAN(A) || NAN(B) 00567 A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0; 00568 A.u64[1] = ((A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1])) ? 0xFFFFFFFFFFFFFFFF : 0; 00569 return A.d; 00570 }
SSP_FORCEINLINE __m128 ssp_comunord_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comunord_ps/ comps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 573 of file SSEPlus_emulation_comps_REF.h.
00574 { 00575 ssp_m128 A,B; 00576 A.f = a; 00577 B.f = b; // NAN(A) || NAN(B) 00578 A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0; 00579 A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0; 00580 A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0; 00581 A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0; 00582 return A.f; 00583 }
SSP_FORCEINLINE __m128d ssp_comunord_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_comunord_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 586 of file SSEPlus_emulation_comps_REF.h.
00587 { 00588 ssp_m128 A,B; 00589 A.d = a; 00590 B.d = b; // NAN(A) || NAN(B) 00591 A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0; 00592 return A.d; 00593 }
SSP_FORCEINLINE __m128 ssp_comunord_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_comunord_ss/ comss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 596 of file SSEPlus_emulation_comps_REF.h.
00597 { 00598 ssp_m128 A,B; 00599 A.f = a; 00600 B.f = b; // NAN(A) || NAN(B) 00601 A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0; 00602 return A.f; 00603 }
SSP_FORCEINLINE __m128i ssp_cvtepi16_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi16_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 2435 of file SSEPlus_emulation_REF.h.
02436 { 02437 ssp_m128 A; 02438 A.i = a; 02439 02440 A.s32[3] = A.s16[3]; 02441 A.s32[2] = A.s16[2]; 02442 A.s32[1] = A.s16[1]; 02443 A.s32[0] = A.s16[0]; 02444 return A.i; 02445 }
SSP_FORCEINLINE __m128i ssp_cvtepi16_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi16_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2448 of file SSEPlus_emulation_REF.h.
02449 { 02450 ssp_m128 A; 02451 A.i = a; 02452 02453 A.s64[1] = A.s16[1]; 02454 A.s64[0] = A.s16[0]; 02455 return A.i; 02456 }
SSP_FORCEINLINE __m128i ssp_cvtepi32_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi32_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2459 of file SSEPlus_emulation_REF.h.
02460 { 02461 ssp_m128 A; 02462 A.i = a; 02463 02464 A.s64[1] = A.s32[1]; 02465 A.s64[0] = A.s32[0]; 02466 return A.i; 02467 }
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi8_epi16 [SSE4.1]. (Searches MSDN)
Definition at line 2394 of file SSEPlus_emulation_REF.h.
02395 { 02396 ssp_m128 A; 02397 A.i = a; 02398 02399 A.s16[7] = A.s8[7]; 02400 A.s16[6] = A.s8[6]; 02401 A.s16[5] = A.s8[5]; 02402 A.s16[4] = A.s8[4]; 02403 A.s16[3] = A.s8[3]; 02404 A.s16[2] = A.s8[2]; 02405 A.s16[1] = A.s8[1]; 02406 A.s16[0] = A.s8[0]; 02407 return A.i; 02408 }
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi8_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 2411 of file SSEPlus_emulation_REF.h.
02412 { 02413 ssp_m128 A; 02414 A.i = a; 02415 02416 A.s32[3] = A.s8[3]; 02417 A.s32[2] = A.s8[2]; 02418 A.s32[1] = A.s8[1]; 02419 A.s32[0] = A.s8[0]; 02420 return A.i; 02421 }
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepi8_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2424 of file SSEPlus_emulation_REF.h.
02425 { 02426 ssp_m128 A; 02427 A.i = a; 02428 02429 A.s64[1] = A.s8[1]; 02430 A.s64[0] = A.s8[0]; 02431 return A.i; 02432 }
SSP_FORCEINLINE __m128i ssp_cvtepu16_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu16_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 2511 of file SSEPlus_emulation_REF.h.
02512 { 02513 ssp_m128 A; 02514 A.i = a; 02515 02516 A.s32[3] = A.u16[3]; 02517 A.s32[2] = A.u16[2]; 02518 A.s32[1] = A.u16[1]; 02519 A.s32[0] = A.u16[0]; 02520 return A.i; 02521 }
SSP_FORCEINLINE __m128i ssp_cvtepu16_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu16_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2524 of file SSEPlus_emulation_REF.h.
02525 { 02526 ssp_m128 A; 02527 A.i = a; 02528 02529 A.s64[1] = A.u16[1]; 02530 A.s64[0] = A.u16[0]; 02531 return A.i; 02532 }
SSP_FORCEINLINE __m128i ssp_cvtepu32_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu32_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2535 of file SSEPlus_emulation_REF.h.
02536 { 02537 ssp_m128 A; 02538 A.i = a; 02539 02540 A.s64[1] = A.u32[1]; 02541 A.s64[0] = A.u32[0]; 02542 return A.i; 02543 }
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu8_epi16 [SSE4.1]. (Searches MSDN)
Definition at line 2470 of file SSEPlus_emulation_REF.h.
02471 { 02472 ssp_m128 A; 02473 A.i = a; 02474 02475 A.s16[7] = A.u8[7]; 02476 A.s16[6] = A.u8[6]; 02477 A.s16[5] = A.u8[5]; 02478 A.s16[4] = A.u8[4]; 02479 A.s16[3] = A.u8[3]; 02480 A.s16[2] = A.u8[2]; 02481 A.s16[1] = A.u8[1]; 02482 A.s16[0] = A.u8[0]; 02483 return A.i; 02484 }
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu8_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 2487 of file SSEPlus_emulation_REF.h.
02488 { 02489 ssp_m128 A; 02490 A.i = a; 02491 02492 A.s32[3] = A.u8[3]; 02493 A.s32[2] = A.u8[2]; 02494 A.s32[1] = A.u8[1]; 02495 A.s32[0] = A.u8[0]; 02496 return A.i; 02497 }
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi64_REF | ( | __m128i | a | ) |
Reference implementation of _mm_cvtepu8_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 2500 of file SSEPlus_emulation_REF.h.
02501 { 02502 ssp_m128 A; 02503 A.i = a; 02504 02505 A.s64[1] = A.u8[1]; 02506 A.s64[0] = A.u8[0]; 02507 return A.i; 02508 }
SSP_FORCEINLINE __m128d ssp_dp_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
const int | mask | |||
) |
Reference implementation of _mm_dp_pd [SSE4.1]. (Searches MSDN)
Definition at line 930 of file SSEPlus_emulation_REF.h.
00931 { 00932 ssp_f64 tmp[3]; 00933 ssp_m128 A, B; 00934 A.d = a; 00935 B.d = b; 00936 00937 tmp[0] = (mask & 0x10) ? (A.f64[0] * B.f64[0]) : 0.0; 00938 tmp[1] = (mask & 0x20) ? (A.f64[1] * B.f64[1]) : 0.0; 00939 00940 tmp[2] = tmp[0] + tmp[1]; 00941 00942 A.f64[0] = (mask & 0x1) ? tmp[2] : 0.0; 00943 A.f64[1] = (mask & 0x2) ? tmp[2] : 0.0; 00944 return A.d; 00945 }
SSP_FORCEINLINE __m128 ssp_dp_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
const int | mask | |||
) |
Reference implementation of _mm_dp_ps [SSE4.1]. (Searches MSDN)
Definition at line 948 of file SSEPlus_emulation_REF.h.
00949 { 00950 ssp_f32 tmp[5]; 00951 ssp_m128 A, B; 00952 A.f = a; 00953 B.f = b; 00954 00955 tmp[0] = (mask & 0x10) ? (A.f32[0] * B.f32[0]) : 0.0f; 00956 tmp[1] = (mask & 0x20) ? (A.f32[1] * B.f32[1]) : 0.0f; 00957 tmp[2] = (mask & 0x40) ? (A.f32[2] * B.f32[2]) : 0.0f; 00958 tmp[3] = (mask & 0x80) ? (A.f32[3] * B.f32[3]) : 0.0f; 00959 00960 tmp[4] = tmp[0] + tmp[1] + tmp[2] + tmp[3]; 00961 00962 A.f32[0] = (mask & 0x1) ? tmp[4] : 0.0f; 00963 A.f32[1] = (mask & 0x2) ? tmp[4] : 0.0f; 00964 A.f32[2] = (mask & 0x4) ? tmp[4] : 0.0f; 00965 A.f32[3] = (mask & 0x8) ? tmp[4] : 0.0f; 00966 return A.f; 00967 }
SSP_FORCEINLINE int ssp_extract_epi32_REF | ( | __m128i | a, | |
const int | imm | |||
) |
Reference implementation of _mm_extract_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1086 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE ssp_s64 ssp_extract_epi64_REF | ( | __m128i | a, | |
const int | ndx | |||
) |
Reference implementation of _mm_extract_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 1094 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE int ssp_extract_epi8_REF | ( | __m128i | a, | |
const int | ndx | |||
) |
Reference implementation of _mm_extract_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 1078 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE int ssp_extract_ps_REF | ( | __m128 | a, | |
const int | ndx | |||
) |
Reference implementation of _mm_extract_ps [SSE4.1]. (Searches MSDN)
Definition at line 1102 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE __m128i ssp_extract_si64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_extract_si64 [SSE4a]. (Searches MSDN)
NOTE: The upper 64-bit of the destination register are undefined.
Definition at line 1113 of file SSEPlus_emulation_REF.h.
01114 { 01115 ssp_u32 len, ndx; 01116 ssp_s64 mask; 01117 ssp_m128 A, B; 01118 A.i = a; 01119 B.i = b; 01120 ndx = (ssp_u32)((B.u64[0] & 0x3F00) >> 8); // Mask ndx field. 01121 len = (ssp_u32)((B.u64[0] & 0x003F)); // Mask len field. 01122 01123 len = (len) ? len : 64; 01124 if( (ndx+len) > 64 ) // If the sum of ndx and length is greater than 64, the results are undefined. 01125 return a; // If index = 0 and length = 0/64, extract all lower bits. 01126 mask = ~(-1 << len); 01127 A.u64[0] = A.u64[0] >> ndx; 01128 A.u64[0] = A.u64[0] & mask; 01129 return A.i; 01130 }
SSP_FORCEINLINE __m128i ssp_extracti_si64_REF | ( | __m128i | a, | |
int | len, | |||
int | ndx | |||
) |
Reference implementation of _mm_extracti_si64 [SSE4a]. (Searches MSDN)
NOTE: The upper 64-bits of the destination register are undefined.
Definition at line 1134 of file SSEPlus_emulation_REF.h.
01135 { 01136 ssp_s64 mask; 01137 ssp_m128 A; 01138 A.i = a; 01139 ndx = ndx & 0x3F; // ndx % 64 01140 len = len & 0x3F; // len % 64 01141 01142 len = (len) ? len : 64; 01143 if( (ndx+len) > 64 ) // If the sum of ndx and length is greater than 64, the results are undefined. 01144 return a; // If index = 0 and length = 0/64, extract all lower bits. 01145 mask = ~(-1 << len); 01146 A.u64[0] = A.u64[0] >> ndx; 01147 A.u64[0] = A.u64[0] & mask; 01148 return A.i; 01149 }
SSP_FORCEINLINE __m128d ssp_floor_pd_REF | ( | __m128d | a | ) |
Reference implementation of _mm_floor_pd [SSE4.1]. (Searches MSDN)
Definition at line 2045 of file SSEPlus_emulation_REF.h.
02046 { 02047 ssp_m128 A; 02048 A.d = a; 02049 02050 A.f64[0] = floor( A.f64[0] ); 02051 A.f64[1] = floor( A.f64[1] ); 02052 return A.d; 02053 }
SSP_FORCEINLINE __m128 ssp_floor_ps_REF | ( | __m128 | a | ) |
Reference implementation of _mm_floor_ps [SSE4.1]. (Searches MSDN)
Definition at line 2056 of file SSEPlus_emulation_REF.h.
02057 { 02058 ssp_m128 A; 02059 A.f = a; 02060 02061 A.f32[0] = (float)floor( A.f32[0] ); 02062 A.f32[1] = (float)floor( A.f32[1] ); 02063 A.f32[2] = (float)floor( A.f32[2] ); 02064 A.f32[3] = (float)floor( A.f32[3] ); 02065 return A.f; 02066 }
SSP_FORCEINLINE __m128d ssp_floor_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_floor_sd [SSE4.1]. (Searches MSDN)
Definition at line 2069 of file SSEPlus_emulation_REF.h.
02070 { 02071 ssp_m128 A,B; 02072 A.d = a; 02073 B.d = b; 02074 02075 A.f64[0] = floor( B.f64[0] ); 02076 return A.d; 02077 }
SSP_FORCEINLINE __m128 ssp_floor_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_floor_ss [SSE4.1]. (Searches MSDN)
Definition at line 2080 of file SSEPlus_emulation_REF.h.
02081 { 02082 ssp_m128 A,B; 02083 A.f = a; 02084 B.f = b; 02085 02086 A.f32[0] = (float)floor( B.f32[0] ); 02087 return A.f; 02088 }
SSP_FORCEINLINE __m128d ssp_frcz_pd_REF | ( | __m128d | a | ) |
Reference implementation of _mm_frcz_pd_REF/ frczpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 24 of file SSEPlus_emulation_REF.h.
00025 { 00026 ssp_m128 A; 00027 long long temp; 00028 00029 A.d = a; 00030 00031 temp = (long long) A.f64[0]; 00032 A.f64[0] -= temp; 00033 temp = (long long) A.f64[1]; 00034 A.f64[1] -= temp; 00035 00036 return A.d; 00037 }
SSP_FORCEINLINE __m128 ssp_frcz_ps_REF | ( | __m128 | a | ) |
Reference implementation of _mm_frcz_ps_REF/ frczps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 40 of file SSEPlus_emulation_REF.h.
00041 { 00042 ssp_m128 A; 00043 int temp; 00044 A.f = a; 00045 00046 temp = (int) A.f32[0]; 00047 A.f32[0] -= temp; 00048 temp = (int) A.f32[1]; 00049 A.f32[1] -= temp; 00050 temp = (int) A.f32[2]; 00051 A.f32[2] -= temp; 00052 temp = (int) A.f32[3]; 00053 A.f32[3] -= temp; 00054 00055 return A.f; 00056 }
SSP_FORCEINLINE __m128d ssp_frcz_sd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
Reference implementation of _mm_frcz_sd_REF/ frczsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 59 of file SSEPlus_emulation_REF.h.
00060 { 00061 ssp_m128 A, B; 00062 long long temp; 00063 00064 A.d = a; 00065 B.d = b; 00066 00067 temp = (long long) A.f64[0]; 00068 B.f64[0] = A.f64[0] - temp; 00069 00070 return B.d; 00071 }
SSP_FORCEINLINE __m128 ssp_frcz_ss_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_frcz_ss_REF/ frczss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 74 of file SSEPlus_emulation_REF.h.
00075 { 00076 ssp_m128 A, B; 00077 int temp; 00078 00079 A.f = a; 00080 B.f = b; 00081 00082 temp = (int) A.f32[0]; 00083 B.f32[0] = A.f32[0] - temp; 00084 00085 return B.f; 00086 }
SSP_FORCEINLINE __m128i ssp_hadd_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hadd_epi16 [SSSE3]. (Searches MSDN)
Definition at line 1157 of file SSEPlus_emulation_REF.h.
01158 { 01159 ssp_m128 A, B; 01160 A.i = a; 01161 B.i = b; 01162 01163 A.s16[0] = A.s16[0] + A.s16[1]; 01164 A.s16[1] = A.s16[2] + A.s16[3]; 01165 A.s16[2] = A.s16[4] + A.s16[5]; 01166 A.s16[3] = A.s16[6] + A.s16[7]; 01167 A.s16[4] = B.s16[0] + B.s16[1]; 01168 A.s16[5] = B.s16[2] + B.s16[3]; 01169 A.s16[6] = B.s16[4] + B.s16[5]; 01170 A.s16[7] = B.s16[6] + B.s16[7]; 01171 return A.i; 01172 }
SSP_FORCEINLINE __m128i ssp_hadd_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hadd_epi32 [SSSE3]. (Searches MSDN)
Definition at line 1175 of file SSEPlus_emulation_REF.h.
01176 { 01177 ssp_m128 A, B; 01178 A.i = a; 01179 B.i = b; 01180 01181 A.s32[0] = A.s32[0] + A.s32[1]; 01182 A.s32[1] = A.s32[2] + A.s32[3]; 01183 A.s32[2] = B.s32[0] + B.s32[1]; 01184 A.s32[3] = B.s32[2] + B.s32[3]; 01185 01186 return A.i; 01187 }
SSP_FORCEINLINE __m128d ssp_hadd_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m64 ssp_hadd_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_hadd_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1192 of file SSEPlus_emulation_REF.h.
01193 { 01194 ssp_m64 A, B; 01195 A.m64 = a; 01196 B.m64 = b; 01197 01198 A.s16[0] = A.s16[0] + A.s16[1]; 01199 A.s16[1] = A.s16[2] + A.s16[3]; 01200 A.s16[2] = B.s16[0] + B.s16[1]; 01201 A.s16[3] = B.s16[2] + B.s16[3]; 01202 01203 return A.m64; 01204 }
SSP_FORCEINLINE __m64 ssp_hadd_pi32_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_add_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1209 of file SSEPlus_emulation_REF.h.
01210 { 01211 ssp_m64 A, B; 01212 A.m64 = a; 01213 B.m64 = b; 01214 01215 A.s32[0] = A.s32[0] + A.s32[1]; 01216 A.s32[1] = B.s32[0] + B.s32[1]; 01217 01218 return A.m64; 01219 }
SSP_FORCEINLINE __m128 ssp_hadd_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_hadd_ps [SSSE3]. (Searches MSDN)
Definition at line 1272 of file SSEPlus_emulation_REF.h.
01273 { 01274 ssp_m128 A, B; 01275 A.f = a; 01276 B.f = b; 01277 01278 A.f32[0] = A.f32[0] + A.f32[1]; 01279 A.f32[1] = A.f32[2] + A.f32[3]; 01280 A.f32[2] = B.f32[0] + B.f32[1]; 01281 A.f32[3] = B.f32[2] + B.f32[3]; 01282 return A.f; 01283 }
SSP_FORCEINLINE __m128i ssp_haddd_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddd_epi16/ phaddwd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 92 of file SSEPlus_emulation_REF.h.
00093 { 00094 ssp_m128 A, B; 00095 A.i = a; 00096 00097 B.s32[0] = A.s16[0] + A.s16[1]; 00098 B.s32[1] = A.s16[2] + A.s16[3]; 00099 B.s32[2] = A.s16[4] + A.s16[5]; 00100 B.s32[3] = A.s16[6] + A.s16[7]; 00101 00102 return B.i; 00103 }
SSP_FORCEINLINE __m128i ssp_haddd_epi8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddd_epi8/ phaddbd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 106 of file SSEPlus_emulation_REF.h.
00107 { 00108 ssp_m128 A, B; 00109 A.i = a; 00110 00111 B.s32[0] = A.s8[ 0] + A.s8[ 1] + A.s8[ 2] + A.s8[ 3]; 00112 B.s32[1] = A.s8[ 4] + A.s8[ 5] + A.s8[ 6] + A.s8[ 7]; 00113 B.s32[2] = A.s8[ 8] + A.s8[ 9] + A.s8[10] + A.s8[11]; 00114 B.s32[3] = A.s8[12] + A.s8[13] + A.s8[14] + A.s8[15]; 00115 00116 return B.i; 00117 }
SSP_FORCEINLINE __m128i ssp_haddd_epu16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddd_epu16/ phadduwd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 120 of file SSEPlus_emulation_REF.h.
00121 { 00122 ssp_m128 A, B; 00123 A.i = a; 00124 00125 B.u32[0] = A.u16[0] + A.u16[1]; 00126 B.u32[1] = A.u16[2] + A.u16[3]; 00127 B.u32[2] = A.u16[4] + A.u16[5]; 00128 B.u32[3] = A.u16[6] + A.u16[7]; 00129 00130 return B.i; 00131 }
SSP_FORCEINLINE __m128i ssp_haddd_epu8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddd_epu8/ phaddubd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 134 of file SSEPlus_emulation_REF.h.
00135 { 00136 ssp_m128 A, B; 00137 A.i = a; 00138 00139 B.u32[0] = A.u8[ 0] + A.u8[ 1] + A.u8[ 2] + A.u8[ 3]; 00140 B.u32[1] = A.u8[ 4] + A.u8[ 5] + A.u8[ 6] + A.u8[ 7]; 00141 B.u32[2] = A.u8[ 8] + A.u8[ 9] + A.u8[10] + A.u8[11]; 00142 B.u32[3] = A.u8[12] + A.u8[13] + A.u8[14] + A.u8[15]; 00143 00144 return B.i; 00145 }
SSP_FORCEINLINE __m128i ssp_haddq_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epi16/ phaddwq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 148 of file SSEPlus_emulation_REF.h.
00149 { 00150 ssp_m128 A, B; 00151 A.i = a; 00152 00153 B.s64[0] = A.s16[0] + A.s16[1] + A.s16[2] + A.s16[3]; 00154 B.s64[1] = A.s16[4] + A.s16[5] + A.s16[6] + A.s16[7]; 00155 00156 return B.i; 00157 }
SSP_FORCEINLINE __m128i ssp_haddq_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epi32/ phadddq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 160 of file SSEPlus_emulation_REF.h.
00161 { 00162 ssp_m128 A, B; 00163 A.i = a; 00164 00165 B.s64[0] = A.s32[0] + (long long)A.s32[1]; 00166 B.s64[1] = A.s32[2] + (long long)A.s32[3]; 00167 00168 return B.i; 00169 }
SSP_FORCEINLINE __m128i ssp_haddq_epi8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epi8/ phaddbq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 172 of file SSEPlus_emulation_REF.h.
00173 { 00174 ssp_m128 A, B; 00175 A.i = a; 00176 00177 B.s64[0] = A.s8[0] + A.s8[1] + A.s8[2] + A.s8[3] + A.s8[4] + A.s8[5] + A.s8[6] + A.s8[7]; 00178 B.s64[1] = A.s8[8] + A.s8[9] + A.s8[10] + A.s8[11] + A.s8[12] + A.s8[13] + A.s8[14] + A.s8[15]; 00179 00180 return B.i; 00181 }
SSP_FORCEINLINE __m128i ssp_haddq_epu16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epu16/ phadduwq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 184 of file SSEPlus_emulation_REF.h.
00185 { 00186 ssp_m128 A, B; 00187 A.i = a; 00188 00189 B.u64[0] = A.u16[0] + A.u16[1] + A.u16[2] + A.u16[3]; 00190 B.u64[1] = A.u16[4] + A.u16[5] + A.u16[6] + A.u16[7]; 00191 00192 return B.i; 00193 }
SSP_FORCEINLINE __m128i ssp_haddq_epu32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epu32/ phaddudq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 196 of file SSEPlus_emulation_REF.h.
00197 { 00198 ssp_m128 A, B; 00199 A.i = a; 00200 00201 B.u64[0] = A.u32[0] + (long long)A.u32[1]; 00202 B.u64[1] = A.u32[2] + (long long)A.u32[3]; 00203 00204 return B.i; 00205 }
SSP_FORCEINLINE __m128i ssp_haddq_epu8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddq_epu8/ phaddubq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 208 of file SSEPlus_emulation_REF.h.
00209 { 00210 ssp_m128 A, B; 00211 A.i = a; 00212 00213 B.u64[0] = A.u8[0] + A.u8[1] + A.u8[2] + A.u8[3] + A.u8[4] + A.u8[5] + A.u8[6] + A.u8[7]; 00214 B.u64[1] = A.u8[8] + A.u8[9] + A.u8[10] + A.u8[11] + A.u8[12] + A.u8[13] + A.u8[14] + A.u8[15]; 00215 00216 return B.i; 00217 }
SSP_FORCEINLINE __m128i ssp_hadds_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hadds_epi16 [SSSE3]. (Searches MSDN)
Definition at line 1222 of file SSEPlus_emulation_REF.h.
01223 { 01224 ssp_m128 A, B; 01225 int answer[8]; 01226 A.i = a; 01227 B.i = b; 01228 01229 answer[0] = A.s16[0] + A.s16[1]; 01230 A.s16[0] = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768)); 01231 answer[1] = A.s16[2] + A.s16[3]; 01232 A.s16[1] = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768)); 01233 answer[2] = A.s16[4] + A.s16[5]; 01234 A.s16[2] = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768)); 01235 answer[3] = A.s16[6] + A.s16[7]; 01236 A.s16[3] = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768)); 01237 answer[4] = B.s16[0] + B.s16[1]; 01238 A.s16[4] = (ssp_s16) (SSP_SATURATION(answer[4], 32767, -32768)); 01239 answer[5] = B.s16[2] + B.s16[3]; 01240 A.s16[5] = (ssp_s16) (SSP_SATURATION(answer[5], 32767, -32768)); 01241 answer[6] = B.s16[4] + B.s16[5]; 01242 A.s16[6] = (ssp_s16) (SSP_SATURATION(answer[6], 32767, -32768)); 01243 answer[7] = B.s16[6] + B.s16[7]; 01244 A.s16[7] = (ssp_s16) (SSP_SATURATION(answer[7], 32767, -32768)); 01245 01246 return A.i; 01247 }
SSP_FORCEINLINE __m64 ssp_hadds_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_hadds_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1252 of file SSEPlus_emulation_REF.h.
01253 { 01254 ssp_m64 A, B; 01255 int answer[4]; 01256 A.m64 = a; 01257 B.m64 = b; 01258 01259 answer[0] = A.s16[0] + A.s16[1]; 01260 A.s16[0] = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768)); 01261 answer[1] = A.s16[2] + A.s16[3]; 01262 A.s16[1] = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768)); 01263 answer[2] = B.s16[0] + B.s16[1]; 01264 A.s16[2] = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768)); 01265 answer[3] = B.s16[2] + B.s16[3]; 01266 A.s16[3] = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768)); 01267 01268 return A.m64; 01269 }
SSP_FORCEINLINE __m128i ssp_haddw_epi8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddw_epi8/ phaddbw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 220 of file SSEPlus_emulation_REF.h.
00221 { 00222 ssp_m128 A, B; 00223 A.i = a; 00224 00225 B.s16[0] = A.s8[0] + A.s8[1]; 00226 B.s16[1] = A.s8[2] + A.s8[3]; 00227 B.s16[2] = A.s8[4] + A.s8[5]; 00228 B.s16[3] = A.s8[6] + A.s8[7]; 00229 B.s16[4] = A.s8[8] + A.s8[9]; 00230 B.s16[5] = A.s8[10] + A.s8[11]; 00231 B.s16[6] = A.s8[12] + A.s8[13]; 00232 B.s16[7] = A.s8[14] + A.s8[15]; 00233 00234 return B.i; 00235 }
SSP_FORCEINLINE __m128i ssp_haddw_epu8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_haddw_epu8/ phaddubw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 238 of file SSEPlus_emulation_REF.h.
00239 { 00240 ssp_m128 A, B; 00241 A.i = a; 00242 00243 B.u16[0] = A.u8[0] + A.u8[1]; 00244 B.u16[1] = A.u8[2] + A.u8[3]; 00245 B.u16[2] = A.u8[4] + A.u8[5]; 00246 B.u16[3] = A.u8[6] + A.u8[7]; 00247 B.u16[4] = A.u8[8] + A.u8[9]; 00248 B.u16[5] = A.u8[10] + A.u8[11]; 00249 B.u16[6] = A.u8[12] + A.u8[13]; 00250 B.u16[7] = A.u8[14] + A.u8[15]; 00251 00252 return B.i; 00253 }
SSP_FORCEINLINE __m128i ssp_hsub_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hsub_epi16 [SSSE3]. (Searches MSDN)
Definition at line 1302 of file SSEPlus_emulation_REF.h.
01303 { 01304 ssp_m128 A, B; 01305 A.i = a; 01306 B.i = b; 01307 01308 A.s16[0] = A.s16[0] - A.s16[1]; 01309 A.s16[1] = A.s16[2] - A.s16[3]; 01310 A.s16[2] = A.s16[4] - A.s16[5]; 01311 A.s16[3] = A.s16[6] - A.s16[7]; 01312 A.s16[4] = B.s16[0] - B.s16[1]; 01313 A.s16[5] = B.s16[2] - B.s16[3]; 01314 A.s16[6] = B.s16[4] - B.s16[5]; 01315 A.s16[7] = B.s16[6] - B.s16[7]; 01316 01317 return A.i; 01318 }
SSP_FORCEINLINE __m128i ssp_hsub_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hsub_epi32 [SSSE3]. (Searches MSDN)
Definition at line 1321 of file SSEPlus_emulation_REF.h.
01322 { 01323 ssp_m128 A, B; 01324 A.i = a; 01325 B.i = b; 01326 01327 A.s32[0] = A.s32[0] - A.s32[1]; 01328 A.s32[1] = A.s32[2] - A.s32[3]; 01329 A.s32[2] = B.s32[0] - B.s32[1]; 01330 A.s32[3] = B.s32[2] - B.s32[3]; 01331 01332 return A.i; 01333 }
SSP_FORCEINLINE __m128d ssp_hsub_pd_REF | ( | __m128d | a, | |
__m128d | b | |||
) |
SSP_FORCEINLINE __m64 ssp_hsub_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_hsub_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1338 of file SSEPlus_emulation_REF.h.
01339 { 01340 ssp_m64 A, B; 01341 A.m64 = a; 01342 B.m64 = b; 01343 01344 A.s16[0] = A.s16[0] - A.s16[1]; 01345 A.s16[1] = A.s16[2] - A.s16[3]; 01346 A.s16[2] = B.s16[0] - B.s16[1]; 01347 A.s16[3] = B.s16[2] - B.s16[3]; 01348 01349 return A.m64; 01350 }
SSP_FORCEINLINE __m64 ssp_hsub_pi32_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_hsub_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1355 of file SSEPlus_emulation_REF.h.
01356 { 01357 ssp_m64 A, B; 01358 A.m64 = a; 01359 B.m64 = b; 01360 01361 A.s32[0] = A.s32[0] - A.s32[1]; 01362 A.s32[1] = B.s32[0] - B.s32[1]; 01363 01364 return A.m64; 01365 }
SSP_FORCEINLINE __m128 ssp_hsub_ps_REF | ( | __m128 | a, | |
__m128 | b | |||
) |
Reference implementation of _mm_hsub_ps [SSSE3]. (Searches MSDN)
Definition at line 1418 of file SSEPlus_emulation_REF.h.
01419 { 01420 ssp_m128 A, B; 01421 A.f = a; 01422 B.f = b; 01423 01424 A.f32[0] = A.f32[0] - A.f32[1]; 01425 A.f32[1] = A.f32[2] - A.f32[3]; 01426 A.f32[2] = B.f32[0] - B.f32[1]; 01427 A.f32[3] = B.f32[2] - B.f32[3]; 01428 return A.f; 01429 }
SSP_FORCEINLINE __m128i ssp_hsubd_epi16_REF | ( | __m128i | a | ) |
Reference implementation of _mm_hsubd_epi16/ phsubwd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 256 of file SSEPlus_emulation_REF.h.
00257 { 00258 ssp_m128 A, B; 00259 A.i = a; 00260 00261 B.s32[0] = A.s16[1] - A.s16[0]; 00262 B.s32[1] = A.s16[3] - A.s16[2]; 00263 B.s32[2] = A.s16[5] - A.s16[4]; 00264 B.s32[3] = A.s16[7] - A.s16[6]; 00265 00266 return B.i; 00267 }
SSP_FORCEINLINE __m128i ssp_hsubq_epi32_REF | ( | __m128i | a | ) |
Reference implementation of _mm_hsubq_epi32/ phsubdq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 270 of file SSEPlus_emulation_REF.h.
00271 { 00272 ssp_m128 A, B; 00273 A.i = a; 00274 00275 B.s64[0] = (long long)A.s32[1] - A.s32[0]; 00276 B.s64[1] = (long long)A.s32[3] - A.s32[2]; 00277 00278 return B.i; 00279 }
SSP_FORCEINLINE __m128i ssp_hsubs_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_hsubs_epi16 [SSSE3]. (Searches MSDN)
Definition at line 1368 of file SSEPlus_emulation_REF.h.
01369 { 01370 ssp_m128 A, B; 01371 int answer[8]; 01372 A.i = a; 01373 B.i = b; 01374 01375 answer[0] = A.s16[0] - A.s16[1]; 01376 A.s16[0] = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768)); 01377 answer[1] = A.s16[2] - A.s16[3]; 01378 A.s16[1] = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768)); 01379 answer[2] = A.s16[4] - A.s16[5]; 01380 A.s16[2] = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768)); 01381 answer[3] = A.s16[6] - A.s16[7]; 01382 A.s16[3] = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768)); 01383 answer[4] = B.s16[0] - B.s16[1]; 01384 A.s16[4] = (ssp_s16) (SSP_SATURATION(answer[4], 32767, -32768)); 01385 answer[5] = B.s16[2] - B.s16[3]; 01386 A.s16[5] = (ssp_s16) (SSP_SATURATION(answer[5], 32767, -32768)); 01387 answer[6] = B.s16[4] - B.s16[5]; 01388 A.s16[6] = (ssp_s16) (SSP_SATURATION(answer[6], 32767, -32768)); 01389 answer[7] = B.s16[6] - B.s16[7]; 01390 A.s16[7] = (ssp_s16) (SSP_SATURATION(answer[7], 32767, -32768)); 01391 01392 return A.i; 01393 }
SSP_FORCEINLINE __m64 ssp_hsubs_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_hsubs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1398 of file SSEPlus_emulation_REF.h.
01399 { 01400 ssp_m64 A, B; 01401 int answer[4]; 01402 A.m64 = a; 01403 B.m64 = b; 01404 01405 answer[0] = A.s16[0] - A.s16[1]; 01406 A.s16[0] = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768)); 01407 answer[1] = A.s16[2] - A.s16[3]; 01408 A.s16[1] = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768)); 01409 answer[2] = B.s16[0] - B.s16[1]; 01410 A.s16[2] = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768)); 01411 answer[3] = B.s16[2] - B.s16[3]; 01412 A.s16[3] = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768)); 01413 01414 return A.m64; 01415 }
SSP_FORCEINLINE __m128i ssp_hsubw_epi8_REF | ( | __m128i | a | ) |
Reference implementation of _mm_hsubw_epi8/ phsubbw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 282 of file SSEPlus_emulation_REF.h.
00283 { 00284 ssp_m128 A, B; 00285 A.i = a; 00286 00287 B.s16[0] = A.s8[1] - A.s8[0]; 00288 B.s16[1] = A.s8[3] - A.s8[2]; 00289 B.s16[2] = A.s8[5] - A.s8[4]; 00290 B.s16[3] = A.s8[7] - A.s8[6]; 00291 B.s16[4] = A.s8[9] - A.s8[8]; 00292 B.s16[5] = A.s8[11] - A.s8[10]; 00293 B.s16[6] = A.s8[13] - A.s8[12]; 00294 B.s16[7] = A.s8[15] - A.s8[14]; 00295 00296 return B.i; 00297 }
SSP_FORCEINLINE __m128i ssp_insert_epi32_REF | ( | __m128i | a, | |
int | b, | |||
const int | ndx | |||
) |
Reference implementation of _mm_insert_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1457 of file SSEPlus_emulation_REF.h.
01458 { 01459 ssp_m128 A; 01460 A.i = a; 01461 01462 A.s32[ndx & 0x3] = b; 01463 return A.i; 01464 }
SSP_FORCEINLINE __m128i ssp_insert_epi64_REF | ( | __m128i | a, | |
ssp_s64 | b, | |||
const int | ndx | |||
) |
Reference implementation of _mm_insert_epi64 [SSE4.1]. (Searches MSDN)
Definition at line 1467 of file SSEPlus_emulation_REF.h.
01468 { 01469 ssp_m128 A; 01470 A.i = a; 01471 01472 A.s64[ndx & 0x1] = b; 01473 return A.i; 01474 }
SSP_FORCEINLINE __m128i ssp_insert_epi8_REF | ( | __m128i | a, | |
int | b, | |||
const int | ndx | |||
) |
Reference implementation of _mm_insert_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 1447 of file SSEPlus_emulation_REF.h.
01448 { 01449 ssp_m128 A; 01450 A.i = a; 01451 01452 A.s8[ndx & 0xF] = (ssp_s8)b; 01453 return A.i; 01454 }
SSP_FORCEINLINE __m128 ssp_insert_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
const int | sel | |||
) |
Reference implementation of _mm_insert_ps [SSE4.1]. (Searches MSDN)
Definition at line 1477 of file SSEPlus_emulation_REF.h.
01478 { 01479 ssp_f32 tmp; 01480 int count_d,zmask; 01481 01482 ssp_m128 A,B; 01483 A.f = a; 01484 B.f = b; 01485 01486 tmp = B.f32[(sel & 0xC0)>>6]; // 0xC0 = sel[7:6] 01487 count_d = (sel & 0x30)>>4; // 0x30 = sel[5:4] 01488 zmask = sel & 0x0F; // 0x0F = sel[3:0] 01489 01490 A.f32[count_d] = tmp; 01491 01492 A.f32[0] = (zmask & 0x1) ? 0 : A.f32[0]; 01493 A.f32[1] = (zmask & 0x2) ? 0 : A.f32[1]; 01494 A.f32[2] = (zmask & 0x4) ? 0 : A.f32[2]; 01495 A.f32[3] = (zmask & 0x8) ? 0 : A.f32[3]; 01496 return A.f; 01497 }
SSP_FORCEINLINE __m128i ssp_insert_si64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_insert_si64 [SSE4a]. (Searches MSDN)
Definition at line 1500 of file SSEPlus_emulation_REF.h.
01501 { 01502 ssp_u32 ndx, len; 01503 ssp_s64 mask; 01504 ssp_m128 A, B; 01505 B.i = b; 01506 ndx = (ssp_u32)((B.u64[1] & 0x3F00) >> 8); // Mask length field. 01507 len = (ssp_u32)((B.u64[1] & 0x003F)); // Mask ndx field. 01508 01509 if( ( (ndx + len) > 64 ) || 01510 ( (len == 0) && (ndx > 0) ) ) 01511 return a; 01512 01513 A.i = a; 01514 if( (len == 0 ) && (ndx == 0) ) 01515 { 01516 A.u64[0] = B.u64[0]; 01517 return A.i; 01518 } 01519 01520 len = (len) ? len : 64; // A value of zero for field length is interpreted as 64. 01521 mask = ~(-1 << len); 01522 B.u64[0] = B.u64[0] & mask; 01523 B.u64[0] = B.u64[0] << ndx; 01524 mask = ~(mask << ndx); 01525 A.u64[0] = A.u64[0] & mask; 01526 A.u64[0] |= B.u64[0]; 01527 return A.i; 01528 }
SSP_FORCEINLINE __m128i ssp_inserti_si64_REF | ( | __m128i | a, | |
__m128i | b, | |||
int | len, | |||
int | ndx | |||
) |
Reference implementation of _mm_inserti_si64 [SSE4a]. (Searches MSDN)
Definition at line 1531 of file SSEPlus_emulation_REF.h.
01532 { 01533 ssp_s64 mask; 01534 ssp_m128 A, B; 01535 A.i = a; 01536 ndx = ndx & 0x3F; // ndx % 64 01537 len = len & 0x3F; // len % 64 01538 01539 if( ( (ndx + len) > 64 ) || 01540 ( (len == 0) && (ndx > 0) ) ) 01541 return a; 01542 01543 B.i = b; 01544 if( (len == 0 ) && (ndx == 0) ) 01545 { 01546 A.u64[0] = B.u64[0]; 01547 return A.i; 01548 } 01549 01550 len = (len) ? len : 64; // A value of zero for field length is interpreted as 64. 01551 mask = ~(-1 << len); 01552 B.u64[0] = B.u64[0] & mask; 01553 B.u64[0] = B.u64[0] << ndx; 01554 mask = ~(mask << ndx); 01555 A.u64[0] = A.u64[0] & mask; 01556 A.u64[0] |= B.u64[0]; 01557 return A.i; 01558 }
SSP_FORCEINLINE __m128i ssp_lddqu_si128_REF | ( | __m128i const * | p | ) |
Reference implementation of _mm_lddqu_si128 [SSE3]. (Searches MSDN)
Definition at line 1575 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE __m128d ssp_loaddup_pd_REF | ( | double const * | dp | ) |
Reference implementation of _mm_loaddup_pd [SSE3]. (Searches MSDN)
Definition at line 1566 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE unsigned short ssp_lzcnt16_REF | ( | unsigned short | val | ) |
Reference implementation of __lzcnt16 [SSE4a]. (Searches MSDN)
Definition at line 2893 of file SSEPlus_emulation_REF.h.
02894 { 02895 02896 if( !val ) 02897 return 16; 02898 // Binary Search Tree of possible output values 02899 else if( val > 0x00FF ) 02900 { 02901 if( val > 0x0FFF ) 02902 { 02903 if( val > 0x3FFF ) 02904 { 02905 if( val > 0x7FFF ) 02906 return 0; 02907 else 02908 return 1; 02909 } 02910 else // val < 0x3FFF 02911 { 02912 if( val > 0x1FFF ) 02913 return 2; 02914 else 02915 return 3; 02916 } 02917 } 02918 else // val < 0x0FFF 02919 { 02920 if( val > 0x03FF ) 02921 { 02922 if( val > 0x07FF ) 02923 return 4; 02924 else 02925 return 5; 02926 } 02927 else // val < 0x03FF 02928 { 02929 if( val > 0x01FF ) 02930 return 6; 02931 else 02932 return 7; 02933 } 02934 } 02935 } 02936 else // val < 0x00FF 02937 { 02938 if( val > 0x000F ) 02939 { 02940 if( val > 0x003F ) 02941 { 02942 if( val > 0x007F ) 02943 return 8; 02944 else 02945 return 9; 02946 } 02947 else // val < 0x003F 02948 { 02949 if( val > 0x001F) 02950 return 10; 02951 else 02952 return 11; 02953 } 02954 } 02955 else // val < 0x000F 02956 { 02957 if( val > 0x0003 ) 02958 { 02959 if( val > 0x0007 ) 02960 return 12; 02961 else 02962 return 13; 02963 } 02964 else // val < 0x0003 02965 { 02966 if( val > 0x0001) 02967 return 14; 02968 else 02969 return 15; 02970 } 02971 } 02972 } 02973 }
Reference implementation of __lzcnt64 [SSE4a]. (Searches MSDN)
Definition at line 2984 of file SSEPlus_emulation_REF.h.
02985 { 02986 ssp_u64 cnt; 02987 cnt = ssp_lzcnt_REF( (ssp_u32)(val>>32) ); 02988 if( cnt == 32 ) 02989 cnt += ssp_lzcnt_REF( (ssp_u32)(val & 0x00000000FFFFFFFF) ); 02990 return cnt; 02991 }
SSP_FORCEINLINE unsigned int ssp_lzcnt_REF | ( | unsigned int | val | ) |
Reference implementation of __lzcnt [SSE4a]. (Searches MSDN)
Definition at line 2975 of file SSEPlus_emulation_REF.h.
02976 { 02977 ssp_u32 cnt; 02978 cnt = ssp_lzcnt16_REF( (ssp_u16)(val>>16) ); 02979 if( cnt == 16 ) 02980 cnt += ssp_lzcnt16_REF( (ssp_u16)(val & 0x0000FFFF) ); 02981 return cnt; 02982 }
SSP_FORCEINLINE __m128i ssp_macc_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_macc_epi16/ pmacsww [SSE5]. (SSE5 .pdf documentation here)
Definition at line 304 of file SSEPlus_emulation_REF.h.
00305 { 00306 ssp_m128 A,B,C; 00307 A.i = a; 00308 B.i = b; 00309 C.i = c; 00310 00311 A.s16[0] = A.s16[0] * B.s16[0] + C.s16[0]; 00312 A.s16[1] = A.s16[1] * B.s16[1] + C.s16[1]; 00313 A.s16[2] = A.s16[2] * B.s16[2] + C.s16[2]; 00314 A.s16[3] = A.s16[3] * B.s16[3] + C.s16[3]; 00315 A.s16[4] = A.s16[4] * B.s16[4] + C.s16[4]; 00316 A.s16[5] = A.s16[5] * B.s16[5] + C.s16[5]; 00317 A.s16[6] = A.s16[6] * B.s16[6] + C.s16[6]; 00318 A.s16[7] = A.s16[7] * B.s16[7] + C.s16[7]; 00319 00320 return A.i; 00321 }
SSP_FORCEINLINE __m128i ssp_macc_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_macc_epi32/ pmacsdd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 324 of file SSEPlus_emulation_REF.h.
00325 { 00326 ssp_m128 A,B,C; 00327 A.i = a; 00328 B.i = b; 00329 C.i = c; 00330 00331 A.s32[0] = A.s32[0] * B.s32[0] + C.s32[0]; 00332 A.s32[1] = A.s32[1] * B.s32[1] + C.s32[1]; 00333 A.s32[2] = A.s32[2] * B.s32[2] + C.s32[2]; 00334 A.s32[3] = A.s32[3] * B.s32[3] + C.s32[3]; 00335 00336 return A.i; 00337 }
SSP_FORCEINLINE __m128d ssp_macc_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_macc_pd/fmaddpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 355 of file SSEPlus_emulation_REF.h.
00356 { 00357 ssp_m128 A,B,C; 00358 A.d = a; 00359 B.d = b; 00360 C.d = c; 00361 00362 A.f64[0] = A.f64[0] * B.f64[0] + C.f64[0]; 00363 A.f64[1] = A.f64[1] * B.f64[1] + C.f64[1]; 00364 return A.d; 00365 }
SSP_FORCEINLINE __m128 ssp_macc_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_macc_ps/fmaddps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 340 of file SSEPlus_emulation_REF.h.
00341 { 00342 ssp_m128 A,B,C; 00343 A.f = a; 00344 B.f = b; 00345 C.f = c; 00346 00347 A.f32[0] = A.f32[0] * B.f32[0] + C.f32[0]; 00348 A.f32[1] = A.f32[1] * B.f32[1] + C.f32[1]; 00349 A.f32[2] = A.f32[2] * B.f32[2] + C.f32[2]; 00350 A.f32[3] = A.f32[3] * B.f32[3] + C.f32[3]; 00351 return A.f; 00352 }
SSP_FORCEINLINE __m128d ssp_macc_sd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_macc_sd/fmaddss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 380 of file SSEPlus_emulation_REF.h.
00380 : confirm 00381 { 00382 ssp_m128 A,B,C; 00383 A.d = a; 00384 B.d = b; 00385 C.d = c; 00386 00387 A.f64[0] = A.f64[0] * B.f64[0] + C.f64[0]; 00388 return A.d; 00389 }
SSP_FORCEINLINE __m128 ssp_macc_ss_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_macc_ss/fmaddss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 368 of file SSEPlus_emulation_REF.h.
00368 : confirm 00369 { 00370 ssp_m128 A,B,C; 00371 A.f = a; 00372 B.f = b; 00373 C.f = c; 00374 00375 A.f32[0] = A.f32[0] * B.f32[0] + C.f32[0]; 00376 return A.f; 00377 }
SSP_FORCEINLINE __m128i ssp_maccd_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccd_epi16/ pmacswd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 392 of file SSEPlus_emulation_REF.h.
00393 { 00394 ssp_m128 A, B, C, D; 00395 A.i = a; 00396 B.i = b; 00397 C.i = c; 00398 00399 D.s32[0] = A.s16[0] * B.s16[0] + C.s32[0]; 00400 D.s32[1] = A.s16[2] * B.s16[2] + C.s32[1]; 00401 D.s32[2] = A.s16[4] * B.s16[4] + C.s32[2]; 00402 D.s32[3] = A.s16[6] * B.s16[6] + C.s32[3]; 00403 00404 return D.i; 00405 }
SSP_FORCEINLINE __m128i ssp_macchi_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_macchi_epi32/ pmacsdqh [SSE5]. (SSE5 .pdf documentation here)
Definition at line 408 of file SSEPlus_emulation_REF.h.
00409 { 00410 ssp_m128 A, B, C, D; 00411 A.i = a; 00412 B.i = b; 00413 C.i = c; 00414 00415 D.s64[0] = A.s32[1] * B.s32[1] + C.s64[0]; 00416 D.s64[1] = A.s32[3] * B.s32[3] + C.s64[1]; 00417 00418 return D.i; 00419 }
SSP_FORCEINLINE __m128i ssp_macclo_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_macclo_epi32/ pmacsdql [SSE5]. (SSE5 .pdf documentation here)
Definition at line 422 of file SSEPlus_emulation_REF.h.
00423 { 00424 ssp_m128 A, B, C, D; 00425 A.i = a; 00426 B.i = b; 00427 C.i = c; 00428 00429 D.s64[0] = A.s32[0] * B.s32[0] + C.s64[0]; 00430 D.s64[1] = A.s32[2] * B.s32[2] + C.s64[1]; 00431 00432 return D.i; 00433 }
SSP_FORCEINLINE __m128i ssp_maccs_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccs_epi16/ pmacssww [SSE5]. (SSE5 .pdf documentation here)
Definition at line 438 of file SSEPlus_emulation_REF.h.
00439 { 00440 ssp_m128 A, B, C; 00441 int temp; 00442 A.i = a; 00443 B.i = b; 00444 C.i = c; 00445 00446 temp = A.s16[0] * B.s16[0] + C.s16[0]; 00447 A.s16[0] = SSP_SATURATION(temp, 32767, -32768); 00448 temp = A.s16[1] * B.s16[1] + C.s16[1]; 00449 A.s16[1] = SSP_SATURATION(temp, 32767, -32768); 00450 temp = A.s16[2] * B.s16[2] + C.s16[2]; 00451 A.s16[2] = SSP_SATURATION(temp, 32767, -32768); 00452 temp = A.s16[3] * B.s16[3] + C.s16[3]; 00453 A.s16[3] = SSP_SATURATION(temp, 32767, -32768); 00454 temp = A.s16[4] * B.s16[4] + C.s16[4]; 00455 A.s16[4] = SSP_SATURATION(temp, 32767, -32768); 00456 temp = A.s16[5] * B.s16[5] + C.s16[5]; 00457 A.s16[5] = SSP_SATURATION(temp, 32767, -32768); 00458 temp = A.s16[6] * B.s16[6] + C.s16[6]; 00459 A.s16[6] = SSP_SATURATION(temp, 32767, -32768); 00460 temp = A.s16[7] * B.s16[7] + C.s16[7]; 00461 A.s16[7] = SSP_SATURATION(temp, 32767, -32768); 00462 00463 return A.i; 00464 }
SSP_FORCEINLINE __m128i ssp_maccs_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccs_epi32/ pmacssdd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 467 of file SSEPlus_emulation_REF.h.
00468 { 00469 ssp_m128 A, B, C; 00470 long long temp; 00471 A.i = a; 00472 B.i = b; 00473 C.i = c; 00474 00475 temp = (long long)A.s32[0] * B.s32[0] + C.s32[0]; 00476 A.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00477 temp = (long long)A.s32[1] * B.s32[1] + C.s32[1]; 00478 A.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00479 temp = (long long)A.s32[2] * B.s32[2] + C.s32[2]; 00480 A.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00481 temp = (long long)A.s32[3] * B.s32[3] + C.s32[3]; 00482 A.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00483 00484 return A.i; 00485 }
SSP_FORCEINLINE __m128i ssp_maccsd_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccsd_epi16/ pmacsswd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 488 of file SSEPlus_emulation_REF.h.
00489 { 00490 ssp_m128 A, B, C, D; 00491 long long temp; 00492 A.i = a; 00493 B.i = b; 00494 C.i = c; 00495 00496 //should be able to compare data to see whether overflow/underflow 00497 temp = A.s16[0] * B.s16[0] + (long long)C.s32[0]; 00498 D.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00499 temp = A.s16[2] * B.s16[2] + (long long)C.s32[1]; 00500 D.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00501 temp = A.s16[4] * B.s16[4] + (long long)C.s32[2]; 00502 D.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00503 temp = A.s16[6] * B.s16[6] + (long long)C.s32[3]; 00504 D.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00505 00506 return D.i; 00507 }
SSP_FORCEINLINE __m128i ssp_maccshi_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccshi_epi32/ pmacssdqh [SSE5]. (SSE5 .pdf documentation here)
Definition at line 510 of file SSEPlus_emulation_REF.h.
00511 { 00512 ssp_m128 A, B, C, D; 00513 long long temp; 00514 unsigned long long signT, signC; 00515 A.i = a; 00516 B.i = b; 00517 C.i = c; 00518 00519 temp = (long long)A.s32[1] * B.s32[1]; 00520 signT = temp & 0x8000000000000000LL; 00521 signC = C.s64[0] & 0x8000000000000000LL; 00522 temp += C.s64[0]; 00523 D.s64[0] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[0]) ? 0x8000000000000000LL : temp) 00524 : ((temp < C.s64[0])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp; 00525 temp = (long long)A.s32[3] * B.s32[3]; 00526 signT = temp & 0x8000000000000000LL; 00527 signC = C.s64[1] & 0x8000000000000000LL; 00528 temp += C.s64[1]; 00529 D.s64[1] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[1]) ? 0x8000000000000000LL : temp) 00530 : ((temp < C.s64[1])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp; 00531 00532 return D.i; 00533 }
SSP_FORCEINLINE __m128i ssp_maccslo_epi32_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maccslo_epi32/ pmacssdql [SSE5]. (SSE5 .pdf documentation here)
Definition at line 536 of file SSEPlus_emulation_REF.h.
00537 { 00538 ssp_m128 A, B, C, D; 00539 long long temp; 00540 unsigned long long signT, signC; 00541 A.i = a; 00542 B.i = b; 00543 C.i = c; 00544 00545 temp = (long long)A.s32[0] * B.s32[0]; 00546 signT = temp & 0x8000000000000000LL; 00547 signC = C.s64[0] & 0x8000000000000000LL; 00548 temp += C.s64[0]; 00549 D.s64[0] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[0]) ? 0x8000000000000000LL : temp) 00550 : ((temp < C.s64[0])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp; 00551 temp = (long long)A.s32[2] * B.s32[2]; 00552 signT = temp & 0x8000000000000000LL; 00553 signC = C.s64[1] & 0x8000000000000000LL; 00554 temp += C.s64[1]; 00555 D.s64[1] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[1]) ? 0x8000000000000000LL : temp) 00556 : ((temp < C.s64[1])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp; 00557 00558 return D.i; 00559 }
SSP_FORCEINLINE __m128i ssp_maddd_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maddd_epi16/ pmadcswd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 562 of file SSEPlus_emulation_REF.h.
00563 { 00564 ssp_m128 A, B, C, D; 00565 A.i = a; 00566 B.i = b; 00567 C.i = c; 00568 00569 D.s32[0] = A.s16[0] * B.s16[0] + A.s16[1] * B.s16[1] + C.s32[0]; 00570 D.s32[1] = A.s16[2] * B.s16[2] + A.s16[3] * B.s16[3] + C.s32[1]; 00571 D.s32[2] = A.s16[4] * B.s16[4] + A.s16[5] * B.s16[5] + C.s32[2]; 00572 D.s32[3] = A.s16[6] * B.s16[6] + A.s16[7] * B.s16[7] + C.s32[3]; 00573 00574 return D.i; 00575 }
SSP_FORCEINLINE __m128i ssp_maddsd_epi16_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_maddsd_epi16/ pmadcsswd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 578 of file SSEPlus_emulation_REF.h.
00579 { 00580 ssp_m128 A, B, C, D; 00581 long long temp; 00582 00583 A.i = a; 00584 B.i = b; 00585 C.i = c; 00586 00587 temp = A.s16[0] * B.s16[0] + A.s16[1] * B.s16[1] + (long long)C.s32[0]; 00588 D.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL)); 00589 temp = A.s16[2] * B.s16[2] + A.s16[3] * B.s16[3] + (long long)C.s32[1]; 00590 D.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));; 00591 temp = A.s16[4] * B.s16[4] + A.s16[5] * B.s16[5] + (long long)C.s32[2]; 00592 D.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));; 00593 temp = A.s16[6] * B.s16[6] + A.s16[7] * B.s16[7] + (long long)C.s32[3]; 00594 D.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));; 00595 00596 return D.i; 00597 }
SSP_FORCEINLINE __m128i ssp_maddubs_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_maddubs_epi16 [SSSE3]. (Searches MSDN)
Definition at line 970 of file SSEPlus_emulation_REF.h.
00971 { 00972 ssp_m128 A, B, C; 00973 int tmp[8]; 00974 A.i = a; 00975 B.i = b; 00976 00977 // a is 8 bit unsigned integer, b is signed integer 00978 tmp[0] = A.u8[0] * B.s8[0] + A.u8[1] * B.s8[1]; 00979 C.s16[0] = (ssp_s16)(SSP_SATURATION(tmp[0], 32767, -32768)); 00980 00981 tmp[1] = A.u8[2] * B.s8[2] + A.u8[3] * B.s8[3]; 00982 C.s16[1] = (ssp_s16)(SSP_SATURATION(tmp[1], 32767, -32768)); 00983 00984 tmp[2] = A.u8[4] * B.s8[4] + A.u8[5] * B.s8[5]; 00985 C.s16[2] = (ssp_s16)(SSP_SATURATION(tmp[2], 32767, -32768)); 00986 00987 tmp[3] = A.u8[6] * B.s8[6] + A.u8[7] * B.s8[7]; 00988 C.s16[3] = (ssp_s16)(SSP_SATURATION(tmp[3], 32767, -32768)); 00989 00990 tmp[4] = A.u8[8] * B.s8[8] + A.u8[9] * B.s8[9]; 00991 C.s16[4] = (ssp_s16)(SSP_SATURATION(tmp[4], 32767, -32768)); 00992 00993 tmp[5] = A.u8[10] * B.s8[10] + A.u8[11] * B.s8[11]; 00994 C.s16[5] = (ssp_s16)(SSP_SATURATION(tmp[5], 32767, -32768)); 00995 00996 tmp[6] = A.u8[12] * B.s8[12] + A.u8[13] * B.s8[13]; 00997 C.s16[6] = (ssp_s16)(SSP_SATURATION(tmp[6], 32767, -32768)); 00998 00999 tmp[7] = A.u8[14] * B.s8[14] + A.u8[15] * B.s8[15]; 01000 C.s16[7] = (ssp_s16)(SSP_SATURATION(tmp[7], 32767, -32768)); 01001 01002 return C.i; 01003 }
SSP_FORCEINLINE __m64 ssp_maddubs_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_maddubs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1009 of file SSEPlus_emulation_REF.h.
01010 { 01011 ssp_m64 A, B, C; 01012 int tmp[4]; 01013 A.m64 = a; 01014 B.m64 = b; 01015 01016 // a is 8 bit unsigned integer, b is signed integer 01017 tmp[0] = A.u8[0] * B.s8[0] + A.u8[1] * B.s8[1]; 01018 C.s16[0] = (ssp_s16)(SSP_SATURATION(tmp[0], 32767, -32768)); 01019 01020 tmp[1] = A.u8[2] * B.s8[2] + A.u8[3] * B.s8[3]; 01021 C.s16[1] = (ssp_s16)(SSP_SATURATION(tmp[1], 32767, -32768)); 01022 01023 tmp[2] = A.u8[4] * B.s8[4] + A.u8[5] * B.s8[5]; 01024 C.s16[2] = (ssp_s16)(SSP_SATURATION(tmp[2], 32767, -32768)); 01025 01026 tmp[3] = A.u8[6] * B.s8[6] + A.u8[7] * B.s8[7]; 01027 C.s16[3] = (ssp_s16)(SSP_SATURATION(tmp[3], 32767, -32768)); 01028 01029 return C.m64; 01030 }
SSP_FORCEINLINE __m128i ssp_max_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_max_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1700 of file SSEPlus_emulation_REF.h.
01701 { 01702 ssp_m128 A,B; 01703 A.i = a; 01704 B.i = b; 01705 01706 SSP_SET_MAX( A.s32[ 0], B.s32[ 0] ); 01707 SSP_SET_MAX( A.s32[ 1], B.s32[ 1] ); 01708 SSP_SET_MAX( A.s32[ 2], B.s32[ 2] ); 01709 SSP_SET_MAX( A.s32[ 3], B.s32[ 3] ); 01710 return A.i; 01711 }
SSP_FORCEINLINE __m128i ssp_max_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_max_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 1622 of file SSEPlus_emulation_REF.h.
01623 { 01624 ssp_m128 A,B; 01625 A.i = a; 01626 B.i = b; 01627 01628 SSP_SET_MAX( A.s8[ 0], B.s8[ 0] ); 01629 SSP_SET_MAX( A.s8[ 1], B.s8[ 1] ); 01630 SSP_SET_MAX( A.s8[ 2], B.s8[ 2] ); 01631 SSP_SET_MAX( A.s8[ 3], B.s8[ 3] ); 01632 SSP_SET_MAX( A.s8[ 4], B.s8[ 4] ); 01633 SSP_SET_MAX( A.s8[ 5], B.s8[ 5] ); 01634 SSP_SET_MAX( A.s8[ 6], B.s8[ 6] ); 01635 SSP_SET_MAX( A.s8[ 7], B.s8[ 7] ); 01636 SSP_SET_MAX( A.s8[ 8], B.s8[ 8] ); 01637 SSP_SET_MAX( A.s8[ 9], B.s8[ 9] ); 01638 SSP_SET_MAX( A.s8[10], B.s8[10] ); 01639 SSP_SET_MAX( A.s8[11], B.s8[11] ); 01640 SSP_SET_MAX( A.s8[12], B.s8[12] ); 01641 SSP_SET_MAX( A.s8[13], B.s8[13] ); 01642 SSP_SET_MAX( A.s8[14], B.s8[14] ); 01643 SSP_SET_MAX( A.s8[15], B.s8[15] ); 01644 return A.i; 01645 }
SSP_FORCEINLINE __m128i ssp_max_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_max_epu16 [SSE4.1]. (Searches MSDN)
Definition at line 1667 of file SSEPlus_emulation_REF.h.
01668 { 01669 ssp_m128 A,B; 01670 A.i = a; 01671 B.i = b; 01672 01673 SSP_SET_MAX( A.u16[ 0], B.u16[ 0] ); 01674 SSP_SET_MAX( A.u16[ 1], B.u16[ 1] ); 01675 SSP_SET_MAX( A.u16[ 2], B.u16[ 2] ); 01676 SSP_SET_MAX( A.u16[ 3], B.u16[ 3] ); 01677 SSP_SET_MAX( A.u16[ 4], B.u16[ 4] ); 01678 SSP_SET_MAX( A.u16[ 5], B.u16[ 5] ); 01679 SSP_SET_MAX( A.u16[ 6], B.u16[ 6] ); 01680 SSP_SET_MAX( A.u16[ 7], B.u16[ 7] ); 01681 return A.i; 01682 }
SSP_FORCEINLINE __m128i ssp_max_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_max_epu32 [SSE4.1]. (Searches MSDN)
Definition at line 1728 of file SSEPlus_emulation_REF.h.
01729 { 01730 ssp_m128 A,B; 01731 A.i = a; 01732 B.i = b; 01733 01734 SSP_SET_MAX( A.u32[ 0], B.u32[ 0] ); 01735 SSP_SET_MAX( A.u32[ 1], B.u32[ 1] ); 01736 SSP_SET_MAX( A.u32[ 2], B.u32[ 2] ); 01737 SSP_SET_MAX( A.u32[ 3], B.u32[ 3] ); 01738 return A.i; 01739 }
SSP_FORCEINLINE __m128i ssp_min_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_min_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1686 of file SSEPlus_emulation_REF.h.
01687 { 01688 ssp_m128 A,B; 01689 A.i = a; 01690 B.i = b; 01691 01692 SSP_SET_MIN( A.s32[ 0], B.s32[ 0] ); 01693 SSP_SET_MIN( A.s32[ 1], B.s32[ 1] ); 01694 SSP_SET_MIN( A.s32[ 2], B.s32[ 2] ); 01695 SSP_SET_MIN( A.s32[ 3], B.s32[ 3] ); 01696 return A.i; 01697 }
SSP_FORCEINLINE __m128i ssp_min_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_min_epi8 [SSE4.1]. (Searches MSDN)
Definition at line 1596 of file SSEPlus_emulation_REF.h.
01597 { 01598 ssp_m128 A,B; 01599 A.i = a; 01600 B.i = b; 01601 01602 SSP_SET_MIN( A.s8[ 0], B.s8[ 0] ); 01603 SSP_SET_MIN( A.s8[ 1], B.s8[ 1] ); 01604 SSP_SET_MIN( A.s8[ 2], B.s8[ 2] ); 01605 SSP_SET_MIN( A.s8[ 3], B.s8[ 3] ); 01606 SSP_SET_MIN( A.s8[ 4], B.s8[ 4] ); 01607 SSP_SET_MIN( A.s8[ 5], B.s8[ 5] ); 01608 SSP_SET_MIN( A.s8[ 6], B.s8[ 6] ); 01609 SSP_SET_MIN( A.s8[ 7], B.s8[ 7] ); 01610 SSP_SET_MIN( A.s8[ 8], B.s8[ 8] ); 01611 SSP_SET_MIN( A.s8[ 9], B.s8[ 9] ); 01612 SSP_SET_MIN( A.s8[10], B.s8[10] ); 01613 SSP_SET_MIN( A.s8[11], B.s8[11] ); 01614 SSP_SET_MIN( A.s8[12], B.s8[12] ); 01615 SSP_SET_MIN( A.s8[13], B.s8[13] ); 01616 SSP_SET_MIN( A.s8[14], B.s8[14] ); 01617 SSP_SET_MIN( A.s8[15], B.s8[15] ); 01618 return A.i; 01619 }
SSP_FORCEINLINE __m128i ssp_min_epu16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_min_epu16 [SSE4.1]. (Searches MSDN)
Definition at line 1649 of file SSEPlus_emulation_REF.h.
01650 { 01651 ssp_m128 A,B; 01652 A.i = a; 01653 B.i = b; 01654 01655 SSP_SET_MIN( A.u16[ 0], B.u16[ 0] ); 01656 SSP_SET_MIN( A.u16[ 1], B.u16[ 1] ); 01657 SSP_SET_MIN( A.u16[ 2], B.u16[ 2] ); 01658 SSP_SET_MIN( A.u16[ 3], B.u16[ 3] ); 01659 SSP_SET_MIN( A.u16[ 4], B.u16[ 4] ); 01660 SSP_SET_MIN( A.u16[ 5], B.u16[ 5] ); 01661 SSP_SET_MIN( A.u16[ 6], B.u16[ 6] ); 01662 SSP_SET_MIN( A.u16[ 7], B.u16[ 7] ); 01663 return A.i; 01664 }
SSP_FORCEINLINE __m128i ssp_min_epu32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_min_epu32 [SSE4.1]. (Searches MSDN)
Definition at line 1714 of file SSEPlus_emulation_REF.h.
01715 { 01716 ssp_m128 A,B; 01717 A.i = a; 01718 B.i = b; 01719 01720 SSP_SET_MIN( A.u32[ 0], B.u32[ 0] ); 01721 SSP_SET_MIN( A.u32[ 1], B.u32[ 1] ); 01722 SSP_SET_MIN( A.u32[ 2], B.u32[ 2] ); 01723 SSP_SET_MIN( A.u32[ 3], B.u32[ 3] ); 01724 return A.i; 01725 }
SSP_FORCEINLINE __m128i ssp_minpos_epu16_REF | ( | __m128i | shortValues | ) |
Reference implementation of _mm_minpos_epu16 [SSE4.1]. (Searches MSDN)
Definition at line 1745 of file SSEPlus_emulation_REF.h.
01746 { 01747 ssp_m128 ShortValues; 01748 ShortValues.i = shortValues; 01749 01750 if( ShortValues.u16[1] < ShortValues.u16[0] ) 01751 { 01752 ShortValues.u16[0] = ShortValues.u16[1]; 01753 ShortValues.u16[1] = 1; 01754 } 01755 else 01756 ShortValues.u16[1] = 0; 01757 01758 01759 #define FN( I ) \ 01760 if( ShortValues.u16[I] < ShortValues.u16[0] ) \ 01761 { \ 01762 ShortValues.u16[0] = ShortValues.u16[I]; \ 01763 ShortValues.u16[1] = I; \ 01764 } 01765 01766 FN( 2 ); 01767 FN( 3 ); 01768 FN( 4 ); 01769 FN( 5 ); 01770 FN( 6 ); 01771 FN( 7 ); 01772 01773 ShortValues.u32[1] = 0; 01774 ShortValues.u64[1] = 0; 01775 01776 #undef FN 01777 01778 return ShortValues.i; 01779 }
SSP_FORCEINLINE __m128i ssp_minpos_epu16_REFb | ( | __m128i | shortValues | ) |
Reference implementation of _mm_minpos_epu16 [SSE4.1]. (Searches MSDN)
Definition at line 1782 of file SSEPlus_emulation_REF.h.
01783 { 01784 ssp_m128 ShortValues; 01785 ssp_u32 i; 01786 ssp_u16 pos = 0; 01787 ssp_u16 minVal; 01788 ShortValues.i = shortValues; 01789 minVal = ShortValues.u16[0]; 01790 01791 for( i=1; i<8; ++i ) 01792 { 01793 if( ShortValues.u16[i] < minVal ) 01794 { 01795 minVal = ShortValues.u16[i]; 01796 pos = i; 01797 } 01798 01799 ShortValues.u16[i] = 0; 01800 } 01801 01802 ShortValues.u16[0] = minVal; 01803 ShortValues.u16[1] = pos; 01804 return ShortValues.i; 01805 }
SSP_FORCEINLINE __m128d ssp_movedup_pd_REF | ( | __m128d | a | ) |
Reference implementation of _mm_movedup_pd [SSE3]. (Searches MSDN)
Definition at line 1834 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE __m128 ssp_movehdup_ps_REF | ( | __m128 | a | ) |
Reference implementation of _mm_movehdup_ps [SSE3]. (Searches MSDN)
Definition at line 1812 of file SSEPlus_emulation_REF.h.
01813 { 01814 ssp_m128 A; 01815 A.f = a; 01816 01817 A.f32[0] = A.f32[1]; 01818 A.f32[2] = A.f32[3]; 01819 return A.f; 01820 }
SSP_FORCEINLINE __m128 ssp_moveldup_ps_REF | ( | __m128 | a | ) |
Reference implementation of _mm_moveldup_ps [SSE3]. (Searches MSDN)
Definition at line 1823 of file SSEPlus_emulation_REF.h.
01824 { 01825 ssp_m128 A; 01826 A.f = a; 01827 01828 A.f32[1] = A.f32[0]; 01829 A.f32[3] = A.f32[2]; 01830 return A.f; 01831 }
SSP_FORCEINLINE __m128i ssp_mpsadbw_epu8_REF | ( | __m128i | a, | |
__m128i | b, | |||
const int | msk | |||
) |
Reference implementation of _mm_mpsadbw_epu8 [SSE4.1]. (Searches MSDN)
Definition at line 1879 of file SSEPlus_emulation_REF.h.
01880 { 01881 ssp_u8 Abyte[11], Bbyte[4], tmp[4]; 01882 ssp_u8 Boffset, Aoffset; 01883 int i; 01884 01885 ssp_m128 A,B; 01886 A.i = a; 01887 B.i = b; 01888 01889 Boffset = (msk & 0x3) << 2; // *32/8, for byte size count 01890 Aoffset = (msk & 0x4); // *32/8/4, for byte size count and shift msk to bit 2 01891 01892 for (i=0; i<11; i++) 01893 { 01894 Abyte[i] = A.u8[i+Aoffset]; 01895 } 01896 01897 Bbyte[0] = B.u8[Boffset ]; 01898 Bbyte[1] = B.u8[Boffset+1]; 01899 Bbyte[2] = B.u8[Boffset+2]; 01900 Bbyte[3] = B.u8[Boffset+3]; 01901 01902 for (i=0; i<8; i++) 01903 { 01904 tmp[0] = (Abyte[i ] > Bbyte[0]) ? (Abyte[i ] - Bbyte[0]) : (Bbyte[0] - Abyte[i ]); //abs diff 01905 tmp[1] = (Abyte[i+1] > Bbyte[1]) ? (Abyte[i+1] - Bbyte[1]) : (Bbyte[1] - Abyte[i+1]); 01906 tmp[2] = (Abyte[i+2] > Bbyte[2]) ? (Abyte[i+2] - Bbyte[2]) : (Bbyte[2] - Abyte[i+2]); 01907 tmp[3] = (Abyte[i+3] > Bbyte[3]) ? (Abyte[i+3] - Bbyte[3]) : (Bbyte[3] - Abyte[i+3]); 01908 01909 A.u16[i] = tmp[0] + tmp[1] + tmp[2] + tmp[3]; 01910 } 01911 01912 return A.i; 01913 }
SSP_FORCEINLINE __m128d ssp_msub_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_msub_pd/fmsubpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 676 of file SSEPlus_emulation_REF.h.
00677 { 00678 ssp_m128 A,B,C; 00679 A.d = a; 00680 B.d = b; 00681 C.d = c; 00682 00683 A.f64[0] = A.f64[0] * B.f64[0] - C.f64[0]; 00684 A.f64[1] = A.f64[1] * B.f64[1] - C.f64[1]; 00685 return A.d; 00686 }
SSP_FORCEINLINE __m128 ssp_msub_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_msub_ps/fmsubps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 661 of file SSEPlus_emulation_REF.h.
00662 { 00663 ssp_m128 A,B,C; 00664 A.f = a; 00665 B.f = b; 00666 C.f = c; 00667 00668 A.f32[0] = A.f32[0] * B.f32[0] - C.f32[0]; 00669 A.f32[1] = A.f32[1] * B.f32[1] - C.f32[1]; 00670 A.f32[2] = A.f32[2] * B.f32[2] - C.f32[2]; 00671 A.f32[3] = A.f32[3] * B.f32[3] - C.f32[3]; 00672 return A.f; 00673 }
SSP_FORCEINLINE __m128d ssp_msub_sd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_msub_sd/fmsubss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 701 of file SSEPlus_emulation_REF.h.
00701 : confirm 00702 { 00703 ssp_m128 A,B,C; 00704 A.d = a; 00705 B.d = b; 00706 C.d = c; 00707 00708 A.f64[0] = A.f64[0] * B.f64[0] - C.f64[0]; 00709 return A.d; 00710 }
SSP_FORCEINLINE __m128 ssp_msub_ss_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_msub_ss/fmsubss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 689 of file SSEPlus_emulation_REF.h.
00689 : confirm 00690 { 00691 ssp_m128 A,B,C; 00692 A.f = a; 00693 B.f = b; 00694 C.f = c; 00695 00696 A.f32[0] = A.f32[0] * B.f32[0] - C.f32[0]; 00697 return A.f; 00698 }
SSP_FORCEINLINE __m128i ssp_mul_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
SSP_FORCEINLINE __m128i ssp_mulhrs_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_mulhrs_epi16 [SSSE3]. (Searches MSDN)
Definition at line 1034 of file SSEPlus_emulation_REF.h.
01035 { 01036 ssp_m128 A,B; 01037 A.i = a; 01038 B.i = b; 01039 01040 A.s16[0] = (ssp_s16) ((A.s16[0] * B.s16[0] + 0x4000) >> 15); 01041 A.s16[1] = (ssp_s16) ((A.s16[1] * B.s16[1] + 0x4000) >> 15); 01042 A.s16[2] = (ssp_s16) ((A.s16[2] * B.s16[2] + 0x4000) >> 15); 01043 A.s16[3] = (ssp_s16) ((A.s16[3] * B.s16[3] + 0x4000) >> 15); 01044 A.s16[4] = (ssp_s16) ((A.s16[4] * B.s16[4] + 0x4000) >> 15); 01045 A.s16[5] = (ssp_s16) ((A.s16[5] * B.s16[5] + 0x4000) >> 15); 01046 A.s16[6] = (ssp_s16) ((A.s16[6] * B.s16[6] + 0x4000) >> 15); 01047 A.s16[7] = (ssp_s16) ((A.s16[7] * B.s16[7] + 0x4000) >> 15); 01048 01049 return A.i; 01050 }
SSP_FORCEINLINE __m64 ssp_mulhrs_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_mulhrs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 1056 of file SSEPlus_emulation_REF.h.
01057 { 01058 ssp_m64 A,B; 01059 A.m64 = a; 01060 B.m64 = b; 01061 01062 A.s16[0] = (ssp_s16) ((A.s16[0] * B.s16[0] + 0x4000) >> 15); 01063 A.s16[1] = (ssp_s16) ((A.s16[1] * B.s16[1] + 0x4000) >> 15); 01064 A.s16[2] = (ssp_s16) ((A.s16[2] * B.s16[2] + 0x4000) >> 15); 01065 A.s16[3] = (ssp_s16) ((A.s16[3] * B.s16[3] + 0x4000) >> 15); 01066 01067 return A.m64; 01068 }
SSP_FORCEINLINE __m128i ssp_mullo_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_mullo_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1859 of file SSEPlus_emulation_REF.h.
01860 { 01861 ssp_m128 t[2]; 01862 ssp_m128 A,B; 01863 A.i = a; 01864 B.i = b; 01865 01866 t[0].s64[0] = A.s32[0] * B.s32[0]; 01867 t[0].s64[1] = A.s32[1] * B.s32[1]; 01868 t[1].s64[0] = A.s32[2] * B.s32[2]; 01869 t[1].s64[1] = A.s32[3] * B.s32[3]; 01870 01871 A.s32[0] = t[0].s32[0]; 01872 A.s32[1] = t[0].s32[2]; 01873 A.s32[2] = t[1].s32[0]; 01874 A.s32[3] = t[1].s32[2]; 01875 return A.i; 01876 }
SSP_FORCEINLINE __m128d ssp_nmacc_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_nmacc_pd/fnmaddpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 619 of file SSEPlus_emulation_REF.h.
00620 { 00621 ssp_m128 A,B,C; 00622 A.d = a; 00623 B.d = b; 00624 C.d = c; 00625 00626 A.f64[0] = -(A.f64[0] * B.f64[0]) + C.f64[0]; 00627 A.f64[1] = -(A.f64[1] * B.f64[1]) + C.f64[1]; 00628 return A.d; 00629 }
SSP_FORCEINLINE __m128 ssp_nmacc_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_nmacc_ps/fnmaddps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 604 of file SSEPlus_emulation_REF.h.
00605 { 00606 ssp_m128 A,B,C; 00607 A.f = a; 00608 B.f = b; 00609 C.f = c; 00610 00611 A.f32[0] = -(A.f32[0] * B.f32[0]) + C.f32[0]; 00612 A.f32[1] = -(A.f32[1] * B.f32[1]) + C.f32[1]; 00613 A.f32[2] = -(A.f32[2] * B.f32[2]) + C.f32[2]; 00614 A.f32[3] = -(A.f32[3] * B.f32[3]) + C.f32[3]; 00615 return A.f; 00616 }
SSP_FORCEINLINE __m128d ssp_nmacc_sd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_nmacc_sd/fnmaddsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 644 of file SSEPlus_emulation_REF.h.
00645 { 00646 ssp_m128 A,B,C; 00647 A.d = a; 00648 B.d = b; 00649 C.d = c; 00650 00651 A.f64[0] = -(A.f64[0] * B.f64[0]) + C.f64[0]; 00652 return A.d; 00653 }
SSP_FORCEINLINE __m128 ssp_nmacc_ss_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_nmacc_ss/fnmaddss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 632 of file SSEPlus_emulation_REF.h.
00633 { 00634 ssp_m128 A,B,C; 00635 A.f = a; 00636 B.f = b; 00637 C.f = c; 00638 00639 A.f32[0] = -(A.f32[0] * B.f32[0]) + C.f32[0]; 00640 return A.f; 00641 }
SSP_FORCEINLINE __m128d ssp_nmsub_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_nmsub_pd/fnmsubpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 732 of file SSEPlus_emulation_REF.h.
00733 { 00734 ssp_m128 A,B,C; 00735 A.d = a; 00736 B.d = b; 00737 C.d = c; 00738 00739 A.f64[0] = -(A.f64[0] * B.f64[0]) - C.f64[0]; 00740 A.f64[1] = -(A.f64[1] * B.f64[1]) - C.f64[1]; 00741 return A.d; 00742 }
SSP_FORCEINLINE __m128 ssp_nmsub_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_nmsub_ps/fnmsubps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 717 of file SSEPlus_emulation_REF.h.
00718 { 00719 ssp_m128 A,B,C; 00720 A.f = a; 00721 B.f = b; 00722 C.f = c; 00723 00724 A.f32[0] = -(A.f32[0] * B.f32[0]) - C.f32[0]; 00725 A.f32[1] = -(A.f32[1] * B.f32[1]) - C.f32[1]; 00726 A.f32[2] = -(A.f32[2] * B.f32[2]) - C.f32[2]; 00727 A.f32[3] = -(A.f32[3] * B.f32[3]) - C.f32[3]; 00728 return A.f; 00729 }
SSP_FORCEINLINE __m128d ssp_nmsub_sd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128d | c | |||
) |
Reference implementation of _mm_nmsub_sd/fnmsubsd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 757 of file SSEPlus_emulation_REF.h.
00758 { 00759 ssp_m128 A,B,C; 00760 A.d = a; 00761 B.d = b; 00762 C.d = c; 00763 00764 A.f64[0] = -(A.f64[0] * B.f64[0]) - C.f64[0]; 00765 return A.d; 00766 }
SSP_FORCEINLINE __m128 ssp_nmsub_ss_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128 | c | |||
) |
Reference implementation of _mm_nmsub_ss/fnmsubss [SSE5]. (SSE5 .pdf documentation here)
Definition at line 745 of file SSEPlus_emulation_REF.h.
00746 { 00747 ssp_m128 A,B,C; 00748 A.f = a; 00749 B.f = b; 00750 C.f = c; 00751 00752 A.f32[0] = -(A.f32[0] * B.f32[0]) - C.f32[0]; 00753 return A.f; 00754 }
SSP_FORCEINLINE __m128i ssp_packus_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_packus_epi32 [SSE4.1]. (Searches MSDN)
Definition at line 1919 of file SSEPlus_emulation_REF.h.
01920 { 01921 ssp_m128 A,B; 01922 A.i = a; 01923 B.i = b; 01924 01925 if( A.s32[0] < 0 ) 01926 A.u16[0] = 0; 01927 else 01928 if( A.s32[0] > 0xFFFF ) 01929 A.u16[0] = 0xFFFF; 01930 else 01931 A.s16[0] = (ssp_u16)A.s32[0]; 01932 01933 if( A.s32[1] < 0 ) 01934 A.u16[1] = 0; 01935 else 01936 if( A.s32[1] > 0xFFFF ) 01937 A.u16[1] = 0xFFFF; 01938 else 01939 A.s16[1] = (ssp_u16)A.s32[1]; 01940 01941 if( A.s32[2] < 0 ) 01942 A.u16[2] = 0; 01943 else 01944 if( A.s32[2] > 0xFFFF ) 01945 A.u16[2] = 0xFFFF; 01946 else 01947 A.s16[2] = (ssp_u16)A.s32[2]; 01948 01949 01950 if( A.s32[3] < 0 ) 01951 A.u16[3] = 0; 01952 else 01953 if( A.s32[3] > 0xFFFF ) 01954 A.u16[3] = 0xFFFF; 01955 else 01956 A.s16[3] = (ssp_u16)A.s32[3]; 01957 01958 if( B.s32[0] < 0 ) 01959 A.u16[4] = 0; 01960 else 01961 if( B.s32[0] > 0xFFFF ) 01962 A.u16[4] = 0xFFFF; 01963 else 01964 A.s16[4] = (ssp_u16)B.s32[0]; 01965 01966 if( B.s32[1] < 0 ) 01967 A.u16[5] = 0; 01968 else 01969 if( B.s32[1] > 0xFFFF ) 01970 A.u16[5] = 0xFFFF; 01971 else 01972 A.s16[5] = (ssp_u16)B.s32[1]; 01973 01974 if( B.s32[2] < 0 ) 01975 A.u16[6] = 0; 01976 else 01977 if( B.s32[2] > 0xFFFF ) 01978 A.u16[6] = 0xFFFF; 01979 else 01980 A.s16[6] = (ssp_u16)B.s32[2]; 01981 01982 01983 if( B.s32[3] < 0 ) 01984 A.u16[7] = 0; 01985 else 01986 if( B.s32[3] > 0xFFFF ) 01987 A.u16[7] = 0xFFFF; 01988 else 01989 A.s16[7] = (ssp_u16)B.s32[3]; 01990 01991 return A.i; 01992 }
SSP_FORCEINLINE __m128i ssp_perm_epi8_REF | ( | __m128i | a, | |
__m128i | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_perm_epi8/ pperm [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3029 of file SSEPlus_emulation_REF.h.
03030 { 03031 int n; 03032 ssp_m128 A,B,C,R; 03033 A.i = a; 03034 B.i = b; 03035 C.i = c; 03036 03037 for( n = 0; n < 16; n++ ) 03038 { 03039 int op = C.u8[n] >> 5; 03040 switch( op ) 03041 { 03042 case 0: // source byte (no logical opeartion) 03043 R.u8[n] = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] ); 03044 break; 03045 case 1: // invert source byte 03046 { 03047 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] ); 03048 R.u8[n] = ~src; 03049 } 03050 break; 03051 case 2: // bit reverse of source byte 03052 { 03053 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] ); 03054 R.u8[n] = ( (src & 0x0F) << 4 ) | ( (src & 0xF0) >> 4 ); 03055 R.u8[n] = ( (R.u8[n] & 0x33) << 2 ) | ( (R.u8[n] & 0xCC) >> 2 ); 03056 R.u8[n] = ( (R.u8[n] & 0x55) << 1 ) | ( (R.u8[n] & 0xAA) >> 1 ); 03057 } 03058 break; 03059 case 3: // bit reverse of inverted source byte 03060 { 03061 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] ); 03062 R.u8[n] = ( (src & 0x0F) << 4 ) | ( (src & 0xF0) >> 4 ); 03063 R.u8[n] = ( (R.u8[n] & 0x33) << 2 ) | ( (R.u8[n] & 0xCC) >> 2 ); 03064 R.u8[n] = ( (R.u8[n] & 0x55) << 1 ) | ( (R.u8[n] & 0xAA) >> 1 ); 03065 R.u8[n] = ~R.u8[n]; 03066 } 03067 break; 03068 case 4: // 0x00 03069 R.u8[n] = 0x00; 03070 break; 03071 case 5: // 0xFF 03072 R.u8[n] = 0xFF; 03073 break; 03074 case 6: // most significant bit of source byte replicated in all bit positions 03075 { 03076 ssp_s8 src = ( C.u8[n] & 0x10 ) ? ( B.s8[C.u8[n] & 0xF] ) : ( A.s8[C.u8[n] & 0xF] ); 03077 R.s8[n] = src >> 7; 03078 } 03079 break; 03080 case 7: // invert most significant bit of source byte and replicate in all bit positions 03081 { 03082 ssp_s8 src = ( C.u8[n] & 0x10 ) ? ( B.s8[C.u8[n] & 0xF] ) : ( A.s8[C.u8[n] & 0xF] ); 03083 R.s8[n] = src >> 7; 03084 R.u8[n] = ~R.u8[n]; 03085 } 03086 break; 03087 } 03088 } 03089 return R.i; 03090 }
SSP_FORCEINLINE __m128d ssp_perm_pd_REF | ( | __m128d | a, | |
__m128d | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_perm_pd/ permpd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3144 of file SSEPlus_emulation_REF.h.
03145 { 03146 int n; 03147 ssp_m128 A,B,C,R; 03148 A.d = a; 03149 B.d = b; 03150 C.i = c; 03151 03152 for( n = 0; n < 2; n++ ) 03153 { 03154 unsigned char cb = C.u8[n*8]; 03155 int op = (cb >> 5) & 0x7; 03156 switch( op ) 03157 { 03158 case 0: // single-precision source operand 03159 R.f64[n] = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] ); 03160 break; 03161 case 1: // absolute value of single-precision source operand 03162 { 03163 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] ); 03164 R.f64[n] = ( src < 0.0 ) ? (-src) : src; 03165 } 03166 break; 03167 case 2: // negative value of single-precision source operand 03168 { 03169 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] ); 03170 R.f64[n] = -src; 03171 } 03172 break; 03173 case 3: // negative of absolute value of single-precision source operand 03174 { 03175 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] ); 03176 R.f64[n] = ( src < 0.0 ) ? src : (-src); 03177 } 03178 break; 03179 case 4: // +0.0 03180 R.f64[n] = 0.0; 03181 break; 03182 case 5: // -1.0 03183 R.f64[n] = -1.0; 03184 break; 03185 case 6: // +1.0 03186 R.f64[n] = 1.0; 03187 break; 03188 case 7: // +0.0 03189 R.u64[n] = 0x400921FB54442D18; //(for mxcsr.rc 00, 01 or 11 use 0x400921FB54442D18, for 10 use 0x400921FB54442D19) 03190 break; 03191 } 03192 } 03193 return R.d; 03194 }
SSP_FORCEINLINE __m128 ssp_perm_ps_REF | ( | __m128 | a, | |
__m128 | b, | |||
__m128i | c | |||
) |
Reference implementation of _mm_perm_ps/ permps [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3092 of file SSEPlus_emulation_REF.h.
03093 { 03094 int n; 03095 ssp_m128 A,B,C,R; 03096 A.f = a; 03097 B.f = b; 03098 C.i = c; 03099 03100 for( n = 0; n < 4; n++ ) 03101 { 03102 unsigned char cb = C.u8[n*4]; 03103 int op = (cb >> 5) & 0x7; 03104 switch( op ) 03105 { 03106 case 0: // single-precision source operand 03107 R.f32[n] = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] ); 03108 break; 03109 case 1: // absolute value of single-precision source operand 03110 { 03111 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] ); 03112 R.f32[n] = ( src < 0.0f ) ? (-src) : src; 03113 } 03114 break; 03115 case 2: // negative value of single-precision source operand 03116 { 03117 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] ); 03118 R.f32[n] = -src; 03119 } 03120 break; 03121 case 3: // negative of absolute value of single-precision source operand 03122 { 03123 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] ); 03124 R.f32[n] = ( src < 0.0f ) ? src : (-src); 03125 } 03126 break; 03127 case 4: // +0.0 03128 R.f32[n] = 0.0f; 03129 break; 03130 case 5: // -1.0 03131 R.f32[n] = -1.0f; 03132 break; 03133 case 6: // +1.0 03134 R.f32[n] = 1.0f; 03135 break; 03136 case 7: // +0.0 03137 R.u32[n] = 0x40490FDB; //(for mxcsr.rc 00 or 10 use 0x40490FDB, for 01 or 11 use 0x40490FDA) 03138 break; 03139 } 03140 } 03141 return R.f; 03142 }
SSP_FORCEINLINE unsigned short ssp_popcnt16_REF | ( | unsigned short | val | ) |
Native implementation of __popcnt16 [SSE4a]. (Searches MSDN)
Definition at line 2997 of file SSEPlus_emulation_REF.h.
02998 { 02999 int i; 03000 ssp_u16 cnt=0; 03001 for( i=0; i<15, val; ++i, val = val>>1 ) 03002 cnt += val & 0x1; 03003 return cnt; 03004 }
Native implementation of __popcnt64 [SSE4a]. (Searches MSDN)
Definition at line 3015 of file SSEPlus_emulation_REF.h.
03016 { 03017 int i; 03018 ssp_u64 cnt = 0; 03019 for( i=0; i<63, val; ++i, val = val>>1 ) 03020 cnt += val & 0x1; 03021 return cnt; 03022 }
SSP_FORCEINLINE unsigned int ssp_popcnt_REF | ( | unsigned int | val | ) |
Native implementation of __popcnt [SSE4a]. (Searches MSDN)
Definition at line 3006 of file SSEPlus_emulation_REF.h.
03007 { 03008 int i; 03009 ssp_u32 cnt = 0; 03010 for( i=0; i<31, val; ++i, val = val>>1 ) 03011 cnt += val & 0x1; 03012 return cnt; 03013 }
SSP_FORCEINLINE __m128i ssp_rot_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_rot_epi16/ protw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3230 of file SSEPlus_emulation_REF.h.
03231 { 03232 int n; 03233 ssp_m128 A,B; 03234 A.i = a; 03235 B.i = b; 03236 03237 for( n = 0; n < 8; n++ ) 03238 { 03239 if( B.s16[n] < 0 ) 03240 { 03241 unsigned int count = (-B.s16[n]) % 16; 03242 unsigned int carry_count = (16 - count) % 16; 03243 ssp_u16 carry = A.u16[n] << carry_count; 03244 A.u16[n] = A.u16[n] >> count; 03245 A.u16[n] = A.u16[n] | carry; 03246 } 03247 else 03248 { 03249 unsigned int count = B.s16[n] % 8; 03250 unsigned int carry_count = (16 - count) % 16; 03251 ssp_u16 carry = A.u16[n] >> carry_count; 03252 A.u16[n] = A.u16[n] << count; 03253 A.u16[n] = A.u16[n] | carry; 03254 } 03255 } 03256 return A.i; 03257 }
SSP_FORCEINLINE __m128i ssp_rot_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_rot_epi32/ protd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3259 of file SSEPlus_emulation_REF.h.
03260 { 03261 int n; 03262 ssp_m128 A,B; 03263 A.i = a; 03264 B.i = b; 03265 03266 for( n = 0; n < 4; n++ ) 03267 { 03268 if( B.s32[n] < 0 ) 03269 { 03270 unsigned int count = (-B.s32[n]) % 32; 03271 unsigned int carry_count = (32 - count) % 32; 03272 ssp_u32 carry = A.u32[n] << carry_count; 03273 A.u32[n] = A.u32[n] >> count; 03274 A.u32[n] = A.u32[n] | carry; 03275 } 03276 else 03277 { 03278 unsigned int count = B.s32[n] % 32; 03279 unsigned int carry_count = (32 - count) % 32; 03280 ssp_u32 carry = A.u32[n] >> carry_count; 03281 A.u32[n] = A.u32[n] << count; 03282 A.u32[n] = A.u32[n] | carry; 03283 } 03284 } 03285 return A.i; 03286 }
SSP_FORCEINLINE __m128i ssp_rot_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_rot_epi64/ protq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3288 of file SSEPlus_emulation_REF.h.
03289 { 03290 int n; 03291 ssp_m128 A,B; 03292 A.i = a; 03293 B.i = b; 03294 03295 for( n = 0; n < 2; n++ ) 03296 { 03297 if( B.s64[n] < 0 ) 03298 { 03299 unsigned int count = (unsigned int)((-B.s64[n]) % 64); 03300 unsigned int carry_count = (64 - count) % 64; 03301 ssp_u64 carry = A.u64[n] << carry_count; 03302 A.u64[n] = A.u64[n] >> count; 03303 A.u64[n] = A.u64[n] | carry; 03304 } 03305 else 03306 { 03307 unsigned int count = (unsigned int)(B.s64[n] % 64); 03308 unsigned int carry_count = (64 - count) % 64; 03309 ssp_u64 carry = A.u64[n] >> carry_count; 03310 A.u64[n] = A.u64[n] << count; 03311 A.u64[n] = A.u64[n] | carry; 03312 } 03313 } 03314 return A.i; 03315 }
SSP_FORCEINLINE __m128i ssp_rot_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_rot_epi8/ protb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3201 of file SSEPlus_emulation_REF.h.
03202 { 03203 int n; 03204 ssp_m128 A,B; 03205 A.i = a; 03206 B.i = b; 03207 03208 for( n = 0; n < 16; n++ ) 03209 { 03210 if( B.s8[n] < 0 ) 03211 { 03212 unsigned int count = (-B.s8[n]) % 8; 03213 unsigned int carry_count = (8 - count) % 8; 03214 ssp_u8 carry = A.u8[n] << carry_count; 03215 A.u8[n] = A.u8[n] >> count; 03216 A.u8[n] = A.u8[n] | carry; 03217 } 03218 else 03219 { 03220 unsigned int count = B.s8[n] % 8; 03221 unsigned int carry_count = (8 - count) % 8; 03222 ssp_u8 carry = A.u8[n] >> carry_count; 03223 A.u8[n] = A.u8[n] << count; 03224 A.u8[n] = A.u8[n] | carry; 03225 } 03226 } 03227 return A.i; 03228 }
SSP_FORCEINLINE __m128i ssp_roti_epi16_REF | ( | __m128i | a, | |
const int | b | |||
) |
Reference implementation of _mm_roti_epi16/ protw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3348 of file SSEPlus_emulation_REF.h.
03349 { 03350 int n; 03351 ssp_m128 A; 03352 A.i = a; 03353 03354 if( b < 0 ) 03355 { 03356 unsigned int count = (-b) % 16; 03357 unsigned int carry_count = (16 - count) % 16; 03358 for( n = 0; n < 8; n++ ) 03359 { 03360 ssp_u16 carry = A.u16[n] << carry_count; 03361 A.u16[n] = A.u16[n] >> count; 03362 A.u16[n] = A.u16[n] | carry; 03363 } 03364 } 03365 else 03366 { 03367 unsigned int count = b % 16; 03368 unsigned int carry_count = (16 - count) % 16; 03369 for( n = 0; n < 8; n++ ) 03370 { 03371 ssp_u16 carry = A.u16[n] >> carry_count; 03372 A.u16[n] = A.u16[n] << count; 03373 A.u16[n] = A.u16[n] | carry; 03374 } 03375 } 03376 return A.i; 03377 }
SSP_FORCEINLINE __m128i ssp_roti_epi32_REF | ( | __m128i | a, | |
const int | b | |||
) |
Reference implementation of _mm_roti_epi32/ protd [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3379 of file SSEPlus_emulation_REF.h.
03380 { 03381 int n; 03382 ssp_m128 A; 03383 A.i = a; 03384 03385 if( b < 0 ) 03386 { 03387 unsigned int count = (-b) % 32; 03388 unsigned int carry_count = (32 - count) % 32; 03389 for( n = 0; n < 4; n++ ) 03390 { 03391 ssp_u32 carry = A.u32[n] << carry_count; 03392 A.u32[n] = A.u32[n] >> count; 03393 A.u32[n] = A.u32[n] | carry; 03394 } 03395 } 03396 else 03397 { 03398 unsigned int count = b % 32; 03399 unsigned int carry_count = (32 - count) % 32; 03400 for( n = 0; n < 4; n++ ) 03401 { 03402 ssp_u32 carry = A.u32[n] >> carry_count; 03403 A.u32[n] = A.u32[n] << count; 03404 A.u32[n] = A.u32[n] | carry; 03405 } 03406 } 03407 return A.i; 03408 }
SSP_FORCEINLINE __m128i ssp_roti_epi64_REF | ( | __m128i | a, | |
const int | b | |||
) |
Reference implementation of _mm_roti_epi64/ protq [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3410 of file SSEPlus_emulation_REF.h.
03411 { 03412 int n; 03413 ssp_m128 A; 03414 A.i = a; 03415 03416 if( b < 0 ) 03417 { 03418 unsigned int count = (-b) % 64; 03419 unsigned int carry_count = (64 - count) % 64; 03420 for( n = 0; n < 2; n++ ) 03421 { 03422 ssp_u64 carry = A.u64[n] << carry_count; 03423 A.u64[n] = A.u64[n] >> count; 03424 A.u64[n] = A.u64[n] | carry; 03425 } 03426 } 03427 else 03428 { 03429 unsigned int count = b % 64; 03430 unsigned int carry_count = (64 - count) % 64; 03431 for( n = 0; n < 2; n++ ) 03432 { 03433 ssp_u64 carry = A.u64[n] >> carry_count; 03434 A.u64[n] = A.u64[n] << count; 03435 A.u64[n] = A.u64[n] | carry; 03436 } 03437 } 03438 return A.i; 03439 }
SSP_FORCEINLINE __m128i ssp_roti_epi8_REF | ( | __m128i | a, | |
const int | b | |||
) |
Reference implementation of _mm_roti_epi8/ protb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3317 of file SSEPlus_emulation_REF.h.
03318 { 03319 int n; 03320 ssp_m128 A; 03321 A.i = a; 03322 03323 if( b < 0 ) 03324 { 03325 unsigned int count = (-b) % 8; 03326 unsigned int carry_count = (8 - count) % 8; 03327 for( n = 0; n < 16; n++ ) 03328 { 03329 ssp_u8 carry = A.u8[n] << carry_count; 03330 A.u8[n] = A.u8[n] >> count; 03331 A.u8[n] = A.u8[n] | carry; 03332 } 03333 } 03334 else 03335 { 03336 unsigned int count = b % 8; 03337 unsigned int carry_count = (8 - count) % 8; 03338 for( n = 0; n < 16; n++ ) 03339 { 03340 ssp_u8 carry = A.u8[n] >> carry_count; 03341 A.u8[n] = A.u8[n] << count; 03342 A.u8[n] = A.u8[n] | carry; 03343 } 03344 } 03345 return A.i; 03346 }
SSP_FORCEINLINE __m128d ssp_round_pd_REF | ( | __m128d | val, | |
int | iRoundMode | |||
) |
Reference implementation of _mm_round_pd/ roundpd [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2091 of file SSEPlus_emulation_REF.h.
02092 { 02093 ssp_s64 *valPtr; 02094 ssp_m128 Val; 02095 Val.d = val; 02096 02097 switch( iRoundMode & 0x3 ) 02098 { 02099 case SSP_FROUND_CUR_DIRECTION: 02100 break; 02101 case SSP_FROUND_TO_ZERO: 02102 valPtr = (ssp_s64*)(&Val.f64[0]); 02103 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02104 Val.f64[0] = (ssp_f64)( (ssp_s64)Val.f64[0] ); 02105 02106 valPtr = (ssp_s64*)(&Val.f64[1]); 02107 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02108 Val.f64[1] = (ssp_f64)( (ssp_s64)Val.f64[1] ); 02109 break; 02110 case SSP_FROUND_TO_POS_INF: 02111 valPtr = (ssp_s64*)(&Val.f64[0]); 02112 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02113 Val.f64[0] = ceil( Val.f64[0] ); 02114 02115 valPtr = (ssp_s64*)(&Val.f64[1]); 02116 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02117 Val.f64[1] = ceil( Val.f64[1] ); 02118 break; 02119 case SSP_FROUND_TO_NEG_INF: 02120 valPtr = (ssp_s64*)(&Val.f64[0]); 02121 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02122 Val.f64[0] = floor( Val.f64[0] ); 02123 02124 valPtr = (ssp_s64*)(&Val.f64[1]); 02125 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02126 Val.f64[1] = floor( Val.f64[1] ); 02127 break; 02128 default: // SSP_FROUND_TO_NEAREST_INT 02129 valPtr = (ssp_s64*)(&Val.f64[0]); 02130 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02131 Val.f64[0] = (ssp_f64)( (Val.f64[0]>0) ? (ssp_s64)(Val.f64[0]+0.5) : (ssp_s64)(Val.f64[0]-0.5) ); 02132 else 02133 Val.f64[0] = ssp_number_changeSNanToQNaN_F64_REF( valPtr ); 02134 02135 valPtr = (ssp_s64*)(&Val.f64[1]); 02136 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02137 Val.f64[1] = (ssp_f64)( (Val.f64[1]>0) ? (ssp_s64)(Val.f64[1]+0.5) : (ssp_s64)(Val.f64[1]-0.5) ); 02138 else 02139 Val.f64[1] = ssp_number_changeSNanToQNaN_F64_REF( valPtr ); 02140 } 02141 return Val.d; 02142 }
SSP_FORCEINLINE __m128 ssp_round_ps_REF | ( | __m128 | val, | |
int | iRoundMode | |||
) |
Reference implementation of _mm_round_ps/ roundps [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2145 of file SSEPlus_emulation_REF.h.
02146 { 02147 ssp_s32 *valPtr; 02148 ssp_m128 Val; 02149 Val.f = val; 02150 02151 switch( iRoundMode & 0x3 ) 02152 { 02153 case SSP_FROUND_CUR_DIRECTION: 02154 break; 02155 case SSP_FROUND_TO_ZERO: 02156 valPtr = (ssp_s32*)(&Val.f32[0]); 02157 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02158 { 02159 if( Val.f32[0] >= 0 ) 02160 Val.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] ); 02161 else 02162 { 02163 Val.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] ); 02164 //Val.s32[0] = Val.s32[0] | 0x80000000; 02165 } 02166 } 02167 02168 valPtr = (ssp_s32*)(&Val.f32[1]); 02169 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02170 { 02171 if( Val.f32[1] >= 0 ) 02172 Val.f32[1] = (ssp_f32)( (ssp_s32)Val.f32[1] ); 02173 else 02174 { 02175 Val.f32[1] = (ssp_f32)( (ssp_s32)Val.f32[1] ); 02176 //Val.s32[1] = Val.s32[1] | 0x80000000; 02177 } 02178 } 02179 02180 valPtr = (ssp_s32*)(&Val.f32[2]); 02181 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02182 { 02183 if( Val.f32[2] >= 0 ) 02184 Val.f32[2] = (ssp_f32)( (ssp_s32)Val.f32[2] ); 02185 else 02186 { 02187 Val.f32[2] = (ssp_f32)( (ssp_s32)Val.f32[2] ); 02188 //Val.s32[2] = Val.s32[2] | 0x80000000; 02189 } 02190 } 02191 02192 valPtr = (ssp_s32*)(&Val.f32[3]); 02193 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02194 { 02195 if( Val.f32[3] >= 0 ) 02196 Val.f32[3] = (ssp_f32)( (ssp_s32)Val.f32[3] ); 02197 else 02198 { 02199 Val.f32[3] = (ssp_f32)( (ssp_s32)Val.f32[3] ); 02200 //Val.s32[3] = Val.s32[3] | 0x80000000; 02201 } 02202 } 02203 break; 02204 case SSP_FROUND_TO_POS_INF: 02205 valPtr = (ssp_s32*)(&Val.f32[0]); 02206 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02207 Val.f32[0] = (ssp_f32)ceil( Val.f32[0] ); 02208 02209 valPtr = (ssp_s32*)(&Val.f32[1]); 02210 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02211 Val.f32[1] = (ssp_f32)ceil( Val.f32[1] ); 02212 02213 valPtr = (ssp_s32*)(&Val.f32[2]); 02214 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02215 Val.f32[2] = (ssp_f32)ceil( Val.f32[2] ); 02216 02217 valPtr = (ssp_s32*)(&Val.f32[3]); 02218 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02219 Val.f32[3] = (ssp_f32)ceil( Val.f32[3] ); 02220 break; 02221 case SSP_FROUND_TO_NEG_INF: 02222 valPtr = (ssp_s32*)(&Val.f32[0]); 02223 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02224 Val.f32[0] = (ssp_f32)floor( Val.f32[0] ); 02225 02226 valPtr = (ssp_s32*)(&Val.f32[1]); 02227 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02228 Val.f32[1] = (ssp_f32)floor( Val.f32[1] ); 02229 02230 valPtr = (ssp_s32*)(&Val.f32[2]); 02231 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02232 Val.f32[2] = (ssp_f32)floor( Val.f32[2] ); 02233 02234 valPtr = (ssp_s32*)(&Val.f32[3]); 02235 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02236 Val.f32[3] = (ssp_f32)floor( Val.f32[3] ); 02237 break; 02238 default: // SSP_FROUND_TO_NEAREST_INT 02239 valPtr = (ssp_s32*)(&Val.f32[0]); 02240 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02241 Val.f32[0] = (ssp_f32)( (Val.f32[0]>0) ? (ssp_s32)(Val.f32[0]+0.5) : (ssp_s32)(Val.f32[0]-0.5) ); 02242 else 02243 Val.f32[0] = ssp_number_changeSNanToQNaN_F32_REF( valPtr ); 02244 02245 valPtr = (ssp_s32*)(&Val.f32[1]); 02246 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02247 Val.f32[1] = (ssp_f32)( (Val.f32[1]>0) ? (ssp_s32)(Val.f32[1]+0.5) : (ssp_s32)(Val.f32[1]-0.5) ); 02248 else 02249 Val.f32[1] = ssp_number_changeSNanToQNaN_F32_REF( valPtr ); 02250 02251 valPtr = (ssp_s32*)(&Val.f32[2]); 02252 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02253 Val.f32[2] = (ssp_f32)( (Val.f32[2]>0) ? (ssp_s32)(Val.f32[2]+0.5) : (ssp_s32)(Val.f32[2]-0.5) ); 02254 else 02255 Val.f32[2] = ssp_number_changeSNanToQNaN_F32_REF( valPtr ); 02256 02257 valPtr = (ssp_s32*)(&Val.f32[3]); 02258 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02259 Val.f32[3] = (ssp_f32)( (Val.f32[3]>0) ? (ssp_s32)(Val.f32[3]+0.5) : (ssp_s32)(Val.f32[3]-0.5) ); 02260 else 02261 Val.f32[3] = ssp_number_changeSNanToQNaN_F32_REF( valPtr ); 02262 } 02263 02264 if( -0.0f == Val.f32[0] ) Val.f32[0]=+0.0f; 02265 if( -0.0f == Val.f32[1] ) Val.f32[1]=+0.0f; 02266 if( -0.0f == Val.f32[2] ) Val.f32[2]=+0.0f; 02267 if( -0.0f == Val.f32[3] ) Val.f32[3]=+0.0f; 02268 02269 return Val.f; 02270 }
SSP_FORCEINLINE __m128d ssp_round_sd_REF | ( | __m128d | dst, | |
__m128d | val, | |||
int | iRoundMode | |||
) |
Reference implementation of _mm_round_sd/ roundsd [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2273 of file SSEPlus_emulation_REF.h.
02274 { 02275 ssp_s64 *valPtr; 02276 ssp_m128 Dst, Val; 02277 Dst.d = dst; 02278 Val.d = val; 02279 02280 switch( iRoundMode & 0x3 ) 02281 { 02282 case SSP_FROUND_CUR_DIRECTION: 02283 break; 02284 case SSP_FROUND_TO_ZERO: 02285 valPtr = (ssp_s64*)(&Val.f64[0]); 02286 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02287 Dst.f64[0] = (ssp_f64)( (ssp_s64)Val.f64[0] ); 02288 break; 02289 case SSP_FROUND_TO_POS_INF: 02290 valPtr = (ssp_s64*)(&Val.f64[0]); 02291 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02292 Dst.f64[0] = ceil( Val.f64[0] ); 02293 break; 02294 case SSP_FROUND_TO_NEG_INF: 02295 valPtr = (ssp_s64*)(&Val.f64[0]); 02296 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02297 Dst.f64[0] = floor( Val.f64[0] ); 02298 break; 02299 default: // SSP_FROUND_TO_NEAREST_INT 02300 valPtr = (ssp_s64*)(&Val.f64[0]); 02301 if( ssp_number_isValidNumber_F64_REF( valPtr ) ) 02302 Dst.f64[0] = (ssp_f64)( (Val.f64[0]>0) ? (ssp_s64)(Val.f64[0]+0.5) : (ssp_s64)(Val.f64[0]-0.5) ); 02303 else 02304 Dst.f64[0] = ssp_number_changeSNanToQNaN_F64_REF( valPtr ); 02305 } 02306 return Dst.d; 02307 }
SSP_FORCEINLINE __m128 ssp_round_ss_REF | ( | __m128 | dst, | |
__m128 | val, | |||
int | iRoundMode | |||
) |
Reference implementation of _mm_round_ss/ roundss [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2310 of file SSEPlus_emulation_REF.h.
02311 { 02312 ssp_s32 *valPtr; 02313 ssp_m128 Dst, Val; 02314 Dst.f = dst; 02315 Val.f = val; 02316 02317 switch( iRoundMode & 0x3 ) 02318 { 02319 case SSP_FROUND_CUR_DIRECTION: 02320 break; 02321 case SSP_FROUND_TO_ZERO: 02322 valPtr = (ssp_s32*)(&Val.f32[0]); 02323 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02324 { 02325 Dst.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] ); 02326 if( Val.f32[0] <= -0 ) 02327 Dst.s32[0] = Dst.s32[0] | 0x80000000; 02328 } 02329 break; 02330 case SSP_FROUND_TO_POS_INF: 02331 valPtr = (ssp_s32*)(&Val.f32[0]); 02332 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02333 Dst.f32[0] = (ssp_f32)ceil( Val.f32[0] ); 02334 break; 02335 case SSP_FROUND_TO_NEG_INF: 02336 valPtr = (ssp_s32*)(&Val.f32[0]); 02337 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02338 Dst.f32[0] = (ssp_f32)floor( Val.f32[0] ); 02339 break; 02340 default: // SSP_FROUND_TO_NEAREST_INT 02341 valPtr = (ssp_s32*)(&Val.f32[0]); 02342 if( ssp_number_isValidNumber_F32_REF( valPtr ) ) 02343 Dst.f32[0] = (ssp_f32)( (Val.f32[0]>0) ? (ssp_s32)(Val.f32[0]+0.5) : (ssp_s32)(Val.f32[0]-0.5) ); 02344 else 02345 Dst.f32[0] = ssp_number_changeSNanToQNaN_F32_REF( valPtr ); 02346 } 02347 return Dst.f; 02348 }
SSP_FORCEINLINE __m128i ssp_sha_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_sha_epi16/pshaw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3520 of file SSEPlus_emulation_REF.h.
03521 { 03522 int n; 03523 ssp_m128 A,B; 03524 A.i = a; 03525 B.i = b; 03526 03527 for( n = 0; n < 8; n++ ) 03528 { 03529 if( B.s8[n*2] < 0 ) 03530 { 03531 unsigned int count = (-B.s8[n*2]) % 16; 03532 A.s16[n] = A.s16[n] >> count; 03533 } 03534 else 03535 { 03536 unsigned int count = B.s8[n*2] % 16; 03537 A.s16[n] = A.s16[n] << count; 03538 } 03539 } 03540 03541 return A.i; 03542 }
SSP_FORCEINLINE __m128i ssp_sha_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_sha_epi32/pshad [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3569 of file SSEPlus_emulation_REF.h.
03570 { 03571 int n; 03572 ssp_m128 A,B; 03573 A.i = a; 03574 B.i = b; 03575 03576 for( n = 0; n < 4; n++ ) 03577 { 03578 if( B.s8[n*4] < 0 ) 03579 { 03580 unsigned int count = (-B.s8[n*4]) % 32; 03581 A.s32[n] = A.s32[n] >> count; 03582 } 03583 else 03584 { 03585 unsigned int count = B.s8[n*4] % 32; 03586 A.s32[n] = A.s32[n] << count; 03587 } 03588 } 03589 03590 return A.i; 03591 }
SSP_FORCEINLINE __m128i ssp_sha_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_sha_epi64/pshad [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3618 of file SSEPlus_emulation_REF.h.
03619 { 03620 int n; 03621 ssp_m128 A,B; 03622 A.i = a; 03623 B.i = b; 03624 03625 for( n = 0; n < 2; n++ ) 03626 { 03627 if( B.s8[n*8] < 0 ) 03628 { 03629 unsigned int count = (-B.s8[n*8]) % 64; 03630 A.s64[n] = A.s64[n] >> count; 03631 } 03632 else 03633 { 03634 unsigned int count = B.s8[n*8] % 64; 03635 A.s64[n] = A.s64[n] << count; 03636 } 03637 } 03638 03639 return A.i; 03640 }
SSP_FORCEINLINE __m128i ssp_sha_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_sha_epi8/pshab [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3471 of file SSEPlus_emulation_REF.h.
03472 { 03473 int n; 03474 ssp_m128 A,B; 03475 A.i = a; 03476 B.i = b; 03477 03478 for( n = 0; n < 16; n++ ) 03479 { 03480 if( B.s8[n] < 0 ) 03481 { 03482 unsigned int count = (-B.s8[n]) % 8; 03483 A.s8[n] = A.s8[n] >> count; 03484 } 03485 else 03486 { 03487 unsigned int count = B.s8[n] % 8; 03488 A.s8[n] = A.s8[n] << count; 03489 } 03490 } 03491 03492 return A.i; 03493 }
SSP_FORCEINLINE __m128i ssp_shl_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_shl_epi16/pshlw [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3496 of file SSEPlus_emulation_REF.h.
03497 { 03498 int n; 03499 ssp_m128 A,B; 03500 A.i = a; 03501 B.i = b; 03502 03503 for( n = 0; n < 8; n++ ) 03504 { 03505 if( B.s8[n*2] < 0 ) 03506 { 03507 unsigned int count = (-B.s8[n*2]) % 16; 03508 A.u16[n] = A.u16[n] >> count; 03509 } 03510 else 03511 { 03512 unsigned int count = B.s8[n*2] % 16; 03513 A.u16[n] = A.u16[n] << count; 03514 } 03515 } 03516 return A.i; 03517 }
SSP_FORCEINLINE __m128i ssp_shl_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_shl_epi32/pshld [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3545 of file SSEPlus_emulation_REF.h.
03546 { 03547 int n; 03548 ssp_m128 A,B; 03549 A.i = a; 03550 B.i = b; 03551 03552 for( n = 0; n < 4; n++ ) 03553 { 03554 if( B.s8[n*4] < 0 ) 03555 { 03556 unsigned int count = (-B.s8[n*4]) % 32; 03557 A.u32[n] = A.u32[n] >> count; 03558 } 03559 else 03560 { 03561 unsigned int count = B.s8[n*4] % 32; 03562 A.u32[n] = A.u32[n] << count; 03563 } 03564 } 03565 return A.i; 03566 }
SSP_FORCEINLINE __m128i ssp_shl_epi64_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_shl_epi64/pshld [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3594 of file SSEPlus_emulation_REF.h.
03595 { 03596 int n; 03597 ssp_m128 A,B; 03598 A.i = a; 03599 B.i = b; 03600 03601 for( n = 0; n < 2; n++ ) 03602 { 03603 if( B.s8[n*8] < 0 ) 03604 { 03605 unsigned int count = (-B.s8[n*8]) % 64; 03606 A.u64[n] = A.u64[n] >> count; 03607 } 03608 else 03609 { 03610 unsigned int count = B.s8[n*8] % 64; 03611 A.u64[n] = A.u64[n] << count; 03612 } 03613 } 03614 return A.i; 03615 }
SSP_FORCEINLINE __m128i ssp_shl_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of ssp_shl_epi8/pshlb [SSE5]. (SSE5 .pdf documentation here)
Definition at line 3447 of file SSEPlus_emulation_REF.h.
03448 { 03449 int n; 03450 ssp_m128 A,B; 03451 A.i = a; 03452 B.i = b; 03453 03454 for( n = 0; n < 16; n++ ) 03455 { 03456 if( B.s8[n] < 0 ) 03457 { 03458 unsigned int count = (-B.s8[n]) % 8; 03459 A.u8[n] = A.u8[n] >> count; 03460 } 03461 else 03462 { 03463 unsigned int count = B.s8[n] % 8; 03464 A.u8[n] = A.u8[n] << count; 03465 } 03466 } 03467 return A.i; 03468 }
SSP_FORCEINLINE __m128i ssp_shuffle_epi8_REF | ( | __m128i | a, | |
__m128i | mask | |||
) |
Reference implementation of _mm_shuffle_epi8 [SSSE3]. (Searches MSDN)
Definition at line 2707 of file SSEPlus_emulation_REF.h.
02708 { 02709 ssp_m128 A, MSK, B; 02710 A.i = a; 02711 MSK.i = mask; 02712 02713 B.s8[0] = (MSK.s8[0] & 0x80) ? 0 : A.s8[(MSK.s8[0] & 0xf)]; 02714 B.s8[1] = (MSK.s8[1] & 0x80) ? 0 : A.s8[(MSK.s8[1] & 0xf)]; 02715 B.s8[2] = (MSK.s8[2] & 0x80) ? 0 : A.s8[(MSK.s8[2] & 0xf)]; 02716 B.s8[3] = (MSK.s8[3] & 0x80) ? 0 : A.s8[(MSK.s8[3] & 0xf)]; 02717 B.s8[4] = (MSK.s8[4] & 0x80) ? 0 : A.s8[(MSK.s8[4] & 0xf)]; 02718 B.s8[5] = (MSK.s8[5] & 0x80) ? 0 : A.s8[(MSK.s8[5] & 0xf)]; 02719 B.s8[6] = (MSK.s8[6] & 0x80) ? 0 : A.s8[(MSK.s8[6] & 0xf)]; 02720 B.s8[7] = (MSK.s8[7] & 0x80) ? 0 : A.s8[(MSK.s8[7] & 0xf)]; 02721 B.s8[8] = (MSK.s8[8] & 0x80) ? 0 : A.s8[(MSK.s8[8] & 0xf)]; 02722 B.s8[9] = (MSK.s8[9] & 0x80) ? 0 : A.s8[(MSK.s8[9] & 0xf)]; 02723 B.s8[10] = (MSK.s8[10] & 0x80) ? 0 : A.s8[(MSK.s8[10] & 0xf)]; 02724 B.s8[11] = (MSK.s8[11] & 0x80) ? 0 : A.s8[(MSK.s8[11] & 0xf)]; 02725 B.s8[12] = (MSK.s8[12] & 0x80) ? 0 : A.s8[(MSK.s8[12] & 0xf)]; 02726 B.s8[13] = (MSK.s8[13] & 0x80) ? 0 : A.s8[(MSK.s8[13] & 0xf)]; 02727 B.s8[14] = (MSK.s8[14] & 0x80) ? 0 : A.s8[(MSK.s8[14] & 0xf)]; 02728 B.s8[15] = (MSK.s8[15] & 0x80) ? 0 : A.s8[(MSK.s8[15] & 0xf)]; 02729 02730 return B.i; 02731 }
SSP_FORCEINLINE __m64 ssp_shuffle_pi8_REF | ( | __m64 | a, | |
__m64 | mask | |||
) |
Reference implementation of _mm_shuffle_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2736 of file SSEPlus_emulation_REF.h.
02737 { 02738 ssp_m64 A, MSK, B; 02739 A.m64 = a; 02740 MSK.m64 = mask; 02741 02742 B.s8[0] = (MSK.s8[0] & 0x80) ? 0 : A.s8[(MSK.s8[0] & 0xf)]; 02743 B.s8[1] = (MSK.s8[1] & 0x80) ? 0 : A.s8[(MSK.s8[1] & 0xf)]; 02744 B.s8[2] = (MSK.s8[2] & 0x80) ? 0 : A.s8[(MSK.s8[2] & 0xf)]; 02745 B.s8[3] = (MSK.s8[3] & 0x80) ? 0 : A.s8[(MSK.s8[3] & 0xf)]; 02746 B.s8[4] = (MSK.s8[4] & 0x80) ? 0 : A.s8[(MSK.s8[4] & 0xf)]; 02747 B.s8[5] = (MSK.s8[5] & 0x80) ? 0 : A.s8[(MSK.s8[5] & 0xf)]; 02748 B.s8[6] = (MSK.s8[6] & 0x80) ? 0 : A.s8[(MSK.s8[6] & 0xf)]; 02749 B.s8[7] = (MSK.s8[7] & 0x80) ? 0 : A.s8[(MSK.s8[7] & 0xf)]; 02750 02751 return B.m64; 02752 }
SSP_FORCEINLINE __m128i ssp_sign_epi16_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_sign_epi16 [SSSE3]. (Searches MSDN)
Definition at line 2785 of file SSEPlus_emulation_REF.h.
02786 { 02787 ssp_m128 A, B; 02788 A.i = a; 02789 B.i = b; 02790 02791 A.s16[0] = (B.s16[0]<0) ? (-A.s16[0]) :((B.s16[0]==0) ? 0: A.s16[0]); 02792 A.s16[1] = (B.s16[1]<0) ? (-A.s16[1]) :((B.s16[1]==0) ? 0: A.s16[1]); 02793 A.s16[2] = (B.s16[2]<0) ? (-A.s16[2]) :((B.s16[2]==0) ? 0: A.s16[2]); 02794 A.s16[3] = (B.s16[3]<0) ? (-A.s16[3]) :((B.s16[3]==0) ? 0: A.s16[3]); 02795 A.s16[4] = (B.s16[4]<0) ? (-A.s16[4]) :((B.s16[4]==0) ? 0: A.s16[4]); 02796 A.s16[5] = (B.s16[5]<0) ? (-A.s16[5]) :((B.s16[5]==0) ? 0: A.s16[5]); 02797 A.s16[6] = (B.s16[6]<0) ? (-A.s16[6]) :((B.s16[6]==0) ? 0: A.s16[6]); 02798 A.s16[7] = (B.s16[7]<0) ? (-A.s16[7]) :((B.s16[7]==0) ? 0: A.s16[7]); 02799 02800 return A.i; 02801 }
SSP_FORCEINLINE __m128i ssp_sign_epi32_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_sign_epi32 [SSSE3]. (Searches MSDN)
Definition at line 2805 of file SSEPlus_emulation_REF.h.
02806 { 02807 ssp_m128 A, B; 02808 A.i = a; 02809 B.i = b; 02810 02811 A.s32[0] = (B.s32[0]<0) ? (-A.s32[0]) :((B.s32[0]==0) ? 0: A.s32[0]); 02812 A.s32[1] = (B.s32[1]<0) ? (-A.s32[1]) :((B.s32[1]==0) ? 0: A.s32[1]); 02813 A.s32[2] = (B.s32[2]<0) ? (-A.s32[2]) :((B.s32[2]==0) ? 0: A.s32[2]); 02814 A.s32[3] = (B.s32[3]<0) ? (-A.s32[3]) :((B.s32[3]==0) ? 0: A.s32[3]); 02815 02816 return A.i; 02817 }
SSP_FORCEINLINE __m128i ssp_sign_epi8_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_sign_epi8 [SSSE3]. (Searches MSDN)
Definition at line 2757 of file SSEPlus_emulation_REF.h.
02758 { 02759 ssp_m128 A, B; 02760 A.i = a; 02761 B.i = b; 02762 02763 A.s8[0] = (B.s8[0]<0) ? (-A.s8[0]) :((B.s8[0]==0) ? 0: A.s8[0]); 02764 A.s8[1] = (B.s8[1]<0) ? (-A.s8[1]) :((B.s8[1]==0) ? 0: A.s8[1]); 02765 A.s8[2] = (B.s8[2]<0) ? (-A.s8[2]) :((B.s8[2]==0) ? 0: A.s8[2]); 02766 A.s8[3] = (B.s8[3]<0) ? (-A.s8[3]) :((B.s8[3]==0) ? 0: A.s8[3]); 02767 A.s8[4] = (B.s8[4]<0) ? (-A.s8[4]) :((B.s8[4]==0) ? 0: A.s8[4]); 02768 A.s8[5] = (B.s8[5]<0) ? (-A.s8[5]) :((B.s8[5]==0) ? 0: A.s8[5]); 02769 A.s8[6] = (B.s8[6]<0) ? (-A.s8[6]) :((B.s8[6]==0) ? 0: A.s8[6]); 02770 A.s8[7] = (B.s8[7]<0) ? (-A.s8[7]) :((B.s8[7]==0) ? 0: A.s8[7]); 02771 A.s8[8] = (B.s8[8]<0) ? (-A.s8[8]) :((B.s8[8]==0) ? 0: A.s8[8]); 02772 A.s8[9] = (B.s8[9]<0) ? (-A.s8[9]) :((B.s8[9]==0) ? 0: A.s8[9]); 02773 A.s8[10] = (B.s8[10]<0) ? (-A.s8[10]) :((B.s8[10]==0)? 0: A.s8[10]); 02774 A.s8[11] = (B.s8[11]<0) ? (-A.s8[11]) :((B.s8[11]==0)? 0: A.s8[11]); 02775 A.s8[12] = (B.s8[12]<0) ? (-A.s8[12]) :((B.s8[12]==0)? 0: A.s8[12]); 02776 A.s8[13] = (B.s8[13]<0) ? (-A.s8[13]) :((B.s8[13]==0)? 0: A.s8[13]); 02777 A.s8[14] = (B.s8[14]<0) ? (-A.s8[14]) :((B.s8[14]==0)? 0: A.s8[14]); 02778 A.s8[15] = (B.s8[15]<0) ? (-A.s8[15]) :((B.s8[15]==0)? 0: A.s8[15]); 02779 02780 return A.i; 02781 }
SSP_FORCEINLINE __m64 ssp_sign_pi16_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_sign_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2843 of file SSEPlus_emulation_REF.h.
02844 { 02845 ssp_m64 A, B; 02846 A.m64 = a; 02847 B.m64 = b; 02848 02849 A.s16[0] = (B.s16[0]<0) ? (-A.s16[0]) :((B.s16[0]==0) ? 0: A.s16[0]); 02850 A.s16[1] = (B.s16[1]<0) ? (-A.s16[1]) :((B.s16[1]==0) ? 0: A.s16[1]); 02851 A.s16[2] = (B.s16[2]<0) ? (-A.s16[2]) :((B.s16[2]==0) ? 0: A.s16[2]); 02852 A.s16[3] = (B.s16[3]<0) ? (-A.s16[3]) :((B.s16[3]==0) ? 0: A.s16[3]); 02853 02854 return A.m64; 02855 }
SSP_FORCEINLINE __m64 ssp_sign_pi32_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_sign_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.
Definition at line 2861 of file SSEPlus_emulation_REF.h.
02862 { 02863 ssp_m64 A, B; 02864 A.m64 = a; 02865 B.m64 = b; 02866 02867 A.s32[0] = (B.s32[0]<0) ? (-A.s32[0]) :((B.s32[0]==0) ? 0: A.s32[0]); 02868 A.s32[1] = (B.s32[1]<0) ? (-A.s32[1]) :((B.s32[1]==0) ? 0: A.s32[1]); 02869 02870 return A.m64; 02871 }
SSP_FORCEINLINE __m64 ssp_sign_pi8_REF | ( | __m64 | a, | |
__m64 | b | |||
) |
Reference implementation of _mm_sign_pi8 [SSSE3]. (Searches MSDN)
Definition at line 2821 of file SSEPlus_emulation_REF.h.
02822 { 02823 ssp_m64 A, B; 02824 A.m64 = a; 02825 B.m64 = b; 02826 02827 A.s8[0] = (B.s8[0]<0) ? (-A.s8[0]) :((B.s8[0]==0) ? 0: A.s8[0]); 02828 A.s8[1] = (B.s8[1]<0) ? (-A.s8[1]) :((B.s8[1]==0) ? 0: A.s8[1]); 02829 A.s8[2] = (B.s8[2]<0) ? (-A.s8[2]) :((B.s8[2]==0) ? 0: A.s8[2]); 02830 A.s8[3] = (B.s8[3]<0) ? (-A.s8[3]) :((B.s8[3]==0) ? 0: A.s8[3]); 02831 A.s8[4] = (B.s8[4]<0) ? (-A.s8[4]) :((B.s8[4]==0) ? 0: A.s8[4]); 02832 A.s8[5] = (B.s8[5]<0) ? (-A.s8[5]) :((B.s8[5]==0) ? 0: A.s8[5]); 02833 A.s8[6] = (B.s8[6]<0) ? (-A.s8[6]) :((B.s8[6]==0) ? 0: A.s8[6]); 02834 A.s8[7] = (B.s8[7]<0) ? (-A.s8[7]) :((B.s8[7]==0) ? 0: A.s8[7]); 02835 02836 return A.m64; 02837 }
SSP_FORCEINLINE __m128i ssp_stream_load_si128_REF | ( | __m128i * | p | ) |
Reference implementation of _mm_stream_load_si128 [SSE4.1]. (Searches MSDN)
Definition at line 1581 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE void ssp_stream_sd_REF | ( | double * | dst, | |
__m128d | src | |||
) |
Reference implementation of _mm_stream_sd [SSE4a]. (Searches MSDN)
Definition at line 2874 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE void ssp_stream_ss_REF | ( | float * | dst, | |
__m128 | src | |||
) |
Reference implementation of _mm_stream_ss [SSE4a]. (Searches MSDN)
Definition at line 2882 of file SSEPlus_emulation_REF.h.
SSP_FORCEINLINE int ssp_testc_si128_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_testc_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2354 of file SSEPlus_emulation_REF.h.
02355 { 02356 ssp_m128 A,B; 02357 A.i = a; 02358 B.i = b; 02359 02360 return ( (A.s64[0] & B.s64[0]) == A.s64[0] ) && 02361 ( (A.s64[1] & B.s64[1]) == A.s64[1] ) ; 02362 }
SSP_FORCEINLINE int ssp_testnzc_si128_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_testnzc_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2376 of file SSEPlus_emulation_REF.h.
02377 { 02378 int zf, cf; 02379 ssp_m128 A,B; 02380 A.i = a; 02381 B.i = b; 02382 02383 zf = ssp_testz_si128_REF( A.i, B.i); 02384 02385 cf = ( (~A.s64[0] & B.s64[0]) == 0 ) && 02386 ( (~A.s64[1] & B.s64[1]) == 0 ) ; 02387 return ((int)!zf & (int)!cf); 02388 }
SSP_FORCEINLINE int ssp_testz_si128_REF | ( | __m128i | a, | |
__m128i | b | |||
) |
Reference implementation of _mm_testz_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)
Definition at line 2365 of file SSEPlus_emulation_REF.h.
02366 { 02367 ssp_m128 A,B; 02368 A.i = a; 02369 B.i = b; 02370 02371 return ( (A.s64[0] & B.s64[0]) == 0 ) && 02372 ( (A.s64[1] & B.s64[1]) == 0 ) ; 02373 }