Reference
[Emulated]


SSE[3,4A,...,5] implemented in reference

SSP_FORCEINLINE __m128i ssp_comeq_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comeq_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comeq_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comeq_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comeq_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comeq_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comlt_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comlt_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comlt_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comlt_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comlt_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comlt_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comle_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comle_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comle_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comle_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comle_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comle_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comunord_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comunord_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comunord_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comunord_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comneq_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comneq_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comneq_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comneq_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comneq_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comneq_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnlt_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnlt_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnlt_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnlt_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnle_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnle_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnle_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnle_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comord_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comord_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comord_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comord_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comueq_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comueq_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comueq_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comueq_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnge_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnge_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comnge_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comnge_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comngt_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comngt_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comngt_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comngt_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comfalse_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comfalse_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comfalse_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comfalse_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comfalse_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comfalse_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comoneq_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comoneq_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comoneq_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comoneq_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comge_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comge_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comge_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comge_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comge_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comge_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comgt_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comgt_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comgt_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comgt_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comgt_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comgt_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_comtrue_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epu64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_comtrue_epu8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_comtrue_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comtrue_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_comtrue_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_comtrue_ss_REF (__m128 a, __m128 b)

SSE[3,4A,...,5] implemented in reference

SSP_FORCEINLINE __m128d ssp_frcz_pd_REF (__m128d a)
SSP_FORCEINLINE __m128 ssp_frcz_ps_REF (__m128 a)
SSP_FORCEINLINE __m128d ssp_frcz_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_frcz_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128i ssp_haddd_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddd_epi8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddd_epu16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddd_epu8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epi8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epu16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epu32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddq_epu8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddw_epi8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_haddw_epu8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_hsubd_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_hsubq_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_hsubw_epi8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_macc_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_macc_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128 ssp_macc_ps_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_macc_pd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_macc_ss_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_macc_sd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128i ssp_maccd_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_macchi_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_macclo_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maccs_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maccs_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maccsd_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maccshi_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maccslo_epi32_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maddd_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128i ssp_maddsd_epi16_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128 ssp_nmacc_ps_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_nmacc_pd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_nmacc_ss_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_nmacc_sd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_msub_ps_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_msub_pd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_msub_ss_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_msub_sd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_nmsub_ps_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_nmsub_pd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_nmsub_ss_REF (__m128 a, __m128 b, __m128 c)
SSP_FORCEINLINE __m128d ssp_nmsub_sd_REF (__m128d a, __m128d b, __m128d c)
SSP_FORCEINLINE __m128 ssp_addsub_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_addsub_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_blend_epi16_REF (__m128i a, __m128i b, const int mask)
SSP_FORCEINLINE __m128d ssp_blend_pd_REF (__m128d a, __m128d b, const int mask)
SSP_FORCEINLINE __m128 ssp_blend_ps_REF (__m128 a, __m128 b, const int mask)
SSP_FORCEINLINE __m128i ssp_blendv_epi8_REF (__m128i a, __m128i b, __m128i mask)
SSP_FORCEINLINE __m128d ssp_blendv_pd_REF (__m128d a, __m128d b, __m128d mask)
SSP_FORCEINLINE __m128 ssp_blendv_ps_REF (__m128 a, __m128 b, __m128 mask)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_dp_pd_REF (__m128d a, __m128d b, const int mask)
SSP_FORCEINLINE __m128 ssp_dp_ps_REF (__m128 a, __m128 b, const int mask)
SSP_FORCEINLINE __m128i ssp_maddubs_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_maddubs_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_mulhrs_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_mulhrs_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE int ssp_extract_epi8_REF (__m128i a, const int ndx)
SSP_FORCEINLINE int ssp_extract_epi32_REF (__m128i a, const int imm)
SSP_FORCEINLINE ssp_s64 ssp_extract_epi64_REF (__m128i a, const int ndx)
SSP_FORCEINLINE int ssp_extract_ps_REF (__m128 a, const int ndx)
SSP_FORCEINLINE __m128i ssp_extract_si64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_extracti_si64_REF (__m128i a, int len, int ndx)
SSP_FORCEINLINE __m128i ssp_hadd_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_hadd_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_hadd_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m64 ssp_hadd_pi32_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_hadds_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_hadds_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m128 ssp_hadd_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_hadd_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_hsub_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_hsub_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_hsub_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m64 ssp_hsub_pi32_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_hsubs_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_hsubs_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m128 ssp_hsub_ps_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_hsub_pd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_insert_epi8_REF (__m128i a, int b, const int ndx)
SSP_FORCEINLINE __m128i ssp_insert_epi32_REF (__m128i a, int b, const int ndx)
SSP_FORCEINLINE __m128i ssp_insert_epi64_REF (__m128i a, ssp_s64 b, const int ndx)
SSP_FORCEINLINE __m128 ssp_insert_ps_REF (__m128 a, __m128 b, const int sel)
SSP_FORCEINLINE __m128i ssp_insert_si64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_inserti_si64_REF (__m128i a, __m128i b, int len, int ndx)
SSP_FORCEINLINE __m128d ssp_loaddup_pd_REF (double const *dp)
SSP_FORCEINLINE __m128i ssp_lddqu_si128_REF (__m128i const *p)
SSP_FORCEINLINE __m128i ssp_stream_load_si128_REF (__m128i *p)
SSP_FORCEINLINE __m128i ssp_min_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_min_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epu16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_min_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_min_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epu32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_minpos_epu16_REF (__m128i shortValues)
SSP_FORCEINLINE __m128i ssp_minpos_epu16_REFb (__m128i shortValues)
SSP_FORCEINLINE __m128 ssp_movehdup_ps_REF (__m128 a)
SSP_FORCEINLINE __m128 ssp_moveldup_ps_REF (__m128 a)
SSP_FORCEINLINE __m128d ssp_movedup_pd_REF (__m128d a)
SSP_FORCEINLINE __m128i ssp_mul_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mullo_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mpsadbw_epu8_REF (__m128i a, __m128i b, const int msk)
SSP_FORCEINLINE __m128i ssp_packus_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_ceil_pd_REF (__m128d a)
SSP_FORCEINLINE __m128 ssp_ceil_ps_REF (__m128 a)
SSP_FORCEINLINE __m128d ssp_ceil_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_ceil_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_floor_pd_REF (__m128d a)
SSP_FORCEINLINE __m128 ssp_floor_ps_REF (__m128 a)
SSP_FORCEINLINE __m128d ssp_floor_sd_REF (__m128d a, __m128d b)
SSP_FORCEINLINE __m128 ssp_floor_ss_REF (__m128 a, __m128 b)
SSP_FORCEINLINE __m128d ssp_round_pd_REF (__m128d val, int iRoundMode)
SSP_FORCEINLINE __m128 ssp_round_ps_REF (__m128 val, int iRoundMode)
SSP_FORCEINLINE __m128d ssp_round_sd_REF (__m128d dst, __m128d val, int iRoundMode)
SSP_FORCEINLINE __m128 ssp_round_ss_REF (__m128 dst, __m128 val, int iRoundMode)
SSP_FORCEINLINE int ssp_testc_si128_REF (__m128i a, __m128i b)
SSP_FORCEINLINE int ssp_testz_si128_REF (__m128i a, __m128i b)
SSP_FORCEINLINE int ssp_testnzc_si128_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepi8_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepi16_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepi16_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepi32_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu8_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu16_epi32_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu16_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtepu32_epi64_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_abs_epi8_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_abs_epi16_REF (__m128i a)
SSP_FORCEINLINE __m128i ssp_abs_epi32_REF (__m128i a)
SSP_FORCEINLINE __m64 ssp_abs_pi8_REF (__m64 a)
SSP_FORCEINLINE __m64 ssp_abs_pi16_REF (__m64 a)
SSP_FORCEINLINE __m64 ssp_abs_pi32_REF (__m64 a)
SSP_FORCEINLINE __m128i ssp_alignr_epi8_REF (__m128i a, __m128i b, const int ralign)
SSP_FORCEINLINE __m64 ssp_alignr_pi8_REF (__m64 a, __m64 b, const int ralign)
SSP_FORCEINLINE __m128i ssp_shuffle_epi8_REF (__m128i a, __m128i mask)
SSP_FORCEINLINE __m64 ssp_shuffle_pi8_REF (__m64 a, __m64 mask)
SSP_FORCEINLINE __m128i ssp_sign_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sign_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sign_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m64 ssp_sign_pi8_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m64 ssp_sign_pi16_REF (__m64 a, __m64 b)
SSP_FORCEINLINE __m64 ssp_sign_pi32_REF (__m64 a, __m64 b)
SSP_FORCEINLINE void ssp_stream_sd_REF (double *dst, __m128d src)
SSP_FORCEINLINE void ssp_stream_ss_REF (float *dst, __m128 src)
SSP_FORCEINLINE unsigned short ssp_lzcnt16_REF (unsigned short val)
SSP_FORCEINLINE unsigned int ssp_lzcnt_REF (unsigned int val)
SSP_FORCEINLINE ssp_u64 ssp_lzcnt64_REF (ssp_u64 val)
SSP_FORCEINLINE unsigned short ssp_popcnt16_REF (unsigned short val)
SSP_FORCEINLINE unsigned int ssp_popcnt_REF (unsigned int val)
SSP_FORCEINLINE ssp_u64 ssp_popcnt64_REF (ssp_u64 val)
SSP_FORCEINLINE __m128i ssp_perm_epi8_REF (__m128i a, __m128i b, __m128i c)
SSP_FORCEINLINE __m128 ssp_perm_ps_REF (__m128 a, __m128 b, __m128i c)
SSP_FORCEINLINE __m128d ssp_perm_pd_REF (__m128d a, __m128d b, __m128i c)
SSP_FORCEINLINE __m128i ssp_rot_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_rot_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_rot_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_rot_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_roti_epi8_REF (__m128i a, const int b)
SSP_FORCEINLINE __m128i ssp_roti_epi16_REF (__m128i a, const int b)
SSP_FORCEINLINE __m128i ssp_roti_epi32_REF (__m128i a, const int b)
SSP_FORCEINLINE __m128i ssp_roti_epi64_REF (__m128i a, const int b)
SSP_FORCEINLINE __m128i ssp_shl_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sha_epi8_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_shl_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sha_epi16_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_shl_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sha_epi32_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_shl_epi64_REF (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sha_epi64_REF (__m128i a, __m128i b)
#define SSP_SATURATION(a, pos_limit, neg_limit)   (a>pos_limit) ? pos_limit : ((a<neg_limit)?neg_limit:a)
#define SSP_SET_MIN(sd, s)   sd=(sd<s)?sd:s;
#define SSP_SET_MAX(sd, s)   sd=(sd>s)?sd:s;

Define Documentation

#define SSP_SATURATION ( a,
pos_limit,
neg_limit   )     (a>pos_limit) ? pos_limit : ((a<neg_limit)?neg_limit:a)

Definition at line 435 of file SSEPlus_emulation_REF.h.

#define SSP_SET_MAX ( sd,
 )     sd=(sd>s)?sd:s;

Definition at line 1592 of file SSEPlus_emulation_REF.h.

#define SSP_SET_MIN ( sd,
 )     sd=(sd<s)?sd:s;

Definition at line 1591 of file SSEPlus_emulation_REF.h.


Function Documentation

SSP_FORCEINLINE __m128i ssp_abs_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_abs_epi16 [SSSE3]. (Searches MSDN)

Definition at line 2575 of file SSEPlus_emulation_REF.h.

02576 {
02577     ssp_m128 A;
02578     A.i = a;
02579 
02580         A.s16[0]  = (A.s16[0] < 0) ? -A.s16[0]  : A.s16[0];
02581         A.s16[1]  = (A.s16[1] < 0) ? -A.s16[1]  : A.s16[1];
02582         A.s16[2]  = (A.s16[2] < 0) ? -A.s16[2]  : A.s16[2];
02583         A.s16[3]  = (A.s16[3] < 0) ? -A.s16[3]  : A.s16[3];
02584         A.s16[4]  = (A.s16[4] < 0) ? -A.s16[4]  : A.s16[4];
02585         A.s16[5]  = (A.s16[5] < 0) ? -A.s16[5]  : A.s16[5];
02586         A.s16[6]  = (A.s16[6] < 0) ? -A.s16[6]  : A.s16[6];
02587         A.s16[7]  = (A.s16[7] < 0) ? -A.s16[7]  : A.s16[7];
02588 
02589         return A.i;
02590 }

SSP_FORCEINLINE __m128i ssp_abs_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_abs_epi32 [SSSE3]. (Searches MSDN)

Definition at line 2593 of file SSEPlus_emulation_REF.h.

02594 {
02595     ssp_m128 A;
02596     A.i = a;
02597 
02598         A.s32[0]  = (A.s32[0] < 0) ? -A.s32[0]  : A.s32[0];
02599         A.s32[1]  = (A.s32[1] < 0) ? -A.s32[1]  : A.s32[1];
02600         A.s32[2]  = (A.s32[2] < 0) ? -A.s32[2]  : A.s32[2];
02601         A.s32[3]  = (A.s32[3] < 0) ? -A.s32[3]  : A.s32[3];
02602 
02603         return A.i;
02604 }

SSP_FORCEINLINE __m128i ssp_abs_epi8_REF ( __m128i  a  ) 

Reference implementation of _mm_abs_epi8 [SSSE3]. (Searches MSDN)

Definition at line 2548 of file SSEPlus_emulation_REF.h.

02549 {
02550     ssp_m128 A;
02551     A.i = a;
02552 
02553         A.s8[0]  = (A.s8[0] < 0) ? -A.s8[0]  : A.s8[0];
02554         A.s8[1]  = (A.s8[1] < 0) ? -A.s8[1]  : A.s8[1];
02555         A.s8[2]  = (A.s8[2] < 0) ? -A.s8[2]  : A.s8[2];
02556         A.s8[3]  = (A.s8[3] < 0) ? -A.s8[3]  : A.s8[3];
02557         A.s8[4]  = (A.s8[4] < 0) ? -A.s8[4]  : A.s8[4];
02558         A.s8[5]  = (A.s8[5] < 0) ? -A.s8[5]  : A.s8[5];
02559         A.s8[6]  = (A.s8[6] < 0) ? -A.s8[6]  : A.s8[6];
02560         A.s8[7]  = (A.s8[7] < 0) ? -A.s8[7]  : A.s8[7];
02561         A.s8[8]  = (A.s8[8] < 0) ? -A.s8[8]  : A.s8[8];
02562         A.s8[9]  = (A.s8[9] < 0) ? -A.s8[9]  : A.s8[9];
02563         A.s8[10] = (A.s8[10]< 0) ? -A.s8[10] : A.s8[10];
02564         A.s8[11] = (A.s8[11]< 0) ? -A.s8[11] : A.s8[11];
02565         A.s8[12] = (A.s8[12]< 0) ? -A.s8[12] : A.s8[12];
02566         A.s8[13] = (A.s8[13]< 0) ? -A.s8[13] : A.s8[13];
02567         A.s8[14] = (A.s8[14]< 0) ? -A.s8[14] : A.s8[14];
02568         A.s8[15] = (A.s8[15]< 0) ? -A.s8[15] : A.s8[15];
02569 
02570         return A.i;
02571 }

SSP_FORCEINLINE __m64 ssp_abs_pi16_REF ( __m64  a  ) 

Reference implementation of _mm_abs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2630 of file SSEPlus_emulation_REF.h.

02631 {
02632     ssp_m64 A;
02633     A.m64 = a;
02634 
02635         A.s16[0]  = (A.s16[0] < 0) ? -A.s16[0]  : A.s16[0];
02636         A.s16[1]  = (A.s16[1] < 0) ? -A.s16[1]  : A.s16[1];
02637         A.s16[2]  = (A.s16[2] < 0) ? -A.s16[2]  : A.s16[2];
02638         A.s16[3]  = (A.s16[3] < 0) ? -A.s16[3]  : A.s16[3];
02639 
02640         return A.m64;
02641 }

SSP_FORCEINLINE __m64 ssp_abs_pi32_REF ( __m64  a  ) 

Reference implementation of _mm_abs_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2647 of file SSEPlus_emulation_REF.h.

02648 {
02649     ssp_m64 A;
02650     A.m64 = a;
02651 
02652         A.s32[0]  = (A.s32[0] < 0) ? -A.s32[0]  : A.s32[0];
02653         A.s32[1]  = (A.s32[1] < 0) ? -A.s32[1]  : A.s32[1];
02654 
02655         return A.m64;
02656 }

SSP_FORCEINLINE __m64 ssp_abs_pi8_REF ( __m64  a  ) 

Reference implementation of _mm_abs_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2609 of file SSEPlus_emulation_REF.h.

02610 {
02611     ssp_m64 A;
02612     A.m64 = a;
02613 
02614         A.s8[0]  = (A.s8[0] < 0) ? -A.s8[0]  : A.s8[0];
02615         A.s8[1]  = (A.s8[1] < 0) ? -A.s8[1]  : A.s8[1];
02616         A.s8[2]  = (A.s8[2] < 0) ? -A.s8[2]  : A.s8[2];
02617         A.s8[3]  = (A.s8[3] < 0) ? -A.s8[3]  : A.s8[3];
02618         A.s8[4]  = (A.s8[4] < 0) ? -A.s8[4]  : A.s8[4];
02619         A.s8[5]  = (A.s8[5] < 0) ? -A.s8[5]  : A.s8[5];
02620         A.s8[6]  = (A.s8[6] < 0) ? -A.s8[6]  : A.s8[6];
02621         A.s8[7]  = (A.s8[7] < 0) ? -A.s8[7]  : A.s8[7];
02622 
02623         return A.m64;
02624 }

SSP_FORCEINLINE __m128d ssp_addsub_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_addsub_pd [SSE3]. (Searches MSDN)

Definition at line 789 of file SSEPlus_emulation_REF.h.

00790 {
00791     ssp_m128 A, B;
00792     A.d = a;
00793     B.d = b;
00794 
00795     A.f64[0] -= B.f64[0];
00796     A.f64[1] += B.f64[1];
00797     return A.d;
00798 }

SSP_FORCEINLINE __m128 ssp_addsub_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_addsub_ps [SSE3]. (Searches MSDN)

Definition at line 775 of file SSEPlus_emulation_REF.h.

00776 {
00777     ssp_m128 A, B;
00778     A.f = a;
00779     B.f = b;
00780 
00781     A.f32[0] -= B.f32[0];
00782     A.f32[1] += B.f32[1];
00783     A.f32[2] -= B.f32[2];
00784     A.f32[3] += B.f32[3];
00785     return A.f;
00786 }

SSP_FORCEINLINE __m128i ssp_alignr_epi8_REF ( __m128i  a,
__m128i  b,
const int  ralign 
)

Reference implementation of _mm_alignr_epi8 [SSSE3]. (Searches MSDN)

Definition at line 2661 of file SSEPlus_emulation_REF.h.

02662 {
02663     ssp_m128 C[3];
02664         ssp_s8 * tmp;
02665         int i, j;
02666 
02667         if (ralign <0) return b; //only shift to right, no negative
02668         C[2].i = _mm_setzero_si128();
02669         if (ralign > 32) return C[2].i;
02670     C[1].i = a;
02671         C[0].i = b;
02672         tmp = & (C[0].s8[0]);
02673 
02674         for (i=ralign+15, j=15; i >=ralign; i--, j--) {
02675                 C[2].s8[j] = tmp[i];
02676         }
02677 
02678         return C[2].i;
02679 }

SSP_FORCEINLINE __m64 ssp_alignr_pi8_REF ( __m64  a,
__m64  b,
const int  ralign 
)

Reference implementation of _mm_alignr_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2684 of file SSEPlus_emulation_REF.h.

02685 {
02686     ssp_m64 C[3];
02687         ssp_s8 * tmp;
02688         int i, j;
02689 
02690         if (ralign <0) return b; //only shift to right, no negative
02691         C[2].u32[0] = 0;
02692         C[2].u32[1] = 0;
02693         if (ralign > 16) return C[2].m64;
02694     C[1].m64 = a;
02695         C[0].m64 = b;
02696         tmp = & (C[0].s8[0]);
02697 
02698         for (i=ralign+7, j=7; i >=ralign; i--, j--) {
02699                 C[2].s8[j] = tmp[i];
02700         }
02701 
02702         return C[2].m64;
02703 }

SSP_FORCEINLINE __m128i ssp_blend_epi16_REF ( __m128i  a,
__m128i  b,
const int  mask 
)

Reference implementation of _mm_blend_epi16 [SSE4.1]. (Searches MSDN)

Definition at line 805 of file SSEPlus_emulation_REF.h.

00806 {
00807     ssp_m128 A, B;
00808     A.i = a;
00809     B.i = b;
00810 
00811     A.s16[0] = (mask & 0x01) ? B.s16[0] : A.s16[0];
00812     A.s16[1] = (mask & 0x02) ? B.s16[1] : A.s16[1];
00813     A.s16[2] = (mask & 0x04) ? B.s16[2] : A.s16[2];
00814     A.s16[3] = (mask & 0x08) ? B.s16[3] : A.s16[3];
00815     A.s16[4] = (mask & 0x10) ? B.s16[4] : A.s16[4];
00816     A.s16[5] = (mask & 0x20) ? B.s16[5] : A.s16[5];
00817     A.s16[6] = (mask & 0x40) ? B.s16[6] : A.s16[6];
00818     A.s16[7] = (mask & 0x80) ? B.s16[7] : A.s16[7];
00819     return A.i;
00820 }

SSP_FORCEINLINE __m128d ssp_blend_pd_REF ( __m128d  a,
__m128d  b,
const int  mask 
)

Reference implementation of _mm_blend_pd [SSE4.1]. (Searches MSDN)

Definition at line 823 of file SSEPlus_emulation_REF.h.

00824 {
00825     ssp_m128 A, B;
00826     A.d = a;
00827     B.d = b;
00828 
00829     A.f64[0] = (mask & 0x1) ? B.f64[0] : A.f64[0];
00830     A.f64[1] = (mask & 0x2) ? B.f64[1] : A.f64[1];
00831     return A.d;
00832 }

SSP_FORCEINLINE __m128 ssp_blend_ps_REF ( __m128  a,
__m128  b,
const int  mask 
)

Reference implementation of _mm_blend_ps [SSE4.1]. (Searches MSDN)

Definition at line 835 of file SSEPlus_emulation_REF.h.

00836 {
00837     ssp_m128 A, B;
00838     A.f = a;
00839     B.f = b;
00840 
00841     A.f32[0] = (mask & 0x1) ? B.f32[0] : A.f32[0];
00842     A.f32[1] = (mask & 0x2) ? B.f32[1] : A.f32[1];
00843     A.f32[2] = (mask & 0x4) ? B.f32[2] : A.f32[2];
00844     A.f32[3] = (mask & 0x8) ? B.f32[3] : A.f32[3];
00845     return A.f;
00846 }

SSP_FORCEINLINE __m128i ssp_blendv_epi8_REF ( __m128i  a,
__m128i  b,
__m128i  mask 
)

Reference implementation of _mm_blendv_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 849 of file SSEPlus_emulation_REF.h.

00850 {
00851     ssp_m128 A, B, Mask;
00852     A.i = a;
00853     B.i = b;
00854     Mask.i = mask;
00855 
00856     A.s8[0]  = (Mask.s8[0]  & 0x80) ? B.s8[0]  : A.s8[0];
00857     A.s8[1]  = (Mask.s8[1]  & 0x80) ? B.s8[1]  : A.s8[1];
00858     A.s8[2]  = (Mask.s8[2]  & 0x80) ? B.s8[2]  : A.s8[2];
00859     A.s8[3]  = (Mask.s8[3]  & 0x80) ? B.s8[3]  : A.s8[3];
00860     A.s8[4]  = (Mask.s8[4]  & 0x80) ? B.s8[4]  : A.s8[4];
00861     A.s8[5]  = (Mask.s8[5]  & 0x80) ? B.s8[5]  : A.s8[5];
00862     A.s8[6]  = (Mask.s8[6]  & 0x80) ? B.s8[6]  : A.s8[6];
00863     A.s8[7]  = (Mask.s8[7]  & 0x80) ? B.s8[7]  : A.s8[7];
00864     A.s8[8]  = (Mask.s8[8]  & 0x80) ? B.s8[8]  : A.s8[8];
00865     A.s8[9]  = (Mask.s8[9]  & 0x80) ? B.s8[9]  : A.s8[9];
00866     A.s8[10] = (Mask.s8[10] & 0x80) ? B.s8[10] : A.s8[10];
00867     A.s8[11] = (Mask.s8[11] & 0x80) ? B.s8[11] : A.s8[11];
00868     A.s8[12] = (Mask.s8[12] & 0x80) ? B.s8[12] : A.s8[12];
00869     A.s8[13] = (Mask.s8[13] & 0x80) ? B.s8[13] : A.s8[13];
00870     A.s8[14] = (Mask.s8[14] & 0x80) ? B.s8[14] : A.s8[14];
00871     A.s8[15] = (Mask.s8[15] & 0x80) ? B.s8[15] : A.s8[15];
00872     return A.i;
00873 }

SSP_FORCEINLINE __m128d ssp_blendv_pd_REF ( __m128d  a,
__m128d  b,
__m128d  mask 
)

Reference implementation of _mm_blendv_pd [SSE4.1]. (Searches MSDN)

Definition at line 876 of file SSEPlus_emulation_REF.h.

00877 {
00878     ssp_m128 A, B, Mask;
00879     A.d = a;
00880     B.d = b;
00881     Mask.d = mask;
00882 
00883     A.f64[0] = (Mask.u64[0] & 0x8000000000000000ll) ? B.f64[0] : A.f64[0];
00884     A.f64[1] = (Mask.u64[1] & 0x8000000000000000ll) ? B.f64[1] : A.f64[1];
00885     return A.d;
00886 }

SSP_FORCEINLINE __m128 ssp_blendv_ps_REF ( __m128  a,
__m128  b,
__m128  mask 
)

Reference implementation of _mm_blendv_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 889 of file SSEPlus_emulation_REF.h.

00890 {
00891     ssp_m128 A, B, Mask;
00892     A.f = a;
00893     B.f = b;
00894     Mask.f = mask;
00895 
00896     A.f32[0] = (Mask.u32[0] & 0x80000000) ? B.f32[0] : A.f32[0];
00897     A.f32[1] = (Mask.u32[1] & 0x80000000) ? B.f32[1] : A.f32[1];
00898     A.f32[2] = (Mask.u32[2] & 0x80000000) ? B.f32[2] : A.f32[2];
00899     A.f32[3] = (Mask.u32[3] & 0x80000000) ? B.f32[3] : A.f32[3];
00900     return A.f;
00901 }

SSP_FORCEINLINE __m128d ssp_ceil_pd_REF ( __m128d  a  ) 

Reference implementation of _mm_ceil_pd [SSE4.1]. (Searches MSDN)

Definition at line 1999 of file SSEPlus_emulation_REF.h.

02000 {
02001     ssp_m128 A;
02002     A.d = a;    
02003 
02004     A.f64[0] = ceil( A.f64[0] );
02005     A.f64[1] = ceil( A.f64[1] );
02006     return A.d;
02007 }

SSP_FORCEINLINE __m128 ssp_ceil_ps_REF ( __m128  a  ) 

Reference implementation of _mm_ceil_ps [SSE4.1]. (Searches MSDN)

Definition at line 2010 of file SSEPlus_emulation_REF.h.

02011 {
02012     ssp_m128 A;
02013     A.f = a;
02014 
02015     A.f32[0] = (ssp_f32)ceil( A.f32[0] );
02016     A.f32[1] = (ssp_f32)ceil( A.f32[1] );
02017     A.f32[2] = (ssp_f32)ceil( A.f32[2] );
02018     A.f32[3] = (ssp_f32)ceil( A.f32[3] );
02019     return A.f;
02020 }

SSP_FORCEINLINE __m128d ssp_ceil_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_ceil_sd [SSE4.1]. (Searches MSDN)

Definition at line 2023 of file SSEPlus_emulation_REF.h.

02024 {
02025     ssp_m128 A,B;
02026     A.d = a;
02027     B.d = b;
02028 
02029     A.f64[0] = ceil( B.f64[0] );
02030     return A.d;
02031 }

SSP_FORCEINLINE __m128 ssp_ceil_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_ceil_ss [SSE4.1]. (Searches MSDN)

Definition at line 2034 of file SSEPlus_emulation_REF.h.

02035 {
02036     ssp_m128 A,B;
02037     A.f = a;
02038     B.f = b;
02039 
02040     A.f32[0] = (ssp_f32)ceil( B.f32[0] );
02041     return A.f;
02042 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_cmpeq_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 908 of file SSEPlus_emulation_REF.h.

00909 {
00910     ssp_m128 A, B;
00911     A.i = a;
00912     B.i = b;
00913 
00914     if( A.s64[0] == B.s64[0] )
00915         A.s64[0] = 0xFFFFFFFFFFFFFFFFll;
00916     else
00917         A.s64[0] = 0x0ll;
00918 
00919     if( A.s64[1] == B.s64[1] )
00920         A.s64[1] = 0xFFFFFFFFFFFFFFFFll;
00921     else
00922         A.s64[1] = 0x0ll;
00923     return A.i;
00924 }

SSP_FORCEINLINE __m128i ssp_comeq_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 20 of file SSEPlus_emulation_comps_REF.h.

00021 {
00022     ssp_m128 A,B;
00023     A.i = a;
00024     B.i = b;
00025     A.u16[0] = (A.s16[0]==B.s16[0]) ? 0xFFFF : 0;
00026     A.u16[1] = (A.s16[1]==B.s16[1]) ? 0xFFFF : 0;
00027     A.u16[2] = (A.s16[2]==B.s16[2]) ? 0xFFFF : 0;
00028     A.u16[3] = (A.s16[3]==B.s16[3]) ? 0xFFFF : 0;
00029     A.u16[4] = (A.s16[4]==B.s16[4]) ? 0xFFFF : 0;
00030     A.u16[5] = (A.s16[5]==B.s16[5]) ? 0xFFFF : 0;
00031     A.u16[6] = (A.s16[6]==B.s16[6]) ? 0xFFFF : 0;
00032     A.u16[7] = (A.s16[7]==B.s16[7]) ? 0xFFFF : 0;
00033     return A.i;
00034 }

SSP_FORCEINLINE __m128i ssp_comeq_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 37 of file SSEPlus_emulation_comps_REF.h.

00038 {
00039     ssp_m128 A,B;
00040     A.i = a;
00041     B.i = b;
00042     A.u32[0] = (A.s32[0]==B.s32[0]) ? 0xFFFFFFFF : 0;
00043     A.u32[1] = (A.s32[1]==B.s32[1]) ? 0xFFFFFFFF : 0;
00044     A.u32[2] = (A.s32[2]==B.s32[2]) ? 0xFFFFFFFF : 0;
00045     A.u32[3] = (A.s32[3]==B.s32[3]) ? 0xFFFFFFFF : 0;
00046     return A.i;
00047 }

SSP_FORCEINLINE __m128i ssp_comeq_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 50 of file SSEPlus_emulation_comps_REF.h.

00051 {
00052     ssp_m128 A,B;
00053     A.i = a;
00054     B.i = b;
00055     A.u64[0] = (A.s64[0]==B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00056     A.u64[1] = (A.s64[1]==B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00057     return A.i;
00058 }

SSP_FORCEINLINE __m128i ssp_comeq_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 61 of file SSEPlus_emulation_comps_REF.h.

00062 {
00063     ssp_m128 A,B;
00064     A.i = a;
00065     B.i = b;
00066     A.u8[ 0] = (A.s8[ 0]==B.s8[ 0]) ? 0xFF : 0;
00067     A.u8[ 1] = (A.s8[ 1]==B.s8[ 1]) ? 0xFF : 0;
00068     A.u8[ 2] = (A.s8[ 2]==B.s8[ 2]) ? 0xFF : 0;
00069     A.u8[ 3] = (A.s8[ 3]==B.s8[ 3]) ? 0xFF : 0;
00070     A.u8[ 4] = (A.s8[ 4]==B.s8[ 4]) ? 0xFF : 0;
00071     A.u8[ 5] = (A.s8[ 5]==B.s8[ 5]) ? 0xFF : 0;
00072     A.u8[ 6] = (A.s8[ 6]==B.s8[ 6]) ? 0xFF : 0;
00073     A.u8[ 7] = (A.s8[ 7]==B.s8[ 7]) ? 0xFF : 0; 
00074         A.u8[ 8] = (A.s8[ 8]==B.s8[ 8]) ? 0xFF : 0;
00075     A.u8[ 9] = (A.s8[ 9]==B.s8[ 9]) ? 0xFF : 0;
00076     A.u8[10]= (A.s8[10]==B.s8[10]) ? 0xFF : 0;
00077     A.u8[11] = (A.s8[11]==B.s8[11]) ? 0xFF : 0;
00078     A.u8[12] = (A.s8[12]==B.s8[12]) ? 0xFF : 0;
00079     A.u8[13] = (A.s8[13]==B.s8[13]) ? 0xFF : 0;
00080     A.u8[14] = (A.s8[14]==B.s8[14]) ? 0xFF : 0;
00081     A.u8[15] = (A.s8[15]==B.s8[15]) ? 0xFF : 0;
00082     return A.i;
00083 }

SSP_FORCEINLINE __m128i ssp_comeq_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 86 of file SSEPlus_emulation_comps_REF.h.

00087 {
00088     ssp_m128 A,B;
00089     A.i = a;
00090     B.i = b;
00091     A.u16[0] = (A.u16[0]==B.u16[0]) ? 0xFFFF : 0;
00092     A.u16[1] = (A.u16[1]==B.u16[1]) ? 0xFFFF : 0;
00093     A.u16[2] = (A.u16[2]==B.u16[2]) ? 0xFFFF : 0;
00094     A.u16[3] = (A.u16[3]==B.u16[3]) ? 0xFFFF : 0;
00095     A.u16[4] = (A.u16[4]==B.u16[4]) ? 0xFFFF : 0;
00096     A.u16[5] = (A.u16[5]==B.u16[5]) ? 0xFFFF : 0;
00097     A.u16[6] = (A.u16[6]==B.u16[6]) ? 0xFFFF : 0;
00098     A.u16[7] = (A.u16[7]==B.u16[7]) ? 0xFFFF : 0;
00099     return A.i;
00100 }

SSP_FORCEINLINE __m128i ssp_comeq_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 103 of file SSEPlus_emulation_comps_REF.h.

00104 {
00105     ssp_m128 A,B;
00106     A.i = a;
00107     B.i = b;
00108     A.u32[0] = (A.u32[0]==B.u32[0]) ? 0xFFFFFFFF : 0;
00109     A.u32[1] = (A.u32[1]==B.u32[1]) ? 0xFFFFFFFF : 0;
00110     A.u32[2] = (A.u32[2]==B.u32[2]) ? 0xFFFFFFFF : 0;
00111     A.u32[3] = (A.u32[3]==B.u32[3]) ? 0xFFFFFFFF : 0;
00112     return A.i;
00113 }

SSP_FORCEINLINE __m128i ssp_comeq_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 116 of file SSEPlus_emulation_comps_REF.h.

00117 {
00118     ssp_m128 A,B;
00119     A.i = a;
00120     B.i = b;
00121     A.u64[0] = (A.u64[0]==B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00122     A.u64[1] = (A.u64[1]==B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00123     return A.i;
00124 }

SSP_FORCEINLINE __m128i ssp_comeq_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comeq_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 127 of file SSEPlus_emulation_comps_REF.h.

00128 {
00129     ssp_m128 A,B;
00130     A.i = a;
00131     B.i = b;
00132     A.u8[ 0] = (A.u8[ 0]==B.u8[ 0]) ? 0xFF : 0;
00133     A.u8[ 1] = (A.u8[ 1]==B.u8[ 1]) ? 0xFF : 0;
00134     A.u8[ 2] = (A.u8[ 2]==B.u8[ 2]) ? 0xFF : 0;
00135     A.u8[ 3] = (A.u8[ 3]==B.u8[ 3]) ? 0xFF : 0;
00136     A.u8[ 4] = (A.u8[ 4]==B.u8[ 4]) ? 0xFF : 0;
00137     A.u8[ 5] = (A.u8[ 5]==B.u8[ 5]) ? 0xFF : 0;
00138     A.u8[ 6] = (A.u8[ 6]==B.u8[ 6]) ? 0xFF : 0;
00139     A.u8[ 7] = (A.u8[ 7]==B.u8[ 7]) ? 0xFF : 0; 
00140         A.u8[ 8] = (A.u8[ 8]==B.u8[ 8]) ? 0xFF : 0;
00141     A.u8[ 9] = (A.u8[ 9]==B.u8[ 9]) ? 0xFF : 0;
00142     A.u8[10] = (A.u8[10]==B.u8[10]) ? 0xFF : 0;
00143     A.u8[11] = (A.u8[11]==B.u8[11]) ? 0xFF : 0;
00144     A.u8[12] = (A.u8[12]==B.u8[12]) ? 0xFF : 0;
00145     A.u8[13] = (A.u8[13]==B.u8[13]) ? 0xFF : 0;
00146     A.u8[14] = (A.u8[14]==B.u8[14]) ? 0xFF : 0;
00147     A.u8[15] = (A.u8[15]==B.u8[15]) ? 0xFF : 0;
00148     return A.i;
00149 }

SSP_FORCEINLINE __m128d ssp_comeq_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comeq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 152 of file SSEPlus_emulation_comps_REF.h.

00153 {
00154     ssp_m128 A,B;
00155     A.d = a;
00156     B.d = b;
00157     A.u64[0] = (A.f64[0]==B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00158     A.u64[1] = (A.f64[1]==B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00159     return A.d;
00160 }

SSP_FORCEINLINE __m128 ssp_comeq_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comeq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 163 of file SSEPlus_emulation_comps_REF.h.

00164 {
00165     ssp_m128 A,B;
00166     A.f = a;
00167     B.f = b;
00168     A.u32[0] = (A.f32[0]==B.f32[0]) ? 0xFFFFFFFF : 0;
00169     A.u32[1] = (A.f32[1]==B.f32[1]) ? 0xFFFFFFFF : 0;
00170     A.u32[2] = (A.f32[2]==B.f32[2]) ? 0xFFFFFFFF : 0;
00171     A.u32[3] = (A.f32[3]==B.f32[3]) ? 0xFFFFFFFF : 0;
00172     return A.f;
00173 }

SSP_FORCEINLINE __m128d ssp_comeq_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comeq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 176 of file SSEPlus_emulation_comps_REF.h.

00177 {
00178     ssp_m128 A,B;
00179     A.d = a;
00180     B.d = b;
00181     A.u64[0] = (A.f64[0]==B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00182     return A.d;
00183 }

SSP_FORCEINLINE __m128 ssp_comeq_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comeq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 186 of file SSEPlus_emulation_comps_REF.h.

00187 {
00188     ssp_m128 A,B;
00189     A.f = a;
00190     B.f = b;
00191     A.u32[0] = (A.f32[0]==B.f32[0]) ? 0xFFFFFFFF : 0;
00192     return A.f;
00193 }

SSP_FORCEINLINE __m128i ssp_comfalse_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1094 of file SSEPlus_emulation_comps_REF.h.

01095 {
01096     const static __m128i tmp = SSP_CONST_SET_32I( 0,0,0,0 );  
01097     return tmp;
01098 }

SSP_FORCEINLINE __m128i ssp_comfalse_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1101 of file SSEPlus_emulation_comps_REF.h.

01102 {
01103     return ssp_comfalse_epi16_REF(a,b);
01104 }

SSP_FORCEINLINE __m128i ssp_comfalse_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1107 of file SSEPlus_emulation_comps_REF.h.

01108 {
01109     return ssp_comfalse_epi16_REF(a,b);
01110 }

SSP_FORCEINLINE __m128i ssp_comfalse_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1113 of file SSEPlus_emulation_comps_REF.h.

01114 {
01115     return ssp_comfalse_epi16_REF(a,b);
01116 }

SSP_FORCEINLINE __m128i ssp_comfalse_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1119 of file SSEPlus_emulation_comps_REF.h.

01120 {
01121     return ssp_comfalse_epi16_REF(a,b);
01122 }

SSP_FORCEINLINE __m128i ssp_comfalse_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1125 of file SSEPlus_emulation_comps_REF.h.

01126 {
01127     return ssp_comfalse_epi16_REF(a,b);
01128 }

SSP_FORCEINLINE __m128i ssp_comfalse_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1131 of file SSEPlus_emulation_comps_REF.h.

01132 {
01133     return ssp_comfalse_epi16_REF(a,b);
01134 }

SSP_FORCEINLINE __m128i ssp_comfalse_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comfalse_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1137 of file SSEPlus_emulation_comps_REF.h.

01138 {
01139     return ssp_comfalse_epi16_REF(a,b);
01140 }

SSP_FORCEINLINE __m128d ssp_comfalse_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comfalse_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1143 of file SSEPlus_emulation_comps_REF.h.

01144 {
01145     const static __m128d tmp = SSP_CONST_SET_64F( 0, 0 );      
01146     return tmp;
01147 }

SSP_FORCEINLINE __m128 ssp_comfalse_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comfalse_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1150 of file SSEPlus_emulation_comps_REF.h.

01151 {
01152    const static __m128 tmp = SSP_CONST_SET_32F( 0, 0, 0, 0 );      
01153    return tmp;
01154 }

SSP_FORCEINLINE __m128d ssp_comfalse_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comfalse_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1157 of file SSEPlus_emulation_comps_REF.h.

01158 {
01159     ssp_m128 A;
01160     A.d = a;
01161     A.u64[0] = 0;
01162     return A.d;
01163 }

SSP_FORCEINLINE __m128 ssp_comfalse_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comfalse_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1166 of file SSEPlus_emulation_comps_REF.h.

01167 {
01168     ssp_m128 A;
01169     A.f = a;
01170     A.u32[0] = 0;
01171     return A.f;
01172 }

SSP_FORCEINLINE __m128i ssp_comge_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1229 of file SSEPlus_emulation_comps_REF.h.

01230 {
01231     ssp_m128 A,B;
01232     A.i = a;
01233     B.i = b;
01234     A.u16[0] = (A.s16[0]>=B.s16[0]) ? 0xFFFF : 0;
01235     A.u16[1] = (A.s16[1]>=B.s16[1]) ? 0xFFFF : 0;
01236     A.u16[2] = (A.s16[2]>=B.s16[2]) ? 0xFFFF : 0;
01237     A.u16[3] = (A.s16[3]>=B.s16[3]) ? 0xFFFF : 0;
01238     A.u16[4] = (A.s16[4]>=B.s16[4]) ? 0xFFFF : 0;
01239     A.u16[5] = (A.s16[5]>=B.s16[5]) ? 0xFFFF : 0;
01240     A.u16[6] = (A.s16[6]>=B.s16[6]) ? 0xFFFF : 0;
01241     A.u16[7] = (A.s16[7]>=B.s16[7]) ? 0xFFFF : 0;
01242     return A.i;
01243 }

SSP_FORCEINLINE __m128i ssp_comge_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1246 of file SSEPlus_emulation_comps_REF.h.

01247 {
01248     ssp_m128 A,B;
01249     A.i = a;
01250     B.i = b;
01251     A.u32[0] = (A.s32[0]>=B.s32[0]) ? 0xFFFFFFFF : 0;
01252     A.u32[1] = (A.s32[1]>=B.s32[1]) ? 0xFFFFFFFF : 0;
01253     A.u32[2] = (A.s32[2]>=B.s32[2]) ? 0xFFFFFFFF : 0;
01254     A.u32[3] = (A.s32[3]>=B.s32[3]) ? 0xFFFFFFFF : 0;
01255     return A.i;
01256 }

SSP_FORCEINLINE __m128i ssp_comge_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1259 of file SSEPlus_emulation_comps_REF.h.

01260 {
01261     ssp_m128 A,B;
01262     A.i = a;
01263     B.i = b;
01264     A.u64[0] = (A.s64[0]>=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01265     A.u64[1] = (A.s64[1]>=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01266     return A.i;
01267 }

SSP_FORCEINLINE __m128i ssp_comge_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1270 of file SSEPlus_emulation_comps_REF.h.

01271 {
01272     ssp_m128 A,B;
01273     A.i = a;
01274     B.i = b;
01275     A.u8[ 0] = (A.s8[ 0]>=B.s8[ 0]) ? 0xFF : 0;
01276     A.u8[ 1] = (A.s8[ 1]>=B.s8[ 1]) ? 0xFF : 0;
01277     A.u8[ 2] = (A.s8[ 2]>=B.s8[ 2]) ? 0xFF : 0;
01278     A.u8[ 3] = (A.s8[ 3]>=B.s8[ 3]) ? 0xFF : 0;
01279     A.u8[ 4] = (A.s8[ 4]>=B.s8[ 4]) ? 0xFF : 0;
01280     A.u8[ 5] = (A.s8[ 5]>=B.s8[ 5]) ? 0xFF : 0;
01281     A.u8[ 6] = (A.s8[ 6]>=B.s8[ 6]) ? 0xFF : 0;
01282     A.u8[ 7] = (A.s8[ 7]>=B.s8[ 7]) ? 0xFF : 0; 
01283         A.u8[ 8] = (A.s8[ 8]>=B.s8[ 8]) ? 0xFF : 0;
01284     A.u8[ 9] = (A.s8[ 9]>=B.s8[ 9]) ? 0xFF : 0;
01285     A.u8[10] = (A.s8[10]>=B.s8[10]) ? 0xFF : 0;
01286     A.u8[11] = (A.s8[11]>=B.s8[11]) ? 0xFF : 0;
01287     A.u8[12] = (A.s8[12]>=B.s8[12]) ? 0xFF : 0;
01288     A.u8[13] = (A.s8[13]>=B.s8[13]) ? 0xFF : 0;
01289     A.u8[14] = (A.s8[14]>=B.s8[14]) ? 0xFF : 0;
01290     A.u8[15] = (A.s8[15]>=B.s8[15]) ? 0xFF : 0;
01291     return A.i;
01292 }

SSP_FORCEINLINE __m128i ssp_comge_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1295 of file SSEPlus_emulation_comps_REF.h.

01296 {
01297     ssp_m128 A,B;
01298     A.i = a;
01299     B.i = b;
01300     A.u16[0] = (A.u16[0]>=B.u16[0]) ? 0xFFFF : 0;
01301     A.u16[1] = (A.u16[1]>=B.u16[1]) ? 0xFFFF : 0;
01302     A.u16[2] = (A.u16[2]>=B.u16[2]) ? 0xFFFF : 0;
01303     A.u16[3] = (A.u16[3]>=B.u16[3]) ? 0xFFFF : 0;
01304     A.u16[4] = (A.u16[4]>=B.u16[4]) ? 0xFFFF : 0;
01305     A.u16[5] = (A.u16[5]>=B.u16[5]) ? 0xFFFF : 0;
01306     A.u16[6] = (A.u16[6]>=B.u16[6]) ? 0xFFFF : 0;
01307     A.u16[7] = (A.u16[7]>=B.u16[7]) ? 0xFFFF : 0;
01308     return A.i;
01309 }

SSP_FORCEINLINE __m128i ssp_comge_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1312 of file SSEPlus_emulation_comps_REF.h.

01313 {
01314     ssp_m128 A,B;
01315     A.i = a;
01316     B.i = b;
01317     A.u32[0] = (A.u32[0]>=B.u32[0]) ? 0xFFFFFFFF : 0;
01318     A.u32[1] = (A.u32[1]>=B.u32[1]) ? 0xFFFFFFFF : 0;
01319     A.u32[2] = (A.u32[2]>=B.u32[2]) ? 0xFFFFFFFF : 0;
01320     A.u32[3] = (A.u32[3]>=B.u32[3]) ? 0xFFFFFFFF : 0;
01321     return A.i;
01322 }

SSP_FORCEINLINE __m128i ssp_comge_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1325 of file SSEPlus_emulation_comps_REF.h.

01326 {
01327     ssp_m128 A,B;
01328     A.i = a;
01329     B.i = b;
01330     A.u64[0] = (A.u64[0]>=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01331     A.u64[1] = (A.u64[1]>=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01332     return A.i;
01333 }

SSP_FORCEINLINE __m128i ssp_comge_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comge_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1336 of file SSEPlus_emulation_comps_REF.h.

01337 {
01338     ssp_m128 A,B;
01339     A.i = a;
01340     B.i = b;
01341     A.u8[ 0] = (A.u8[ 0]>=B.u8[ 0]) ? 0xFF : 0;
01342     A.u8[ 1] = (A.u8[ 1]>=B.u8[ 1]) ? 0xFF : 0;
01343     A.u8[ 2] = (A.u8[ 2]>=B.u8[ 2]) ? 0xFF : 0;
01344     A.u8[ 3] = (A.u8[ 3]>=B.u8[ 3]) ? 0xFF : 0;
01345     A.u8[ 4] = (A.u8[ 4]>=B.u8[ 4]) ? 0xFF : 0;
01346     A.u8[ 5] = (A.u8[ 5]>=B.u8[ 5]) ? 0xFF : 0;
01347     A.u8[ 6] = (A.u8[ 6]>=B.u8[ 6]) ? 0xFF : 0;
01348     A.u8[ 7] = (A.u8[ 7]>=B.u8[ 7]) ? 0xFF : 0; 
01349         A.u8[ 8] = (A.u8[ 8]>=B.u8[ 8]) ? 0xFF : 0;
01350     A.u8[ 9] = (A.u8[ 9]>=B.u8[ 9]) ? 0xFF : 0;
01351     A.u8[10] = (A.u8[10]>=B.u8[10]) ? 0xFF : 0;
01352     A.u8[11] = (A.u8[11]>=B.u8[11]) ? 0xFF : 0;
01353     A.u8[12] = (A.u8[12]>=B.u8[12]) ? 0xFF : 0;
01354     A.u8[13] = (A.u8[13]>=B.u8[13]) ? 0xFF : 0;
01355     A.u8[14] = (A.u8[14]>=B.u8[14]) ? 0xFF : 0;
01356     A.u8[15] = (A.u8[15]>=B.u8[15]) ? 0xFF : 0;
01357     return A.i;
01358 }

SSP_FORCEINLINE __m128d ssp_comge_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comge_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1361 of file SSEPlus_emulation_comps_REF.h.

01362 {
01363     ssp_m128 A,B;
01364     A.d = a;
01365     B.d = b;
01366     A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01367     A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01368     return A.d;
01369 }

SSP_FORCEINLINE __m128 ssp_comge_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comge_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1372 of file SSEPlus_emulation_comps_REF.h.

01373 {
01374     ssp_m128 A,B;
01375     A.f = a;
01376     B.f = b;
01377     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0xFFFFFFFF : 0;
01378     A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0xFFFFFFFF : 0;
01379     A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0xFFFFFFFF : 0;
01380     A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0xFFFFFFFF : 0;
01381     return A.f;
01382 }

SSP_FORCEINLINE __m128d ssp_comge_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comge_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1385 of file SSEPlus_emulation_comps_REF.h.

01386 {
01387     ssp_m128 A,B;
01388     A.d = a;
01389     B.d = b;
01390     A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0 ;
01391     return A.d;
01392 }

SSP_FORCEINLINE __m128 ssp_comge_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comge_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1395 of file SSEPlus_emulation_comps_REF.h.

01396 {
01397     ssp_m128 A,B;
01398     A.f = a;
01399     B.f = b; 
01400     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0xFFFFFFFF : 0; 
01401     return A.f;
01402 }

SSP_FORCEINLINE __m128i ssp_comgt_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1411 of file SSEPlus_emulation_comps_REF.h.

01412 {
01413     ssp_m128 A,B;
01414     A.i = a;
01415     B.i = b;
01416     A.u16[0] = (A.s16[0]>B.s16[0]) ? 0xFFFF : 0;
01417     A.u16[1] = (A.s16[1]>B.s16[1]) ? 0xFFFF : 0;
01418     A.u16[2] = (A.s16[2]>B.s16[2]) ? 0xFFFF : 0;
01419     A.u16[3] = (A.s16[3]>B.s16[3]) ? 0xFFFF : 0;
01420     A.u16[4] = (A.s16[4]>B.s16[4]) ? 0xFFFF : 0;
01421     A.u16[5] = (A.s16[5]>B.s16[5]) ? 0xFFFF : 0;
01422     A.u16[6] = (A.s16[6]>B.s16[6]) ? 0xFFFF : 0;
01423     A.u16[7] = (A.s16[7]>B.s16[7]) ? 0xFFFF : 0;
01424     return A.i;
01425 }

SSP_FORCEINLINE __m128i ssp_comgt_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1428 of file SSEPlus_emulation_comps_REF.h.

01429 {
01430     ssp_m128 A,B;
01431     A.i = a;
01432     B.i = b;
01433     A.u32[0] = (A.s32[0]>B.s32[0]) ? 0xFFFFFFFF : 0;
01434     A.u32[1] = (A.s32[1]>B.s32[1]) ? 0xFFFFFFFF : 0;
01435     A.u32[2] = (A.s32[2]>B.s32[2]) ? 0xFFFFFFFF : 0;
01436     A.u32[3] = (A.s32[3]>B.s32[3]) ? 0xFFFFFFFF : 0;
01437     return A.i;
01438 }

SSP_FORCEINLINE __m128i ssp_comgt_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1441 of file SSEPlus_emulation_comps_REF.h.

01442 {
01443     ssp_m128 A,B;
01444     A.i = a;
01445     B.i = b;
01446     A.u64[0] = (A.s64[0]>B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01447     A.u64[1] = (A.s64[1]>B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01448     return A.i;
01449 }

SSP_FORCEINLINE __m128i ssp_comgt_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1452 of file SSEPlus_emulation_comps_REF.h.

01453 {
01454     ssp_m128 A,B;
01455     A.i = a;
01456     B.i = b;
01457     A.u8[ 0] = (A.s8[ 0]>B.s8[ 0]) ? 0xFF : 0;
01458     A.u8[ 1] = (A.s8[ 1]>B.s8[ 1]) ? 0xFF : 0;
01459     A.u8[ 2] = (A.s8[ 2]>B.s8[ 2]) ? 0xFF : 0;
01460     A.u8[ 3] = (A.s8[ 3]>B.s8[ 3]) ? 0xFF : 0;
01461     A.u8[ 4] = (A.s8[ 4]>B.s8[ 4]) ? 0xFF : 0;
01462     A.u8[ 5] = (A.s8[ 5]>B.s8[ 5]) ? 0xFF : 0;
01463     A.u8[ 6] = (A.s8[ 6]>B.s8[ 6]) ? 0xFF : 0;
01464     A.u8[ 7] = (A.s8[ 7]>B.s8[ 7]) ? 0xFF : 0; 
01465         A.u8[ 8] = (A.s8[ 8]>B.s8[ 8]) ? 0xFF : 0;
01466     A.u8[ 9] = (A.s8[ 9]>B.s8[ 9]) ? 0xFF : 0;
01467     A.u8[10] = (A.s8[10]>B.s8[10]) ? 0xFF : 0;
01468     A.u8[11] = (A.s8[11]>B.s8[11]) ? 0xFF : 0;
01469     A.u8[12] = (A.s8[12]>B.s8[12]) ? 0xFF : 0;
01470     A.u8[13] = (A.s8[13]>B.s8[13]) ? 0xFF : 0;
01471     A.u8[14] = (A.s8[14]>B.s8[14]) ? 0xFF : 0;
01472     A.u8[15] = (A.s8[15]>B.s8[15]) ? 0xFF : 0;
01473     return A.i;
01474 }

SSP_FORCEINLINE __m128i ssp_comgt_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1477 of file SSEPlus_emulation_comps_REF.h.

01478 {
01479     ssp_m128 A,B;
01480     A.i = a;
01481     B.i = b;
01482     A.u16[0] = (A.u16[0]>B.u16[0]) ? 0xFFFF : 0;
01483     A.u16[1] = (A.u16[1]>B.u16[1]) ? 0xFFFF : 0;
01484     A.u16[2] = (A.u16[2]>B.u16[2]) ? 0xFFFF : 0;
01485     A.u16[3] = (A.u16[3]>B.u16[3]) ? 0xFFFF : 0;
01486     A.u16[4] = (A.u16[4]>B.u16[4]) ? 0xFFFF : 0;
01487     A.u16[5] = (A.u16[5]>B.u16[5]) ? 0xFFFF : 0;
01488     A.u16[6] = (A.u16[6]>B.u16[6]) ? 0xFFFF : 0;
01489     A.u16[7] = (A.u16[7]>B.u16[7]) ? 0xFFFF : 0;
01490     return A.i;
01491 }

SSP_FORCEINLINE __m128i ssp_comgt_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1494 of file SSEPlus_emulation_comps_REF.h.

01495 {
01496     ssp_m128 A,B;
01497     A.i = a;
01498     B.i = b;
01499     A.u32[0] = (A.u32[0]>B.u32[0]) ? 0xFFFFFFFF : 0;
01500     A.u32[1] = (A.u32[1]>B.u32[1]) ? 0xFFFFFFFF : 0;
01501     A.u32[2] = (A.u32[2]>B.u32[2]) ? 0xFFFFFFFF : 0;
01502     A.u32[3] = (A.u32[3]>B.u32[3]) ? 0xFFFFFFFF : 0;
01503     return A.i;
01504 }

SSP_FORCEINLINE __m128i ssp_comgt_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1507 of file SSEPlus_emulation_comps_REF.h.

01508 {
01509     ssp_m128 A,B;
01510     A.i = a;
01511     B.i = b;
01512     A.u64[0] = (A.u64[0]>B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01513     A.u64[1] = (A.u64[1]>B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01514     return A.i;
01515 }

SSP_FORCEINLINE __m128i ssp_comgt_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comgt_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1518 of file SSEPlus_emulation_comps_REF.h.

01519 {
01520     ssp_m128 A,B;
01521     A.i = a;
01522     B.i = b;
01523     A.u8[ 0] = (A.u8[ 0]>B.u8[ 0]) ? 0xFF : 0;
01524     A.u8[ 1] = (A.u8[ 1]>B.u8[ 1]) ? 0xFF : 0;
01525     A.u8[ 2] = (A.u8[ 2]>B.u8[ 2]) ? 0xFF : 0;
01526     A.u8[ 3] = (A.u8[ 3]>B.u8[ 3]) ? 0xFF : 0;
01527     A.u8[ 4] = (A.u8[ 4]>B.u8[ 4]) ? 0xFF : 0;
01528     A.u8[ 5] = (A.u8[ 5]>B.u8[ 5]) ? 0xFF : 0;
01529     A.u8[ 6] = (A.u8[ 6]>B.u8[ 6]) ? 0xFF : 0;
01530     A.u8[ 7] = (A.u8[ 7]>B.u8[ 7]) ? 0xFF : 0; 
01531         A.u8[ 8] = (A.u8[ 8]>B.u8[ 8]) ? 0xFF : 0;
01532     A.u8[ 9] = (A.u8[ 9]>B.u8[ 9]) ? 0xFF : 0;
01533     A.u8[10] = (A.u8[10]>B.u8[10]) ? 0xFF : 0;
01534     A.u8[11] = (A.u8[11]>B.u8[11]) ? 0xFF : 0;
01535     A.u8[12] = (A.u8[12]>B.u8[12]) ? 0xFF : 0;
01536     A.u8[13] = (A.u8[13]>B.u8[13]) ? 0xFF : 0;
01537     A.u8[14] = (A.u8[14]>B.u8[14]) ? 0xFF : 0;
01538     A.u8[15] = (A.u8[15]>B.u8[15]) ? 0xFF : 0;
01539     return A.i;
01540 }

SSP_FORCEINLINE __m128d ssp_comgt_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comgt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1543 of file SSEPlus_emulation_comps_REF.h.

01544 {
01545     ssp_m128 A,B;
01546     A.d = a;
01547     B.d = b;
01548     A.u64[0] = (A.f64[0]>B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01549     A.u64[1] = (A.f64[1]>B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
01550  
01551     return A.d;
01552 }

SSP_FORCEINLINE __m128 ssp_comgt_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comgt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1555 of file SSEPlus_emulation_comps_REF.h.

01556 {
01557     ssp_m128 A,B;
01558     A.f = a;
01559     B.f = b;
01560     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0;
01561     A.u32[1] = (A.f32[1]>B.f32[1]) ? 0xFFFFFFFF : 0;
01562     A.u32[2] = (A.f32[2]>B.f32[2]) ? 0xFFFFFFFF : 0;
01563     A.u32[3] = (A.f32[3]>B.f32[3]) ? 0xFFFFFFFF : 0;
01564     return A.f;
01565 }

SSP_FORCEINLINE __m128d ssp_comgt_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comgt_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1568 of file SSEPlus_emulation_comps_REF.h.

01569 {
01570     ssp_m128 A,B;
01571     A.d = a;
01572     B.d = b;
01573     A.u64[0] = (A.f64[0]>B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
01574     return A.d;
01575 }

SSP_FORCEINLINE __m128 ssp_comgt_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comgt_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1578 of file SSEPlus_emulation_comps_REF.h.

01579 {
01580     ssp_m128 A,B;
01581     A.f = a;
01582     B.f = b;
01583     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0xFFFFFFFF : 0;
01584     return A.f;
01585 }

SSP_FORCEINLINE __m128i ssp_comle_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 380 of file SSEPlus_emulation_comps_REF.h.

00381 {
00382     ssp_m128 A,B;
00383     A.i = a;
00384     B.i = b;
00385     A.u16[0] = (A.s16[0]<=B.s16[0]) ? 0xFFFF : 0;
00386     A.u16[1] = (A.s16[1]<=B.s16[1]) ? 0xFFFF : 0;
00387     A.u16[2] = (A.s16[2]<=B.s16[2]) ? 0xFFFF : 0;
00388     A.u16[3] = (A.s16[3]<=B.s16[3]) ? 0xFFFF : 0;
00389     A.u16[4] = (A.s16[4]<=B.s16[4]) ? 0xFFFF : 0;
00390     A.u16[5] = (A.s16[5]<=B.s16[5]) ? 0xFFFF : 0;
00391     A.u16[6] = (A.s16[6]<=B.s16[6]) ? 0xFFFF : 0;
00392     A.u16[7] = (A.s16[7]<=B.s16[7]) ? 0xFFFF : 0;
00393     return A.i;
00394 }

SSP_FORCEINLINE __m128i ssp_comle_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 397 of file SSEPlus_emulation_comps_REF.h.

00398 {
00399     ssp_m128 A,B;
00400     A.i = a;
00401     B.i = b;
00402     A.u32[0] = (A.s32[0]<=B.s32[0]) ? 0xFFFFFFFF : 0;
00403     A.u32[1] = (A.s32[1]<=B.s32[1]) ? 0xFFFFFFFF : 0;
00404     A.u32[2] = (A.s32[2]<=B.s32[2]) ? 0xFFFFFFFF : 0;
00405     A.u32[3] = (A.s32[3]<=B.s32[3]) ? 0xFFFFFFFF : 0;
00406     return A.i;
00407 }

SSP_FORCEINLINE __m128i ssp_comle_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 410 of file SSEPlus_emulation_comps_REF.h.

00411 {
00412     ssp_m128 A,B;
00413     A.i = a;
00414     B.i = b;
00415     A.u64[0] = (A.s64[0]<=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00416     A.u64[1] = (A.s64[1]<=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00417     return A.i;
00418 }

SSP_FORCEINLINE __m128i ssp_comle_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 421 of file SSEPlus_emulation_comps_REF.h.

00422 {
00423     ssp_m128 A,B;
00424     A.i = a;
00425     B.i = b;
00426     A.u8[ 0] = (A.s8[ 0]<=B.s8[ 0]) ? 0xFF : 0;
00427     A.u8[ 1] = (A.s8[ 1]<=B.s8[ 1]) ? 0xFF : 0;
00428     A.u8[ 2] = (A.s8[ 2]<=B.s8[ 2]) ? 0xFF : 0;
00429     A.u8[ 3] = (A.s8[ 3]<=B.s8[ 3]) ? 0xFF : 0;
00430     A.u8[ 4] = (A.s8[ 4]<=B.s8[ 4]) ? 0xFF : 0;
00431     A.u8[ 5] = (A.s8[ 5]<=B.s8[ 5]) ? 0xFF : 0;
00432     A.u8[ 6] = (A.s8[ 6]<=B.s8[ 6]) ? 0xFF : 0;
00433     A.u8[ 7] = (A.s8[ 7]<=B.s8[ 7]) ? 0xFF : 0; 
00434         A.u8[ 8] = (A.s8[ 8]<=B.s8[ 8]) ? 0xFF : 0;
00435     A.u8[ 9] = (A.s8[ 9]<=B.s8[ 9]) ? 0xFF : 0;
00436     A.u8[10] = (A.s8[10]<=B.s8[10]) ? 0xFF : 0;
00437     A.u8[11] = (A.s8[11]<=B.s8[11]) ? 0xFF : 0;
00438     A.u8[12] = (A.s8[12]<=B.s8[12]) ? 0xFF : 0;
00439     A.u8[13] = (A.s8[13]<=B.s8[13]) ? 0xFF : 0;
00440     A.u8[14] = (A.s8[14]<=B.s8[14]) ? 0xFF : 0;
00441     A.u8[15] = (A.s8[15]<=B.s8[15]) ? 0xFF : 0;
00442     return A.i;
00443 }

SSP_FORCEINLINE __m128i ssp_comle_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 446 of file SSEPlus_emulation_comps_REF.h.

00447 {
00448     ssp_m128 A,B;
00449     A.i = a;
00450     B.i = b;
00451     A.u16[0] = (A.u16[0]<=B.u16[0]) ? 0xFFFF : 0;
00452     A.u16[1] = (A.u16[1]<=B.u16[1]) ? 0xFFFF : 0;
00453     A.u16[2] = (A.u16[2]<=B.u16[2]) ? 0xFFFF : 0;
00454     A.u16[3] = (A.u16[3]<=B.u16[3]) ? 0xFFFF : 0;
00455     A.u16[4] = (A.u16[4]<=B.u16[4]) ? 0xFFFF : 0;
00456     A.u16[5] = (A.u16[5]<=B.u16[5]) ? 0xFFFF : 0;
00457     A.u16[6] = (A.u16[6]<=B.u16[6]) ? 0xFFFF : 0;
00458     A.u16[7] = (A.u16[7]<=B.u16[7]) ? 0xFFFF : 0;
00459     return A.i;
00460 }

SSP_FORCEINLINE __m128i ssp_comle_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 463 of file SSEPlus_emulation_comps_REF.h.

00464 {
00465     ssp_m128 A,B;
00466     A.i = a;
00467     B.i = b;
00468     A.u32[0] = (A.u32[0]<=B.u32[0]) ? 0xFFFFFFFF : 0;
00469     A.u32[1] = (A.u32[1]<=B.u32[1]) ? 0xFFFFFFFF : 0;
00470     A.u32[2] = (A.u32[2]<=B.u32[2]) ? 0xFFFFFFFF : 0;
00471     A.u32[3] = (A.u32[3]<=B.u32[3]) ? 0xFFFFFFFF : 0;
00472     return A.i;
00473 }

SSP_FORCEINLINE __m128i ssp_comle_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 476 of file SSEPlus_emulation_comps_REF.h.

00477 {
00478     ssp_m128 A,B;
00479     A.i = a;
00480     B.i = b;
00481     A.u64[0] = (A.u64[0]<=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00482     A.u64[1] = (A.u64[1]<=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00483     return A.i;
00484 }

SSP_FORCEINLINE __m128i ssp_comle_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comle_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 487 of file SSEPlus_emulation_comps_REF.h.

00488 {
00489     ssp_m128 A,B;
00490     A.i = a;
00491     B.i = b;
00492     A.u8[ 0] = (A.u8[ 0]<=B.u8[ 0]) ? 0xFF : 0;
00493     A.u8[ 1] = (A.u8[ 1]<=B.u8[ 1]) ? 0xFF : 0;
00494     A.u8[ 2] = (A.u8[ 2]<=B.u8[ 2]) ? 0xFF : 0;
00495     A.u8[ 3] = (A.u8[ 3]<=B.u8[ 3]) ? 0xFF : 0;
00496     A.u8[ 4] = (A.u8[ 4]<=B.u8[ 4]) ? 0xFF : 0;
00497     A.u8[ 5] = (A.u8[ 5]<=B.u8[ 5]) ? 0xFF : 0;
00498     A.u8[ 6] = (A.u8[ 6]<=B.u8[ 6]) ? 0xFF : 0;
00499     A.u8[ 7] = (A.u8[ 7]<=B.u8[ 7]) ? 0xFF : 0; 
00500         A.u8[ 8] = (A.u8[ 8]<=B.u8[ 8]) ? 0xFF : 0;
00501     A.u8[ 9] = (A.u8[ 9]<=B.u8[ 9]) ? 0xFF : 0;
00502     A.u8[10] = (A.u8[10]<=B.u8[10]) ? 0xFF : 0;
00503     A.u8[11] = (A.u8[11]<=B.u8[11]) ? 0xFF : 0;
00504     A.u8[12] = (A.u8[12]<=B.u8[12]) ? 0xFF : 0;
00505     A.u8[13] = (A.u8[13]<=B.u8[13]) ? 0xFF : 0;
00506     A.u8[14] = (A.u8[14]<=B.u8[14]) ? 0xFF : 0;
00507     A.u8[15] = (A.u8[15]<=B.u8[15]) ? 0xFF : 0;
00508     return A.i;
00509 }

SSP_FORCEINLINE __m128d ssp_comle_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comle_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 512 of file SSEPlus_emulation_comps_REF.h.

00513 {
00514     ssp_m128 A,B;
00515     A.d = a;
00516     B.d = b;
00517    
00518         A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00519     A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00520     return A.d;
00521 }

SSP_FORCEINLINE __m128 ssp_comle_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comle_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 524 of file SSEPlus_emulation_comps_REF.h.

00525 {
00526     ssp_m128 A,B;
00527     A.f = a;
00528     B.f = b;
00529     A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0xFFFFFFFF : 0;
00530     A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0xFFFFFFFF : 0;
00531     A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0xFFFFFFFF : 0;
00532     A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0xFFFFFFFF : 0;
00533     return A.f;
00534 }

SSP_FORCEINLINE __m128d ssp_comle_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comle_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 537 of file SSEPlus_emulation_comps_REF.h.

00538 {
00539     ssp_m128 A,B;
00540     A.d = a;
00541     B.d = b;
00542    
00543         A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00544     return A.d;
00545 }

SSP_FORCEINLINE __m128 ssp_comle_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comle_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 548 of file SSEPlus_emulation_comps_REF.h.

00549 {
00550     ssp_m128 A,B;
00551     A.f = a;
00552     B.f = b;
00553         A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0xFFFFFFFF : 0;
00554     return A.f;
00555 }

SSP_FORCEINLINE __m128i ssp_comlt_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 200 of file SSEPlus_emulation_comps_REF.h.

00201 {
00202     ssp_m128 A,B;
00203     A.i = a;
00204     B.i = b;
00205     A.u16[0] = (A.s16[0]<B.s16[0]) ? 0xFFFF : 0;
00206     A.u16[1] = (A.s16[1]<B.s16[1]) ? 0xFFFF : 0;
00207     A.u16[2] = (A.s16[2]<B.s16[2]) ? 0xFFFF : 0;
00208     A.u16[3] = (A.s16[3]<B.s16[3]) ? 0xFFFF : 0;
00209     A.u16[4] = (A.s16[4]<B.s16[4]) ? 0xFFFF : 0;
00210     A.u16[5] = (A.s16[5]<B.s16[5]) ? 0xFFFF : 0;
00211     A.u16[6] = (A.s16[6]<B.s16[6]) ? 0xFFFF : 0;
00212     A.u16[7] = (A.s16[7]<B.s16[7]) ? 0xFFFF : 0;
00213     return A.i;
00214 }

SSP_FORCEINLINE __m128i ssp_comlt_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 217 of file SSEPlus_emulation_comps_REF.h.

00218 {
00219     ssp_m128 A,B;
00220     A.i = a;
00221     B.i = b;
00222     A.u32[0] = (A.s32[0]<B.s32[0]) ? 0xFFFFFFFF : 0;
00223     A.u32[1] = (A.s32[1]<B.s32[1]) ? 0xFFFFFFFF : 0;
00224     A.u32[2] = (A.s32[2]<B.s32[2]) ? 0xFFFFFFFF : 0;
00225     A.u32[3] = (A.s32[3]<B.s32[3]) ? 0xFFFFFFFF : 0;
00226     return A.i;
00227 }

SSP_FORCEINLINE __m128i ssp_comlt_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 230 of file SSEPlus_emulation_comps_REF.h.

00231 {
00232     ssp_m128 A,B;
00233     A.i = a;
00234     B.i = b;
00235     A.u64[0] = (A.s64[0]<B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00236     A.u64[1] = (A.s64[1]<B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00237     return A.i;
00238 }

SSP_FORCEINLINE __m128i ssp_comlt_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 241 of file SSEPlus_emulation_comps_REF.h.

00242 {
00243     ssp_m128 A,B;
00244     A.i = a;
00245     B.i = b;
00246     A.u8[ 0] = (A.s8[ 0]<B.s8[ 0]) ? 0xFF : 0;
00247     A.u8[ 1] = (A.s8[ 1]<B.s8[ 1]) ? 0xFF : 0;
00248     A.u8[ 2] = (A.s8[ 2]<B.s8[ 2]) ? 0xFF : 0;
00249     A.u8[ 3] = (A.s8[ 3]<B.s8[ 3]) ? 0xFF : 0;
00250     A.u8[ 4] = (A.s8[ 4]<B.s8[ 4]) ? 0xFF : 0;
00251     A.u8[ 5] = (A.s8[ 5]<B.s8[ 5]) ? 0xFF : 0;
00252     A.u8[ 6] = (A.s8[ 6]<B.s8[ 6]) ? 0xFF : 0;
00253     A.u8[ 7] = (A.s8[ 7]<B.s8[ 7]) ? 0xFF : 0; 
00254         A.u8[ 8] = (A.s8[ 8]<B.s8[ 8]) ? 0xFF : 0;
00255     A.u8[ 9] = (A.s8[ 9]<B.s8[ 9]) ? 0xFF : 0;
00256     A.u8[10] = (A.s8[10]<B.s8[10]) ? 0xFF : 0;
00257     A.u8[11] = (A.s8[11]<B.s8[11]) ? 0xFF : 0;
00258     A.u8[12] = (A.s8[12]<B.s8[12]) ? 0xFF : 0;
00259     A.u8[13] = (A.s8[13]<B.s8[13]) ? 0xFF : 0;
00260     A.u8[14] = (A.s8[14]<B.s8[14]) ? 0xFF : 0;
00261     A.u8[15] = (A.s8[15]<B.s8[15]) ? 0xFF : 0;
00262     return A.i;
00263 }

SSP_FORCEINLINE __m128i ssp_comlt_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 266 of file SSEPlus_emulation_comps_REF.h.

00267 {
00268     ssp_m128 A,B;
00269     A.i = a;
00270     B.i = b;
00271     A.u16[0] = (A.u16[0]<B.u16[0]) ? 0xFFFF : 0;
00272     A.u16[1] = (A.u16[1]<B.u16[1]) ? 0xFFFF : 0;
00273     A.u16[2] = (A.u16[2]<B.u16[2]) ? 0xFFFF : 0;
00274     A.u16[3] = (A.u16[3]<B.u16[3]) ? 0xFFFF : 0;
00275     A.u16[4] = (A.u16[4]<B.u16[4]) ? 0xFFFF : 0;
00276     A.u16[5] = (A.u16[5]<B.u16[5]) ? 0xFFFF : 0;
00277     A.u16[6] = (A.u16[6]<B.u16[6]) ? 0xFFFF : 0;
00278     A.u16[7] = (A.u16[7]<B.u16[7]) ? 0xFFFF : 0;
00279     return A.i;
00280 }

SSP_FORCEINLINE __m128i ssp_comlt_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 283 of file SSEPlus_emulation_comps_REF.h.

00284 {
00285     ssp_m128 A,B;
00286     A.i = a;
00287     B.i = b;
00288     A.u32[0] = (A.u32[0]<B.u32[0]) ? 0xFFFFFFFF : 0;
00289     A.u32[1] = (A.u32[1]<B.u32[1]) ? 0xFFFFFFFF : 0;
00290     A.u32[2] = (A.u32[2]<B.u32[2]) ? 0xFFFFFFFF : 0;
00291     A.u32[3] = (A.u32[3]<B.u32[3]) ? 0xFFFFFFFF : 0;
00292     return A.i;
00293 }

SSP_FORCEINLINE __m128i ssp_comlt_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 296 of file SSEPlus_emulation_comps_REF.h.

00297 {
00298     ssp_m128 A,B;
00299     A.i = a;
00300     B.i = b;
00301     A.u64[0] = (A.u64[0]<B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00302     A.u64[1] = (A.u64[1]<B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00303     return A.i;
00304 }

SSP_FORCEINLINE __m128i ssp_comlt_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comlt_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 307 of file SSEPlus_emulation_comps_REF.h.

00308 {
00309     ssp_m128 A,B;
00310     A.i = a;
00311     B.i = b;
00312     A.u8[ 0] = (A.u8[ 0]<B.u8[0]) ? 0xFF : 0;
00313     A.u8[ 1] = (A.u8[ 1]<B.u8[1]) ? 0xFF : 0;
00314     A.u8[ 2] = (A.u8[ 2]<B.u8[2]) ? 0xFF : 0;
00315     A.u8[ 3] = (A.u8[ 3]<B.u8[3]) ? 0xFF : 0;
00316     A.u8[ 4] = (A.u8[ 4]<B.u8[4]) ? 0xFF : 0;
00317     A.u8[ 5] = (A.u8[ 5]<B.u8[5]) ? 0xFF : 0;
00318     A.u8[ 6] = (A.u8[ 6]<B.u8[6]) ? 0xFF : 0;
00319     A.u8[ 7] = (A.u8[ 7]<B.u8[7]) ? 0xFF : 0; 
00320         A.u8[ 8] = (A.u8[ 8]<B.u8[8]) ? 0xFF : 0;
00321     A.u8[ 9] = (A.u8[ 9]<B.u8[9]) ? 0xFF : 0;
00322     A.u8[10] = (A.u8[10]<B.u8[10]) ? 0xFF : 0;
00323     A.u8[11] = (A.u8[11]<B.u8[11]) ? 0xFF : 0;
00324     A.u8[12] = (A.u8[12]<B.u8[12]) ? 0xFF : 0;
00325     A.u8[13] = (A.u8[13]<B.u8[13]) ? 0xFF : 0;
00326     A.u8[14] = (A.u8[14]<B.u8[14]) ? 0xFF : 0;
00327     A.u8[15] = (A.u8[15]<B.u8[15]) ? 0xFF : 0;
00328     return A.i;
00329 }

SSP_FORCEINLINE __m128d ssp_comlt_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comlt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 332 of file SSEPlus_emulation_comps_REF.h.

00333 {
00334     ssp_m128 A,B;
00335     A.d = a;
00336     B.d = b;
00337         A.u64[0] = (A.f64[0]<B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00338     A.u64[1] = (A.f64[1]<B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00339     return A.d;
00340 }

SSP_FORCEINLINE __m128 ssp_comlt_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comlt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 343 of file SSEPlus_emulation_comps_REF.h.

00344 {
00345     ssp_m128 A,B;
00346     A.f = a;
00347     B.f = b;
00348     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0xFFFFFFFF : 0;
00349     A.u32[1] = (A.f32[1]<B.f32[1]) ? 0xFFFFFFFF : 0;
00350     A.u32[2] = (A.f32[2]<B.f32[2]) ? 0xFFFFFFFF : 0;
00351     A.u32[3] = (A.f32[3]<B.f32[3]) ? 0xFFFFFFFF : 0;
00352     return A.f;
00353 }

SSP_FORCEINLINE __m128d ssp_comlt_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comlt_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 356 of file SSEPlus_emulation_comps_REF.h.

00357 {
00358     ssp_m128 A,B;
00359     A.d = a;
00360     B.d = b;
00361         A.u64[0] = (A.f64[0]<B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00362     return A.d;
00363 }

SSP_FORCEINLINE __m128 ssp_comlt_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comlt_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 366 of file SSEPlus_emulation_comps_REF.h.

00367 {
00368     ssp_m128 A,B;
00369     A.f = a;
00370     B.f = b;
00371         A.u32[0] = (A.f32[0]<B.f32[0]) ? 0xFFFFFFFF : 0;
00372     return A.f;
00373 }

SSP_FORCEINLINE __m128i ssp_comneq_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 611 of file SSEPlus_emulation_comps_REF.h.

00612 {
00613     ssp_m128 A,B;
00614     A.i = a;
00615     B.i = b;
00616     A.u16[0] = (A.s16[0]!=B.s16[0]) ? 0xFFFF : 0;
00617     A.u16[1] = (A.s16[1]!=B.s16[1]) ? 0xFFFF : 0;
00618     A.u16[2] = (A.s16[2]!=B.s16[2]) ? 0xFFFF : 0;
00619     A.u16[3] = (A.s16[3]!=B.s16[3]) ? 0xFFFF : 0;
00620     A.u16[4] = (A.s16[4]!=B.s16[4]) ? 0xFFFF : 0;
00621     A.u16[5] = (A.s16[5]!=B.s16[5]) ? 0xFFFF : 0;
00622     A.u16[6] = (A.s16[6]!=B.s16[6]) ? 0xFFFF : 0;
00623     A.u16[7] = (A.s16[7]!=B.s16[7]) ? 0xFFFF : 0;
00624     return A.i;
00625 }

SSP_FORCEINLINE __m128i ssp_comneq_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 628 of file SSEPlus_emulation_comps_REF.h.

00629 {
00630     ssp_m128 A,B;
00631     A.i = a;
00632     B.i = b;
00633     A.u32[0] = (A.s32[0]!=B.s32[0]) ? 0xFFFFFFFF : 0;
00634     A.u32[1] = (A.s32[1]!=B.s32[1]) ? 0xFFFFFFFF : 0;
00635     A.u32[2] = (A.s32[2]!=B.s32[2]) ? 0xFFFFFFFF : 0;
00636     A.u32[3] = (A.s32[3]!=B.s32[3]) ? 0xFFFFFFFF : 0;
00637     return A.i;
00638 }

SSP_FORCEINLINE __m128i ssp_comneq_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 641 of file SSEPlus_emulation_comps_REF.h.

00642 {
00643     ssp_m128 A,B;
00644     A.i = a;
00645     B.i = b;
00646     A.u64[0] = (A.s64[0]!=B.s64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00647     A.u64[1] = (A.s64[1]!=B.s64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00648     return A.i;
00649 }

SSP_FORCEINLINE __m128i ssp_comneq_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 652 of file SSEPlus_emulation_comps_REF.h.

00653 {
00654     ssp_m128 A,B;
00655     A.i = a;
00656     B.i = b;
00657     A.u8[ 0] = (A.s8[ 0]!=B.s8[ 0]) ? 0xFF : 0;
00658     A.u8[ 1] = (A.s8[ 1]!=B.s8[ 1]) ? 0xFF : 0;
00659     A.u8[ 2] = (A.s8[ 2]!=B.s8[ 2]) ? 0xFF : 0;
00660     A.u8[ 3] = (A.s8[ 3]!=B.s8[ 3]) ? 0xFF : 0;
00661     A.u8[ 4] = (A.s8[ 4]!=B.s8[ 4]) ? 0xFF : 0;
00662     A.u8[ 5] = (A.s8[ 5]!=B.s8[ 5]) ? 0xFF : 0;
00663     A.u8[ 6] = (A.s8[ 6]!=B.s8[ 6]) ? 0xFF : 0;
00664     A.u8[ 7] = (A.s8[ 7]!=B.s8[ 7]) ? 0xFF : 0; 
00665         A.u8[ 8] = (A.s8[ 8]!=B.s8[ 8]) ? 0xFF : 0;
00666     A.u8[ 9] = (A.s8[ 9]!=B.s8[ 9]) ? 0xFF : 0;
00667     A.u8[10] = (A.s8[10]!=B.s8[10]) ? 0xFF : 0;
00668     A.u8[11] = (A.s8[11]!=B.s8[11]) ? 0xFF : 0;
00669     A.u8[12] = (A.s8[12]!=B.s8[12]) ? 0xFF : 0;
00670     A.u8[13] = (A.s8[13]!=B.s8[13]) ? 0xFF : 0;
00671     A.u8[14] = (A.s8[14]!=B.s8[14]) ? 0xFF : 0;
00672     A.u8[15] = (A.s8[15]!=B.s8[15]) ? 0xFF : 0;
00673     return A.i;
00674 }

SSP_FORCEINLINE __m128i ssp_comneq_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epu16/ pcomuw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 677 of file SSEPlus_emulation_comps_REF.h.

00678 {
00679     ssp_m128 A,B;
00680     A.i = a;
00681     B.i = b;
00682     A.u16[0] = (A.u16[0]!=B.u16[0]) ? 0xFFFF : 0;
00683     A.u16[1] = (A.u16[1]!=B.u16[1]) ? 0xFFFF : 0;
00684     A.u16[2] = (A.u16[2]!=B.u16[2]) ? 0xFFFF : 0;
00685     A.u16[3] = (A.u16[3]!=B.u16[3]) ? 0xFFFF : 0;
00686     A.u16[4] = (A.u16[4]!=B.u16[4]) ? 0xFFFF : 0;
00687     A.u16[5] = (A.u16[5]!=B.u16[5]) ? 0xFFFF : 0;
00688     A.u16[6] = (A.u16[6]!=B.u16[6]) ? 0xFFFF : 0;
00689     A.u16[7] = (A.u16[7]!=B.u16[7]) ? 0xFFFF : 0;
00690     return A.i;
00691 }

SSP_FORCEINLINE __m128i ssp_comneq_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epu32/ pcomud [SSE5]. (SSE5 .pdf documentation here)

Definition at line 694 of file SSEPlus_emulation_comps_REF.h.

00695 {
00696     ssp_m128 A,B;
00697     A.i = a;
00698     B.i = b;
00699     A.u32[0] = (A.u32[0]!=B.u32[0]) ? 0xFFFFFFFF : 0;
00700     A.u32[1] = (A.u32[1]!=B.u32[1]) ? 0xFFFFFFFF : 0;
00701     A.u32[2] = (A.u32[2]!=B.u32[2]) ? 0xFFFFFFFF : 0;
00702     A.u32[3] = (A.u32[3]!=B.u32[3]) ? 0xFFFFFFFF : 0;
00703     return A.i;
00704 }

SSP_FORCEINLINE __m128i ssp_comneq_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epu64/ pcomuq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 707 of file SSEPlus_emulation_comps_REF.h.

00708 {
00709     ssp_m128 A,B;
00710     A.i = a;
00711     B.i = b;
00712     A.u64[0] = (A.u64[0]!=B.u64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00713     A.u64[1] = (A.u64[1]!=B.u64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00714     return A.i;
00715 }

SSP_FORCEINLINE __m128i ssp_comneq_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comneq_epu8/ pcomub [SSE5]. (SSE5 .pdf documentation here)

Definition at line 718 of file SSEPlus_emulation_comps_REF.h.

00719 {
00720     ssp_m128 A,B;
00721     A.i = a;
00722     B.i = b;
00723     A.u8[ 0] = (A.u8[ 0]!=B.u8[ 0]) ? 0xFF : 0;
00724     A.u8[ 1] = (A.u8[ 1]!=B.u8[ 1]) ? 0xFF : 0;
00725     A.u8[ 2] = (A.u8[ 2]!=B.u8[ 2]) ? 0xFF : 0;
00726     A.u8[ 3] = (A.u8[ 3]!=B.u8[ 3]) ? 0xFF : 0;
00727     A.u8[ 4] = (A.u8[ 4]!=B.u8[ 4]) ? 0xFF : 0;
00728     A.u8[ 5] = (A.u8[ 5]!=B.u8[ 5]) ? 0xFF : 0;
00729     A.u8[ 6] = (A.u8[ 6]!=B.u8[ 6]) ? 0xFF : 0;
00730     A.u8[ 7] = (A.u8[ 7]!=B.u8[ 7]) ? 0xFF : 0; 
00731         A.u8[ 8] = (A.u8[ 8]!=B.u8[ 8]) ? 0xFF : 0;
00732     A.u8[ 9] = (A.u8[ 9]!=B.u8[ 9]) ? 0xFF : 0;
00733     A.u8[10] = (A.u8[10]!=B.u8[10]) ? 0xFF : 0;
00734     A.u8[11] = (A.u8[11]!=B.u8[11]) ? 0xFF : 0;
00735     A.u8[12] = (A.u8[12]!=B.u8[12]) ? 0xFF : 0;
00736     A.u8[13] = (A.u8[13]!=B.u8[13]) ? 0xFF : 0;
00737     A.u8[14] = (A.u8[14]!=B.u8[14]) ? 0xFF : 0;
00738     A.u8[15] = (A.u8[15]!=B.u8[15]) ? 0xFF : 0;
00739     return A.i;
00740 }

SSP_FORCEINLINE __m128d ssp_comneq_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comneq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 743 of file SSEPlus_emulation_comps_REF.h.

00744 {
00745     ssp_m128 A,B;
00746     A.d = a;
00747     B.d = b;
00748     
00749         A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00750     A.u64[1] = (A.f64[1]!=B.f64[1]) ? 0xFFFFFFFFFFFFFFFF : 0;
00751     return A.d;
00752 }

SSP_FORCEINLINE __m128 ssp_comneq_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comneq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 755 of file SSEPlus_emulation_comps_REF.h.

00756 {
00757     ssp_m128 A,B;
00758     A.f = a;
00759     B.f = b;
00760     A.u32[0] = (A.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00761     A.u32[1] = (A.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0;
00762     A.u32[2] = (A.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0;
00763     A.u32[3] = (A.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0;
00764     return A.f;
00765 }

SSP_FORCEINLINE __m128d ssp_comneq_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comneq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 768 of file SSEPlus_emulation_comps_REF.h.

00769 {
00770     ssp_m128 A,B;
00771     A.d = a;
00772     B.d = b;
00773     
00774         A.u64[0] = (A.f64[0]!=B.f64[0]) ? 0xFFFFFFFFFFFFFFFF : 0;
00775     return A.d;
00776 }

SSP_FORCEINLINE __m128 ssp_comneq_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comneq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 779 of file SSEPlus_emulation_comps_REF.h.

00780 {
00781     ssp_m128 A,B;
00782     A.f = a;
00783     B.f = b;
00784         A.u32[0] = (A.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00785     return A.f;
00786 }

SSP_FORCEINLINE __m128d ssp_comnge_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnge_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 996 of file SSEPlus_emulation_comps_REF.h.

00997 {
00998     ssp_m128 A,B;
00999     A.d = a;
01000     B.d = b;
01001         A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01002     A.u64[1] = (A.f64[1]>=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01003     return A.d;
01004 }

SSP_FORCEINLINE __m128 ssp_comnge_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnge_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1007 of file SSEPlus_emulation_comps_REF.h.

01008 {
01009     ssp_m128 A,B;
01010     A.f = a;
01011     B.f = b;
01012     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0 : 0xFFFFFFFF;
01013     A.u32[1] = (A.f32[1]>=B.f32[1]) ? 0 : 0xFFFFFFFF;
01014     A.u32[2] = (A.f32[2]>=B.f32[2]) ? 0 : 0xFFFFFFFF;
01015     A.u32[3] = (A.f32[3]>=B.f32[3]) ? 0 : 0xFFFFFFFF;
01016     return A.f;
01017 }

SSP_FORCEINLINE __m128d ssp_comnge_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnge_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1020 of file SSEPlus_emulation_comps_REF.h.

01021 {
01022     ssp_m128 A,B;
01023     A.d = a;
01024     B.d = b;
01025         A.u64[0] = (A.f64[0]>=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01026     return A.d;
01027 }

SSP_FORCEINLINE __m128 ssp_comnge_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnge_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1030 of file SSEPlus_emulation_comps_REF.h.

01031 {
01032     ssp_m128 A,B;
01033     A.f = a;
01034     B.f = b;
01035     A.u32[0] = (A.f32[0]>=B.f32[0]) ? 0 : 0xFFFFFFFF;
01036     return A.f;
01037 }

SSP_FORCEINLINE __m128d ssp_comngt_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comngt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1045 of file SSEPlus_emulation_comps_REF.h.

01046 {
01047     ssp_m128 A,B;
01048     A.d = a;
01049     B.d = b;
01050         A.u64[0] = (A.f64[0]>B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01051     A.u64[1] = (A.f64[1]>B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01052     return A.d;
01053 }

SSP_FORCEINLINE __m128 ssp_comngt_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comngt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1056 of file SSEPlus_emulation_comps_REF.h.

01057 {
01058     ssp_m128 A,B;
01059     A.f = a;
01060     B.f = b;
01061     A.u32[0] = (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
01062     A.u32[1] = (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF;
01063     A.u32[2] = (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF;
01064     A.u32[3] = (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF;
01065     return A.f;
01066 }

SSP_FORCEINLINE __m128d ssp_comngt_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comngt_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1069 of file SSEPlus_emulation_comps_REF.h.

01070 {
01071     ssp_m128 A,B;
01072     A.d = a;
01073     B.d = b;
01074         A.u64[0] = (A.f64[0]>B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
01075     return A.d;
01076 }

SSP_FORCEINLINE __m128 ssp_comngt_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comngt_ss/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1079 of file SSEPlus_emulation_comps_REF.h.

01080 {
01081     ssp_m128 A,B;
01082     A.f = a;
01083     B.f = b;
01084         A.u32[0] = (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
01085     return A.f;
01086 }

SSP_FORCEINLINE __m128d ssp_comnle_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnle_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 847 of file SSEPlus_emulation_comps_REF.h.

00848 {
00849     ssp_m128 A,B;
00850     A.d = a;
00851     B.d = b;
00852     A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00853     A.u64[1] = (A.f64[1]<=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00854  
00855     return A.d;
00856 }

SSP_FORCEINLINE __m128 ssp_comnle_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnle_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 859 of file SSEPlus_emulation_comps_REF.h.

00860 {
00861     ssp_m128 A,B;
00862     A.f = a;
00863     B.f = b;
00864     A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0 : 0xFFFFFFFF;
00865     A.u32[1] = (A.f32[1]<=B.f32[1]) ? 0 : 0xFFFFFFFF;
00866     A.u32[2] = (A.f32[2]<=B.f32[2]) ? 0 : 0xFFFFFFFF;
00867     A.u32[3] = (A.f32[3]<=B.f32[3]) ? 0 : 0xFFFFFFFF;
00868     return A.f;
00869 }

SSP_FORCEINLINE __m128d ssp_comnle_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnle_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 872 of file SSEPlus_emulation_comps_REF.h.

00873 {
00874     ssp_m128 A,B;
00875     A.d = a;
00876     B.d = b;
00877     A.u64[0] = (A.f64[0]<=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00878  
00879     return A.d;
00880 }

SSP_FORCEINLINE __m128 ssp_comnle_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnle_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 883 of file SSEPlus_emulation_comps_REF.h.

00884 {
00885     ssp_m128 A,B;
00886     A.f = a;
00887     B.f = b;
00888         A.u32[0] = (A.f32[0]<=B.f32[0]) ? 0 : 0xFFFFFFFF;
00889     return A.f;
00890 }

SSP_FORCEINLINE __m128d ssp_comnlt_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnlt_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 794 of file SSEPlus_emulation_comps_REF.h.

00795 {
00796     ssp_m128 A,B;
00797     A.d = a;
00798     B.d = b;
00799  
00800     A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00801     A.u64[1] = (A.f64[1]<B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00802  
00803     return A.d;
00804 }

SSP_FORCEINLINE __m128 ssp_comnlt_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnlt_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 807 of file SSEPlus_emulation_comps_REF.h.

00808 {
00809     ssp_m128 A,B;
00810     A.f = a;
00811     B.f = b;
00812     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0 : 0xFFFFFFFF;
00813     A.u32[1] = (A.f32[1]<B.f32[1]) ? 0 : 0xFFFFFFFF;
00814     A.u32[2] = (A.f32[2]<B.f32[2]) ? 0 : 0xFFFFFFFF;
00815     A.u32[3] = (A.f32[3]<B.f32[3]) ? 0 : 0xFFFFFFFF;
00816     return A.f;
00817 }

SSP_FORCEINLINE __m128d ssp_comnlt_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comnlt_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 820 of file SSEPlus_emulation_comps_REF.h.

00821 {
00822     ssp_m128 A,B;
00823     A.d = a;
00824     B.d = b;
00825  
00826     A.u64[0] = (A.f64[0]<B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00827  
00828     return A.d;
00829 }

SSP_FORCEINLINE __m128 ssp_comnlt_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comnlt_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 832 of file SSEPlus_emulation_comps_REF.h.

00833 {
00834     ssp_m128 A,B;
00835     A.f = a;
00836     B.f = b;
00837     A.u32[0] = (A.f32[0]<B.f32[0]) ? 0 : 0xFFFFFFFF;
00838     return A.f;
00839 }

SSP_FORCEINLINE __m128d ssp_comoneq_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comoneq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1180 of file SSEPlus_emulation_comps_REF.h.

01181 {
01182     ssp_m128 A,B;
01183     A.d = a;
01184     B.d = b; 
01185     A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0]))  ? 0xFFFFFFFFFFFFFFFF : 0;
01186     A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1]))  ? 0xFFFFFFFFFFFFFFFF : 0;
01187     return A.d;   
01188 }

SSP_FORCEINLINE __m128 ssp_comoneq_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comoneq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1191 of file SSEPlus_emulation_comps_REF.h.

01192 {
01193     ssp_m128 A,B;
01194     A.f = a;
01195     B.f = b; 
01196     A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0])  ? 0xFFFFFFFF : 0;
01197     A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1])  ? 0xFFFFFFFF : 0;
01198     A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2])  ? 0xFFFFFFFF : 0;
01199     A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3])  ? 0xFFFFFFFF : 0;
01200     return A.f;   
01201 }

SSP_FORCEINLINE __m128d ssp_comoneq_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comoneq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1204 of file SSEPlus_emulation_comps_REF.h.

01205 {
01206     ssp_m128 A,B;
01207     A.d = a;
01208     B.d = b; 
01209     A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0]))  ? 0xFFFFFFFFFFFFFFFF : 0; 
01210     return A.d;   
01211 }

SSP_FORCEINLINE __m128 ssp_comoneq_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comoneq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1214 of file SSEPlus_emulation_comps_REF.h.

01215 {
01216     ssp_m128 A,B;
01217     A.f = a;
01218     B.f = b;  
01219         A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0])  ? 0xFFFFFFFF : 0;
01220     return A.f;   
01221 }

SSP_FORCEINLINE __m128d ssp_comord_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comord_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 898 of file SSEPlus_emulation_comps_REF.h.

00899 {
00900     ssp_m128 A,B;
00901     A.d = a;
00902     B.d = b; // NAN(A)              || NAN(B)         
00903     A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00904     A.u64[1] = (A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00905     return A.d;
00906 }

SSP_FORCEINLINE __m128 ssp_comord_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comord_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 909 of file SSEPlus_emulation_comps_REF.h.

00910 {
00911     ssp_m128 A,B;
00912     A.f = a;
00913     B.f = b; // NAN(A)              || NAN(B)         
00914     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF;
00915     A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0 : 0xFFFFFFFF;
00916     A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0 : 0xFFFFFFFF;
00917     A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0 : 0xFFFFFFFF;
00918     return A.f;
00919 }

SSP_FORCEINLINE __m128d ssp_comord_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comord_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 922 of file SSEPlus_emulation_comps_REF.h.

00923 {
00924     ssp_m128 A,B;
00925     A.d = a;
00926     B.d = b; // NAN(A)              || NAN(B)         
00927     A.u64[0] = (A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0]) ? 0 : 0xFFFFFFFFFFFFFFFF;
00928     return A.d;
00929 }

SSP_FORCEINLINE __m128 ssp_comord_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comord_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 932 of file SSEPlus_emulation_comps_REF.h.

00933 {
00934     ssp_m128 A,B;
00935     A.f = a;
00936     B.f = b; // NAN(A)              || NAN(B)         
00937     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0 : 0xFFFFFFFF;
00938     return A.f;
00939 }

SSP_FORCEINLINE __m128i ssp_comtrue_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epi16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1593 of file SSEPlus_emulation_comps_REF.h.

01594 {
01595     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );  
01596     return tmp;
01597 }

SSP_FORCEINLINE __m128i ssp_comtrue_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epi32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1600 of file SSEPlus_emulation_comps_REF.h.

01601 {
01602     return ssp_comtrue_epi16_REF(a,b);
01603 }

SSP_FORCEINLINE __m128i ssp_comtrue_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epi64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1606 of file SSEPlus_emulation_comps_REF.h.

01607 {
01608     return ssp_comtrue_epi16_REF(a,b);
01609 }

SSP_FORCEINLINE __m128i ssp_comtrue_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epi8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1612 of file SSEPlus_emulation_comps_REF.h.

01613 {
01614     return ssp_comtrue_epi16_REF(a,b);
01615 }

SSP_FORCEINLINE __m128i ssp_comtrue_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epu16/ pcomw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1618 of file SSEPlus_emulation_comps_REF.h.

01619 {
01620     return ssp_comtrue_epi16_REF(a,b);
01621 }

SSP_FORCEINLINE __m128i ssp_comtrue_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epu32/ pcomd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1624 of file SSEPlus_emulation_comps_REF.h.

01625 {
01626     return ssp_comtrue_epi16_REF(a,b);
01627 }

SSP_FORCEINLINE __m128i ssp_comtrue_epu64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epu64/ pcomq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1630 of file SSEPlus_emulation_comps_REF.h.

01631 {
01632     return ssp_comtrue_epi16_REF(a,b);
01633 }

SSP_FORCEINLINE __m128i ssp_comtrue_epu8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_comtrue_epu8/ pcomb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1636 of file SSEPlus_emulation_comps_REF.h.

01637 {
01638     return ssp_comtrue_epi16_REF(a,b);
01639 }

SSP_FORCEINLINE __m128d ssp_comtrue_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comtrue_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1642 of file SSEPlus_emulation_comps_REF.h.

01643 {   
01644     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );
01645     ssp_m128 A;  
01646     A.i = tmp;
01647     return A.d;
01648 }

SSP_FORCEINLINE __m128 ssp_comtrue_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comtrue_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1651 of file SSEPlus_emulation_comps_REF.h.

01652 {   
01653     const static __m128i tmp = SSP_CONST_SET_64I( 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF );
01654     ssp_m128 A;  
01655     A.i = tmp;
01656     return A.f;
01657 }

SSP_FORCEINLINE __m128d ssp_comtrue_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comtrue_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1660 of file SSEPlus_emulation_comps_REF.h.

01661 {   
01662     ssp_m128 A;    
01663     A.d      = a;
01664     A.u64[0] = 0xFFFFFFFFFFFFFFFF;
01665     return A.d;
01666 }

SSP_FORCEINLINE __m128 ssp_comtrue_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comtrue_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 1669 of file SSEPlus_emulation_comps_REF.h.

01670 {   
01671     ssp_m128 A;
01672     A.f = a;
01673     A.u32[0] = 0xFFFFFFFF;
01674     return A.f;
01675 }

SSP_FORCEINLINE __m128d ssp_comueq_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comueq_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 947 of file SSEPlus_emulation_comps_REF.h.

00948 {
00949     ssp_m128 A,B;
00950     A.d = a;
00951     B.d = b;
00952         A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00953     A.u64[1] = ((A.f64[1]<B.f64[1]) || (A.f64[1]>B.f64[1])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00954     return A.d;
00955 }

SSP_FORCEINLINE __m128 ssp_comueq_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comueq_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 958 of file SSEPlus_emulation_comps_REF.h.

00959 {
00960     ssp_m128 A,B;
00961     A.f = a;
00962     B.f = b;             
00963     A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
00964     A.u32[1] = (A.f32[1]<B.f32[1]) || (A.f32[1]>B.f32[1]) ? 0 : 0xFFFFFFFF;
00965     A.u32[2] = (A.f32[2]<B.f32[2]) || (A.f32[2]>B.f32[2]) ? 0 : 0xFFFFFFFF;
00966     A.u32[3] = (A.f32[3]<B.f32[3]) || (A.f32[3]>B.f32[3]) ? 0 : 0xFFFFFFFF;
00967     return A.f;
00968 }

SSP_FORCEINLINE __m128d ssp_comueq_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comueq_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 971 of file SSEPlus_emulation_comps_REF.h.

00972 {
00973     ssp_m128 A,B;
00974     A.d = a;
00975     B.d = b;
00976         A.u64[0] = ((A.f64[0]<B.f64[0]) || (A.f64[0]>B.f64[0])) ? 0 : 0xFFFFFFFFFFFFFFFF;
00977     return A.d;
00978 }

SSP_FORCEINLINE __m128 ssp_comueq_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comueq_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 981 of file SSEPlus_emulation_comps_REF.h.

00982 {
00983     ssp_m128 A,B;
00984     A.f = a;
00985     B.f = b;
00986         A.u32[0] = (A.f32[0]<B.f32[0]) || (A.f32[0]>B.f32[0]) ? 0 : 0xFFFFFFFF;
00987     return A.f;
00988 }

SSP_FORCEINLINE __m128d ssp_comunord_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comunord_pd/ compd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 562 of file SSEPlus_emulation_comps_REF.h.

00563 {
00564     ssp_m128 A,B;
00565     A.d = a;
00566     B.d = b; // NAN(A)              || NAN(B)         
00567     A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0;
00568     A.u64[1] = ((A.f64[1]!=A.f64[1]) || (B.f64[1]!=B.f64[1])) ? 0xFFFFFFFFFFFFFFFF : 0;
00569     return A.d;
00570 }

SSP_FORCEINLINE __m128 ssp_comunord_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comunord_ps/ comps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 573 of file SSEPlus_emulation_comps_REF.h.

00574 {
00575     ssp_m128 A,B;
00576     A.f = a;
00577     B.f = b; // NAN(A)              || NAN(B)         
00578     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00579     A.u32[1] = (A.f32[1]!=A.f32[1]) || (B.f32[1]!=B.f32[1]) ? 0xFFFFFFFF : 0;
00580     A.u32[2] = (A.f32[2]!=A.f32[2]) || (B.f32[2]!=B.f32[2]) ? 0xFFFFFFFF : 0;
00581     A.u32[3] = (A.f32[3]!=A.f32[3]) || (B.f32[3]!=B.f32[3]) ? 0xFFFFFFFF : 0;
00582     return A.f;
00583 }

SSP_FORCEINLINE __m128d ssp_comunord_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_comunord_sd/ comsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 586 of file SSEPlus_emulation_comps_REF.h.

00587 {
00588     ssp_m128 A,B;
00589     A.d = a;
00590     B.d = b; // NAN(A)              || NAN(B)         
00591     A.u64[0] = ((A.f64[0]!=A.f64[0]) || (B.f64[0]!=B.f64[0])) ? 0xFFFFFFFFFFFFFFFF : 0;
00592     return A.d;
00593 }

SSP_FORCEINLINE __m128 ssp_comunord_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_comunord_ss/ comss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 596 of file SSEPlus_emulation_comps_REF.h.

00597 {
00598     ssp_m128 A,B;
00599     A.f = a;
00600     B.f = b; // NAN(A)              || NAN(B)         
00601     A.u32[0] = (A.f32[0]!=A.f32[0]) || (B.f32[0]!=B.f32[0]) ? 0xFFFFFFFF : 0;
00602     return A.f;
00603 }

SSP_FORCEINLINE __m128i ssp_cvtepi16_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi16_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 2435 of file SSEPlus_emulation_REF.h.

02436 {
02437     ssp_m128 A;
02438     A.i = a;
02439 
02440         A.s32[3] = A.s16[3];
02441         A.s32[2] = A.s16[2];
02442         A.s32[1] = A.s16[1];
02443         A.s32[0] = A.s16[0];
02444         return A.i;
02445 }

SSP_FORCEINLINE __m128i ssp_cvtepi16_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi16_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2448 of file SSEPlus_emulation_REF.h.

02449 {
02450     ssp_m128 A;
02451     A.i = a;
02452 
02453         A.s64[1] = A.s16[1];
02454         A.s64[0] = A.s16[0];
02455         return A.i;
02456 }

SSP_FORCEINLINE __m128i ssp_cvtepi32_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi32_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2459 of file SSEPlus_emulation_REF.h.

02460 {
02461     ssp_m128 A;
02462     A.i = a;
02463 
02464         A.s64[1] = A.s32[1];
02465     A.s64[0] = A.s32[0];
02466         return A.i;
02467 }

SSP_FORCEINLINE __m128i ssp_cvtepi8_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi8_epi16 [SSE4.1]. (Searches MSDN)

Definition at line 2394 of file SSEPlus_emulation_REF.h.

02395 {
02396     ssp_m128 A;
02397     A.i = a;
02398 
02399         A.s16[7] = A.s8[7];
02400         A.s16[6] = A.s8[6];
02401         A.s16[5] = A.s8[5];
02402         A.s16[4] = A.s8[4];
02403         A.s16[3] = A.s8[3];
02404         A.s16[2] = A.s8[2];
02405         A.s16[1] = A.s8[1];
02406         A.s16[0] = A.s8[0];
02407         return A.i;
02408 }

SSP_FORCEINLINE __m128i ssp_cvtepi8_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi8_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 2411 of file SSEPlus_emulation_REF.h.

02412 {
02413     ssp_m128 A;
02414     A.i = a;
02415 
02416         A.s32[3] = A.s8[3];
02417         A.s32[2] = A.s8[2];
02418         A.s32[1] = A.s8[1];
02419         A.s32[0] = A.s8[0];
02420         return A.i;
02421 }

SSP_FORCEINLINE __m128i ssp_cvtepi8_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepi8_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2424 of file SSEPlus_emulation_REF.h.

02425 {
02426     ssp_m128 A;
02427     A.i = a;
02428 
02429         A.s64[1] = A.s8[1];
02430         A.s64[0] = A.s8[0];
02431         return A.i;
02432 }

SSP_FORCEINLINE __m128i ssp_cvtepu16_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu16_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 2511 of file SSEPlus_emulation_REF.h.

02512 {
02513     ssp_m128 A;
02514     A.i = a;
02515 
02516         A.s32[3] = A.u16[3];
02517         A.s32[2] = A.u16[2];
02518         A.s32[1] = A.u16[1];
02519         A.s32[0] = A.u16[0];
02520         return A.i;
02521 }

SSP_FORCEINLINE __m128i ssp_cvtepu16_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu16_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2524 of file SSEPlus_emulation_REF.h.

02525 {
02526     ssp_m128 A;
02527     A.i = a;
02528 
02529         A.s64[1] = A.u16[1];
02530         A.s64[0] = A.u16[0];
02531         return A.i;
02532 }

SSP_FORCEINLINE __m128i ssp_cvtepu32_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu32_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2535 of file SSEPlus_emulation_REF.h.

02536 {
02537     ssp_m128 A;
02538     A.i = a;
02539 
02540         A.s64[1] = A.u32[1];
02541         A.s64[0] = A.u32[0];
02542         return A.i;
02543 }

SSP_FORCEINLINE __m128i ssp_cvtepu8_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu8_epi16 [SSE4.1]. (Searches MSDN)

Definition at line 2470 of file SSEPlus_emulation_REF.h.

02471 {
02472     ssp_m128 A;
02473     A.i = a;
02474 
02475         A.s16[7] = A.u8[7];
02476         A.s16[6] = A.u8[6];
02477         A.s16[5] = A.u8[5];
02478         A.s16[4] = A.u8[4];
02479         A.s16[3] = A.u8[3];
02480         A.s16[2] = A.u8[2];
02481         A.s16[1] = A.u8[1];
02482         A.s16[0] = A.u8[0];
02483         return A.i;
02484 }

SSP_FORCEINLINE __m128i ssp_cvtepu8_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu8_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 2487 of file SSEPlus_emulation_REF.h.

02488 {
02489     ssp_m128 A;
02490     A.i = a;
02491 
02492         A.s32[3] = A.u8[3];
02493         A.s32[2] = A.u8[2];
02494         A.s32[1] = A.u8[1];
02495         A.s32[0] = A.u8[0];
02496         return A.i;
02497 }

SSP_FORCEINLINE __m128i ssp_cvtepu8_epi64_REF ( __m128i  a  ) 

Reference implementation of _mm_cvtepu8_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 2500 of file SSEPlus_emulation_REF.h.

02501 {
02502     ssp_m128 A;
02503     A.i = a;
02504 
02505         A.s64[1] = A.u8[1];
02506         A.s64[0] = A.u8[0];
02507         return A.i;
02508 }

SSP_FORCEINLINE __m128d ssp_dp_pd_REF ( __m128d  a,
__m128d  b,
const int  mask 
)

Reference implementation of _mm_dp_pd [SSE4.1]. (Searches MSDN)

Definition at line 930 of file SSEPlus_emulation_REF.h.

00931 {
00932     ssp_f64 tmp[3];
00933     ssp_m128 A, B;
00934     A.d = a;
00935     B.d = b;
00936 
00937     tmp[0] = (mask & 0x10) ? (A.f64[0] * B.f64[0]) : 0.0;
00938     tmp[1] = (mask & 0x20) ? (A.f64[1] * B.f64[1]) : 0.0;
00939 
00940     tmp[2] = tmp[0] + tmp[1];
00941 
00942     A.f64[0] = (mask & 0x1) ? tmp[2] : 0.0;
00943     A.f64[1] = (mask & 0x2) ? tmp[2] : 0.0;
00944     return A.d;
00945 }

SSP_FORCEINLINE __m128 ssp_dp_ps_REF ( __m128  a,
__m128  b,
const int  mask 
)

Reference implementation of _mm_dp_ps [SSE4.1]. (Searches MSDN)

Definition at line 948 of file SSEPlus_emulation_REF.h.

00949 {
00950     ssp_f32 tmp[5];
00951     ssp_m128 A, B;
00952     A.f = a;
00953     B.f = b;
00954 
00955     tmp[0] = (mask & 0x10) ? (A.f32[0] * B.f32[0]) : 0.0f;
00956     tmp[1] = (mask & 0x20) ? (A.f32[1] * B.f32[1]) : 0.0f;
00957     tmp[2] = (mask & 0x40) ? (A.f32[2] * B.f32[2]) : 0.0f;
00958     tmp[3] = (mask & 0x80) ? (A.f32[3] * B.f32[3]) : 0.0f;
00959 
00960     tmp[4] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
00961 
00962     A.f32[0] = (mask & 0x1) ? tmp[4] : 0.0f;
00963     A.f32[1] = (mask & 0x2) ? tmp[4] : 0.0f;
00964     A.f32[2] = (mask & 0x4) ? tmp[4] : 0.0f;
00965     A.f32[3] = (mask & 0x8) ? tmp[4] : 0.0f;
00966     return A.f;
00967 }

SSP_FORCEINLINE int ssp_extract_epi32_REF ( __m128i  a,
const int  imm 
)

Reference implementation of _mm_extract_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1086 of file SSEPlus_emulation_REF.h.

01087 {
01088     ssp_m128 A;
01089     A.i = a;
01090     return (int)A.u32[imm&0x3];
01091 }

SSP_FORCEINLINE ssp_s64 ssp_extract_epi64_REF ( __m128i  a,
const int  ndx 
)

Reference implementation of _mm_extract_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 1094 of file SSEPlus_emulation_REF.h.

01095 {
01096     ssp_m128 A;
01097     A.i = a;
01098     return A.s64[ndx & 0x1];
01099 }

SSP_FORCEINLINE int ssp_extract_epi8_REF ( __m128i  a,
const int  ndx 
)

Reference implementation of _mm_extract_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 1078 of file SSEPlus_emulation_REF.h.

01079 {
01080     ssp_m128 A;
01081     A.i = a;
01082     return (int)A.u8[ndx&0xF];
01083 }

SSP_FORCEINLINE int ssp_extract_ps_REF ( __m128  a,
const int  ndx 
)

Reference implementation of _mm_extract_ps [SSE4.1]. (Searches MSDN)

Definition at line 1102 of file SSEPlus_emulation_REF.h.

01103 { 
01104     ssp_m128 A;
01105     A.f = a; 
01106     return A.s32[ndx&0x3];
01107 }

SSP_FORCEINLINE __m128i ssp_extract_si64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_extract_si64 [SSE4a]. (Searches MSDN)
NOTE: The upper 64-bit of the destination register are undefined.

Definition at line 1113 of file SSEPlus_emulation_REF.h.

01114 {
01115     ssp_u32 len, ndx;
01116     ssp_s64 mask;
01117     ssp_m128 A, B;
01118     A.i = a;
01119     B.i = b;
01120     ndx = (ssp_u32)((B.u64[0] & 0x3F00) >> 8);    // Mask ndx field.
01121     len = (ssp_u32)((B.u64[0] & 0x003F));         // Mask len field.
01122 
01123     len = (len) ? len : 64;    
01124     if( (ndx+len) > 64 )               // If the sum of ndx and length is greater than 64, the results are undefined.
01125         return a;                      // If index = 0 and length = 0/64, extract all lower bits.
01126     mask = ~(-1 << len);
01127     A.u64[0] = A.u64[0] >> ndx;
01128     A.u64[0] = A.u64[0] & mask;
01129     return A.i;
01130 }

SSP_FORCEINLINE __m128i ssp_extracti_si64_REF ( __m128i  a,
int  len,
int  ndx 
)

Reference implementation of _mm_extracti_si64 [SSE4a]. (Searches MSDN)
NOTE: The upper 64-bits of the destination register are undefined.

Definition at line 1134 of file SSEPlus_emulation_REF.h.

01135 {
01136     ssp_s64 mask;
01137     ssp_m128 A;
01138     A.i = a;
01139     ndx = ndx & 0x3F; // ndx % 64
01140     len = len & 0x3F; // len % 64
01141 
01142     len = (len) ? len : 64;    
01143     if( (ndx+len) > 64 )               // If the sum of ndx and length is greater than 64, the results are undefined.
01144         return a;                      // If index = 0 and length = 0/64, extract all lower bits.
01145     mask = ~(-1 << len);
01146     A.u64[0] = A.u64[0] >> ndx;
01147     A.u64[0] = A.u64[0] & mask;
01148     return A.i;
01149 }

SSP_FORCEINLINE __m128d ssp_floor_pd_REF ( __m128d  a  ) 

Reference implementation of _mm_floor_pd [SSE4.1]. (Searches MSDN)

Definition at line 2045 of file SSEPlus_emulation_REF.h.

02046 {
02047     ssp_m128 A;
02048     A.d = a;
02049 
02050     A.f64[0] = floor( A.f64[0] );
02051     A.f64[1] = floor( A.f64[1] );
02052     return A.d;
02053 }

SSP_FORCEINLINE __m128 ssp_floor_ps_REF ( __m128  a  ) 

Reference implementation of _mm_floor_ps [SSE4.1]. (Searches MSDN)

Definition at line 2056 of file SSEPlus_emulation_REF.h.

02057 {
02058     ssp_m128 A;
02059     A.f = a;
02060 
02061     A.f32[0] = (float)floor( A.f32[0] );
02062     A.f32[1] = (float)floor( A.f32[1] );
02063     A.f32[2] = (float)floor( A.f32[2] );
02064     A.f32[3] = (float)floor( A.f32[3] );
02065     return A.f;
02066 }

SSP_FORCEINLINE __m128d ssp_floor_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_floor_sd [SSE4.1]. (Searches MSDN)

Definition at line 2069 of file SSEPlus_emulation_REF.h.

02070 {
02071     ssp_m128 A,B;
02072     A.d = a;
02073     B.d = b;
02074 
02075     A.f64[0] = floor( B.f64[0] );
02076     return A.d;
02077 }

SSP_FORCEINLINE __m128 ssp_floor_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_floor_ss [SSE4.1]. (Searches MSDN)

Definition at line 2080 of file SSEPlus_emulation_REF.h.

02081 {
02082     ssp_m128 A,B;
02083     A.f = a;
02084     B.f = b;
02085 
02086     A.f32[0] = (float)floor( B.f32[0] );
02087     return A.f;
02088 }

SSP_FORCEINLINE __m128d ssp_frcz_pd_REF ( __m128d  a  ) 

Reference implementation of _mm_frcz_pd_REF/ frczpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 24 of file SSEPlus_emulation_REF.h.

00025 {
00026         ssp_m128 A;
00027         long long temp;
00028 
00029         A.d = a;
00030 
00031         temp = (long long) A.f64[0];
00032         A.f64[0] -= temp;
00033         temp = (long long) A.f64[1];
00034         A.f64[1] -= temp;
00035 
00036         return A.d;
00037 }

SSP_FORCEINLINE __m128 ssp_frcz_ps_REF ( __m128  a  ) 

Reference implementation of _mm_frcz_ps_REF/ frczps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 40 of file SSEPlus_emulation_REF.h.

00041 {
00042         ssp_m128 A;
00043         int temp;
00044         A.f = a;
00045 
00046         temp = (int) A.f32[0];
00047         A.f32[0] -= temp;
00048         temp = (int) A.f32[1];
00049         A.f32[1] -= temp;
00050         temp = (int) A.f32[2];
00051         A.f32[2] -= temp;
00052         temp = (int) A.f32[3];
00053         A.f32[3] -= temp;
00054 
00055         return A.f;
00056 }

SSP_FORCEINLINE __m128d ssp_frcz_sd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_frcz_sd_REF/ frczsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 59 of file SSEPlus_emulation_REF.h.

00060 {
00061         ssp_m128 A, B;
00062         long long temp;
00063 
00064         A.d = a;
00065         B.d = b;
00066 
00067         temp = (long long) A.f64[0];
00068         B.f64[0] = A.f64[0] - temp;
00069 
00070         return B.d;
00071 }

SSP_FORCEINLINE __m128 ssp_frcz_ss_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_frcz_ss_REF/ frczss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 74 of file SSEPlus_emulation_REF.h.

00075 {
00076         ssp_m128 A, B;
00077         int temp;
00078 
00079         A.f = a;
00080         B.f = b;
00081 
00082         temp = (int) A.f32[0];
00083         B.f32[0] = A.f32[0] - temp;
00084 
00085         return B.f;
00086 }

SSP_FORCEINLINE __m128i ssp_hadd_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hadd_epi16 [SSSE3]. (Searches MSDN)

Definition at line 1157 of file SSEPlus_emulation_REF.h.

01158 {
01159     ssp_m128 A, B;
01160     A.i = a;
01161     B.i = b;
01162 
01163     A.s16[0] = A.s16[0] + A.s16[1];
01164     A.s16[1] = A.s16[2] + A.s16[3];
01165     A.s16[2] = A.s16[4] + A.s16[5];
01166     A.s16[3] = A.s16[6] + A.s16[7];
01167     A.s16[4] = B.s16[0] + B.s16[1];
01168     A.s16[5] = B.s16[2] + B.s16[3];
01169     A.s16[6] = B.s16[4] + B.s16[5];
01170     A.s16[7] = B.s16[6] + B.s16[7];
01171     return A.i;
01172 }

SSP_FORCEINLINE __m128i ssp_hadd_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hadd_epi32 [SSSE3]. (Searches MSDN)

Definition at line 1175 of file SSEPlus_emulation_REF.h.

01176 {
01177     ssp_m128 A, B;
01178     A.i = a;
01179     B.i = b;
01180 
01181     A.s32[0] = A.s32[0] + A.s32[1];
01182     A.s32[1] = A.s32[2] + A.s32[3];
01183     A.s32[2] = B.s32[0] + B.s32[1];
01184     A.s32[3] = B.s32[2] + B.s32[3];
01185 
01186     return A.i;
01187 }

SSP_FORCEINLINE __m128d ssp_hadd_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_hadd_pd [SSSE3]. (Searches MSDN)

Definition at line 1286 of file SSEPlus_emulation_REF.h.

01287 {
01288     ssp_m128 A, B;
01289     A.d = a;
01290     B.d = b;
01291 
01292     A.f64[0] = A.f64[0] + A.f64[1];
01293     A.f64[1] = B.f64[0] + B.f64[1];
01294     return A.d;
01295 }

SSP_FORCEINLINE __m64 ssp_hadd_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_hadd_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1192 of file SSEPlus_emulation_REF.h.

01193 {
01194     ssp_m64 A, B;
01195     A.m64 = a;
01196     B.m64 = b;
01197 
01198     A.s16[0] = A.s16[0] + A.s16[1];
01199     A.s16[1] = A.s16[2] + A.s16[3];
01200     A.s16[2] = B.s16[0] + B.s16[1];
01201     A.s16[3] = B.s16[2] + B.s16[3];
01202 
01203     return A.m64;
01204 }

SSP_FORCEINLINE __m64 ssp_hadd_pi32_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_add_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1209 of file SSEPlus_emulation_REF.h.

01210 {
01211     ssp_m64 A, B;
01212     A.m64 = a;
01213     B.m64 = b;
01214 
01215     A.s32[0] = A.s32[0] + A.s32[1];
01216     A.s32[1] = B.s32[0] + B.s32[1];
01217 
01218     return A.m64;
01219 }

SSP_FORCEINLINE __m128 ssp_hadd_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_hadd_ps [SSSE3]. (Searches MSDN)

Definition at line 1272 of file SSEPlus_emulation_REF.h.

01273 {
01274     ssp_m128 A, B;
01275     A.f = a;
01276     B.f = b;
01277 
01278     A.f32[0] = A.f32[0] + A.f32[1];
01279     A.f32[1] = A.f32[2] + A.f32[3];
01280     A.f32[2] = B.f32[0] + B.f32[1];
01281     A.f32[3] = B.f32[2] + B.f32[3];
01282     return A.f;
01283 }

SSP_FORCEINLINE __m128i ssp_haddd_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_haddd_epi16/ phaddwd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 92 of file SSEPlus_emulation_REF.h.

00093 {
00094         ssp_m128 A, B;
00095         A.i = a;
00096 
00097         B.s32[0] = A.s16[0] + A.s16[1];
00098         B.s32[1] = A.s16[2] + A.s16[3];
00099         B.s32[2] = A.s16[4] + A.s16[5];
00100         B.s32[3] = A.s16[6] + A.s16[7];
00101 
00102         return B.i;
00103 }

SSP_FORCEINLINE __m128i ssp_haddd_epi8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddd_epi8/ phaddbd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 106 of file SSEPlus_emulation_REF.h.

00107 {
00108         ssp_m128 A, B;
00109         A.i = a;
00110 
00111         B.s32[0] = A.s8[ 0] + A.s8[ 1] + A.s8[ 2] + A.s8[ 3];
00112         B.s32[1] = A.s8[ 4] + A.s8[ 5] + A.s8[ 6] + A.s8[ 7];
00113         B.s32[2] = A.s8[ 8] + A.s8[ 9] + A.s8[10] + A.s8[11];
00114         B.s32[3] = A.s8[12] + A.s8[13] + A.s8[14] + A.s8[15];
00115 
00116         return B.i;
00117 }

SSP_FORCEINLINE __m128i ssp_haddd_epu16_REF ( __m128i  a  ) 

Reference implementation of _mm_haddd_epu16/ phadduwd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 120 of file SSEPlus_emulation_REF.h.

00121 {
00122         ssp_m128 A, B;
00123         A.i = a;
00124 
00125         B.u32[0] = A.u16[0] + A.u16[1];
00126         B.u32[1] = A.u16[2] + A.u16[3];
00127         B.u32[2] = A.u16[4] + A.u16[5];
00128         B.u32[3] = A.u16[6] + A.u16[7];
00129 
00130         return B.i;
00131 }

SSP_FORCEINLINE __m128i ssp_haddd_epu8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddd_epu8/ phaddubd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 134 of file SSEPlus_emulation_REF.h.

00135 {
00136         ssp_m128 A, B;
00137         A.i = a;
00138 
00139         B.u32[0] = A.u8[ 0] + A.u8[ 1] + A.u8[ 2] + A.u8[ 3];
00140         B.u32[1] = A.u8[ 4] + A.u8[ 5] + A.u8[ 6] + A.u8[ 7];
00141         B.u32[2] = A.u8[ 8] + A.u8[ 9] + A.u8[10] + A.u8[11];
00142         B.u32[3] = A.u8[12] + A.u8[13] + A.u8[14] + A.u8[15];
00143 
00144         return B.i;
00145 }

SSP_FORCEINLINE __m128i ssp_haddq_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epi16/ phaddwq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 148 of file SSEPlus_emulation_REF.h.

00149 {
00150         ssp_m128 A, B;
00151         A.i = a;
00152 
00153         B.s64[0] = A.s16[0] + A.s16[1] + A.s16[2] + A.s16[3];
00154         B.s64[1] = A.s16[4] + A.s16[5] + A.s16[6] + A.s16[7];
00155 
00156         return B.i;
00157 }

SSP_FORCEINLINE __m128i ssp_haddq_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epi32/ phadddq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 160 of file SSEPlus_emulation_REF.h.

00161 {
00162         ssp_m128 A, B;
00163         A.i = a;
00164 
00165         B.s64[0] = A.s32[0] + (long long)A.s32[1];
00166         B.s64[1] = A.s32[2] + (long long)A.s32[3];
00167 
00168         return B.i;
00169 }

SSP_FORCEINLINE __m128i ssp_haddq_epi8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epi8/ phaddbq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 172 of file SSEPlus_emulation_REF.h.

00173 {
00174         ssp_m128 A, B;
00175         A.i = a;
00176 
00177         B.s64[0] = A.s8[0] + A.s8[1] + A.s8[2] + A.s8[3] + A.s8[4] + A.s8[5] + A.s8[6] + A.s8[7];
00178         B.s64[1] = A.s8[8] + A.s8[9] + A.s8[10] + A.s8[11] + A.s8[12] + A.s8[13] + A.s8[14] + A.s8[15];
00179 
00180         return B.i;
00181 }

SSP_FORCEINLINE __m128i ssp_haddq_epu16_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epu16/ phadduwq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 184 of file SSEPlus_emulation_REF.h.

00185 {
00186         ssp_m128 A, B;
00187         A.i = a;
00188 
00189         B.u64[0] = A.u16[0] + A.u16[1] + A.u16[2] + A.u16[3];
00190         B.u64[1] = A.u16[4] + A.u16[5] + A.u16[6] + A.u16[7];
00191 
00192         return B.i;
00193 }

SSP_FORCEINLINE __m128i ssp_haddq_epu32_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epu32/ phaddudq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 196 of file SSEPlus_emulation_REF.h.

00197 {
00198         ssp_m128 A, B;
00199         A.i = a;
00200 
00201         B.u64[0] = A.u32[0] + (long long)A.u32[1];
00202         B.u64[1] = A.u32[2] + (long long)A.u32[3];
00203 
00204         return B.i;
00205 }

SSP_FORCEINLINE __m128i ssp_haddq_epu8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddq_epu8/ phaddubq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 208 of file SSEPlus_emulation_REF.h.

00209 {
00210         ssp_m128 A, B;
00211         A.i = a;
00212 
00213         B.u64[0] = A.u8[0] + A.u8[1] + A.u8[2] + A.u8[3] + A.u8[4] + A.u8[5] + A.u8[6] + A.u8[7];
00214         B.u64[1] = A.u8[8] + A.u8[9] + A.u8[10] + A.u8[11] + A.u8[12] + A.u8[13] + A.u8[14] + A.u8[15];
00215 
00216         return B.i;
00217 }

SSP_FORCEINLINE __m128i ssp_hadds_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hadds_epi16 [SSSE3]. (Searches MSDN)

Definition at line 1222 of file SSEPlus_emulation_REF.h.

01223 {
01224     ssp_m128 A, B;
01225         int answer[8];
01226     A.i = a;
01227     B.i = b;
01228 
01229         answer[0] = A.s16[0] + A.s16[1];
01230     A.s16[0]  = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768));
01231         answer[1] = A.s16[2] + A.s16[3];
01232     A.s16[1]  = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768));
01233         answer[2] = A.s16[4] + A.s16[5];
01234     A.s16[2]  = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768));
01235         answer[3] = A.s16[6] + A.s16[7];
01236     A.s16[3]  = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768));
01237         answer[4] = B.s16[0] + B.s16[1];
01238     A.s16[4]  = (ssp_s16) (SSP_SATURATION(answer[4], 32767, -32768));
01239         answer[5] = B.s16[2] + B.s16[3];
01240     A.s16[5]  = (ssp_s16) (SSP_SATURATION(answer[5], 32767, -32768));
01241         answer[6] = B.s16[4] + B.s16[5];
01242     A.s16[6]  = (ssp_s16) (SSP_SATURATION(answer[6], 32767, -32768));
01243         answer[7] = B.s16[6] + B.s16[7];
01244     A.s16[7]  = (ssp_s16) (SSP_SATURATION(answer[7], 32767, -32768));
01245 
01246         return A.i;
01247 }

SSP_FORCEINLINE __m64 ssp_hadds_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_hadds_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1252 of file SSEPlus_emulation_REF.h.

01253 {
01254     ssp_m64 A, B;
01255         int answer[4];
01256     A.m64 = a;
01257     B.m64 = b;
01258 
01259         answer[0] = A.s16[0] + A.s16[1];
01260     A.s16[0]  = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768));
01261         answer[1] = A.s16[2] + A.s16[3];
01262     A.s16[1]  = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768));
01263         answer[2] = B.s16[0] + B.s16[1];
01264     A.s16[2]  = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768));
01265         answer[3] = B.s16[2] + B.s16[3];
01266     A.s16[3]  = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768));
01267 
01268         return A.m64;
01269 }

SSP_FORCEINLINE __m128i ssp_haddw_epi8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddw_epi8/ phaddbw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 220 of file SSEPlus_emulation_REF.h.

00221 {
00222         ssp_m128 A, B;
00223         A.i = a;
00224 
00225         B.s16[0] = A.s8[0] + A.s8[1];
00226         B.s16[1] = A.s8[2] + A.s8[3];
00227         B.s16[2] = A.s8[4] + A.s8[5];
00228         B.s16[3] = A.s8[6] + A.s8[7];
00229         B.s16[4] = A.s8[8] + A.s8[9];
00230         B.s16[5] = A.s8[10] + A.s8[11];
00231         B.s16[6] = A.s8[12] + A.s8[13];
00232         B.s16[7] = A.s8[14] + A.s8[15];
00233 
00234         return B.i;
00235 }

SSP_FORCEINLINE __m128i ssp_haddw_epu8_REF ( __m128i  a  ) 

Reference implementation of _mm_haddw_epu8/ phaddubw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 238 of file SSEPlus_emulation_REF.h.

00239 {
00240         ssp_m128 A, B;
00241         A.i = a;
00242 
00243         B.u16[0] = A.u8[0] + A.u8[1];
00244         B.u16[1] = A.u8[2] + A.u8[3];
00245         B.u16[2] = A.u8[4] + A.u8[5];
00246         B.u16[3] = A.u8[6] + A.u8[7];
00247         B.u16[4] = A.u8[8] + A.u8[9];
00248         B.u16[5] = A.u8[10] + A.u8[11];
00249         B.u16[6] = A.u8[12] + A.u8[13];
00250         B.u16[7] = A.u8[14] + A.u8[15];
00251 
00252         return B.i;
00253 }

SSP_FORCEINLINE __m128i ssp_hsub_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hsub_epi16 [SSSE3]. (Searches MSDN)

Definition at line 1302 of file SSEPlus_emulation_REF.h.

01303 {
01304     ssp_m128 A, B;
01305     A.i = a;
01306     B.i = b;
01307 
01308     A.s16[0] = A.s16[0] - A.s16[1];
01309     A.s16[1] = A.s16[2] - A.s16[3];
01310     A.s16[2] = A.s16[4] - A.s16[5];
01311     A.s16[3] = A.s16[6] - A.s16[7];
01312     A.s16[4] = B.s16[0] - B.s16[1];
01313     A.s16[5] = B.s16[2] - B.s16[3];
01314     A.s16[6] = B.s16[4] - B.s16[5];
01315     A.s16[7] = B.s16[6] - B.s16[7];
01316 
01317         return A.i;
01318 }

SSP_FORCEINLINE __m128i ssp_hsub_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hsub_epi32 [SSSE3]. (Searches MSDN)

Definition at line 1321 of file SSEPlus_emulation_REF.h.

01322 {
01323     ssp_m128 A, B;
01324     A.i = a;
01325     B.i = b;
01326 
01327     A.s32[0] = A.s32[0] - A.s32[1];
01328     A.s32[1] = A.s32[2] - A.s32[3];
01329     A.s32[2] = B.s32[0] - B.s32[1];
01330     A.s32[3] = B.s32[2] - B.s32[3];
01331 
01332     return A.i;
01333 }

SSP_FORCEINLINE __m128d ssp_hsub_pd_REF ( __m128d  a,
__m128d  b 
)

Reference implementation of _mm_hsub_pd [SSSE3]. (Searches MSDN)

Definition at line 1432 of file SSEPlus_emulation_REF.h.

01433 {
01434     ssp_m128 A, B;
01435     A.d = a;
01436     B.d = b;
01437 
01438     A.f64[0] = A.f64[0] - A.f64[1];
01439     A.f64[1] = B.f64[0] - B.f64[1];
01440     return A.d;
01441 }

SSP_FORCEINLINE __m64 ssp_hsub_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_hsub_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1338 of file SSEPlus_emulation_REF.h.

01339 {
01340     ssp_m64 A, B;
01341     A.m64 = a;
01342     B.m64 = b;
01343 
01344     A.s16[0] = A.s16[0] - A.s16[1];
01345     A.s16[1] = A.s16[2] - A.s16[3];
01346     A.s16[2] = B.s16[0] - B.s16[1];
01347     A.s16[3] = B.s16[2] - B.s16[3];
01348 
01349         return A.m64;
01350 }

SSP_FORCEINLINE __m64 ssp_hsub_pi32_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_hsub_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1355 of file SSEPlus_emulation_REF.h.

01356 {
01357     ssp_m64 A, B;
01358     A.m64 = a;
01359     B.m64 = b;
01360 
01361     A.s32[0] = A.s32[0] - A.s32[1];
01362     A.s32[1] = B.s32[0] - B.s32[1];
01363 
01364     return A.m64;
01365 }

SSP_FORCEINLINE __m128 ssp_hsub_ps_REF ( __m128  a,
__m128  b 
)

Reference implementation of _mm_hsub_ps [SSSE3]. (Searches MSDN)

Definition at line 1418 of file SSEPlus_emulation_REF.h.

01419 {
01420     ssp_m128 A, B;
01421     A.f = a;
01422     B.f = b;
01423 
01424     A.f32[0] = A.f32[0] - A.f32[1];
01425     A.f32[1] = A.f32[2] - A.f32[3];
01426     A.f32[2] = B.f32[0] - B.f32[1];
01427     A.f32[3] = B.f32[2] - B.f32[3];
01428     return A.f;
01429 }

SSP_FORCEINLINE __m128i ssp_hsubd_epi16_REF ( __m128i  a  ) 

Reference implementation of _mm_hsubd_epi16/ phsubwd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 256 of file SSEPlus_emulation_REF.h.

00257 {
00258         ssp_m128 A, B;
00259         A.i = a;
00260 
00261         B.s32[0] = A.s16[1] - A.s16[0];
00262         B.s32[1] = A.s16[3] - A.s16[2];
00263         B.s32[2] = A.s16[5] - A.s16[4];
00264         B.s32[3] = A.s16[7] - A.s16[6];
00265 
00266         return B.i;
00267 }

SSP_FORCEINLINE __m128i ssp_hsubq_epi32_REF ( __m128i  a  ) 

Reference implementation of _mm_hsubq_epi32/ phsubdq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 270 of file SSEPlus_emulation_REF.h.

00271 {
00272         ssp_m128 A, B;
00273         A.i = a;
00274 
00275         B.s64[0] = (long long)A.s32[1] - A.s32[0];
00276         B.s64[1] = (long long)A.s32[3] - A.s32[2];
00277 
00278         return B.i;
00279 }

SSP_FORCEINLINE __m128i ssp_hsubs_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_hsubs_epi16 [SSSE3]. (Searches MSDN)

Definition at line 1368 of file SSEPlus_emulation_REF.h.

01369 {
01370     ssp_m128 A, B;
01371         int answer[8];
01372     A.i = a;
01373     B.i = b;
01374 
01375         answer[0] = A.s16[0] - A.s16[1];
01376     A.s16[0]  = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768));
01377         answer[1] = A.s16[2] - A.s16[3];
01378     A.s16[1]  = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768));
01379         answer[2] = A.s16[4] - A.s16[5];
01380     A.s16[2]  = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768));
01381         answer[3] = A.s16[6] - A.s16[7];
01382     A.s16[3]  = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768));
01383         answer[4] = B.s16[0] - B.s16[1];
01384     A.s16[4]  = (ssp_s16) (SSP_SATURATION(answer[4], 32767, -32768));
01385         answer[5] = B.s16[2] - B.s16[3];
01386     A.s16[5]  = (ssp_s16) (SSP_SATURATION(answer[5], 32767, -32768));
01387         answer[6] = B.s16[4] - B.s16[5];
01388     A.s16[6]  = (ssp_s16) (SSP_SATURATION(answer[6], 32767, -32768));
01389         answer[7] = B.s16[6] - B.s16[7];
01390     A.s16[7]  = (ssp_s16) (SSP_SATURATION(answer[7], 32767, -32768));
01391 
01392         return A.i;
01393 }

SSP_FORCEINLINE __m64 ssp_hsubs_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_hsubs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1398 of file SSEPlus_emulation_REF.h.

01399 {
01400     ssp_m64 A, B;
01401         int answer[4];
01402     A.m64 = a;
01403     B.m64 = b;
01404 
01405         answer[0] = A.s16[0] - A.s16[1];
01406     A.s16[0]  = (ssp_s16) (SSP_SATURATION(answer[0], 32767, -32768));
01407         answer[1] = A.s16[2] - A.s16[3];
01408     A.s16[1]  = (ssp_s16) (SSP_SATURATION(answer[1], 32767, -32768));
01409         answer[2] = B.s16[0] - B.s16[1];
01410     A.s16[2]  = (ssp_s16) (SSP_SATURATION(answer[2], 32767, -32768));
01411         answer[3] = B.s16[2] - B.s16[3];
01412     A.s16[3]  = (ssp_s16) (SSP_SATURATION(answer[3], 32767, -32768));
01413 
01414         return A.m64;
01415 }

SSP_FORCEINLINE __m128i ssp_hsubw_epi8_REF ( __m128i  a  ) 

Reference implementation of _mm_hsubw_epi8/ phsubbw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 282 of file SSEPlus_emulation_REF.h.

00283 {
00284         ssp_m128 A, B;
00285         A.i = a;
00286 
00287         B.s16[0] = A.s8[1] - A.s8[0];
00288         B.s16[1] = A.s8[3] - A.s8[2];
00289         B.s16[2] = A.s8[5] - A.s8[4];
00290         B.s16[3] = A.s8[7] - A.s8[6];
00291         B.s16[4] = A.s8[9] - A.s8[8];
00292         B.s16[5] = A.s8[11] - A.s8[10];
00293         B.s16[6] = A.s8[13] - A.s8[12];
00294         B.s16[7] = A.s8[15] - A.s8[14];
00295 
00296         return B.i;
00297 }

SSP_FORCEINLINE __m128i ssp_insert_epi32_REF ( __m128i  a,
int  b,
const int  ndx 
)

Reference implementation of _mm_insert_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1457 of file SSEPlus_emulation_REF.h.

01458 {
01459     ssp_m128 A;
01460     A.i = a;
01461 
01462     A.s32[ndx & 0x3] = b;
01463     return A.i;
01464 }

SSP_FORCEINLINE __m128i ssp_insert_epi64_REF ( __m128i  a,
ssp_s64  b,
const int  ndx 
)

Reference implementation of _mm_insert_epi64 [SSE4.1]. (Searches MSDN)

Definition at line 1467 of file SSEPlus_emulation_REF.h.

01468 {
01469     ssp_m128 A;
01470     A.i = a;
01471 
01472     A.s64[ndx & 0x1] = b;
01473     return A.i;
01474 }

SSP_FORCEINLINE __m128i ssp_insert_epi8_REF ( __m128i  a,
int  b,
const int  ndx 
)

Reference implementation of _mm_insert_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 1447 of file SSEPlus_emulation_REF.h.

01448 {
01449     ssp_m128 A;
01450     A.i = a;
01451 
01452     A.s8[ndx & 0xF] = (ssp_s8)b;
01453     return A.i;
01454 }

SSP_FORCEINLINE __m128 ssp_insert_ps_REF ( __m128  a,
__m128  b,
const int  sel 
)

Reference implementation of _mm_insert_ps [SSE4.1]. (Searches MSDN)

Definition at line 1477 of file SSEPlus_emulation_REF.h.

01478 {
01479     ssp_f32 tmp;
01480     int count_d,zmask;
01481 
01482     ssp_m128 A,B;
01483     A.f = a;
01484     B.f = b;
01485 
01486     tmp     = B.f32[(sel & 0xC0)>>6];   // 0xC0 = sel[7:6]
01487     count_d = (sel & 0x30)>>4;          // 0x30 = sel[5:4]
01488     zmask   = sel & 0x0F;               // 0x0F = sel[3:0]
01489 
01490     A.f32[count_d] = tmp;
01491 
01492     A.f32[0] = (zmask & 0x1) ? 0 : A.f32[0];
01493     A.f32[1] = (zmask & 0x2) ? 0 : A.f32[1];
01494     A.f32[2] = (zmask & 0x4) ? 0 : A.f32[2];
01495     A.f32[3] = (zmask & 0x8) ? 0 : A.f32[3];
01496     return A.f;
01497 }

SSP_FORCEINLINE __m128i ssp_insert_si64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_insert_si64 [SSE4a]. (Searches MSDN)

Definition at line 1500 of file SSEPlus_emulation_REF.h.

01501 {
01502     ssp_u32  ndx, len;
01503     ssp_s64  mask;
01504     ssp_m128 A, B;
01505     B.i = b;
01506     ndx = (ssp_u32)((B.u64[1] & 0x3F00) >> 8);    // Mask length field.
01507     len = (ssp_u32)((B.u64[1] & 0x003F));         // Mask ndx field.
01508 
01509     if( ( (ndx + len) > 64 ) ||
01510         ( (len == 0) && (ndx > 0) ) )
01511         return a;
01512 
01513     A.i = a;
01514     if( (len == 0 ) && (ndx == 0) )
01515     {
01516         A.u64[0] = B.u64[0];
01517         return A.i;
01518     }
01519 
01520     len = (len) ? len : 64;         // A value of zero for field length is interpreted as 64.
01521     mask = ~(-1 << len);
01522     B.u64[0]  = B.u64[0] & mask;
01523     B.u64[0]  = B.u64[0] << ndx;
01524     mask      = ~(mask << ndx);
01525     A.u64[0]  = A.u64[0] & mask;
01526     A.u64[0] |= B.u64[0];
01527     return A.i;
01528 }

SSP_FORCEINLINE __m128i ssp_inserti_si64_REF ( __m128i  a,
__m128i  b,
int  len,
int  ndx 
)

Reference implementation of _mm_inserti_si64 [SSE4a]. (Searches MSDN)

Definition at line 1531 of file SSEPlus_emulation_REF.h.

01532 {
01533     ssp_s64 mask;
01534     ssp_m128 A, B;
01535     A.i = a;
01536     ndx = ndx & 0x3F; // ndx % 64
01537     len = len & 0x3F; // len % 64
01538 
01539     if( ( (ndx + len) > 64 ) ||
01540         ( (len == 0) && (ndx > 0) ) )
01541         return a;
01542 
01543     B.i = b;
01544     if( (len == 0 ) && (ndx == 0) )
01545     {
01546         A.u64[0] = B.u64[0];
01547         return A.i;
01548     }
01549 
01550     len = (len) ? len : 64;         // A value of zero for field length is interpreted as 64.
01551     mask = ~(-1 << len);
01552     B.u64[0]  = B.u64[0] & mask;
01553     B.u64[0]  = B.u64[0] << ndx;
01554     mask      = ~(mask << ndx);
01555     A.u64[0]  = A.u64[0] & mask;
01556     A.u64[0] |= B.u64[0];
01557     return A.i;
01558 }

SSP_FORCEINLINE __m128i ssp_lddqu_si128_REF ( __m128i const *  p  ) 

Reference implementation of _mm_lddqu_si128 [SSE3]. (Searches MSDN)

Definition at line 1575 of file SSEPlus_emulation_REF.h.

01576 {
01577     return *p;
01578 }

SSP_FORCEINLINE __m128d ssp_loaddup_pd_REF ( double const *  dp  ) 

Reference implementation of _mm_loaddup_pd [SSE3]. (Searches MSDN)

Definition at line 1566 of file SSEPlus_emulation_REF.h.

01567 {
01568     ssp_m128 a;
01569     a.f64[0] = *dp;
01570     a.f64[1] = *dp;
01571     return a.d;
01572 }

SSP_FORCEINLINE unsigned short ssp_lzcnt16_REF ( unsigned short  val  ) 

Reference implementation of __lzcnt16 [SSE4a]. (Searches MSDN)

Definition at line 2893 of file SSEPlus_emulation_REF.h.

02894 {
02895     
02896     if( !val )
02897         return 16;
02898     // Binary Search Tree of possible output values
02899     else if( val > 0x00FF )
02900     {
02901         if( val > 0x0FFF )
02902         {
02903             if( val > 0x3FFF )
02904             {
02905                 if( val > 0x7FFF )
02906                     return 0;
02907                 else
02908                     return 1;
02909             }
02910             else // val < 0x3FFF
02911             {
02912                 if( val > 0x1FFF )
02913                     return 2;
02914                 else
02915                     return 3;
02916             }
02917         }
02918         else // val < 0x0FFF
02919         {
02920             if( val > 0x03FF )
02921             {
02922                 if( val > 0x07FF )
02923                     return 4;
02924                 else
02925                     return 5;
02926             }
02927             else // val < 0x03FF
02928             {
02929                 if( val > 0x01FF )
02930                     return 6;
02931                 else
02932                     return 7;
02933             }
02934         }
02935     }
02936     else // val < 0x00FF
02937     {
02938         if( val > 0x000F )
02939         {
02940             if( val > 0x003F  )
02941             {
02942                 if( val > 0x007F  )
02943                     return 8;
02944                 else
02945                     return 9;
02946             }
02947             else // val < 0x003F
02948             {
02949                 if( val > 0x001F)
02950                     return 10;
02951                 else
02952                     return 11;
02953             }
02954         }
02955         else // val < 0x000F
02956         {
02957             if( val > 0x0003  )
02958             {
02959                 if( val > 0x0007  )
02960                     return 12;
02961                 else
02962                     return 13;
02963             }
02964             else // val < 0x0003
02965             {
02966                 if( val > 0x0001)
02967                     return 14;
02968                 else
02969                     return 15;
02970             }
02971         }
02972     }
02973 }

SSP_FORCEINLINE ssp_u64 ssp_lzcnt64_REF ( ssp_u64  val  ) 

Reference implementation of __lzcnt64 [SSE4a]. (Searches MSDN)

Definition at line 2984 of file SSEPlus_emulation_REF.h.

02985 {
02986     ssp_u64 cnt;
02987     cnt = ssp_lzcnt_REF( (ssp_u32)(val>>32) );
02988     if( cnt == 32 )
02989         cnt += ssp_lzcnt_REF( (ssp_u32)(val & 0x00000000FFFFFFFF) );
02990     return cnt;
02991 }

SSP_FORCEINLINE unsigned int ssp_lzcnt_REF ( unsigned int  val  ) 

Reference implementation of __lzcnt [SSE4a]. (Searches MSDN)

Definition at line 2975 of file SSEPlus_emulation_REF.h.

02976 {
02977     ssp_u32 cnt;
02978     cnt = ssp_lzcnt16_REF( (ssp_u16)(val>>16) );
02979     if( cnt == 16 )
02980         cnt += ssp_lzcnt16_REF( (ssp_u16)(val & 0x0000FFFF) );
02981     return cnt;
02982 }

SSP_FORCEINLINE __m128i ssp_macc_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_macc_epi16/ pmacsww [SSE5]. (SSE5 .pdf documentation here)

Definition at line 304 of file SSEPlus_emulation_REF.h.

00305 {
00306     ssp_m128 A,B,C;
00307     A.i = a;
00308     B.i = b;
00309     C.i = c;
00310 
00311     A.s16[0] = A.s16[0] * B.s16[0] + C.s16[0];
00312     A.s16[1] = A.s16[1] * B.s16[1] + C.s16[1];
00313     A.s16[2] = A.s16[2] * B.s16[2] + C.s16[2];
00314     A.s16[3] = A.s16[3] * B.s16[3] + C.s16[3];
00315     A.s16[4] = A.s16[4] * B.s16[4] + C.s16[4];
00316     A.s16[5] = A.s16[5] * B.s16[5] + C.s16[5];
00317     A.s16[6] = A.s16[6] * B.s16[6] + C.s16[6];
00318     A.s16[7] = A.s16[7] * B.s16[7] + C.s16[7];
00319 
00320     return A.i;
00321 }

SSP_FORCEINLINE __m128i ssp_macc_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_macc_epi32/ pmacsdd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 324 of file SSEPlus_emulation_REF.h.

00325 {
00326     ssp_m128 A,B,C;
00327     A.i = a;
00328     B.i = b;
00329     C.i = c;
00330 
00331     A.s32[0] = A.s32[0] * B.s32[0] + C.s32[0];
00332     A.s32[1] = A.s32[1] * B.s32[1] + C.s32[1];
00333     A.s32[2] = A.s32[2] * B.s32[2] + C.s32[2];
00334     A.s32[3] = A.s32[3] * B.s32[3] + C.s32[3];
00335 
00336     return A.i;
00337 }

SSP_FORCEINLINE __m128d ssp_macc_pd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_macc_pd/fmaddpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 355 of file SSEPlus_emulation_REF.h.

00356 {
00357     ssp_m128 A,B,C;
00358     A.d = a;
00359     B.d = b;
00360     C.d = c;
00361 
00362     A.f64[0] = A.f64[0] * B.f64[0] + C.f64[0];
00363     A.f64[1] = A.f64[1] * B.f64[1] + C.f64[1]; 
00364     return A.d;
00365 }

SSP_FORCEINLINE __m128 ssp_macc_ps_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_macc_ps/fmaddps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 340 of file SSEPlus_emulation_REF.h.

00341 {
00342     ssp_m128 A,B,C;
00343     A.f = a;
00344     B.f = b;
00345     C.f = c;
00346 
00347     A.f32[0] = A.f32[0] * B.f32[0] + C.f32[0];
00348     A.f32[1] = A.f32[1] * B.f32[1] + C.f32[1];
00349     A.f32[2] = A.f32[2] * B.f32[2] + C.f32[2];
00350     A.f32[3] = A.f32[3] * B.f32[3] + C.f32[3];
00351     return A.f;
00352 }

SSP_FORCEINLINE __m128d ssp_macc_sd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_macc_sd/fmaddss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 380 of file SSEPlus_emulation_REF.h.

00380                                                                                                                                           : confirm
00381 {
00382     ssp_m128 A,B,C;
00383     A.d = a;
00384     B.d = b;
00385     C.d = c;
00386 
00387     A.f64[0] = A.f64[0] * B.f64[0] + C.f64[0];   
00388     return A.d;
00389 }

SSP_FORCEINLINE __m128 ssp_macc_ss_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_macc_ss/fmaddss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 368 of file SSEPlus_emulation_REF.h.

00368                                                                                                                                       : confirm
00369 {
00370     ssp_m128 A,B,C;
00371     A.f = a;
00372     B.f = b;
00373     C.f = c;
00374 
00375     A.f32[0] = A.f32[0] * B.f32[0] + C.f32[0];   
00376     return A.f;
00377 }

SSP_FORCEINLINE __m128i ssp_maccd_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccd_epi16/ pmacswd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 392 of file SSEPlus_emulation_REF.h.

00393 {
00394     ssp_m128 A, B, C, D;
00395     A.i = a;
00396     B.i = b;
00397     C.i = c;
00398 
00399     D.s32[0] = A.s16[0] * B.s16[0] + C.s32[0];
00400     D.s32[1] = A.s16[2] * B.s16[2] + C.s32[1];
00401     D.s32[2] = A.s16[4] * B.s16[4] + C.s32[2];
00402     D.s32[3] = A.s16[6] * B.s16[6] + C.s32[3];
00403 
00404     return D.i;
00405 }

SSP_FORCEINLINE __m128i ssp_macchi_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_macchi_epi32/ pmacsdqh [SSE5]. (SSE5 .pdf documentation here)

Definition at line 408 of file SSEPlus_emulation_REF.h.

00409 {
00410     ssp_m128 A, B, C, D;
00411     A.i = a;
00412     B.i = b;
00413     C.i = c;
00414 
00415     D.s64[0] = A.s32[1] * B.s32[1] + C.s64[0];
00416     D.s64[1] = A.s32[3] * B.s32[3] + C.s64[1];
00417 
00418     return D.i;
00419 }

SSP_FORCEINLINE __m128i ssp_macclo_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_macclo_epi32/ pmacsdql [SSE5]. (SSE5 .pdf documentation here)

Definition at line 422 of file SSEPlus_emulation_REF.h.

00423 {
00424     ssp_m128 A, B, C, D;
00425     A.i = a;
00426     B.i = b;
00427     C.i = c;
00428 
00429     D.s64[0] = A.s32[0] * B.s32[0] + C.s64[0];
00430     D.s64[1] = A.s32[2] * B.s32[2] + C.s64[1];
00431 
00432     return D.i;
00433 }

SSP_FORCEINLINE __m128i ssp_maccs_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccs_epi16/ pmacssww [SSE5]. (SSE5 .pdf documentation here)

Definition at line 438 of file SSEPlus_emulation_REF.h.

00439 {
00440     ssp_m128 A, B, C;
00441         int temp;
00442     A.i = a;
00443     B.i = b;
00444     C.i = c;
00445 
00446         temp = A.s16[0] * B.s16[0] + C.s16[0];
00447         A.s16[0] = SSP_SATURATION(temp, 32767, -32768);
00448         temp = A.s16[1] * B.s16[1] + C.s16[1];
00449     A.s16[1] = SSP_SATURATION(temp, 32767, -32768);
00450         temp = A.s16[2] * B.s16[2] + C.s16[2];
00451     A.s16[2] = SSP_SATURATION(temp, 32767, -32768);
00452         temp = A.s16[3] * B.s16[3] + C.s16[3];
00453     A.s16[3] = SSP_SATURATION(temp, 32767, -32768);
00454         temp = A.s16[4] * B.s16[4] + C.s16[4];
00455     A.s16[4] = SSP_SATURATION(temp, 32767, -32768);
00456         temp = A.s16[5] * B.s16[5] + C.s16[5];
00457     A.s16[5] = SSP_SATURATION(temp, 32767, -32768);
00458         temp = A.s16[6] * B.s16[6] + C.s16[6];
00459     A.s16[6] = SSP_SATURATION(temp, 32767, -32768);
00460         temp = A.s16[7] * B.s16[7] + C.s16[7];
00461     A.s16[7] = SSP_SATURATION(temp, 32767, -32768);
00462 
00463     return A.i;
00464 }

SSP_FORCEINLINE __m128i ssp_maccs_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccs_epi32/ pmacssdd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 467 of file SSEPlus_emulation_REF.h.

00468 {
00469     ssp_m128 A, B, C;
00470         long long temp;
00471     A.i = a;
00472     B.i = b;
00473     C.i = c;
00474 
00475         temp = (long long)A.s32[0] * B.s32[0] + C.s32[0];
00476         A.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00477         temp = (long long)A.s32[1] * B.s32[1] + C.s32[1];
00478     A.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00479         temp = (long long)A.s32[2] * B.s32[2] + C.s32[2];
00480     A.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00481         temp = (long long)A.s32[3] * B.s32[3] + C.s32[3];
00482     A.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00483 
00484     return A.i;
00485 }

SSP_FORCEINLINE __m128i ssp_maccsd_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccsd_epi16/ pmacsswd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 488 of file SSEPlus_emulation_REF.h.

00489 {
00490     ssp_m128 A, B, C, D;
00491         long long temp;
00492     A.i = a;
00493     B.i = b;
00494     C.i = c;
00495 
00496         //should be able to compare data to see whether overflow/underflow
00497         temp = A.s16[0] * B.s16[0] + (long long)C.s32[0];
00498     D.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00499         temp = A.s16[2] * B.s16[2] + (long long)C.s32[1];
00500     D.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00501         temp = A.s16[4] * B.s16[4] + (long long)C.s32[2];
00502     D.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00503         temp = A.s16[6] * B.s16[6] + (long long)C.s32[3];
00504     D.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00505 
00506     return D.i;
00507 }

SSP_FORCEINLINE __m128i ssp_maccshi_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccshi_epi32/ pmacssdqh [SSE5]. (SSE5 .pdf documentation here)

Definition at line 510 of file SSEPlus_emulation_REF.h.

00511 {
00512     ssp_m128 A, B, C, D;
00513         long long temp;
00514         unsigned long long signT, signC;
00515     A.i = a;
00516     B.i = b;
00517     C.i = c;
00518 
00519         temp = (long long)A.s32[1] * B.s32[1];
00520         signT = temp & 0x8000000000000000LL;
00521         signC = C.s64[0] & 0x8000000000000000LL;
00522         temp += C.s64[0];
00523         D.s64[0] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[0]) ? 0x8000000000000000LL : temp) 
00524                 : ((temp < C.s64[0])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp;
00525         temp = (long long)A.s32[3] * B.s32[3];
00526         signT = temp & 0x8000000000000000LL;
00527         signC = C.s64[1] & 0x8000000000000000LL;
00528         temp += C.s64[1];
00529         D.s64[1] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[1]) ? 0x8000000000000000LL : temp) 
00530                 : ((temp < C.s64[1])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp;
00531 
00532     return D.i;
00533 }

SSP_FORCEINLINE __m128i ssp_maccslo_epi32_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maccslo_epi32/ pmacssdql [SSE5]. (SSE5 .pdf documentation here)

Definition at line 536 of file SSEPlus_emulation_REF.h.

00537 {
00538     ssp_m128 A, B, C, D;
00539         long long temp;
00540         unsigned long long signT, signC;
00541     A.i = a;
00542     B.i = b;
00543     C.i = c;
00544 
00545         temp = (long long)A.s32[0] * B.s32[0];
00546         signT = temp & 0x8000000000000000LL;
00547         signC = C.s64[0] & 0x8000000000000000LL;
00548         temp += C.s64[0];
00549         D.s64[0] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[0]) ? 0x8000000000000000LL : temp) 
00550                 : ((temp < C.s64[0])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp;
00551         temp = (long long)A.s32[2] * B.s32[2];
00552         signT = temp & 0x8000000000000000LL;
00553         signC = C.s64[1] & 0x8000000000000000LL;
00554         temp += C.s64[1];
00555         D.s64[1] = (signT==signC) ? ((signT >0) ? ((temp > C.s64[1]) ? 0x8000000000000000LL : temp) 
00556                 : ((temp < C.s64[1])? 0x7FFFFFFFFFFFFFFFLL : temp)) : temp;
00557 
00558     return D.i;
00559 }

SSP_FORCEINLINE __m128i ssp_maddd_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maddd_epi16/ pmadcswd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 562 of file SSEPlus_emulation_REF.h.

00563 {
00564     ssp_m128 A, B, C, D;
00565     A.i = a;
00566     B.i = b;
00567     C.i = c;
00568 
00569     D.s32[0] = A.s16[0] * B.s16[0] + A.s16[1] * B.s16[1] + C.s32[0];
00570     D.s32[1] = A.s16[2] * B.s16[2] + A.s16[3] * B.s16[3] + C.s32[1];
00571     D.s32[2] = A.s16[4] * B.s16[4] + A.s16[5] * B.s16[5] + C.s32[2];
00572     D.s32[3] = A.s16[6] * B.s16[6] + A.s16[7] * B.s16[7] + C.s32[3];
00573 
00574     return D.i;
00575 }

SSP_FORCEINLINE __m128i ssp_maddsd_epi16_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_maddsd_epi16/ pmadcsswd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 578 of file SSEPlus_emulation_REF.h.

00579 {
00580     ssp_m128 A, B, C, D;
00581         long long temp;
00582 
00583     A.i = a;
00584     B.i = b;
00585     C.i = c;
00586 
00587         temp = A.s16[0] * B.s16[0] + A.s16[1] * B.s16[1] + (long long)C.s32[0];
00588     D.s32[0] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));
00589         temp = A.s16[2] * B.s16[2] + A.s16[3] * B.s16[3] + (long long)C.s32[1];
00590     D.s32[1] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));;
00591         temp = A.s16[4] * B.s16[4] + A.s16[5] * B.s16[5] + (long long)C.s32[2];
00592     D.s32[2] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));;
00593         temp = A.s16[6] * B.s16[6] + A.s16[7] * B.s16[7] + (long long)C.s32[3];
00594     D.s32[3] = (ssp_s32)(SSP_SATURATION(temp, 2147483647LL, -2147483648LL));;
00595 
00596     return D.i;
00597 }

SSP_FORCEINLINE __m128i ssp_maddubs_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_maddubs_epi16 [SSSE3]. (Searches MSDN)

Definition at line 970 of file SSEPlus_emulation_REF.h.

00971 {
00972     ssp_m128 A, B, C;
00973         int tmp[8];
00974     A.i = a;
00975     B.i = b;
00976 
00977         // a is 8 bit unsigned integer, b is signed integer
00978         tmp[0] = A.u8[0] * B.s8[0] +  A.u8[1] * B.s8[1];
00979         C.s16[0] = (ssp_s16)(SSP_SATURATION(tmp[0], 32767, -32768));
00980 
00981         tmp[1] = A.u8[2] * B.s8[2] +  A.u8[3] * B.s8[3];
00982         C.s16[1] = (ssp_s16)(SSP_SATURATION(tmp[1], 32767, -32768));
00983 
00984         tmp[2] = A.u8[4] * B.s8[4] +  A.u8[5] * B.s8[5];
00985         C.s16[2] = (ssp_s16)(SSP_SATURATION(tmp[2], 32767, -32768));
00986 
00987         tmp[3] = A.u8[6] * B.s8[6] +  A.u8[7] * B.s8[7];
00988         C.s16[3] = (ssp_s16)(SSP_SATURATION(tmp[3], 32767, -32768));
00989 
00990         tmp[4] = A.u8[8] * B.s8[8] +  A.u8[9] * B.s8[9];
00991         C.s16[4] = (ssp_s16)(SSP_SATURATION(tmp[4], 32767, -32768));
00992 
00993         tmp[5] = A.u8[10] * B.s8[10] +  A.u8[11] * B.s8[11];
00994         C.s16[5] = (ssp_s16)(SSP_SATURATION(tmp[5], 32767, -32768));
00995 
00996         tmp[6] = A.u8[12] * B.s8[12] +  A.u8[13] * B.s8[13];
00997         C.s16[6] = (ssp_s16)(SSP_SATURATION(tmp[6], 32767, -32768));
00998 
00999         tmp[7] = A.u8[14] * B.s8[14] +  A.u8[15] * B.s8[15];
01000         C.s16[7] = (ssp_s16)(SSP_SATURATION(tmp[7], 32767, -32768));
01001 
01002         return C.i;
01003 }

SSP_FORCEINLINE __m64 ssp_maddubs_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_maddubs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1009 of file SSEPlus_emulation_REF.h.

01010 {
01011     ssp_m64 A, B, C;
01012         int tmp[4];
01013     A.m64 = a;
01014     B.m64 = b;
01015 
01016         // a is 8 bit unsigned integer, b is signed integer
01017         tmp[0] = A.u8[0] * B.s8[0] +  A.u8[1] * B.s8[1];
01018         C.s16[0] = (ssp_s16)(SSP_SATURATION(tmp[0], 32767, -32768));
01019 
01020         tmp[1] = A.u8[2] * B.s8[2] +  A.u8[3] * B.s8[3];
01021         C.s16[1] = (ssp_s16)(SSP_SATURATION(tmp[1], 32767, -32768));
01022 
01023         tmp[2] = A.u8[4] * B.s8[4] +  A.u8[5] * B.s8[5];
01024         C.s16[2] = (ssp_s16)(SSP_SATURATION(tmp[2], 32767, -32768));
01025 
01026         tmp[3] = A.u8[6] * B.s8[6] +  A.u8[7] * B.s8[7];
01027         C.s16[3] = (ssp_s16)(SSP_SATURATION(tmp[3], 32767, -32768));
01028 
01029         return C.m64;
01030 }

SSP_FORCEINLINE __m128i ssp_max_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_max_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1700 of file SSEPlus_emulation_REF.h.

01701 {
01702     ssp_m128 A,B;
01703     A.i = a;
01704     B.i = b;
01705 
01706     SSP_SET_MAX( A.s32[ 0], B.s32[ 0] );
01707     SSP_SET_MAX( A.s32[ 1], B.s32[ 1] );
01708     SSP_SET_MAX( A.s32[ 2], B.s32[ 2] );
01709     SSP_SET_MAX( A.s32[ 3], B.s32[ 3] );
01710     return A.i;
01711 }

SSP_FORCEINLINE __m128i ssp_max_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_max_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 1622 of file SSEPlus_emulation_REF.h.

01623 {
01624     ssp_m128 A,B;
01625     A.i = a;
01626     B.i = b;
01627 
01628     SSP_SET_MAX( A.s8[ 0], B.s8[ 0] );
01629     SSP_SET_MAX( A.s8[ 1], B.s8[ 1] );
01630     SSP_SET_MAX( A.s8[ 2], B.s8[ 2] );
01631     SSP_SET_MAX( A.s8[ 3], B.s8[ 3] );
01632     SSP_SET_MAX( A.s8[ 4], B.s8[ 4] );
01633     SSP_SET_MAX( A.s8[ 5], B.s8[ 5] );
01634     SSP_SET_MAX( A.s8[ 6], B.s8[ 6] );
01635     SSP_SET_MAX( A.s8[ 7], B.s8[ 7] );
01636     SSP_SET_MAX( A.s8[ 8], B.s8[ 8] );
01637     SSP_SET_MAX( A.s8[ 9], B.s8[ 9] );
01638     SSP_SET_MAX( A.s8[10], B.s8[10] );
01639     SSP_SET_MAX( A.s8[11], B.s8[11] );
01640     SSP_SET_MAX( A.s8[12], B.s8[12] );
01641     SSP_SET_MAX( A.s8[13], B.s8[13] );
01642     SSP_SET_MAX( A.s8[14], B.s8[14] );
01643     SSP_SET_MAX( A.s8[15], B.s8[15] );
01644     return A.i;
01645 }

SSP_FORCEINLINE __m128i ssp_max_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_max_epu16 [SSE4.1]. (Searches MSDN)

Definition at line 1667 of file SSEPlus_emulation_REF.h.

01668 {
01669     ssp_m128 A,B;
01670     A.i = a;
01671     B.i = b;
01672 
01673     SSP_SET_MAX( A.u16[ 0], B.u16[ 0] );
01674     SSP_SET_MAX( A.u16[ 1], B.u16[ 1] );
01675     SSP_SET_MAX( A.u16[ 2], B.u16[ 2] );
01676     SSP_SET_MAX( A.u16[ 3], B.u16[ 3] );
01677     SSP_SET_MAX( A.u16[ 4], B.u16[ 4] );
01678     SSP_SET_MAX( A.u16[ 5], B.u16[ 5] );
01679     SSP_SET_MAX( A.u16[ 6], B.u16[ 6] );
01680     SSP_SET_MAX( A.u16[ 7], B.u16[ 7] );
01681     return A.i;
01682 }

SSP_FORCEINLINE __m128i ssp_max_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_max_epu32 [SSE4.1]. (Searches MSDN)

Definition at line 1728 of file SSEPlus_emulation_REF.h.

01729 {
01730     ssp_m128 A,B;
01731     A.i = a;
01732     B.i = b;
01733 
01734     SSP_SET_MAX( A.u32[ 0], B.u32[ 0] );
01735     SSP_SET_MAX( A.u32[ 1], B.u32[ 1] );
01736     SSP_SET_MAX( A.u32[ 2], B.u32[ 2] );
01737     SSP_SET_MAX( A.u32[ 3], B.u32[ 3] );
01738     return A.i;
01739 }

SSP_FORCEINLINE __m128i ssp_min_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_min_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1686 of file SSEPlus_emulation_REF.h.

01687 {
01688     ssp_m128 A,B;
01689     A.i = a;
01690     B.i = b;
01691 
01692     SSP_SET_MIN( A.s32[ 0], B.s32[ 0] );
01693     SSP_SET_MIN( A.s32[ 1], B.s32[ 1] );
01694     SSP_SET_MIN( A.s32[ 2], B.s32[ 2] );
01695     SSP_SET_MIN( A.s32[ 3], B.s32[ 3] );
01696     return A.i;
01697 }

SSP_FORCEINLINE __m128i ssp_min_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_min_epi8 [SSE4.1]. (Searches MSDN)

Definition at line 1596 of file SSEPlus_emulation_REF.h.

01597 {
01598     ssp_m128 A,B;
01599     A.i = a;
01600     B.i = b;
01601 
01602     SSP_SET_MIN( A.s8[ 0], B.s8[ 0] );
01603     SSP_SET_MIN( A.s8[ 1], B.s8[ 1] );
01604     SSP_SET_MIN( A.s8[ 2], B.s8[ 2] );
01605     SSP_SET_MIN( A.s8[ 3], B.s8[ 3] );
01606     SSP_SET_MIN( A.s8[ 4], B.s8[ 4] );
01607     SSP_SET_MIN( A.s8[ 5], B.s8[ 5] );
01608     SSP_SET_MIN( A.s8[ 6], B.s8[ 6] );
01609     SSP_SET_MIN( A.s8[ 7], B.s8[ 7] );
01610     SSP_SET_MIN( A.s8[ 8], B.s8[ 8] );
01611     SSP_SET_MIN( A.s8[ 9], B.s8[ 9] );
01612     SSP_SET_MIN( A.s8[10], B.s8[10] );
01613     SSP_SET_MIN( A.s8[11], B.s8[11] );
01614     SSP_SET_MIN( A.s8[12], B.s8[12] );
01615     SSP_SET_MIN( A.s8[13], B.s8[13] );
01616     SSP_SET_MIN( A.s8[14], B.s8[14] );
01617     SSP_SET_MIN( A.s8[15], B.s8[15] );
01618     return A.i;
01619 }

SSP_FORCEINLINE __m128i ssp_min_epu16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_min_epu16 [SSE4.1]. (Searches MSDN)

Definition at line 1649 of file SSEPlus_emulation_REF.h.

01650 {
01651     ssp_m128 A,B;
01652     A.i = a;
01653     B.i = b;
01654 
01655     SSP_SET_MIN( A.u16[ 0], B.u16[ 0] );
01656     SSP_SET_MIN( A.u16[ 1], B.u16[ 1] );
01657     SSP_SET_MIN( A.u16[ 2], B.u16[ 2] );
01658     SSP_SET_MIN( A.u16[ 3], B.u16[ 3] );
01659     SSP_SET_MIN( A.u16[ 4], B.u16[ 4] );
01660     SSP_SET_MIN( A.u16[ 5], B.u16[ 5] );
01661     SSP_SET_MIN( A.u16[ 6], B.u16[ 6] );
01662     SSP_SET_MIN( A.u16[ 7], B.u16[ 7] );
01663     return A.i;
01664 }

SSP_FORCEINLINE __m128i ssp_min_epu32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_min_epu32 [SSE4.1]. (Searches MSDN)

Definition at line 1714 of file SSEPlus_emulation_REF.h.

01715 {
01716     ssp_m128 A,B;
01717     A.i = a;
01718     B.i = b;
01719 
01720     SSP_SET_MIN( A.u32[ 0], B.u32[ 0] );
01721     SSP_SET_MIN( A.u32[ 1], B.u32[ 1] );
01722     SSP_SET_MIN( A.u32[ 2], B.u32[ 2] );
01723     SSP_SET_MIN( A.u32[ 3], B.u32[ 3] );
01724     return A.i;
01725 }

SSP_FORCEINLINE __m128i ssp_minpos_epu16_REF ( __m128i  shortValues  ) 

Reference implementation of _mm_minpos_epu16 [SSE4.1]. (Searches MSDN)

Definition at line 1745 of file SSEPlus_emulation_REF.h.

01746 {
01747     ssp_m128 ShortValues;
01748     ShortValues.i = shortValues;
01749 
01750     if( ShortValues.u16[1] < ShortValues.u16[0] )
01751     {
01752         ShortValues.u16[0] = ShortValues.u16[1];
01753         ShortValues.u16[1] = 1;
01754     }
01755     else
01756         ShortValues.u16[1] = 0;
01757 
01758 
01759 #define FN( I )                                     \
01760     if( ShortValues.u16[I] < ShortValues.u16[0] )   \
01761     {                                               \
01762         ShortValues.u16[0] = ShortValues.u16[I];    \
01763         ShortValues.u16[1] = I;                     \
01764     }
01765 
01766     FN( 2 );
01767     FN( 3 );
01768     FN( 4 );
01769     FN( 5 );
01770     FN( 6 );
01771     FN( 7 );
01772 
01773     ShortValues.u32[1] = 0;
01774     ShortValues.u64[1] = 0;
01775 
01776 #undef FN
01777 
01778     return ShortValues.i;
01779 }

SSP_FORCEINLINE __m128i ssp_minpos_epu16_REFb ( __m128i  shortValues  ) 

Reference implementation of _mm_minpos_epu16 [SSE4.1]. (Searches MSDN)

Definition at line 1782 of file SSEPlus_emulation_REF.h.

01783 {
01784     ssp_m128 ShortValues;
01785     ssp_u32 i;
01786     ssp_u16 pos = 0;
01787     ssp_u16 minVal;
01788     ShortValues.i = shortValues;
01789     minVal = ShortValues.u16[0];
01790 
01791     for( i=1; i<8; ++i )
01792     {
01793         if( ShortValues.u16[i] < minVal )
01794         {
01795             minVal = ShortValues.u16[i];
01796             pos    = i;
01797         }
01798 
01799         ShortValues.u16[i] = 0;
01800     }
01801 
01802     ShortValues.u16[0] = minVal;
01803     ShortValues.u16[1] = pos;
01804     return ShortValues.i;
01805 }

SSP_FORCEINLINE __m128d ssp_movedup_pd_REF ( __m128d  a  ) 

Reference implementation of _mm_movedup_pd [SSE3]. (Searches MSDN)

Definition at line 1834 of file SSEPlus_emulation_REF.h.

01835 {
01836     ssp_m128 A;
01837     A.d = a;
01838 
01839     A.f64[1] = A.f64[0];
01840     return A.d;
01841 }

SSP_FORCEINLINE __m128 ssp_movehdup_ps_REF ( __m128  a  ) 

Reference implementation of _mm_movehdup_ps [SSE3]. (Searches MSDN)

Definition at line 1812 of file SSEPlus_emulation_REF.h.

01813 {
01814     ssp_m128 A;
01815     A.f = a;
01816 
01817     A.f32[0] = A.f32[1];
01818     A.f32[2] = A.f32[3];
01819     return A.f;
01820 }

SSP_FORCEINLINE __m128 ssp_moveldup_ps_REF ( __m128  a  ) 

Reference implementation of _mm_moveldup_ps [SSE3]. (Searches MSDN)

Definition at line 1823 of file SSEPlus_emulation_REF.h.

01824 {
01825     ssp_m128 A;
01826     A.f = a;
01827 
01828     A.f32[1] = A.f32[0];
01829     A.f32[3] = A.f32[2];
01830     return A.f;
01831 }

SSP_FORCEINLINE __m128i ssp_mpsadbw_epu8_REF ( __m128i  a,
__m128i  b,
const int  msk 
)

Reference implementation of _mm_mpsadbw_epu8 [SSE4.1]. (Searches MSDN)

Definition at line 1879 of file SSEPlus_emulation_REF.h.

01880 {
01881         ssp_u8 Abyte[11], Bbyte[4], tmp[4];
01882         ssp_u8 Boffset, Aoffset;
01883         int i;
01884 
01885     ssp_m128 A,B;
01886     A.i = a;
01887     B.i = b;
01888 
01889         Boffset = (msk & 0x3) << 2; // *32/8,   for byte size count
01890         Aoffset = (msk & 0x4);      // *32/8/4, for byte size count and shift msk to bit 2
01891 
01892         for (i=0; i<11; i++)
01893         {
01894                 Abyte[i] = A.u8[i+Aoffset];
01895         }
01896         
01897         Bbyte[0] = B.u8[Boffset  ];
01898         Bbyte[1] = B.u8[Boffset+1];
01899         Bbyte[2] = B.u8[Boffset+2];
01900         Bbyte[3] = B.u8[Boffset+3];
01901 
01902         for (i=0; i<8; i++)
01903         {
01904                 tmp[0] = (Abyte[i  ] > Bbyte[0]) ? (Abyte[i  ] - Bbyte[0]) :  (Bbyte[0] - Abyte[i  ]);        //abs diff
01905                 tmp[1] = (Abyte[i+1] > Bbyte[1]) ? (Abyte[i+1] - Bbyte[1]) :  (Bbyte[1] - Abyte[i+1]);
01906                 tmp[2] = (Abyte[i+2] > Bbyte[2]) ? (Abyte[i+2] - Bbyte[2]) :  (Bbyte[2] - Abyte[i+2]);
01907                 tmp[3] = (Abyte[i+3] > Bbyte[3]) ? (Abyte[i+3] - Bbyte[3]) :  (Bbyte[3] - Abyte[i+3]);
01908 
01909                 A.u16[i] = tmp[0] + tmp[1] + tmp[2] + tmp[3];
01910         }
01911 
01912         return A.i;
01913 }

SSP_FORCEINLINE __m128d ssp_msub_pd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_msub_pd/fmsubpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 676 of file SSEPlus_emulation_REF.h.

00677 {
00678     ssp_m128 A,B,C;
00679     A.d = a;
00680     B.d = b;
00681     C.d = c;
00682 
00683     A.f64[0] = A.f64[0] * B.f64[0] - C.f64[0];
00684     A.f64[1] = A.f64[1] * B.f64[1] - C.f64[1]; 
00685     return A.d;
00686 }

SSP_FORCEINLINE __m128 ssp_msub_ps_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_msub_ps/fmsubps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 661 of file SSEPlus_emulation_REF.h.

00662 {
00663     ssp_m128 A,B,C;
00664     A.f = a;
00665     B.f = b;
00666     C.f = c;
00667 
00668     A.f32[0] = A.f32[0] * B.f32[0] - C.f32[0];
00669     A.f32[1] = A.f32[1] * B.f32[1] - C.f32[1];
00670     A.f32[2] = A.f32[2] * B.f32[2] - C.f32[2];
00671     A.f32[3] = A.f32[3] * B.f32[3] - C.f32[3];
00672     return A.f;
00673 }

SSP_FORCEINLINE __m128d ssp_msub_sd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_msub_sd/fmsubss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 701 of file SSEPlus_emulation_REF.h.

00701                                                                                                                                           : confirm
00702 {
00703     ssp_m128 A,B,C;
00704     A.d = a;
00705     B.d = b;
00706     C.d = c;
00707 
00708     A.f64[0] = A.f64[0] * B.f64[0] - C.f64[0];   
00709     return A.d;
00710 }

SSP_FORCEINLINE __m128 ssp_msub_ss_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_msub_ss/fmsubss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 689 of file SSEPlus_emulation_REF.h.

00689                                                                                                                                       : confirm
00690 {
00691     ssp_m128 A,B,C;
00692     A.f = a;
00693     B.f = b;
00694     C.f = c;
00695 
00696     A.f32[0] = A.f32[0] * B.f32[0] - C.f32[0];   
00697     return A.f;
00698 }

SSP_FORCEINLINE __m128i ssp_mul_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_mul_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1847 of file SSEPlus_emulation_REF.h.

01848 {
01849     ssp_m128 A,B;
01850     A.i = a;
01851     B.i = b;
01852 
01853     A.s64[0] = A.s32[0] * B.s32[0];
01854     A.s64[1] = A.s32[2] * B.s32[2];
01855     return A.i;
01856 }

SSP_FORCEINLINE __m128i ssp_mulhrs_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_mulhrs_epi16 [SSSE3]. (Searches MSDN)

Definition at line 1034 of file SSEPlus_emulation_REF.h.

01035 {
01036     ssp_m128 A,B;
01037     A.i = a;
01038     B.i = b;
01039 
01040         A.s16[0] = (ssp_s16) ((A.s16[0] * B.s16[0] + 0x4000) >> 15);
01041         A.s16[1] = (ssp_s16) ((A.s16[1] * B.s16[1] + 0x4000) >> 15);
01042         A.s16[2] = (ssp_s16) ((A.s16[2] * B.s16[2] + 0x4000) >> 15);
01043         A.s16[3] = (ssp_s16) ((A.s16[3] * B.s16[3] + 0x4000) >> 15);
01044         A.s16[4] = (ssp_s16) ((A.s16[4] * B.s16[4] + 0x4000) >> 15);
01045         A.s16[5] = (ssp_s16) ((A.s16[5] * B.s16[5] + 0x4000) >> 15);
01046         A.s16[6] = (ssp_s16) ((A.s16[6] * B.s16[6] + 0x4000) >> 15);
01047         A.s16[7] = (ssp_s16) ((A.s16[7] * B.s16[7] + 0x4000) >> 15);
01048 
01049     return A.i;
01050 }

SSP_FORCEINLINE __m64 ssp_mulhrs_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_mulhrs_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 1056 of file SSEPlus_emulation_REF.h.

01057 {
01058     ssp_m64 A,B;
01059     A.m64 = a;
01060     B.m64 = b;
01061 
01062         A.s16[0] = (ssp_s16) ((A.s16[0] * B.s16[0] + 0x4000) >> 15);
01063         A.s16[1] = (ssp_s16) ((A.s16[1] * B.s16[1] + 0x4000) >> 15);
01064         A.s16[2] = (ssp_s16) ((A.s16[2] * B.s16[2] + 0x4000) >> 15);
01065         A.s16[3] = (ssp_s16) ((A.s16[3] * B.s16[3] + 0x4000) >> 15);
01066 
01067     return A.m64;
01068 }

SSP_FORCEINLINE __m128i ssp_mullo_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_mullo_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1859 of file SSEPlus_emulation_REF.h.

01860 {
01861     ssp_m128 t[2];
01862     ssp_m128 A,B;
01863     A.i = a;
01864     B.i = b;
01865 
01866     t[0].s64[0] = A.s32[0] * B.s32[0];
01867     t[0].s64[1] = A.s32[1] * B.s32[1];
01868     t[1].s64[0] = A.s32[2] * B.s32[2];
01869     t[1].s64[1] = A.s32[3] * B.s32[3];    
01870 
01871     A.s32[0] = t[0].s32[0];
01872     A.s32[1] = t[0].s32[2];
01873     A.s32[2] = t[1].s32[0];
01874     A.s32[3] = t[1].s32[2];
01875     return A.i;
01876 }

SSP_FORCEINLINE __m128d ssp_nmacc_pd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_nmacc_pd/fnmaddpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 619 of file SSEPlus_emulation_REF.h.

00620 {
00621     ssp_m128 A,B,C;
00622     A.d = a;
00623     B.d = b;
00624     C.d = c;
00625 
00626     A.f64[0] = -(A.f64[0] * B.f64[0]) + C.f64[0];
00627     A.f64[1] = -(A.f64[1] * B.f64[1]) + C.f64[1]; 
00628     return A.d;
00629 }

SSP_FORCEINLINE __m128 ssp_nmacc_ps_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_nmacc_ps/fnmaddps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 604 of file SSEPlus_emulation_REF.h.

00605 {
00606     ssp_m128 A,B,C;
00607     A.f = a;
00608     B.f = b;
00609     C.f = c;
00610 
00611     A.f32[0] = -(A.f32[0] * B.f32[0]) + C.f32[0];
00612     A.f32[1] = -(A.f32[1] * B.f32[1]) + C.f32[1];
00613     A.f32[2] = -(A.f32[2] * B.f32[2]) + C.f32[2];
00614     A.f32[3] = -(A.f32[3] * B.f32[3]) + C.f32[3];
00615     return A.f;
00616 }

SSP_FORCEINLINE __m128d ssp_nmacc_sd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_nmacc_sd/fnmaddsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 644 of file SSEPlus_emulation_REF.h.

00645 {
00646     ssp_m128 A,B,C;
00647     A.d = a;
00648     B.d = b;
00649     C.d = c;
00650 
00651     A.f64[0] = -(A.f64[0] * B.f64[0]) + C.f64[0];   
00652     return A.d;
00653 }

SSP_FORCEINLINE __m128 ssp_nmacc_ss_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_nmacc_ss/fnmaddss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 632 of file SSEPlus_emulation_REF.h.

00633 {
00634     ssp_m128 A,B,C;
00635     A.f = a;
00636     B.f = b;
00637     C.f = c;
00638 
00639     A.f32[0] = -(A.f32[0] * B.f32[0]) + C.f32[0];   
00640     return A.f;
00641 }

SSP_FORCEINLINE __m128d ssp_nmsub_pd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_nmsub_pd/fnmsubpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 732 of file SSEPlus_emulation_REF.h.

00733 {
00734     ssp_m128 A,B,C;
00735     A.d = a;
00736     B.d = b;
00737     C.d = c;
00738 
00739     A.f64[0] = -(A.f64[0] * B.f64[0]) - C.f64[0];
00740     A.f64[1] = -(A.f64[1] * B.f64[1]) - C.f64[1]; 
00741     return A.d;
00742 }

SSP_FORCEINLINE __m128 ssp_nmsub_ps_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_nmsub_ps/fnmsubps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 717 of file SSEPlus_emulation_REF.h.

00718 {
00719     ssp_m128 A,B,C;
00720     A.f = a;
00721     B.f = b;
00722     C.f = c;
00723 
00724     A.f32[0] = -(A.f32[0] * B.f32[0]) - C.f32[0];
00725     A.f32[1] = -(A.f32[1] * B.f32[1]) - C.f32[1];
00726     A.f32[2] = -(A.f32[2] * B.f32[2]) - C.f32[2];
00727     A.f32[3] = -(A.f32[3] * B.f32[3]) - C.f32[3];
00728     return A.f;
00729 }

SSP_FORCEINLINE __m128d ssp_nmsub_sd_REF ( __m128d  a,
__m128d  b,
__m128d  c 
)

Reference implementation of _mm_nmsub_sd/fnmsubsd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 757 of file SSEPlus_emulation_REF.h.

00758 {
00759     ssp_m128 A,B,C;
00760     A.d = a;
00761     B.d = b;
00762     C.d = c;
00763 
00764     A.f64[0] = -(A.f64[0] * B.f64[0]) - C.f64[0];   
00765     return A.d;
00766 }

SSP_FORCEINLINE __m128 ssp_nmsub_ss_REF ( __m128  a,
__m128  b,
__m128  c 
)

Reference implementation of _mm_nmsub_ss/fnmsubss [SSE5]. (SSE5 .pdf documentation here)

Definition at line 745 of file SSEPlus_emulation_REF.h.

00746 {
00747     ssp_m128 A,B,C;
00748     A.f = a;
00749     B.f = b;
00750     C.f = c;
00751 
00752     A.f32[0] = -(A.f32[0] * B.f32[0]) - C.f32[0];   
00753     return A.f;
00754 }

SSP_FORCEINLINE __m128i ssp_packus_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_packus_epi32 [SSE4.1]. (Searches MSDN)

Definition at line 1919 of file SSEPlus_emulation_REF.h.

01920 {
01921     ssp_m128 A,B;
01922     A.i = a;
01923     B.i = b;
01924 
01925     if( A.s32[0] < 0 )
01926         A.u16[0] = 0;
01927     else
01928         if( A.s32[0] > 0xFFFF )
01929             A.u16[0] = 0xFFFF;
01930         else
01931             A.s16[0] = (ssp_u16)A.s32[0];
01932 
01933     if( A.s32[1] < 0 )
01934         A.u16[1] = 0;
01935     else
01936         if( A.s32[1] > 0xFFFF )
01937             A.u16[1] = 0xFFFF;
01938         else
01939             A.s16[1] = (ssp_u16)A.s32[1];
01940 
01941     if( A.s32[2] < 0 )
01942         A.u16[2] = 0;
01943     else
01944         if( A.s32[2] > 0xFFFF )
01945             A.u16[2] = 0xFFFF;
01946         else
01947             A.s16[2] = (ssp_u16)A.s32[2];
01948 
01949 
01950     if( A.s32[3] < 0 )
01951         A.u16[3] = 0;
01952     else
01953         if( A.s32[3] > 0xFFFF )
01954             A.u16[3] = 0xFFFF;
01955         else
01956             A.s16[3] = (ssp_u16)A.s32[3];
01957 
01958     if( B.s32[0] < 0 )
01959         A.u16[4] = 0;
01960     else
01961         if( B.s32[0] > 0xFFFF )
01962             A.u16[4] = 0xFFFF;
01963         else
01964             A.s16[4] = (ssp_u16)B.s32[0];
01965 
01966     if( B.s32[1] < 0 )
01967         A.u16[5] = 0;
01968     else
01969         if( B.s32[1] > 0xFFFF )
01970             A.u16[5] = 0xFFFF;
01971         else
01972             A.s16[5] = (ssp_u16)B.s32[1];
01973 
01974     if( B.s32[2] < 0 )
01975         A.u16[6] = 0;
01976     else
01977         if( B.s32[2] > 0xFFFF )
01978             A.u16[6] = 0xFFFF;
01979         else
01980             A.s16[6] = (ssp_u16)B.s32[2];
01981 
01982 
01983     if( B.s32[3] < 0 )
01984         A.u16[7] = 0;
01985     else
01986         if( B.s32[3] > 0xFFFF )
01987             A.u16[7] = 0xFFFF;
01988         else
01989             A.s16[7] = (ssp_u16)B.s32[3];
01990 
01991     return A.i;
01992 }

SSP_FORCEINLINE __m128i ssp_perm_epi8_REF ( __m128i  a,
__m128i  b,
__m128i  c 
)

Reference implementation of _mm_perm_epi8/ pperm [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3029 of file SSEPlus_emulation_REF.h.

03030 {
03031     int n;
03032     ssp_m128 A,B,C,R;
03033     A.i = a;
03034     B.i = b;
03035     C.i = c;
03036 
03037     for( n = 0; n < 16; n++ )
03038     {
03039         int op = C.u8[n] >> 5;
03040         switch( op )
03041         {
03042         case 0: // source byte (no logical opeartion)
03043             R.u8[n] = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] );
03044             break;
03045         case 1: // invert source byte
03046             {
03047                 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] );
03048                 R.u8[n] = ~src;
03049             }
03050             break;
03051         case 2: // bit reverse of source byte
03052             {
03053                 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] );
03054                 R.u8[n] = ( (src & 0x0F) << 4 ) | ( (src & 0xF0) >> 4 );
03055                 R.u8[n] = ( (R.u8[n] & 0x33) << 2 ) | ( (R.u8[n] & 0xCC) >> 2 );
03056                 R.u8[n] = ( (R.u8[n] & 0x55) << 1 ) | ( (R.u8[n] & 0xAA) >> 1 );
03057             }
03058             break;
03059         case 3: // bit reverse of inverted source byte
03060             {
03061                 ssp_u8 src = ( C.u8[n] & 0x10 ) ? ( B.u8[C.u8[n] & 0xF] ) : ( A.u8[C.u8[n] & 0xF] );
03062                 R.u8[n] = ( (src & 0x0F) << 4 ) | ( (src & 0xF0) >> 4 );
03063                 R.u8[n] = ( (R.u8[n] & 0x33) << 2 ) | ( (R.u8[n] & 0xCC) >> 2 );
03064                 R.u8[n] = ( (R.u8[n] & 0x55) << 1 ) | ( (R.u8[n] & 0xAA) >> 1 );
03065                 R.u8[n] = ~R.u8[n];
03066             }
03067             break;
03068         case 4: // 0x00
03069             R.u8[n] = 0x00;
03070             break;
03071         case 5: // 0xFF
03072             R.u8[n] = 0xFF;
03073             break;
03074         case 6: // most significant bit of source byte replicated in all bit positions
03075             {
03076                 ssp_s8 src = ( C.u8[n] & 0x10 ) ? ( B.s8[C.u8[n] & 0xF] ) : ( A.s8[C.u8[n] & 0xF] );
03077                 R.s8[n] = src >> 7;
03078             }
03079             break;
03080         case 7: // invert most significant bit of source byte and replicate in all bit positions
03081             {
03082                 ssp_s8 src = ( C.u8[n] & 0x10 ) ? ( B.s8[C.u8[n] & 0xF] ) : ( A.s8[C.u8[n] & 0xF] );
03083                 R.s8[n] = src >> 7;
03084                 R.u8[n] = ~R.u8[n];
03085             }
03086             break;
03087         }
03088     }
03089     return R.i;
03090 }

SSP_FORCEINLINE __m128d ssp_perm_pd_REF ( __m128d  a,
__m128d  b,
__m128i  c 
)

Reference implementation of _mm_perm_pd/ permpd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3144 of file SSEPlus_emulation_REF.h.

03145 {
03146     int n;
03147     ssp_m128 A,B,C,R;
03148     A.d = a;
03149     B.d = b;
03150     C.i = c;
03151 
03152     for( n = 0; n < 2; n++ )
03153     {
03154         unsigned char cb = C.u8[n*8];
03155         int op = (cb >> 5) & 0x7;
03156         switch( op )
03157         {
03158         case 0: // single-precision source operand
03159             R.f64[n] = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] );
03160             break;
03161         case 1: // absolute value of single-precision source operand
03162             {
03163                 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] );
03164                 R.f64[n] = ( src < 0.0 ) ? (-src) : src;
03165             }
03166             break;
03167         case 2: // negative value of single-precision source operand
03168             {
03169                 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] );
03170                 R.f64[n] = -src;
03171             }
03172             break;
03173         case 3: // negative of absolute value of single-precision source operand
03174             {
03175                 ssp_f64 src = ( cb & 0x02 ) ? ( B.f64[cb & 0x01] ) : ( A.f64[cb & 0x01] );
03176                 R.f64[n] = ( src < 0.0 ) ? src : (-src);
03177             }
03178             break;
03179         case 4: // +0.0
03180             R.f64[n] = 0.0;
03181             break;
03182         case 5: // -1.0
03183             R.f64[n] = -1.0;
03184             break;
03185         case 6: // +1.0
03186             R.f64[n] = 1.0;
03187             break;
03188         case 7: // +0.0
03189             R.u64[n] = 0x400921FB54442D18; //(for mxcsr.rc 00, 01 or 11 use 0x400921FB54442D18, for 10 use 0x400921FB54442D19)
03190             break;
03191         }
03192     }
03193     return R.d;
03194 }

SSP_FORCEINLINE __m128 ssp_perm_ps_REF ( __m128  a,
__m128  b,
__m128i  c 
)

Reference implementation of _mm_perm_ps/ permps [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3092 of file SSEPlus_emulation_REF.h.

03093 {
03094     int n;
03095     ssp_m128 A,B,C,R;
03096     A.f = a;
03097     B.f = b;
03098     C.i = c;
03099 
03100     for( n = 0; n < 4; n++ )
03101     {
03102         unsigned char cb = C.u8[n*4];
03103         int op = (cb >> 5) & 0x7;
03104         switch( op )
03105         {
03106         case 0: // single-precision source operand
03107             R.f32[n] = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] );
03108             break;
03109         case 1: // absolute value of single-precision source operand
03110             {
03111                 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] );
03112                 R.f32[n] = ( src < 0.0f ) ? (-src) : src;
03113             }
03114             break;
03115         case 2: // negative value of single-precision source operand
03116             {
03117                 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] );
03118                 R.f32[n] = -src;
03119             }
03120             break;
03121         case 3: // negative of absolute value of single-precision source operand
03122             {
03123                 ssp_f32 src = ( cb & 0x04 ) ? ( B.f32[cb & 0x03] ) : ( A.f32[cb & 0x03] );
03124                 R.f32[n] = ( src < 0.0f ) ? src : (-src);
03125             }
03126             break;
03127         case 4: // +0.0
03128             R.f32[n] = 0.0f;
03129             break;
03130         case 5: // -1.0
03131             R.f32[n] = -1.0f;
03132             break;
03133         case 6: // +1.0
03134             R.f32[n] = 1.0f;
03135             break;
03136         case 7: // +0.0
03137             R.u32[n] = 0x40490FDB; //(for mxcsr.rc 00 or 10 use 0x40490FDB, for 01 or 11 use 0x40490FDA)
03138             break;
03139         }
03140     }
03141     return R.f;
03142 }

SSP_FORCEINLINE unsigned short ssp_popcnt16_REF ( unsigned short  val  ) 

Native implementation of __popcnt16 [SSE4a]. (Searches MSDN)

Definition at line 2997 of file SSEPlus_emulation_REF.h.

02998 {
02999     int i;
03000     ssp_u16 cnt=0;
03001     for( i=0; i<15, val; ++i, val = val>>1 )
03002         cnt += val & 0x1;
03003     return cnt;
03004 }

SSP_FORCEINLINE ssp_u64 ssp_popcnt64_REF ( ssp_u64  val  ) 

Native implementation of __popcnt64 [SSE4a]. (Searches MSDN)

Definition at line 3015 of file SSEPlus_emulation_REF.h.

03016 {
03017     int i;
03018     ssp_u64 cnt = 0;
03019     for( i=0; i<63, val; ++i, val = val>>1 )
03020         cnt += val & 0x1;
03021     return cnt;
03022 }

SSP_FORCEINLINE unsigned int ssp_popcnt_REF ( unsigned int  val  ) 

Native implementation of __popcnt [SSE4a]. (Searches MSDN)

Definition at line 3006 of file SSEPlus_emulation_REF.h.

03007 {
03008     int i;
03009     ssp_u32 cnt = 0;
03010     for( i=0; i<31, val; ++i, val = val>>1 )
03011         cnt += val & 0x1;
03012     return cnt;
03013 }

SSP_FORCEINLINE __m128i ssp_rot_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_rot_epi16/ protw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3230 of file SSEPlus_emulation_REF.h.

03231 {
03232     int n;
03233     ssp_m128 A,B;
03234     A.i = a;
03235     B.i = b;
03236 
03237     for( n = 0; n < 8; n++ )
03238     {
03239       if( B.s16[n] < 0 )
03240       {
03241         unsigned int count = (-B.s16[n]) % 16;
03242         unsigned int carry_count = (16 - count) % 16;
03243         ssp_u16 carry = A.u16[n] << carry_count;
03244         A.u16[n] = A.u16[n] >> count;
03245         A.u16[n] = A.u16[n] | carry;
03246       }
03247       else
03248       {
03249         unsigned int count = B.s16[n] % 8;
03250         unsigned int carry_count = (16 - count) % 16;
03251         ssp_u16 carry = A.u16[n] >> carry_count;
03252         A.u16[n] = A.u16[n] << count;
03253         A.u16[n] = A.u16[n] | carry;
03254       }
03255     }
03256     return A.i;
03257 }

SSP_FORCEINLINE __m128i ssp_rot_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_rot_epi32/ protd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3259 of file SSEPlus_emulation_REF.h.

03260 {
03261     int n;
03262     ssp_m128 A,B;
03263     A.i = a;
03264     B.i = b;
03265 
03266     for( n = 0; n < 4; n++ )
03267     {
03268       if( B.s32[n] < 0 )
03269       {
03270         unsigned int count = (-B.s32[n]) % 32;
03271         unsigned int carry_count = (32 - count) % 32;
03272         ssp_u32 carry = A.u32[n] << carry_count;
03273         A.u32[n] = A.u32[n] >> count;
03274         A.u32[n] = A.u32[n] | carry;
03275       }
03276       else
03277       {
03278         unsigned int count = B.s32[n] % 32;
03279         unsigned int carry_count = (32 - count) % 32;
03280         ssp_u32 carry = A.u32[n] >> carry_count;
03281         A.u32[n] = A.u32[n] << count;
03282         A.u32[n] = A.u32[n] | carry;
03283       }
03284     }
03285     return A.i;
03286 }

SSP_FORCEINLINE __m128i ssp_rot_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_rot_epi64/ protq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3288 of file SSEPlus_emulation_REF.h.

03289 {
03290     int n;
03291     ssp_m128 A,B;
03292     A.i = a;
03293     B.i = b;
03294 
03295     for( n = 0; n < 2; n++ )
03296     {
03297       if( B.s64[n] < 0 )
03298       {
03299         unsigned int count = (unsigned int)((-B.s64[n]) % 64);
03300         unsigned int carry_count = (64 - count) % 64;
03301         ssp_u64 carry = A.u64[n] << carry_count;
03302         A.u64[n] = A.u64[n] >> count;
03303         A.u64[n] = A.u64[n] | carry;
03304       }
03305       else
03306       {
03307         unsigned int count = (unsigned int)(B.s64[n] % 64);
03308         unsigned int carry_count = (64 - count) % 64;
03309         ssp_u64 carry = A.u64[n] >> carry_count;
03310         A.u64[n] = A.u64[n] << count;
03311         A.u64[n] = A.u64[n] | carry;
03312       }
03313     }
03314     return A.i;
03315 }

SSP_FORCEINLINE __m128i ssp_rot_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_rot_epi8/ protb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3201 of file SSEPlus_emulation_REF.h.

03202 {
03203     int n;
03204     ssp_m128 A,B;
03205     A.i = a;
03206     B.i = b;
03207 
03208     for( n = 0; n < 16; n++ )
03209     {
03210       if( B.s8[n] < 0 )
03211       {
03212         unsigned int count = (-B.s8[n]) % 8;
03213         unsigned int carry_count = (8 - count) % 8;
03214         ssp_u8 carry = A.u8[n] << carry_count;
03215         A.u8[n] = A.u8[n] >> count;
03216         A.u8[n] = A.u8[n] | carry;
03217       }
03218       else
03219       {
03220         unsigned int count = B.s8[n] % 8;
03221         unsigned int carry_count = (8 - count) % 8;
03222         ssp_u8 carry = A.u8[n] >> carry_count;
03223         A.u8[n] = A.u8[n] << count;
03224         A.u8[n] = A.u8[n] | carry;
03225       }
03226     }
03227     return A.i;
03228 }

SSP_FORCEINLINE __m128i ssp_roti_epi16_REF ( __m128i  a,
const int  b 
)

Reference implementation of _mm_roti_epi16/ protw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3348 of file SSEPlus_emulation_REF.h.

03349 {
03350     int n;
03351     ssp_m128 A;
03352     A.i = a;
03353 
03354     if( b < 0 )
03355     {
03356         unsigned int count = (-b) % 16;
03357         unsigned int carry_count = (16 - count) % 16;
03358         for( n = 0; n < 8; n++ )
03359         {
03360             ssp_u16 carry = A.u16[n] << carry_count;
03361             A.u16[n] = A.u16[n] >> count;
03362             A.u16[n] = A.u16[n] | carry;
03363         }
03364     }
03365     else
03366     {
03367         unsigned int count = b % 16;
03368         unsigned int carry_count = (16 - count) % 16;
03369         for( n = 0; n < 8; n++ )
03370         {
03371             ssp_u16 carry = A.u16[n] >> carry_count;
03372             A.u16[n] = A.u16[n] << count;
03373             A.u16[n] = A.u16[n] | carry;
03374         }
03375     }
03376     return A.i;
03377 }

SSP_FORCEINLINE __m128i ssp_roti_epi32_REF ( __m128i  a,
const int  b 
)

Reference implementation of _mm_roti_epi32/ protd [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3379 of file SSEPlus_emulation_REF.h.

03380 {
03381     int n;
03382     ssp_m128 A;
03383     A.i = a;
03384 
03385     if( b < 0 )
03386     {
03387         unsigned int count = (-b) % 32;
03388         unsigned int carry_count = (32 - count) % 32;
03389         for( n = 0; n < 4; n++ )
03390         {
03391             ssp_u32 carry = A.u32[n] << carry_count;
03392             A.u32[n] = A.u32[n] >> count;
03393             A.u32[n] = A.u32[n] | carry;
03394         }
03395     }
03396     else
03397     {
03398         unsigned int count = b % 32;
03399         unsigned int carry_count = (32 - count) % 32;
03400         for( n = 0; n < 4; n++ )
03401         {
03402             ssp_u32 carry = A.u32[n] >> carry_count;
03403             A.u32[n] = A.u32[n] << count;
03404             A.u32[n] = A.u32[n] | carry;
03405         }
03406     }
03407     return A.i;
03408 }

SSP_FORCEINLINE __m128i ssp_roti_epi64_REF ( __m128i  a,
const int  b 
)

Reference implementation of _mm_roti_epi64/ protq [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3410 of file SSEPlus_emulation_REF.h.

03411 {
03412     int n;
03413     ssp_m128 A;
03414     A.i = a;
03415 
03416     if( b < 0 )
03417     {
03418         unsigned int count = (-b) % 64;
03419         unsigned int carry_count = (64 - count) % 64;
03420         for( n = 0; n < 2; n++ )
03421         {
03422             ssp_u64 carry = A.u64[n] << carry_count;
03423             A.u64[n] = A.u64[n] >> count;
03424             A.u64[n] = A.u64[n] | carry;
03425         }
03426     }
03427     else
03428     {
03429         unsigned int count = b % 64;
03430         unsigned int carry_count = (64 - count) % 64;
03431         for( n = 0; n < 2; n++ )
03432         {
03433             ssp_u64 carry = A.u64[n] >> carry_count;
03434             A.u64[n] = A.u64[n] << count;
03435             A.u64[n] = A.u64[n] | carry;
03436         }
03437     }
03438     return A.i;
03439 }

SSP_FORCEINLINE __m128i ssp_roti_epi8_REF ( __m128i  a,
const int  b 
)

Reference implementation of _mm_roti_epi8/ protb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3317 of file SSEPlus_emulation_REF.h.

03318 {
03319     int n;
03320     ssp_m128 A;
03321     A.i = a;
03322 
03323     if( b < 0 )
03324     {
03325         unsigned int count = (-b) % 8;
03326         unsigned int carry_count = (8 - count) % 8;
03327         for( n = 0; n < 16; n++ )
03328         {
03329             ssp_u8 carry = A.u8[n] << carry_count;
03330             A.u8[n] = A.u8[n] >> count;
03331             A.u8[n] = A.u8[n] | carry;
03332         }
03333     }
03334     else
03335     {
03336         unsigned int count = b % 8;
03337         unsigned int carry_count = (8 - count) % 8;
03338         for( n = 0; n < 16; n++ )
03339         {
03340             ssp_u8 carry = A.u8[n] >> carry_count;
03341             A.u8[n] = A.u8[n] << count;
03342             A.u8[n] = A.u8[n] | carry;
03343         }
03344     }
03345     return A.i;
03346 }

SSP_FORCEINLINE __m128d ssp_round_pd_REF ( __m128d  val,
int  iRoundMode 
)

Reference implementation of _mm_round_pd/ roundpd [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2091 of file SSEPlus_emulation_REF.h.

02092 {
02093     ssp_s64 *valPtr;
02094     ssp_m128 Val;
02095     Val.d = val;
02096 
02097     switch( iRoundMode & 0x3 )
02098     {
02099     case SSP_FROUND_CUR_DIRECTION:
02100         break;
02101     case SSP_FROUND_TO_ZERO:
02102         valPtr = (ssp_s64*)(&Val.f64[0]);
02103         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02104             Val.f64[0] = (ssp_f64)( (ssp_s64)Val.f64[0] );
02105 
02106         valPtr = (ssp_s64*)(&Val.f64[1]);
02107         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02108             Val.f64[1] = (ssp_f64)( (ssp_s64)Val.f64[1] );
02109         break;
02110     case SSP_FROUND_TO_POS_INF:
02111         valPtr = (ssp_s64*)(&Val.f64[0]);
02112         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02113             Val.f64[0] = ceil( Val.f64[0] );
02114 
02115         valPtr = (ssp_s64*)(&Val.f64[1]);
02116         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02117             Val.f64[1] = ceil( Val.f64[1] );
02118         break;
02119     case SSP_FROUND_TO_NEG_INF:
02120         valPtr = (ssp_s64*)(&Val.f64[0]);
02121         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02122             Val.f64[0] = floor( Val.f64[0] );
02123 
02124         valPtr = (ssp_s64*)(&Val.f64[1]);
02125         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02126             Val.f64[1] = floor( Val.f64[1] );
02127         break;
02128     default: // SSP_FROUND_TO_NEAREST_INT
02129         valPtr = (ssp_s64*)(&Val.f64[0]);
02130         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02131             Val.f64[0] = (ssp_f64)( (Val.f64[0]>0) ? (ssp_s64)(Val.f64[0]+0.5) : (ssp_s64)(Val.f64[0]-0.5) );
02132         else
02133             Val.f64[0] = ssp_number_changeSNanToQNaN_F64_REF( valPtr );
02134 
02135         valPtr = (ssp_s64*)(&Val.f64[1]);
02136         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02137             Val.f64[1] = (ssp_f64)( (Val.f64[1]>0) ? (ssp_s64)(Val.f64[1]+0.5) : (ssp_s64)(Val.f64[1]-0.5) );
02138         else
02139             Val.f64[1] = ssp_number_changeSNanToQNaN_F64_REF( valPtr );
02140     }
02141     return Val.d;
02142 }

SSP_FORCEINLINE __m128 ssp_round_ps_REF ( __m128  val,
int  iRoundMode 
)

Reference implementation of _mm_round_ps/ roundps [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2145 of file SSEPlus_emulation_REF.h.

02146 {
02147     ssp_s32 *valPtr;
02148     ssp_m128 Val;
02149     Val.f = val;
02150 
02151     switch( iRoundMode & 0x3 )
02152     {
02153     case SSP_FROUND_CUR_DIRECTION:
02154         break;
02155     case SSP_FROUND_TO_ZERO:
02156         valPtr = (ssp_s32*)(&Val.f32[0]);
02157         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02158         {
02159             if( Val.f32[0] >= 0 )
02160                 Val.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] );
02161             else
02162             {
02163                 Val.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] );
02164                 //Val.s32[0] = Val.s32[0] | 0x80000000;
02165             }
02166         }
02167 
02168         valPtr = (ssp_s32*)(&Val.f32[1]);
02169         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02170         {
02171             if( Val.f32[1] >= 0 )
02172                 Val.f32[1] = (ssp_f32)( (ssp_s32)Val.f32[1] );
02173             else
02174             {
02175                 Val.f32[1] = (ssp_f32)( (ssp_s32)Val.f32[1] );
02176                 //Val.s32[1] = Val.s32[1] | 0x80000000;
02177             }
02178         }
02179 
02180         valPtr = (ssp_s32*)(&Val.f32[2]);
02181         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02182         {
02183             if( Val.f32[2] >= 0 )
02184                 Val.f32[2] = (ssp_f32)( (ssp_s32)Val.f32[2] );
02185             else
02186             {
02187                 Val.f32[2] = (ssp_f32)( (ssp_s32)Val.f32[2] );
02188                 //Val.s32[2] = Val.s32[2] | 0x80000000;
02189             }
02190         }
02191 
02192         valPtr = (ssp_s32*)(&Val.f32[3]);
02193         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02194         {
02195             if( Val.f32[3] >= 0 )
02196                 Val.f32[3] = (ssp_f32)( (ssp_s32)Val.f32[3] );
02197             else
02198             {
02199                 Val.f32[3] = (ssp_f32)( (ssp_s32)Val.f32[3] );
02200                 //Val.s32[3] = Val.s32[3] | 0x80000000;
02201             }
02202         }
02203         break;
02204     case SSP_FROUND_TO_POS_INF:
02205         valPtr = (ssp_s32*)(&Val.f32[0]);
02206         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02207             Val.f32[0] = (ssp_f32)ceil( Val.f32[0] );
02208 
02209         valPtr = (ssp_s32*)(&Val.f32[1]);
02210         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02211             Val.f32[1] = (ssp_f32)ceil( Val.f32[1] );
02212 
02213         valPtr = (ssp_s32*)(&Val.f32[2]);
02214         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02215             Val.f32[2] = (ssp_f32)ceil( Val.f32[2] );
02216 
02217         valPtr = (ssp_s32*)(&Val.f32[3]);
02218         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02219             Val.f32[3] = (ssp_f32)ceil( Val.f32[3] );
02220         break;
02221     case SSP_FROUND_TO_NEG_INF:
02222         valPtr = (ssp_s32*)(&Val.f32[0]);
02223         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02224             Val.f32[0] = (ssp_f32)floor( Val.f32[0] );
02225 
02226         valPtr = (ssp_s32*)(&Val.f32[1]);
02227         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02228             Val.f32[1] = (ssp_f32)floor( Val.f32[1] );
02229 
02230         valPtr = (ssp_s32*)(&Val.f32[2]);
02231         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02232             Val.f32[2] = (ssp_f32)floor( Val.f32[2] );
02233 
02234         valPtr = (ssp_s32*)(&Val.f32[3]);
02235         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02236             Val.f32[3] = (ssp_f32)floor( Val.f32[3] );
02237         break;
02238     default: // SSP_FROUND_TO_NEAREST_INT
02239         valPtr = (ssp_s32*)(&Val.f32[0]);
02240         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02241             Val.f32[0] = (ssp_f32)( (Val.f32[0]>0) ? (ssp_s32)(Val.f32[0]+0.5) : (ssp_s32)(Val.f32[0]-0.5) );
02242         else
02243             Val.f32[0] = ssp_number_changeSNanToQNaN_F32_REF( valPtr );
02244 
02245         valPtr = (ssp_s32*)(&Val.f32[1]);
02246         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02247             Val.f32[1] = (ssp_f32)( (Val.f32[1]>0) ? (ssp_s32)(Val.f32[1]+0.5) : (ssp_s32)(Val.f32[1]-0.5) );
02248         else
02249             Val.f32[1] = ssp_number_changeSNanToQNaN_F32_REF( valPtr );
02250 
02251         valPtr = (ssp_s32*)(&Val.f32[2]);
02252         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02253             Val.f32[2] = (ssp_f32)( (Val.f32[2]>0) ? (ssp_s32)(Val.f32[2]+0.5) : (ssp_s32)(Val.f32[2]-0.5) );
02254         else
02255             Val.f32[2] = ssp_number_changeSNanToQNaN_F32_REF( valPtr );
02256 
02257         valPtr = (ssp_s32*)(&Val.f32[3]);
02258         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02259             Val.f32[3] = (ssp_f32)( (Val.f32[3]>0) ? (ssp_s32)(Val.f32[3]+0.5) : (ssp_s32)(Val.f32[3]-0.5) );
02260         else
02261             Val.f32[3] = ssp_number_changeSNanToQNaN_F32_REF( valPtr );
02262     }
02263 
02264     if( -0.0f == Val.f32[0] ) Val.f32[0]=+0.0f;
02265     if( -0.0f == Val.f32[1] ) Val.f32[1]=+0.0f;
02266     if( -0.0f == Val.f32[2] ) Val.f32[2]=+0.0f;
02267     if( -0.0f == Val.f32[3] ) Val.f32[3]=+0.0f;
02268 
02269     return Val.f;
02270 }

SSP_FORCEINLINE __m128d ssp_round_sd_REF ( __m128d  dst,
__m128d  val,
int  iRoundMode 
)

Reference implementation of _mm_round_sd/ roundsd [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2273 of file SSEPlus_emulation_REF.h.

02274 {
02275     ssp_s64 *valPtr;
02276     ssp_m128 Dst, Val;
02277     Dst.d = dst;
02278     Val.d = val;
02279 
02280     switch( iRoundMode & 0x3 )
02281     {
02282     case SSP_FROUND_CUR_DIRECTION:
02283         break;
02284     case SSP_FROUND_TO_ZERO:
02285         valPtr = (ssp_s64*)(&Val.f64[0]);
02286         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02287             Dst.f64[0] = (ssp_f64)( (ssp_s64)Val.f64[0] );
02288         break;
02289     case SSP_FROUND_TO_POS_INF:
02290         valPtr = (ssp_s64*)(&Val.f64[0]);
02291         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02292             Dst.f64[0] = ceil( Val.f64[0] );
02293         break;
02294     case SSP_FROUND_TO_NEG_INF:
02295         valPtr = (ssp_s64*)(&Val.f64[0]);
02296         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02297             Dst.f64[0] = floor( Val.f64[0] );
02298         break;
02299     default: // SSP_FROUND_TO_NEAREST_INT
02300         valPtr = (ssp_s64*)(&Val.f64[0]);
02301         if( ssp_number_isValidNumber_F64_REF( valPtr ) )
02302             Dst.f64[0] = (ssp_f64)( (Val.f64[0]>0) ? (ssp_s64)(Val.f64[0]+0.5) : (ssp_s64)(Val.f64[0]-0.5) );
02303         else
02304             Dst.f64[0] = ssp_number_changeSNanToQNaN_F64_REF( valPtr );
02305     }
02306     return Dst.d;
02307 }

SSP_FORCEINLINE __m128 ssp_round_ss_REF ( __m128  dst,
__m128  val,
int  iRoundMode 
)

Reference implementation of _mm_round_ss/ roundss [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2310 of file SSEPlus_emulation_REF.h.

02311 {
02312     ssp_s32 *valPtr;
02313     ssp_m128 Dst, Val;
02314     Dst.f = dst;
02315     Val.f = val;
02316 
02317     switch( iRoundMode & 0x3 )
02318     {
02319     case SSP_FROUND_CUR_DIRECTION:
02320         break;
02321     case SSP_FROUND_TO_ZERO:
02322         valPtr = (ssp_s32*)(&Val.f32[0]);
02323         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02324         {
02325             Dst.f32[0] = (ssp_f32)( (ssp_s32)Val.f32[0] );
02326             if( Val.f32[0] <= -0 )
02327                 Dst.s32[0] = Dst.s32[0] | 0x80000000;
02328         }
02329         break;
02330     case SSP_FROUND_TO_POS_INF:
02331         valPtr = (ssp_s32*)(&Val.f32[0]);
02332         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02333             Dst.f32[0] = (ssp_f32)ceil( Val.f32[0] );
02334         break;
02335     case SSP_FROUND_TO_NEG_INF:
02336         valPtr = (ssp_s32*)(&Val.f32[0]);
02337         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02338             Dst.f32[0] = (ssp_f32)floor( Val.f32[0] );
02339         break;
02340     default: // SSP_FROUND_TO_NEAREST_INT
02341         valPtr = (ssp_s32*)(&Val.f32[0]);
02342         if( ssp_number_isValidNumber_F32_REF( valPtr ) )
02343             Dst.f32[0] = (ssp_f32)( (Val.f32[0]>0) ? (ssp_s32)(Val.f32[0]+0.5) : (ssp_s32)(Val.f32[0]-0.5) );
02344         else
02345             Dst.f32[0] = ssp_number_changeSNanToQNaN_F32_REF( valPtr );
02346     }
02347     return Dst.f;
02348 }

SSP_FORCEINLINE __m128i ssp_sha_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_sha_epi16/pshaw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3520 of file SSEPlus_emulation_REF.h.

03521 {
03522     int n;
03523     ssp_m128 A,B;
03524     A.i = a;
03525     B.i = b;
03526 
03527     for( n = 0; n < 8; n++ )
03528     {
03529       if( B.s8[n*2] < 0 )
03530       {
03531         unsigned int count = (-B.s8[n*2]) % 16;
03532         A.s16[n] = A.s16[n] >> count;
03533       }
03534       else
03535       {
03536         unsigned int count = B.s8[n*2] % 16;
03537         A.s16[n] = A.s16[n] << count;
03538       }
03539     }
03540 
03541     return A.i;
03542 }

SSP_FORCEINLINE __m128i ssp_sha_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_sha_epi32/pshad [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3569 of file SSEPlus_emulation_REF.h.

03570 {
03571     int n;
03572     ssp_m128 A,B;
03573     A.i = a;
03574     B.i = b;
03575 
03576     for( n = 0; n < 4; n++ )
03577     {
03578       if( B.s8[n*4] < 0 )
03579       {
03580         unsigned int count = (-B.s8[n*4]) % 32;
03581         A.s32[n] = A.s32[n] >> count;
03582       }
03583       else
03584       {
03585         unsigned int count = B.s8[n*4] % 32;
03586         A.s32[n] = A.s32[n] << count;
03587       }
03588     }
03589 
03590     return A.i;
03591 }

SSP_FORCEINLINE __m128i ssp_sha_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_sha_epi64/pshad [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3618 of file SSEPlus_emulation_REF.h.

03619 {
03620     int n;
03621     ssp_m128 A,B;
03622     A.i = a;
03623     B.i = b;
03624 
03625     for( n = 0; n < 2; n++ )
03626     {
03627       if( B.s8[n*8] < 0 )
03628       {
03629         unsigned int count = (-B.s8[n*8]) % 64;
03630         A.s64[n] = A.s64[n] >> count;
03631       }
03632       else
03633       {
03634         unsigned int count = B.s8[n*8] % 64;
03635         A.s64[n] = A.s64[n] << count;
03636       }
03637     }
03638 
03639     return A.i;
03640 }

SSP_FORCEINLINE __m128i ssp_sha_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_sha_epi8/pshab [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3471 of file SSEPlus_emulation_REF.h.

03472 {
03473     int n;
03474     ssp_m128 A,B;
03475     A.i = a;
03476     B.i = b;
03477 
03478     for( n = 0; n < 16; n++ )
03479     {
03480       if( B.s8[n] < 0 )
03481       {
03482         unsigned int count = (-B.s8[n]) % 8;
03483         A.s8[n] = A.s8[n] >> count;
03484       }
03485       else
03486       {
03487         unsigned int count = B.s8[n] % 8;
03488         A.s8[n] = A.s8[n] << count;
03489       }
03490     }
03491 
03492     return A.i;
03493 }

SSP_FORCEINLINE __m128i ssp_shl_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_shl_epi16/pshlw [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3496 of file SSEPlus_emulation_REF.h.

03497 {
03498     int n;
03499     ssp_m128 A,B;
03500     A.i = a;
03501     B.i = b;
03502 
03503     for( n = 0; n < 8; n++ )
03504     {
03505       if( B.s8[n*2] < 0 )
03506       {
03507         unsigned int count = (-B.s8[n*2]) % 16;
03508         A.u16[n] = A.u16[n] >> count;
03509       }
03510       else
03511       {
03512         unsigned int count = B.s8[n*2] % 16;
03513         A.u16[n] = A.u16[n] << count;
03514       }
03515     }
03516     return A.i;
03517 }

SSP_FORCEINLINE __m128i ssp_shl_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_shl_epi32/pshld [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3545 of file SSEPlus_emulation_REF.h.

03546 {
03547     int n;
03548     ssp_m128 A,B;
03549     A.i = a;
03550     B.i = b;
03551 
03552     for( n = 0; n < 4; n++ )
03553     {
03554       if( B.s8[n*4] < 0 )
03555       {
03556         unsigned int count = (-B.s8[n*4]) % 32;
03557         A.u32[n] = A.u32[n] >> count;
03558       }
03559       else
03560       {
03561         unsigned int count = B.s8[n*4] % 32;
03562         A.u32[n] = A.u32[n] << count;
03563       }
03564     }
03565     return A.i;
03566 }

SSP_FORCEINLINE __m128i ssp_shl_epi64_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_shl_epi64/pshld [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3594 of file SSEPlus_emulation_REF.h.

03595 {
03596     int n;
03597     ssp_m128 A,B;
03598     A.i = a;
03599     B.i = b;
03600 
03601     for( n = 0; n < 2; n++ )
03602     {
03603       if( B.s8[n*8] < 0 )
03604       {
03605         unsigned int count = (-B.s8[n*8]) % 64;
03606         A.u64[n] = A.u64[n] >> count;
03607       }
03608       else
03609       {
03610         unsigned int count = B.s8[n*8] % 64;
03611         A.u64[n] = A.u64[n] << count;
03612       }
03613     }
03614     return A.i;
03615 }

SSP_FORCEINLINE __m128i ssp_shl_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of ssp_shl_epi8/pshlb [SSE5]. (SSE5 .pdf documentation here)

Definition at line 3447 of file SSEPlus_emulation_REF.h.

03448 {
03449     int n;
03450     ssp_m128 A,B;
03451     A.i = a;
03452     B.i = b;
03453 
03454     for( n = 0; n < 16; n++ )
03455     {
03456       if( B.s8[n] < 0 )
03457       {
03458         unsigned int count = (-B.s8[n]) % 8;
03459         A.u8[n] = A.u8[n] >> count;
03460       }
03461       else
03462       {
03463         unsigned int count = B.s8[n] % 8;
03464         A.u8[n] = A.u8[n] << count;
03465       }
03466     }
03467     return A.i;
03468 }

SSP_FORCEINLINE __m128i ssp_shuffle_epi8_REF ( __m128i  a,
__m128i  mask 
)

Reference implementation of _mm_shuffle_epi8 [SSSE3]. (Searches MSDN)

Definition at line 2707 of file SSEPlus_emulation_REF.h.

02708 {
02709     ssp_m128 A, MSK, B;
02710         A.i = a;
02711         MSK.i = mask;
02712 
02713         B.s8[0]  = (MSK.s8[0]  & 0x80) ? 0 : A.s8[(MSK.s8[0]  & 0xf)];
02714         B.s8[1]  = (MSK.s8[1]  & 0x80) ? 0 : A.s8[(MSK.s8[1]  & 0xf)];
02715         B.s8[2]  = (MSK.s8[2]  & 0x80) ? 0 : A.s8[(MSK.s8[2]  & 0xf)];
02716         B.s8[3]  = (MSK.s8[3]  & 0x80) ? 0 : A.s8[(MSK.s8[3]  & 0xf)];
02717         B.s8[4]  = (MSK.s8[4]  & 0x80) ? 0 : A.s8[(MSK.s8[4]  & 0xf)];
02718         B.s8[5]  = (MSK.s8[5]  & 0x80) ? 0 : A.s8[(MSK.s8[5]  & 0xf)];
02719         B.s8[6]  = (MSK.s8[6]  & 0x80) ? 0 : A.s8[(MSK.s8[6]  & 0xf)];
02720         B.s8[7]  = (MSK.s8[7]  & 0x80) ? 0 : A.s8[(MSK.s8[7]  & 0xf)];
02721         B.s8[8]  = (MSK.s8[8]  & 0x80) ? 0 : A.s8[(MSK.s8[8]  & 0xf)];
02722         B.s8[9]  = (MSK.s8[9]  & 0x80) ? 0 : A.s8[(MSK.s8[9]  & 0xf)];
02723         B.s8[10] = (MSK.s8[10] & 0x80) ? 0 : A.s8[(MSK.s8[10] & 0xf)];
02724         B.s8[11] = (MSK.s8[11] & 0x80) ? 0 : A.s8[(MSK.s8[11] & 0xf)];
02725         B.s8[12] = (MSK.s8[12] & 0x80) ? 0 : A.s8[(MSK.s8[12] & 0xf)];
02726         B.s8[13] = (MSK.s8[13] & 0x80) ? 0 : A.s8[(MSK.s8[13] & 0xf)];
02727         B.s8[14] = (MSK.s8[14] & 0x80) ? 0 : A.s8[(MSK.s8[14] & 0xf)];
02728         B.s8[15] = (MSK.s8[15] & 0x80) ? 0 : A.s8[(MSK.s8[15] & 0xf)];
02729 
02730         return B.i;
02731 }

SSP_FORCEINLINE __m64 ssp_shuffle_pi8_REF ( __m64  a,
__m64  mask 
)

Reference implementation of _mm_shuffle_pi8 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2736 of file SSEPlus_emulation_REF.h.

02737 {
02738     ssp_m64 A, MSK, B;
02739         A.m64 = a;
02740         MSK.m64 = mask;
02741 
02742         B.s8[0]  = (MSK.s8[0]  & 0x80) ? 0 : A.s8[(MSK.s8[0]  & 0xf)];
02743         B.s8[1]  = (MSK.s8[1]  & 0x80) ? 0 : A.s8[(MSK.s8[1]  & 0xf)];
02744         B.s8[2]  = (MSK.s8[2]  & 0x80) ? 0 : A.s8[(MSK.s8[2]  & 0xf)];
02745         B.s8[3]  = (MSK.s8[3]  & 0x80) ? 0 : A.s8[(MSK.s8[3]  & 0xf)];
02746         B.s8[4]  = (MSK.s8[4]  & 0x80) ? 0 : A.s8[(MSK.s8[4]  & 0xf)];
02747         B.s8[5]  = (MSK.s8[5]  & 0x80) ? 0 : A.s8[(MSK.s8[5]  & 0xf)];
02748         B.s8[6]  = (MSK.s8[6]  & 0x80) ? 0 : A.s8[(MSK.s8[6]  & 0xf)];
02749         B.s8[7]  = (MSK.s8[7]  & 0x80) ? 0 : A.s8[(MSK.s8[7]  & 0xf)];
02750 
02751         return B.m64;
02752 }

SSP_FORCEINLINE __m128i ssp_sign_epi16_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_sign_epi16 [SSSE3]. (Searches MSDN)

Definition at line 2785 of file SSEPlus_emulation_REF.h.

02786 {
02787     ssp_m128 A, B;
02788         A.i = a;
02789         B.i = b;
02790 
02791         A.s16[0]  = (B.s16[0]<0)  ? (-A.s16[0])  :((B.s16[0]==0) ? 0: A.s16[0]);
02792         A.s16[1]  = (B.s16[1]<0)  ? (-A.s16[1])  :((B.s16[1]==0) ? 0: A.s16[1]);
02793         A.s16[2]  = (B.s16[2]<0)  ? (-A.s16[2])  :((B.s16[2]==0) ? 0: A.s16[2]);
02794         A.s16[3]  = (B.s16[3]<0)  ? (-A.s16[3])  :((B.s16[3]==0) ? 0: A.s16[3]);
02795         A.s16[4]  = (B.s16[4]<0)  ? (-A.s16[4])  :((B.s16[4]==0) ? 0: A.s16[4]);
02796         A.s16[5]  = (B.s16[5]<0)  ? (-A.s16[5])  :((B.s16[5]==0) ? 0: A.s16[5]);
02797         A.s16[6]  = (B.s16[6]<0)  ? (-A.s16[6])  :((B.s16[6]==0) ? 0: A.s16[6]);
02798         A.s16[7]  = (B.s16[7]<0)  ? (-A.s16[7])  :((B.s16[7]==0) ? 0: A.s16[7]);
02799 
02800         return A.i;
02801 }

SSP_FORCEINLINE __m128i ssp_sign_epi32_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_sign_epi32 [SSSE3]. (Searches MSDN)

Definition at line 2805 of file SSEPlus_emulation_REF.h.

02806 {
02807     ssp_m128 A, B;
02808         A.i = a;
02809         B.i = b;
02810 
02811         A.s32[0]  = (B.s32[0]<0)  ? (-A.s32[0])  :((B.s32[0]==0) ? 0: A.s32[0]);
02812         A.s32[1]  = (B.s32[1]<0)  ? (-A.s32[1])  :((B.s32[1]==0) ? 0: A.s32[1]);
02813         A.s32[2]  = (B.s32[2]<0)  ? (-A.s32[2])  :((B.s32[2]==0) ? 0: A.s32[2]);
02814         A.s32[3]  = (B.s32[3]<0)  ? (-A.s32[3])  :((B.s32[3]==0) ? 0: A.s32[3]);
02815 
02816         return A.i;
02817 }

SSP_FORCEINLINE __m128i ssp_sign_epi8_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_sign_epi8 [SSSE3]. (Searches MSDN)

Definition at line 2757 of file SSEPlus_emulation_REF.h.

02758 {
02759     ssp_m128 A, B;
02760         A.i = a;
02761         B.i = b;
02762 
02763         A.s8[0]  = (B.s8[0]<0)  ? (-A.s8[0])  :((B.s8[0]==0) ? 0: A.s8[0]);
02764         A.s8[1]  = (B.s8[1]<0)  ? (-A.s8[1])  :((B.s8[1]==0) ? 0: A.s8[1]);
02765         A.s8[2]  = (B.s8[2]<0)  ? (-A.s8[2])  :((B.s8[2]==0) ? 0: A.s8[2]);
02766         A.s8[3]  = (B.s8[3]<0)  ? (-A.s8[3])  :((B.s8[3]==0) ? 0: A.s8[3]);
02767         A.s8[4]  = (B.s8[4]<0)  ? (-A.s8[4])  :((B.s8[4]==0) ? 0: A.s8[4]);
02768         A.s8[5]  = (B.s8[5]<0)  ? (-A.s8[5])  :((B.s8[5]==0) ? 0: A.s8[5]);
02769         A.s8[6]  = (B.s8[6]<0)  ? (-A.s8[6])  :((B.s8[6]==0) ? 0: A.s8[6]);
02770         A.s8[7]  = (B.s8[7]<0)  ? (-A.s8[7])  :((B.s8[7]==0) ? 0: A.s8[7]);
02771         A.s8[8]  = (B.s8[8]<0)  ? (-A.s8[8])  :((B.s8[8]==0) ? 0: A.s8[8]);
02772         A.s8[9]  = (B.s8[9]<0)  ? (-A.s8[9])  :((B.s8[9]==0) ? 0: A.s8[9]);
02773         A.s8[10] = (B.s8[10]<0) ? (-A.s8[10]) :((B.s8[10]==0)? 0: A.s8[10]);
02774         A.s8[11] = (B.s8[11]<0) ? (-A.s8[11]) :((B.s8[11]==0)? 0: A.s8[11]);
02775         A.s8[12] = (B.s8[12]<0) ? (-A.s8[12]) :((B.s8[12]==0)? 0: A.s8[12]);
02776         A.s8[13] = (B.s8[13]<0) ? (-A.s8[13]) :((B.s8[13]==0)? 0: A.s8[13]);
02777         A.s8[14] = (B.s8[14]<0) ? (-A.s8[14]) :((B.s8[14]==0)? 0: A.s8[14]);
02778         A.s8[15] = (B.s8[15]<0) ? (-A.s8[15]) :((B.s8[15]==0)? 0: A.s8[15]);
02779 
02780         return A.i;
02781 }

SSP_FORCEINLINE __m64 ssp_sign_pi16_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_sign_pi16 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2843 of file SSEPlus_emulation_REF.h.

02844 {
02845     ssp_m64 A, B;
02846         A.m64 = a;
02847         B.m64 = b;
02848 
02849         A.s16[0]  = (B.s16[0]<0)  ? (-A.s16[0])  :((B.s16[0]==0) ? 0: A.s16[0]);
02850         A.s16[1]  = (B.s16[1]<0)  ? (-A.s16[1])  :((B.s16[1]==0) ? 0: A.s16[1]);
02851         A.s16[2]  = (B.s16[2]<0)  ? (-A.s16[2])  :((B.s16[2]==0) ? 0: A.s16[2]);
02852         A.s16[3]  = (B.s16[3]<0)  ? (-A.s16[3])  :((B.s16[3]==0) ? 0: A.s16[3]);
02853 
02854         return A.m64;
02855 }

SSP_FORCEINLINE __m64 ssp_sign_pi32_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_sign_pi32 [SSSE3]. (Searches MSDN)
NOTE: The user must call _mm_empty() after a call to this function.

Definition at line 2861 of file SSEPlus_emulation_REF.h.

02862 {
02863     ssp_m64 A, B;
02864         A.m64 = a;
02865         B.m64 = b;
02866 
02867         A.s32[0]  = (B.s32[0]<0)  ? (-A.s32[0])  :((B.s32[0]==0) ? 0: A.s32[0]);
02868         A.s32[1]  = (B.s32[1]<0)  ? (-A.s32[1])  :((B.s32[1]==0) ? 0: A.s32[1]);
02869 
02870         return A.m64;
02871 }

SSP_FORCEINLINE __m64 ssp_sign_pi8_REF ( __m64  a,
__m64  b 
)

Reference implementation of _mm_sign_pi8 [SSSE3]. (Searches MSDN)

Definition at line 2821 of file SSEPlus_emulation_REF.h.

02822 {
02823     ssp_m64 A, B;
02824         A.m64 = a;
02825         B.m64 = b;
02826 
02827         A.s8[0]  = (B.s8[0]<0)  ? (-A.s8[0])  :((B.s8[0]==0) ? 0: A.s8[0]);
02828         A.s8[1]  = (B.s8[1]<0)  ? (-A.s8[1])  :((B.s8[1]==0) ? 0: A.s8[1]);
02829         A.s8[2]  = (B.s8[2]<0)  ? (-A.s8[2])  :((B.s8[2]==0) ? 0: A.s8[2]);
02830         A.s8[3]  = (B.s8[3]<0)  ? (-A.s8[3])  :((B.s8[3]==0) ? 0: A.s8[3]);
02831         A.s8[4]  = (B.s8[4]<0)  ? (-A.s8[4])  :((B.s8[4]==0) ? 0: A.s8[4]);
02832         A.s8[5]  = (B.s8[5]<0)  ? (-A.s8[5])  :((B.s8[5]==0) ? 0: A.s8[5]);
02833         A.s8[6]  = (B.s8[6]<0)  ? (-A.s8[6])  :((B.s8[6]==0) ? 0: A.s8[6]);
02834         A.s8[7]  = (B.s8[7]<0)  ? (-A.s8[7])  :((B.s8[7]==0) ? 0: A.s8[7]);
02835 
02836         return A.m64;
02837 }

SSP_FORCEINLINE __m128i ssp_stream_load_si128_REF ( __m128i *  p  ) 

Reference implementation of _mm_stream_load_si128 [SSE4.1]. (Searches MSDN)

Definition at line 1581 of file SSEPlus_emulation_REF.h.

01582 {
01583     return *p;
01584 }

SSP_FORCEINLINE void ssp_stream_sd_REF ( double *  dst,
__m128d  src 
)

Reference implementation of _mm_stream_sd [SSE4a]. (Searches MSDN)

Definition at line 2874 of file SSEPlus_emulation_REF.h.

02875 {
02876     ssp_m128 SRC;
02877     SRC.d = src;
02878     *dst = SRC.f64[0];
02879 }

SSP_FORCEINLINE void ssp_stream_ss_REF ( float *  dst,
__m128  src 
)

Reference implementation of _mm_stream_ss [SSE4a]. (Searches MSDN)

Definition at line 2882 of file SSEPlus_emulation_REF.h.

02883 {
02884     ssp_m128 SRC;
02885     SRC.f = src;
02886     *dst = SRC.f32[0];
02887 }

SSP_FORCEINLINE int ssp_testc_si128_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_testc_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2354 of file SSEPlus_emulation_REF.h.

02355 {
02356     ssp_m128 A,B;
02357     A.i = a;
02358     B.i = b;
02359 
02360     return ( (A.s64[0] & B.s64[0]) == A.s64[0] ) &&
02361            ( (A.s64[1] & B.s64[1]) == A.s64[1] ) ;
02362 }

SSP_FORCEINLINE int ssp_testnzc_si128_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_testnzc_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2376 of file SSEPlus_emulation_REF.h.

02377 {
02378     int zf, cf;
02379     ssp_m128 A,B;
02380     A.i = a;
02381     B.i = b;
02382 
02383     zf = ssp_testz_si128_REF( A.i, B.i);
02384 
02385     cf = ( (~A.s64[0] & B.s64[0]) == 0 ) &&
02386          ( (~A.s64[1] & B.s64[1]) == 0 ) ;
02387     return ((int)!zf & (int)!cf);
02388 }

SSP_FORCEINLINE int ssp_testz_si128_REF ( __m128i  a,
__m128i  b 
)

Reference implementation of _mm_testz_si128/ptest [SSE4.1 and SSE5]. (SSE5 .pdf documentation here)

Definition at line 2365 of file SSEPlus_emulation_REF.h.

02366 {
02367     ssp_m128 A,B;
02368     A.i = a;
02369     B.i = b;
02370 
02371     return ( (A.s64[0] & B.s64[0]) == 0 ) &&
02372            ( (A.s64[1] & B.s64[1]) == 0 ) ;
02373 }


Generated on Wed May 21 13:44:14 2008 for "SSEPlus" by  doxygen 1.5.4