SSE2 Optimized


Arithmetic Operations

SSP_FORCEINLINE __m128 ssp_arithmetic_hadd4_dup_ps_SSE2 (__m128 a)
SSP_FORCEINLINE __m128i ssp_arithmetic_hadd4_epi16_SSE2 (__m128i a, const unsigned int offset)
SSP_FORCEINLINE __m128 ssp_round_ps_neg_zero_SSE2 (__m128 a, int iRoundMode)

Convert Operations

SSP_FORCEINLINE void ssp_convert_odd_even_epi16_SSE2 (__m128i *a, __m128i *b)
SSP_FORCEINLINE void ssp_convert_odd_even_ps_SSE2 (__m128 *a, __m128 *b)
SSP_FORCEINLINE void ssp_convert_odd_even_epi32_SSE2 (__m128i *a, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi8_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_reverse_transpose_SSE2 (__m128i *a, __m128i *b, __m128i *c)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi8_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi16_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi16_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi32_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi32_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi8_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi8_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi16_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi16_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi32_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi32_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)

Logical Operations

SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_bitwise_select_SSE2 (__m128i a, __m128i b, __m128i mask)
SSP_FORCEINLINE __m128i ssp_movmask_imm8_to_epi32_SSE2 (int mask)
SSP_FORCEINLINE __m128i ssp_logical_cmplte_epi8 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgte_epi8 (__m128i a, __m128i b)

Native Instructions

SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_add_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_add_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_add_si64_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_and_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_and_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_clflush_SSE2 (void const *p)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnge_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpngt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpngt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnle_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnle_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnlt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnlt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpord_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpord_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpunord_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpunord_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comieq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comige_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comigt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comile_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comilt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comineq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cvtepi32_pd_SSE2 (__m128i a)
SSP_FORCEINLINE __m128 ssp_cvtepi32_ps_SSE2 (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtpd_epi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_cvtpd_pi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128 ssp_cvtpd_ps_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_cvtpi32_pd_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_cvtps_epi32_SSE2 (__m128 a)
SSP_FORCEINLINE __m128d ssp_cvtps_pd_SSE2 (__m128 a)
SSP_FORCEINLINE int ssp_cvtsd_si32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128 ssp_cvtsd_ss_SSE2 (__m128 a, __m128d b)
SSP_FORCEINLINE int ssp_cvtsi128_si32_SSE2 (__m128i a)
SSP_FORCEINLINE __m128d ssp_cvtsi32_sd_SSE2 (__m128d a, int b)
SSP_FORCEINLINE __m128i ssp_cvtsi32_si128_SSE2 (int a)
SSP_FORCEINLINE __m128d ssp_cvtss_sd_SSE2 (__m128d a, __m128 b)
SSP_FORCEINLINE __m128i ssp_cvttpd_epi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_cvttpd_pi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128i ssp_cvttps_epi32_SSE2 (__m128 a)
SSP_FORCEINLINE int ssp_cvttsd_si32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_div_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_div_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_extract_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_insert_epi16_SSE2 (__m128i a, int b, int imm)
SSP_FORCEINLINE void ssp_lfence_SSE2 (void)
SSP_FORCEINLINE __m128d ssp_load_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_load_sd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128i ssp_load_si128_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128d ssp_load1_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_loadh_pd_SSE2 (__m128d a, double const *dp)
SSP_FORCEINLINE __m128i ssp_loadl_epi64_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128d ssp_loadl_pd_SSE2 (__m128d a, double const *dp)
SSP_FORCEINLINE __m128d ssp_loadr_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_loadu_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128i ssp_loadu_si128_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128i ssp_madd_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_maskmoveu_si128_SSE2 (__m128i a, __m128i b, char *c)
SSP_FORCEINLINE __m128i ssp_max_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_max_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_max_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE void ssp_mfence_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_min_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_min_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_min_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_min_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_move_epi64_SSE2 (__m128i a)
SSP_FORCEINLINE __m128d ssp_move_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_movemask_epi8_SSE2 (__m128i a)
SSP_FORCEINLINE int ssp_movemask_pd_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_movepi64_pi64_SSE2 (__m128i a)
SSP_FORCEINLINE __m128i ssp_movpi64_epi64_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_mul_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_mul_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_mul_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_mul_su32_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_mulhi_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mulhi_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mullo_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_or_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_or_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packs_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packs_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packus_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_pause_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_sad_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_set_epi16_SSE2 (short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
SSP_FORCEINLINE __m128i ssp_set_epi32_SSE2 (int i3, int i2, int i1, int i0)
SSP_FORCEINLINE __m128i ssp_set_epi64_SSE2 (__m64 a1, __m64 a0)
SSP_FORCEINLINE __m128i ssp_set_epi8_SSE2 (char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
SSP_FORCEINLINE __m128d ssp_set_pd_SSE2 (double a1, double a0)
SSP_FORCEINLINE __m128d ssp_set_sd_SSE2 (double w)
SSP_FORCEINLINE __m128i ssp_set1_epi16_SSE2 (short w)
SSP_FORCEINLINE __m128i ssp_set1_epi32_SSE2 (int i)
SSP_FORCEINLINE __m128i ssp_set1_epi64_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_set1_epi8_SSE2 (char b)
SSP_FORCEINLINE __m128d ssp_set1_pd_SSE2 (double a)
SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2 (short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2 (int i0, int i1, int i2, int i3)
SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2 (__m64 a0, __m64 a1)
SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2 (char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2 (double a0, double a1)
SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2 (__m128d a, __m128d b, int imm)
SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE void ssp_store_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_store_sd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_store_si128_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_store1_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeh_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storel_epi64_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_storel_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storer_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeu_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeu_si128_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_stream_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_stream_si128_SSE2 (__m128i *p, __m128i a)
SSP_FORCEINLINE void ssp_stream_si32_SSE2 (int *p, int i)
SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomige_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomile_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2 (__m128i a, __m128i b)

Function Documentation

SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi16 [SSE2]. (Searches MSDN)

Definition at line 17 of file SSEPlus_native_SSE2.h.

00018 {
00019     return _mm_add_epi16( a, b );
00020 }

SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi32 [SSE2]. (Searches MSDN)

Definition at line 22 of file SSEPlus_native_SSE2.h.

00023 {
00024     return _mm_add_epi32( a, b );
00025 }

SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi64 [SSE2]. (Searches MSDN)

Definition at line 27 of file SSEPlus_native_SSE2.h.

00028 {
00029     return _mm_add_epi64( a, b );
00030 }

SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi8 [SSE2]. (Searches MSDN)

Definition at line 32 of file SSEPlus_native_SSE2.h.

00033 {
00034     return _mm_add_epi8( a, b );
00035 }

SSP_FORCEINLINE __m128d ssp_add_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_add_pd [SSE2]. (Searches MSDN)

Definition at line 37 of file SSEPlus_native_SSE2.h.

00038 {
00039     return _mm_add_pd( a, b );
00040 }

SSP_FORCEINLINE __m128d ssp_add_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_add_sd [SSE2]. (Searches MSDN)

Definition at line 42 of file SSEPlus_native_SSE2.h.

00043 {
00044     return _mm_add_sd( a, b );
00045 }

SSP_FORCEINLINE __m64 ssp_add_si64_SSE2 ( __m64  a,
__m64  b 
)

SSE2 Native implementation of _mm_add_si64 [SSE2]. (Searches MSDN)

Definition at line 47 of file SSEPlus_native_SSE2.h.

00048 {
00049     return _mm_add_si64( a, b );
00050 }

SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epi16 [SSE2]. (Searches MSDN)

Definition at line 52 of file SSEPlus_native_SSE2.h.

00053 {
00054     return _mm_adds_epi16( a, b );
00055 }

SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epi8 [SSE2]. (Searches MSDN)

Definition at line 57 of file SSEPlus_native_SSE2.h.

00058 {
00059     return _mm_adds_epi8( a, b );
00060 }

SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epu16 [SSE2]. (Searches MSDN)

Definition at line 62 of file SSEPlus_native_SSE2.h.

00063 {
00064     return _mm_adds_epu16( a, b );
00065 }

SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epu8 [SSE2]. (Searches MSDN)

Definition at line 67 of file SSEPlus_native_SSE2.h.

00068 {
00069     return _mm_adds_epu8( a, b );
00070 }

SSP_FORCEINLINE __m128d ssp_and_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_and_pd [SSE2]. (Searches MSDN)

Definition at line 72 of file SSEPlus_native_SSE2.h.

00073 {
00074     return _mm_and_pd( a, b );
00075 }

SSP_FORCEINLINE __m128i ssp_and_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_and_si128 [SSE2]. (Searches MSDN)

Definition at line 77 of file SSEPlus_native_SSE2.h.

00078 {
00079     return _mm_and_si128( a, b );
00080 }

SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_andnot_pd [SSE2]. (Searches MSDN)

Definition at line 82 of file SSEPlus_native_SSE2.h.

00083 {
00084     return _mm_andnot_pd( a, b );
00085 }

SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_andnot_si128 [SSE2]. (Searches MSDN)

Definition at line 87 of file SSEPlus_native_SSE2.h.

00088 {
00089     return _mm_andnot_si128( a, b );
00090 }

SSP_FORCEINLINE __m128 ssp_arithmetic_hadd4_dup_ps_SSE2 ( __m128  a  ) 

Definition at line 16 of file SSEPlus_arithmetic_SSE2.h.

00017 {
00018     __m128 t;
00019     t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(2, 3, 0, 1) );                //TODO shuflo, shuf hi
00020     a = _mm_add_ps( a, t );   
00021 
00022     t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(1, 0, 3, 2) );                //TODO shuflo, shuf hi
00023     a = _mm_add_ps( a, t );
00024     return a;
00025 } 

SSP_FORCEINLINE __m128i ssp_arithmetic_hadd4_epi16_SSE2 ( __m128i  a,
const unsigned int  offset 
)

in = a,b,c,d | e,f,g,h, 0 out = x,x,x,a+b+c+d | x,x,x,e+f+g+h

in = a,b,c,d | e,f,g,h, 3 out = a+b+c+d,x,x,x,| x,x,x,e+f+g+h

offset indicates desired position of sum (0,1,2,3)

Definition at line 38 of file SSEPlus_arithmetic_SSE2.h.

00039 {
00040     ssp_m128 A,B;
00041     A.i = a;                                           //A = a, b, c, d | e, f, g, h
00042 
00043     if( offset >= 2 ) B.i = _mm_slli_si128( A.i, 4 );  //B = c, d, x, x | g, h, x, x
00044     else              B.i = _mm_srli_si128( A.i, 4 );  //B = x, x, a, b | x, x, e, f
00045 
00046     A.i = _mm_add_epi16 ( A.i, B.i );      
00047 
00048     if( offset & 1 )  B.i = _mm_slli_si128( A.i, 2 );  
00049     else              B.i = _mm_srli_si128( A.i, 2 ); 
00050   
00051     A.i = _mm_add_epi16 ( A.i, B.i );      
00052     return A.i;
00053 }  

SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_avg_epu16 [SSE2]. (Searches MSDN)

Definition at line 92 of file SSEPlus_native_SSE2.h.

00093 {
00094     return _mm_avg_epu16( a, b );
00095 }

SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_avg_epu8 [SSE2]. (Searches MSDN)

Definition at line 97 of file SSEPlus_native_SSE2.h.

00098 {
00099     return _mm_avg_epu8( a, b );
00100 }

SSP_FORCEINLINE void ssp_clflush_SSE2 ( void const *  p  ) 

SSE2 Native implementation of _mm_clflush [SSE2]. (Searches MSDN)

Definition at line 139 of file SSEPlus_native_SSE2.h.

00140 {
00141     _mm_clflush( p );
00142 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi16 [SSE2]. (Searches MSDN)

Definition at line 144 of file SSEPlus_native_SSE2.h.

00145 {
00146     return _mm_cmpeq_epi16( a, b );
00147 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi32 [SSE2]. (Searches MSDN)

Definition at line 149 of file SSEPlus_native_SSE2.h.

00150 {
00151     return _mm_cmpeq_epi32( a, b );
00152 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi8 [SSE2]. (Searches MSDN)

Definition at line 154 of file SSEPlus_native_SSE2.h.

00155 {
00156     return _mm_cmpeq_epi8( a, b );
00157 }

SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpeq_pd [SSE2]. (Searches MSDN)

Definition at line 159 of file SSEPlus_native_SSE2.h.

00160 {
00161     return _mm_cmpeq_pd( a, b );
00162 }

SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpeq_sd [SSE2]. (Searches MSDN)

Definition at line 164 of file SSEPlus_native_SSE2.h.

00165 {
00166     return _mm_cmpeq_sd( a, b );
00167 }

SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpge_pd [SSE2]. (Searches MSDN)

Definition at line 169 of file SSEPlus_native_SSE2.h.

00170 {
00171     return _mm_cmpge_pd( a, b );
00172 }

SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpge_sd [SSE2]. (Searches MSDN)

Definition at line 174 of file SSEPlus_native_SSE2.h.

00175 {
00176     return _mm_cmpge_sd( a, b );
00177 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi16 [SSE2]. (Searches MSDN)

Definition at line 179 of file SSEPlus_native_SSE2.h.

00180 {
00181     return _mm_cmpgt_epi16( a, b );
00182 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi32 [SSE2]. (Searches MSDN)

Definition at line 184 of file SSEPlus_native_SSE2.h.

00185 {
00186     return _mm_cmpgt_epi32( a, b );
00187 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi8 [SSE2]. (Searches MSDN)

Definition at line 189 of file SSEPlus_native_SSE2.h.

00190 {
00191     return _mm_cmpgt_epi8( a, b );
00192 }

SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpgt_pd [SSE2]. (Searches MSDN)

Definition at line 194 of file SSEPlus_native_SSE2.h.

00195 {
00196     return _mm_cmpgt_pd( a, b );
00197 }

SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpgt_sd [SSE2]. (Searches MSDN)

Definition at line 199 of file SSEPlus_native_SSE2.h.

00200 {
00201     return _mm_cmpgt_sd( a, b );
00202 }

SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmple_pd [SSE2]. (Searches MSDN)

Definition at line 204 of file SSEPlus_native_SSE2.h.

00205 {
00206     return _mm_cmple_pd( a, b );
00207 }

SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmple_sd [SSE2]. (Searches MSDN)

Definition at line 209 of file SSEPlus_native_SSE2.h.

00210 {
00211     return _mm_cmple_sd( a, b );
00212 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi16 [SSE2]. (Searches MSDN)

Definition at line 214 of file SSEPlus_native_SSE2.h.

00215 {
00216     return _mm_cmplt_epi16( a, b );
00217 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi32 [SSE2]. (Searches MSDN)

Definition at line 219 of file SSEPlus_native_SSE2.h.

00220 {
00221     return _mm_cmplt_epi32( a, b );
00222 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi8 [SSE2]. (Searches MSDN)

Definition at line 224 of file SSEPlus_native_SSE2.h.

00225 {
00226     return _mm_cmplt_epi8( a, b );
00227 }

SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmplt_pd [SSE2]. (Searches MSDN)

Definition at line 229 of file SSEPlus_native_SSE2.h.

00230 {
00231     return _mm_cmplt_pd( a, b );
00232 }

SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmplt_sd [SSE2]. (Searches MSDN)

Definition at line 234 of file SSEPlus_native_SSE2.h.

00235 {
00236     return _mm_cmplt_sd( a, b );
00237 }

SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpneq_pd [SSE2]. (Searches MSDN)

Definition at line 239 of file SSEPlus_native_SSE2.h.

00240 {
00241     return _mm_cmpneq_pd( a, b );
00242 }

SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpneq_sd [SSE2]. (Searches MSDN)

Definition at line 244 of file SSEPlus_native_SSE2.h.

00245 {
00246     return _mm_cmpneq_sd( a, b );
00247 }

SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnge_pd [SSE2]. (Searches MSDN)

Definition at line 249 of file SSEPlus_native_SSE2.h.

00250 {
00251     return _mm_cmpnge_pd( a, b );
00252 }

SSP_