SSE2 Optimized


Arithmetic Operations

SSP_FORCEINLINE __m128 ssp_arithmetic_hadd4_dup_ps_SSE2 (__m128 a)
SSP_FORCEINLINE __m128i ssp_arithmetic_hadd4_epi16_SSE2 (__m128i a, const unsigned int offset)
SSP_FORCEINLINE __m128 ssp_round_ps_neg_zero_SSE2 (__m128 a, int iRoundMode)

Convert Operations

SSP_FORCEINLINE void ssp_convert_odd_even_epi16_SSE2 (__m128i *a, __m128i *b)
SSP_FORCEINLINE void ssp_convert_odd_even_ps_SSE2 (__m128 *a, __m128 *b)
SSP_FORCEINLINE void ssp_convert_odd_even_epi32_SSE2 (__m128i *a, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi8_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_reverse_transpose_SSE2 (__m128i *a, __m128i *b, __m128i *c)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi8_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi16_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi16_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_3c_3p_epi32_SSE2 (__m128i *rgb1, __m128i *rgb2, __m128i *rgb3)
SSP_FORCEINLINE void ssp_convert_3p_3c_epi32_SSE2 (__m128i *r, __m128i *g, __m128i *b)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi8_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi8_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi16_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi16_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)
SSP_FORCEINLINE void ssp_convert_4c_4p_epi32_SSE2 (__m128i *rgba1, __m128i *rgba2, __m128i *rgba3, __m128i *rgba4)
SSP_FORCEINLINE void ssp_convert_4p_4c_epi32_SSE2 (__m128i *r, __m128i *g, __m128i *b, __m128i *a)

Logical Operations

SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_bitwise_select_SSE2 (__m128i a, __m128i b, __m128i mask)
SSP_FORCEINLINE __m128i ssp_movmask_imm8_to_epi32_SSE2 (int mask)
SSP_FORCEINLINE __m128i ssp_logical_cmplte_epi8 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_logical_cmpgte_epi8 (__m128i a, __m128i b)

Native Instructions

SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_add_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_add_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_add_si64_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_and_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_and_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_clflush_SSE2 (void const *p)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnge_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpngt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpngt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnle_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnle_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnlt_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpnlt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpord_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpord_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpunord_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cmpunord_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comieq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comige_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comigt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comile_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comilt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_comineq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_cvtepi32_pd_SSE2 (__m128i a)
SSP_FORCEINLINE __m128 ssp_cvtepi32_ps_SSE2 (__m128i a)
SSP_FORCEINLINE __m128i ssp_cvtpd_epi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_cvtpd_pi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128 ssp_cvtpd_ps_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_cvtpi32_pd_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_cvtps_epi32_SSE2 (__m128 a)
SSP_FORCEINLINE __m128d ssp_cvtps_pd_SSE2 (__m128 a)
SSP_FORCEINLINE int ssp_cvtsd_si32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128 ssp_cvtsd_ss_SSE2 (__m128 a, __m128d b)
SSP_FORCEINLINE int ssp_cvtsi128_si32_SSE2 (__m128i a)
SSP_FORCEINLINE __m128d ssp_cvtsi32_sd_SSE2 (__m128d a, int b)
SSP_FORCEINLINE __m128i ssp_cvtsi32_si128_SSE2 (int a)
SSP_FORCEINLINE __m128d ssp_cvtss_sd_SSE2 (__m128d a, __m128 b)
SSP_FORCEINLINE __m128i ssp_cvttpd_epi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_cvttpd_pi32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128i ssp_cvttps_epi32_SSE2 (__m128 a)
SSP_FORCEINLINE int ssp_cvttsd_si32_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_div_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_div_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_extract_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_insert_epi16_SSE2 (__m128i a, int b, int imm)
SSP_FORCEINLINE void ssp_lfence_SSE2 (void)
SSP_FORCEINLINE __m128d ssp_load_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_load_sd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128i ssp_load_si128_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128d ssp_load1_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_loadh_pd_SSE2 (__m128d a, double const *dp)
SSP_FORCEINLINE __m128i ssp_loadl_epi64_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128d ssp_loadl_pd_SSE2 (__m128d a, double const *dp)
SSP_FORCEINLINE __m128d ssp_loadr_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128d ssp_loadu_pd_SSE2 (double const *dp)
SSP_FORCEINLINE __m128i ssp_loadu_si128_SSE2 (__m128i const *p)
SSP_FORCEINLINE __m128i ssp_madd_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_maskmoveu_si128_SSE2 (__m128i a, __m128i b, char *c)
SSP_FORCEINLINE __m128i ssp_max_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_max_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_max_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_max_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE void ssp_mfence_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_min_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_min_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_min_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_min_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_move_epi64_SSE2 (__m128i a)
SSP_FORCEINLINE __m128d ssp_move_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_movemask_epi8_SSE2 (__m128i a)
SSP_FORCEINLINE int ssp_movemask_pd_SSE2 (__m128d a)
SSP_FORCEINLINE __m64 ssp_movepi64_pi64_SSE2 (__m128i a)
SSP_FORCEINLINE __m128i ssp_movpi64_epi64_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_mul_epu32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_mul_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_mul_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_mul_su32_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_mulhi_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mulhi_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_mullo_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_or_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_or_si128_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packs_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packs_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_packus_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE void ssp_pause_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_sad_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_set_epi16_SSE2 (short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
SSP_FORCEINLINE __m128i ssp_set_epi32_SSE2 (int i3, int i2, int i1, int i0)
SSP_FORCEINLINE __m128i ssp_set_epi64_SSE2 (__m64 a1, __m64 a0)
SSP_FORCEINLINE __m128i ssp_set_epi8_SSE2 (char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
SSP_FORCEINLINE __m128d ssp_set_pd_SSE2 (double a1, double a0)
SSP_FORCEINLINE __m128d ssp_set_sd_SSE2 (double w)
SSP_FORCEINLINE __m128i ssp_set1_epi16_SSE2 (short w)
SSP_FORCEINLINE __m128i ssp_set1_epi32_SSE2 (int i)
SSP_FORCEINLINE __m128i ssp_set1_epi64_SSE2 (__m64 a)
SSP_FORCEINLINE __m128i ssp_set1_epi8_SSE2 (char b)
SSP_FORCEINLINE __m128d ssp_set1_pd_SSE2 (double a)
SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2 (short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2 (int i0, int i1, int i2, int i3)
SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2 (__m64 a0, __m64 a1)
SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2 (char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2 (double a0, double a1)
SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2 (void)
SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2 (__m128d a, __m128d b, int imm)
SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2 (__m128d a)
SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2 (__m128i a, __m128i count)
SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2 (__m128i a, int count)
SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2 (__m128i a, int imm)
SSP_FORCEINLINE void ssp_store_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_store_sd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_store_si128_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_store1_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeh_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storel_epi64_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_storel_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storer_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeu_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_storeu_si128_SSE2 (__m128i *p, __m128i b)
SSP_FORCEINLINE void ssp_stream_pd_SSE2 (double *dp, __m128d a)
SSP_FORCEINLINE void ssp_stream_si128_SSE2 (__m128i *p, __m128i a)
SSP_FORCEINLINE void ssp_stream_si32_SSE2 (int *p, int i)
SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2 (__m64 a, __m64 b)
SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomige_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomile_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2 (__m128i a, __m128i b)
SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2 (__m128d a, __m128d b)
SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2 (__m128i a, __m128i b)

Function Documentation

SSP_FORCEINLINE __m128i ssp_add_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi16 [SSE2]. (Searches MSDN)

Definition at line 17 of file SSEPlus_native_SSE2.h.

00018 {
00019     return _mm_add_epi16( a, b );
00020 }

SSP_FORCEINLINE __m128i ssp_add_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi32 [SSE2]. (Searches MSDN)

Definition at line 22 of file SSEPlus_native_SSE2.h.

00023 {
00024     return _mm_add_epi32( a, b );
00025 }

SSP_FORCEINLINE __m128i ssp_add_epi64_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi64 [SSE2]. (Searches MSDN)

Definition at line 27 of file SSEPlus_native_SSE2.h.

00028 {
00029     return _mm_add_epi64( a, b );
00030 }

SSP_FORCEINLINE __m128i ssp_add_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_add_epi8 [SSE2]. (Searches MSDN)

Definition at line 32 of file SSEPlus_native_SSE2.h.

00033 {
00034     return _mm_add_epi8( a, b );
00035 }

SSP_FORCEINLINE __m128d ssp_add_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_add_pd [SSE2]. (Searches MSDN)

Definition at line 37 of file SSEPlus_native_SSE2.h.

00038 {
00039     return _mm_add_pd( a, b );
00040 }

SSP_FORCEINLINE __m128d ssp_add_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_add_sd [SSE2]. (Searches MSDN)

Definition at line 42 of file SSEPlus_native_SSE2.h.

00043 {
00044     return _mm_add_sd( a, b );
00045 }

SSP_FORCEINLINE __m64 ssp_add_si64_SSE2 ( __m64  a,
__m64  b 
)

SSE2 Native implementation of _mm_add_si64 [SSE2]. (Searches MSDN)

Definition at line 47 of file SSEPlus_native_SSE2.h.

00048 {
00049     return _mm_add_si64( a, b );
00050 }

SSP_FORCEINLINE __m128i ssp_adds_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epi16 [SSE2]. (Searches MSDN)

Definition at line 52 of file SSEPlus_native_SSE2.h.

00053 {
00054     return _mm_adds_epi16( a, b );
00055 }

SSP_FORCEINLINE __m128i ssp_adds_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epi8 [SSE2]. (Searches MSDN)

Definition at line 57 of file SSEPlus_native_SSE2.h.

00058 {
00059     return _mm_adds_epi8( a, b );
00060 }

SSP_FORCEINLINE __m128i ssp_adds_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epu16 [SSE2]. (Searches MSDN)

Definition at line 62 of file SSEPlus_native_SSE2.h.

00063 {
00064     return _mm_adds_epu16( a, b );
00065 }

SSP_FORCEINLINE __m128i ssp_adds_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_adds_epu8 [SSE2]. (Searches MSDN)

Definition at line 67 of file SSEPlus_native_SSE2.h.

00068 {
00069     return _mm_adds_epu8( a, b );
00070 }

SSP_FORCEINLINE __m128d ssp_and_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_and_pd [SSE2]. (Searches MSDN)

Definition at line 72 of file SSEPlus_native_SSE2.h.

00073 {
00074     return _mm_and_pd( a, b );
00075 }

SSP_FORCEINLINE __m128i ssp_and_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_and_si128 [SSE2]. (Searches MSDN)

Definition at line 77 of file SSEPlus_native_SSE2.h.

00078 {
00079     return _mm_and_si128( a, b );
00080 }

SSP_FORCEINLINE __m128d ssp_andnot_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_andnot_pd [SSE2]. (Searches MSDN)

Definition at line 82 of file SSEPlus_native_SSE2.h.

00083 {
00084     return _mm_andnot_pd( a, b );
00085 }

SSP_FORCEINLINE __m128i ssp_andnot_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_andnot_si128 [SSE2]. (Searches MSDN)

Definition at line 87 of file SSEPlus_native_SSE2.h.

00088 {
00089     return _mm_andnot_si128( a, b );
00090 }

SSP_FORCEINLINE __m128 ssp_arithmetic_hadd4_dup_ps_SSE2 ( __m128  a  ) 

Definition at line 16 of file SSEPlus_arithmetic_SSE2.h.

00017 {
00018     __m128 t;
00019     t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(2, 3, 0, 1) );                //TODO shuflo, shuf hi
00020     a = _mm_add_ps( a, t );   
00021 
00022     t = _mm_shuffle_ps( a, a, _MM_SHUFFLE(1, 0, 3, 2) );                //TODO shuflo, shuf hi
00023     a = _mm_add_ps( a, t );
00024     return a;
00025 } 

SSP_FORCEINLINE __m128i ssp_arithmetic_hadd4_epi16_SSE2 ( __m128i  a,
const unsigned int  offset 
)

in = a,b,c,d | e,f,g,h, 0 out = x,x,x,a+b+c+d | x,x,x,e+f+g+h

in = a,b,c,d | e,f,g,h, 3 out = a+b+c+d,x,x,x,| x,x,x,e+f+g+h

offset indicates desired position of sum (0,1,2,3)

Definition at line 38 of file SSEPlus_arithmetic_SSE2.h.

00039 {
00040     ssp_m128 A,B;
00041     A.i = a;                                           //A = a, b, c, d | e, f, g, h
00042 
00043     if( offset >= 2 ) B.i = _mm_slli_si128( A.i, 4 );  //B = c, d, x, x | g, h, x, x
00044     else              B.i = _mm_srli_si128( A.i, 4 );  //B = x, x, a, b | x, x, e, f
00045 
00046     A.i = _mm_add_epi16 ( A.i, B.i );      
00047 
00048     if( offset & 1 )  B.i = _mm_slli_si128( A.i, 2 );  
00049     else              B.i = _mm_srli_si128( A.i, 2 ); 
00050   
00051     A.i = _mm_add_epi16 ( A.i, B.i );      
00052     return A.i;
00053 }  

SSP_FORCEINLINE __m128i ssp_avg_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_avg_epu16 [SSE2]. (Searches MSDN)

Definition at line 92 of file SSEPlus_native_SSE2.h.

00093 {
00094     return _mm_avg_epu16( a, b );
00095 }

SSP_FORCEINLINE __m128i ssp_avg_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_avg_epu8 [SSE2]. (Searches MSDN)

Definition at line 97 of file SSEPlus_native_SSE2.h.

00098 {
00099     return _mm_avg_epu8( a, b );
00100 }

SSP_FORCEINLINE void ssp_clflush_SSE2 ( void const *  p  ) 

SSE2 Native implementation of _mm_clflush [SSE2]. (Searches MSDN)

Definition at line 139 of file SSEPlus_native_SSE2.h.

00140 {
00141     _mm_clflush( p );
00142 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi16 [SSE2]. (Searches MSDN)

Definition at line 144 of file SSEPlus_native_SSE2.h.

00145 {
00146     return _mm_cmpeq_epi16( a, b );
00147 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi32 [SSE2]. (Searches MSDN)

Definition at line 149 of file SSEPlus_native_SSE2.h.

00150 {
00151     return _mm_cmpeq_epi32( a, b );
00152 }

SSP_FORCEINLINE __m128i ssp_cmpeq_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpeq_epi8 [SSE2]. (Searches MSDN)

Definition at line 154 of file SSEPlus_native_SSE2.h.

00155 {
00156     return _mm_cmpeq_epi8( a, b );
00157 }

SSP_FORCEINLINE __m128d ssp_cmpeq_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpeq_pd [SSE2]. (Searches MSDN)

Definition at line 159 of file SSEPlus_native_SSE2.h.

00160 {
00161     return _mm_cmpeq_pd( a, b );
00162 }

SSP_FORCEINLINE __m128d ssp_cmpeq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpeq_sd [SSE2]. (Searches MSDN)

Definition at line 164 of file SSEPlus_native_SSE2.h.

00165 {
00166     return _mm_cmpeq_sd( a, b );
00167 }

SSP_FORCEINLINE __m128d ssp_cmpge_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpge_pd [SSE2]. (Searches MSDN)

Definition at line 169 of file SSEPlus_native_SSE2.h.

00170 {
00171     return _mm_cmpge_pd( a, b );
00172 }

SSP_FORCEINLINE __m128d ssp_cmpge_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpge_sd [SSE2]. (Searches MSDN)

Definition at line 174 of file SSEPlus_native_SSE2.h.

00175 {
00176     return _mm_cmpge_sd( a, b );
00177 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi16 [SSE2]. (Searches MSDN)

Definition at line 179 of file SSEPlus_native_SSE2.h.

00180 {
00181     return _mm_cmpgt_epi16( a, b );
00182 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi32 [SSE2]. (Searches MSDN)

Definition at line 184 of file SSEPlus_native_SSE2.h.

00185 {
00186     return _mm_cmpgt_epi32( a, b );
00187 }

SSP_FORCEINLINE __m128i ssp_cmpgt_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmpgt_epi8 [SSE2]. (Searches MSDN)

Definition at line 189 of file SSEPlus_native_SSE2.h.

00190 {
00191     return _mm_cmpgt_epi8( a, b );
00192 }

SSP_FORCEINLINE __m128d ssp_cmpgt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpgt_pd [SSE2]. (Searches MSDN)

Definition at line 194 of file SSEPlus_native_SSE2.h.

00195 {
00196     return _mm_cmpgt_pd( a, b );
00197 }

SSP_FORCEINLINE __m128d ssp_cmpgt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpgt_sd [SSE2]. (Searches MSDN)

Definition at line 199 of file SSEPlus_native_SSE2.h.

00200 {
00201     return _mm_cmpgt_sd( a, b );
00202 }

SSP_FORCEINLINE __m128d ssp_cmple_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmple_pd [SSE2]. (Searches MSDN)

Definition at line 204 of file SSEPlus_native_SSE2.h.

00205 {
00206     return _mm_cmple_pd( a, b );
00207 }

SSP_FORCEINLINE __m128d ssp_cmple_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmple_sd [SSE2]. (Searches MSDN)

Definition at line 209 of file SSEPlus_native_SSE2.h.

00210 {
00211     return _mm_cmple_sd( a, b );
00212 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi16 [SSE2]. (Searches MSDN)

Definition at line 214 of file SSEPlus_native_SSE2.h.

00215 {
00216     return _mm_cmplt_epi16( a, b );
00217 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi32 [SSE2]. (Searches MSDN)

Definition at line 219 of file SSEPlus_native_SSE2.h.

00220 {
00221     return _mm_cmplt_epi32( a, b );
00222 }

SSP_FORCEINLINE __m128i ssp_cmplt_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_cmplt_epi8 [SSE2]. (Searches MSDN)

Definition at line 224 of file SSEPlus_native_SSE2.h.

00225 {
00226     return _mm_cmplt_epi8( a, b );
00227 }

SSP_FORCEINLINE __m128d ssp_cmplt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmplt_pd [SSE2]. (Searches MSDN)

Definition at line 229 of file SSEPlus_native_SSE2.h.

00230 {
00231     return _mm_cmplt_pd( a, b );
00232 }

SSP_FORCEINLINE __m128d ssp_cmplt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmplt_sd [SSE2]. (Searches MSDN)

Definition at line 234 of file SSEPlus_native_SSE2.h.

00235 {
00236     return _mm_cmplt_sd( a, b );
00237 }

SSP_FORCEINLINE __m128d ssp_cmpneq_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpneq_pd [SSE2]. (Searches MSDN)

Definition at line 239 of file SSEPlus_native_SSE2.h.

00240 {
00241     return _mm_cmpneq_pd( a, b );
00242 }

SSP_FORCEINLINE __m128d ssp_cmpneq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpneq_sd [SSE2]. (Searches MSDN)

Definition at line 244 of file SSEPlus_native_SSE2.h.

00245 {
00246     return _mm_cmpneq_sd( a, b );
00247 }

SSP_FORCEINLINE __m128d ssp_cmpnge_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnge_pd [SSE2]. (Searches MSDN)

Definition at line 249 of file SSEPlus_native_SSE2.h.

00250 {
00251     return _mm_cmpnge_pd( a, b );
00252 }

SSP_FORCEINLINE __m128d ssp_cmpnge_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnge_sd [SSE2]. (Searches MSDN)

Definition at line 254 of file SSEPlus_native_SSE2.h.

00255 {
00256     return _mm_cmpnge_sd( a, b );
00257 }

SSP_FORCEINLINE __m128d ssp_cmpngt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpngt_pd [SSE2]. (Searches MSDN)

Definition at line 259 of file SSEPlus_native_SSE2.h.

00260 {
00261     return _mm_cmpngt_pd( a, b );
00262 }

SSP_FORCEINLINE __m128d ssp_cmpngt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpngt_sd [SSE2]. (Searches MSDN)

Definition at line 264 of file SSEPlus_native_SSE2.h.

00265 {
00266     return _mm_cmpngt_sd( a, b );
00267 }

SSP_FORCEINLINE __m128d ssp_cmpnle_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnle_pd [SSE2]. (Searches MSDN)

Definition at line 269 of file SSEPlus_native_SSE2.h.

00270 {
00271     return _mm_cmpnle_pd( a, b );
00272 }

SSP_FORCEINLINE __m128d ssp_cmpnle_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnle_sd [SSE2]. (Searches MSDN)

Definition at line 274 of file SSEPlus_native_SSE2.h.

00275 {
00276     return _mm_cmpnle_sd( a, b );
00277 }

SSP_FORCEINLINE __m128d ssp_cmpnlt_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnlt_pd [SSE2]. (Searches MSDN)

Definition at line 279 of file SSEPlus_native_SSE2.h.

00280 {
00281     return _mm_cmpnlt_pd( a, b );
00282 }

SSP_FORCEINLINE __m128d ssp_cmpnlt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpnlt_sd [SSE2]. (Searches MSDN)

Definition at line 284 of file SSEPlus_native_SSE2.h.

00285 {
00286     return _mm_cmpnlt_sd( a, b );
00287 }

SSP_FORCEINLINE __m128d ssp_cmpord_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpord_pd [SSE2]. (Searches MSDN)

Definition at line 289 of file SSEPlus_native_SSE2.h.

00290 {
00291     return _mm_cmpord_pd( a, b );
00292 }

SSP_FORCEINLINE __m128d ssp_cmpord_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpord_sd [SSE2]. (Searches MSDN)

Definition at line 294 of file SSEPlus_native_SSE2.h.

00295 {
00296     return _mm_cmpord_sd( a, b );
00297 }

SSP_FORCEINLINE __m128d ssp_cmpunord_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpunord_pd [SSE2]. (Searches MSDN)

Definition at line 299 of file SSEPlus_native_SSE2.h.

00300 {
00301     return _mm_cmpunord_pd( a, b );
00302 }

SSP_FORCEINLINE __m128d ssp_cmpunord_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cmpunord_sd [SSE2]. (Searches MSDN)

Definition at line 304 of file SSEPlus_native_SSE2.h.

00305 {
00306     return _mm_cmpunord_sd( a, b );
00307 }

SSP_FORCEINLINE int ssp_comieq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comieq_sd [SSE2]. (Searches MSDN)

Definition at line 309 of file SSEPlus_native_SSE2.h.

00310 {
00311     return _mm_comieq_sd( a, b );
00312 }

SSP_FORCEINLINE int ssp_comige_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comige_sd [SSE2]. (Searches MSDN)

Definition at line 314 of file SSEPlus_native_SSE2.h.

00315 {
00316     return _mm_comige_sd( a, b );
00317 }

SSP_FORCEINLINE int ssp_comigt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comigt_sd [SSE2]. (Searches MSDN)

Definition at line 319 of file SSEPlus_native_SSE2.h.

00320 {
00321     return _mm_comigt_sd( a, b );
00322 }

SSP_FORCEINLINE int ssp_comile_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comile_sd [SSE2]. (Searches MSDN)

Definition at line 324 of file SSEPlus_native_SSE2.h.

00325 {
00326     return _mm_comile_sd( a, b );
00327 }

SSP_FORCEINLINE int ssp_comilt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comilt_sd [SSE2]. (Searches MSDN)

Definition at line 329 of file SSEPlus_native_SSE2.h.

00330 {
00331     return _mm_comilt_sd( a, b );
00332 }

SSP_FORCEINLINE int ssp_comineq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_comineq_sd [SSE2]. (Searches MSDN)

Definition at line 334 of file SSEPlus_native_SSE2.h.

00335 {
00336     return _mm_comineq_sd( a, b );
00337 }

SSP_FORCEINLINE void ssp_convert_3c_3p_epi16_SSE2 ( __m128i *  rgb1,
__m128i *  rgb2,
__m128i *  rgb3 
)

Definition at line 190 of file SSEPlus_convert_SSE2.h.

00191 {
00192                 __m128i temp1, temp2;
00193 
00194                 *rgb2  = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));//b3,g3,r3,b2,r5,b4,g4,r4
00195                 temp1 = _mm_unpacklo_epi16(*rgb1, *rgb2);                               //r5,r1,b4,b0,g4,g0,r4,r0
00196                 temp2 = _mm_unpackhi_epi16(*rgb2, *rgb3);                               //b7,b3,g7,g3,r7,r3,b6,b2
00197                 *rgb3  = _mm_slli_si128(*rgb3, 8);                                              //g6,r6,b5,g5, 0, 0, 0, 0
00198                 *rgb2  = _mm_unpackhi_epi16(*rgb1, *rgb3);                              //g6,g2,r6,r2,b5,b1,g5,g1
00199                 
00200                 *rgb3  = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));//b5,b1,g5,g1,g6,g2,r6,r2
00201                 *rgb1  = _mm_unpacklo_epi16(temp1, *rgb3);                              //g6,g4,g2,g0,r6,r4,r2,r0
00202                 temp1 = _mm_srli_si128(temp1, 8);                                               // 0, 0, 0, 0,r5,r1,b4,b0
00203                 temp1 = _mm_unpacklo_epi16(temp1, temp2);                               //r7,r5,r3,r1,b6,b4,b2,b0
00204                 temp2 = _mm_unpackhi_epi16(*rgb3, temp2);                               //b7,b5,b3,b1,g7,g5,g3,g1
00205 
00206                 temp1 = _mm_shuffle_epi32(temp1, _MM_SHUFFLE(1,0,3,2)); //b6,b4,b2,b0,r7,r5,r3,r1
00207                 *rgb3  = _mm_unpackhi_epi16(temp1, temp2);                              //b7,b6,b5,b4,b3,b2,b1,b0                               
00208                 temp2 = _mm_slli_si128(temp2, 8);                                               //g7,g5,g3,g1, 0, 0, 0, 0
00209                 *rgb2  = _mm_unpackhi_epi16(*rgb1, temp2);                              //g7,g6,g5,g4,g3,g2,g1,g0                               
00210                 *rgb1  = _mm_unpacklo_epi16(*rgb1, temp1);                              //r7,r6,r5,r4,r3,r2,r1,r0                               
00211 }

SSP_FORCEINLINE void ssp_convert_3c_3p_epi32_SSE2 ( __m128i *  rgb1,
__m128i *  rgb2,
__m128i *  rgb3 
)

Definition at line 257 of file SSEPlus_convert_SSE2.h.

00258 {
00259                 __m128i temp1, temp2;
00260                 
00261                 *rgb2  = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));//b1,g1,g2,r2
00262                 temp1 = _mm_unpacklo_epi32(*rgb1, *rgb2);                               //g2,g0,r2,r0
00263                 temp2 = _mm_unpackhi_epi32(*rgb2, *rgb3);                               //b3,b1,g3,g1
00264                 *rgb3  = _mm_slli_si128(*rgb3, 8);                                              //r3,b2, 0, 0
00265                 *rgb2  = _mm_unpackhi_epi32(*rgb1, *rgb3);                              //r3,r1,b2,b0
00266                 
00267                 *rgb3  = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));//b2,b0,r3,r1
00268                 *rgb1  = _mm_unpacklo_epi32(temp1, *rgb3);                              //r3,r2,r1,r0
00269                 temp1 = _mm_srli_si128(temp1, 8);                                               // 0, 0,g2,g0
00270                 *rgb2  = _mm_unpacklo_epi32(temp1, temp2);                              //g3,g2,g1,g0
00271                 *rgb3  = _mm_unpackhi_epi32(*rgb3, temp2);                              //b3,b2,b1,b0
00272 }

SSP_FORCEINLINE void ssp_convert_3c_3p_epi8_SSE2 ( __m128i *  rgb1,
__m128i *  rgb2,
__m128i *  rgb3 
)

Definition at line 88 of file SSEPlus_convert_SSE2.h.

00089 {
00090     __m128i temp1, temp2;
00091                                                             // RGB1 =         r5 , b4  g4  r4 , b3  g3  r3 , b2  g2  r2 , b1  g1  r1 , b0  g0 r0
00092                                                             // RGB2 =     g10 r10, b9  g9  r9 , b8  g8  r8 , b7  g7  r7 , b6  g6  r6 , b5  g5   
00093                                                             // RGB3 = b15 g15 r15, b14 g14 r14, b13 g13 r13, b12 g12 r12, b11 g11 r11, b10 
00094 
00095 
00096     *rgb2 = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));     // b7, g7, r7, b6, g6, r6, b5, g5,g10,r10, b9, g9, r9, b8, g8, r8
00097     temp1 = _mm_unpacklo_epi8(*rgb1, *rgb2);                            //g10, g2,r10, r2, b9, b1, g9, g1, r9, r1, b8, b0, g8, g0, r8, r0
00098     temp2 = _mm_unpackhi_epi8(*rgb2, *rgb3);                            //b15, b7,g15, g7,r15, r7,b14, b6,g14, g6,r14, r6,b13, b5,g13, g5
00099     *rgb3 = _mm_slli_si128   (*rgb3, 8    );                            //r13,b12,g12,r12,b11,g11,r11,b10,  0,  0,  0,  0,  0,  0,  0,  0
00100     *rgb2 = _mm_unpackhi_epi8(*rgb1, *rgb3);                            //r13, r5,b12, b4,g12, g4,r12, r4,b11, b3,g11, g3,r11, r3,b10, b2
00101 
00102     *rgb3 = _mm_shuffle_epi32(*rgb2, _MM_SHUFFLE(1,0,3,2));     //b11, b3,g11, g3,r11, r3,b10, b2,r13, r5,b12, b4,g12, g4,r12, r4
00103     *rgb1 = _mm_unpacklo_epi8(temp1, *rgb3);                            //r13, r9, r5, r1,b12, b8, b4, b0,g12, g8, g4, g0,r12, r8, r4, r0
00104     temp1 = _mm_srli_si128   (temp1, 8    );                            //  0,  0,  0,  0,  0,  0,  0,  0,g10, g2,r10, r2, b9, b1, g9, g1
00105     temp1 = _mm_unpacklo_epi8(temp1, temp2);                            //g14,g10, g6, g2,r14,r10, r6, r2,b13, b9, b5, b1,g13, g9, g5, g1
00106     temp2 = _mm_unpackhi_epi8(*rgb3, temp2);                            //b15,b11, b7, b3,g15,g11, g7, g3,r15,r11, r7, r3,b14,b10, b6, b2
00107 
00108     temp1 = _mm_shuffle_epi32(temp1, _MM_SHUFFLE(1,0,3,2)); //b13, b9, b5, b1,g13, g9, g5, g1,g14,g10, g6, g2,r14,r10, r6, r2
00109     *rgb3 = _mm_unpackhi_epi8(temp1, temp2);                            //b15,b13,b11, b9, b7, b5, b3, b1,g15,g13,g11, g9, g7, g5, g3, g1
00110     temp2 = _mm_slli_si128   (temp2, 8    );                            //r15,r11, r7, r3,b14,b10, b6, b2,  0,  0,  0,  0,  0,  0,  0,  0
00111     temp2 = _mm_unpackhi_epi8(*rgb1, temp2);                            //r15,r13,r11, r9, r7, r5, r3, r1,b14,b12,b10, b8, b6, b4, b2, b0
00112     temp1 = _mm_unpacklo_epi8(*rgb1, temp1);                            //g14,g12,g10, g8, g6, g4, g2, g0,r14,r12,r10, r8, r6, r4, r2, r0
00113 
00114     temp2 = _mm_shuffle_epi32(temp2, _MM_SHUFFLE(1,0,3,2)); //b14,b12,b10, b8, b6, b4, b2, b0,r15,r13,r11, r9, r7, r5, r3, r1
00115     *rgb1 = _mm_unpacklo_epi8(temp1, temp2);                            //r15,r14,r13,r12,r11,r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0       
00116     temp1 = _mm_srli_si128   (temp1, 8    );                            //  0,  0,  0,  0,  0,  0,  0,  0,g14,g12,g10, g8, g6, g4, g2, g0
00117     *rgb2 = _mm_unpacklo_epi8(temp1, *rgb3);                            //g15,g14,g13,g12,g11,g10, g9, g8, g7, g6, g5, g4, g3, g2, g1, g0       
00118     *rgb3 = _mm_unpackhi_epi8(temp2, *rgb3);                            //b15,b14,b13,b12,b11,b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0       
00119 }

SSP_FORCEINLINE void ssp_convert_3p_3c_epi16_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b 
)

Definition at line 214 of file SSEPlus_convert_SSE2.h.

00215 {
00216                 __m128i temp;
00217 
00218                 temp = _mm_srli_si128(*r, 8);                   // 0, 0, 0, 0,r7,r6,r5,r4
00219                 *r    = _mm_unpacklo_epi16(*r, temp);   //r7,r3,r6,r2,r5,r1,r4,r0
00220                 temp = _mm_srli_si128(*r, 8);                   // 0, 0, 0, 0,r7,r3,r6,r2
00221                 *r    = _mm_unpacklo_epi16(*r, temp);   //r7,r5,r3,r1,r6,r4,r2,r0
00222 
00223                 temp = _mm_srli_si128(*g, 8);                   //g7,g3,g6,g2,g5,g1,g4,g0
00224                 *g    = _mm_unpacklo_epi16(*g, temp);   // 0, 0, 0, 0,g7,g3,g6,g2
00225                 temp = _mm_srli_si128(*g, 8);                   //g7,g5,g3,g1,g6,g4,g2,g0
00226                 *g    = _mm_unpacklo_epi16(*g, temp);   //g7,g5,g3,g1,g6,g4,g2,g0
00227 
00228                 temp = _mm_srli_si128(*b, 8);                   //b7,b3,b6,b2,b5,b1,b4,b0
00229                 *b    = _mm_unpacklo_epi16(*b, temp);   // 0, 0, 0, 0,b7,b3,b6,b2
00230                 temp = _mm_srli_si128(*b, 8);                   //b7,b5,b3,b1,b6,b4,b2,b0
00231                 *b    = _mm_unpacklo_epi16(*b, temp);   //b7,b5,b3,b1,b6,b4,b2,b0
00232 
00233                 temp = _mm_unpacklo_epi16(*r, *g);              //g6,r6,g4,r4,g2,r2,g0,r0
00234                 *r    = _mm_srli_si128(*r , 8);                 // 0, 0, 0, 0,r7,r5,r3,r1
00235                 *r    = _mm_unpacklo_epi16(*b, *r);             //r7,b6,r5,b4,r3,b2,r1,b0
00236                 *g    = _mm_unpackhi_epi16(*g, *b);             //b7,g7,b5,g5,b3,g3,b1,g1
00237 
00238                 *b    = _mm_srli_si128(*r, 8);                  // 0, 0, 0, 0,r7,b6,r5,b4
00239                 *r    = _mm_unpacklo_epi32(*r, *b);             //r7,b6,r3,b2,r5,b4,r1,b0
00240                 *b    = _mm_srli_si128(*g, 8);                  // 0, 0, 0, 0,b7,g7,b5,g5
00241                 *g    = _mm_unpacklo_epi32(*g, *b);             //b7,g7,b3,g3,b5,g5,b1,g1
00242                 *b    = _mm_srli_si128(temp, 8);                // 0, 0, 0, 0,g6,r6,g4,r4
00243                 temp = _mm_unpacklo_epi32(temp, *b);    //g6,r6,g2,r2,g4,r4,g0,r0
00244 
00245                 *b    = _mm_unpacklo_epi32(temp, *g);   //b5,g5,g4,r4,b1,g1,g0,r0
00246                 temp = _mm_srli_si128(temp, 8);                 // 0, 0, 0, 0,g6,r6,g2,r2
00247                 temp = _mm_unpacklo_epi32(*r, temp);    //g6,r6,r5,b4,g2,r2,r1,b0
00248                 *g    = _mm_unpackhi_epi32(*r, *g);             //b7,g7,r7,b6,b3,g3,r3,b2
00249                 
00250                 *r    = _mm_unpacklo_epi32(*b, temp);   //g2,r2,b1,g1,r1,b0,g0,r0
00251                 temp = _mm_unpackhi_epi32(*b, temp);    //g6,r6,b5,g5,r5,b4,g4,b4
00252                 *b    = _mm_unpackhi_epi64(temp, *g);   //b7,g7,r7,b6,g6,r6,b5,g5
00253                 *g    = _mm_unpacklo_epi64(*g, temp);   //r5,b4,g4,r4,b3,g3,r3,b2
00254 }

SSP_FORCEINLINE void ssp_convert_3p_3c_epi32_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b 
)

Definition at line 275 of file SSEPlus_convert_SSE2.h.

00276 {
00277                 __m128i temp;
00278 
00279                 temp = _mm_srli_si128(*r, 8);                   // 0, 0,r3,r2
00280                 *r    = _mm_unpacklo_epi32(*r, temp);   //r3,r1,r2,r0
00281                 temp = _mm_srli_si128(*g, 8);                   // 0, 0,g3,g2
00282                 *g    = _mm_unpacklo_epi32(*g, temp);   //g3,g1,g2,g0
00283                 temp = _mm_srli_si128(*b, 8);                   // 0, 0,b3,b2
00284                 *b    = _mm_unpacklo_epi32(*b, temp);   //b3,b1,b2,b0
00285 
00286                 temp = _mm_unpacklo_epi32(*r, *g);              //g2,r2,g0,r0
00287                 *g    = _mm_unpackhi_epi32(*g, *b);             //b3,g3,b1,g1
00288                 *r    = _mm_srli_si128(*r, 8);                  // 0, 0,r3,r1
00289                 *b    = _mm_unpacklo_epi32(*b, *r);             //r3,b2,r1,b0
00290 
00291                 *r    = _mm_unpacklo_epi64(temp, *b);   //r1,b0,g0,r0
00292                 *b    = _mm_unpackhi_epi64(*b, *g);             //b3,g3,r3,b2
00293                 *g    = _mm_slli_si128(*g, 8);                  //b1,g1, 0, 0
00294                 *g    = _mm_unpackhi_epi64(*g, temp);   //g2,r2,b1,g1
00295 }

SSP_FORCEINLINE void ssp_convert_3p_3c_epi8_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b 
)

Definition at line 148 of file SSEPlus_convert_SSE2.h.

00149 {
00150     const static __m128i odd_8  = SSP_CONST_SET_8I(   0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0 );
00151     const static __m128i even_8 = SSP_CONST_SET_8I( 0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF,0,0xFF   );
00152 
00153     const static __m128i odd_16  = SSP_CONST_SET_16I(   0xFFFF,0,0xFFFF,0,0xFFFF,0,0xFFFF,0 );
00154     const static __m128i even_16 = SSP_CONST_SET_16I( 0,0xFFFF,0,0xFFFF,0,0xFFFF,0,0xFFFF   );    
00155 
00156    ssp_m128 T, RG, GB, BR, RGBR, GBRG, BRGB;
00157     
00158      RG.i = _mm_and_si128 (     *r, even_8  );  // Mask out the odd r bits
00159       T.i = _mm_slli_epi16(     *g, 8       );  // Move the even g bits to the odd position
00160      RG.i = _mm_or_si128  (   RG.i, T.i     );  // G14 R14 ... G2 R2 G0 R0
00161 
00162      GB.i = _mm_srli_epi16(     *g, 8       );      
00163       T.i = _mm_and_si128 (     *b, odd_8   );
00164      GB.i = _mm_or_si128  (   GB.i, T.i     );
00165 
00166      BR.i = _mm_and_si128 (     *b, even_8  );
00167       T.i = _mm_and_si128 (     *r, odd_8   );
00168      BR.i = _mm_or_si128  (   BR.i, T.i     );
00169 
00170    RGBR.i = _mm_and_si128 (   RG.i, even_16 );
00171       T.i = _mm_slli_epi32(   BR.i, 16      );
00172    RGBR.i = _mm_or_si128  ( RGBR.i, T.i     );
00173 
00174    GBRG.i = _mm_and_si128 (   GB.i, even_16 );
00175       T.i = _mm_and_si128 (   RG.i, odd_16  );
00176    GBRG.i = _mm_or_si128  ( GBRG.i, T.i     );
00177 
00178    BRGB.i = _mm_srli_epi32(   BR.i, 16      );
00179       T.i = _mm_and_si128 (   GB.i, odd_16  );
00180    BRGB.i = _mm_or_si128  ( BRGB.i, T.i     );
00181 
00182    ssp_convert_reverse_transpose_SSE2( &RGBR.i, &GBRG.i, &BRGB.i );
00183 
00184    *r = RGBR.i;
00185    *g = GBRG.i;
00186    *b = BRGB.i; 
00187 }

SSP_FORCEINLINE void ssp_convert_4c_4p_epi16_SSE2 ( __m128i *  rgba1,
__m128i *  rgba2,
__m128i *  rgba3,
__m128i *  rgba4 
)

Definition at line 342 of file SSEPlus_convert_SSE2.h.

00343 {
00344                 __m128i temp1, temp2;
00345 
00346                 temp1  = _mm_unpacklo_epi16(*rgba1, *rgba3);            //a4,a0,b4,b0,g4,g0,r4,r0
00347                 *rgba1  = _mm_unpackhi_epi16(*rgba1, *rgba3);           //a5,a1,b5,b1,g5,g1,r5,r1
00348                 *rgba3  = _mm_unpacklo_epi16(*rgba2, *rgba4);           //a6,a2,b6,b2,g6,g2,r6,r2
00349                 *rgba2  = _mm_unpackhi_epi16(*rgba2, *rgba4);           //a7,a3,b7,b3,g7,g3,r7,r3
00350 
00351                 *rgba4  = _mm_unpackhi_epi16(*rgba1, *rgba2);           //a7,a5,a3,a1,b7,b5,b3,b1
00352                 *rgba1  = _mm_unpacklo_epi16(*rgba1, *rgba2);           //g7,g5,g3,g1,r7,r5,r3,r1
00353                 temp2  = _mm_unpacklo_epi16(temp1, *rgba3);                     //g6,g4,g2,g0,r6,r4,r2,r0
00354                 temp1  = _mm_unpackhi_epi16(temp1, *rgba3);                     //a6,a4,a2,a0,b6,b4,b2,b0
00355 
00356                 *rgba3  = _mm_unpacklo_epi16(temp1, *rgba4);            //b7,b6,b5,b4,b3,b2,b1,b0
00357                 *rgba4  = _mm_unpackhi_epi16(temp1, *rgba4);            //a7,a6,a5,a4,a3,a2,a1,a0
00358                 *rgba2  = _mm_unpackhi_epi16(temp2, *rgba1);            //g7,g6,g5,g4,g3,g2,g1,g0
00359                 *rgba1  = _mm_unpacklo_epi16(temp2, *rgba1);            //r7,r6,r5,r4,r3,r2,r1,r0
00360 }

SSP_FORCEINLINE void ssp_convert_4c_4p_epi32_SSE2 ( __m128i *  rgba1,
__m128i *  rgba2,
__m128i *  rgba3,
__m128i *  rgba4 
)

Definition at line 379 of file SSEPlus_convert_SSE2.h.

00380 {
00381                 __m128i temp1, temp2;
00382 
00383                 temp1  = _mm_unpacklo_epi32(*rgba1, *rgba3);            //g2,g0,r2,r0
00384                 *rgba1  = _mm_unpackhi_epi32(*rgba1, *rgba3);           //a2,a0,b2,b0
00385                 temp2  = _mm_unpacklo_epi32(*rgba2, *rgba4);            //g3,g1,r3,r1
00386                 *rgba2  = _mm_unpackhi_epi32(*rgba2, *rgba4);           //a3,a1,b3,b1
00387 
00388                 *rgba4  = _mm_unpackhi_epi32(*rgba1, *rgba2);           //a3,a2,a1,a0
00389                 *rgba3  = _mm_unpacklo_epi32(*rgba1, *rgba2);           //b3,b2,b1,b0
00390                 *rgba1  = _mm_unpacklo_epi32(temp1, temp2);                     //r3,r2,r1,r0
00391                 *rgba2  = _mm_unpackhi_epi32(temp1, temp2);                     //g3,g2,g1,g0
00392 }

SSP_FORCEINLINE void ssp_convert_4c_4p_epi8_SSE2 ( __m128i *  rgba1,
__m128i *  rgba2,
__m128i *  rgba3,
__m128i *  rgba4 
)

Definition at line 299 of file SSEPlus_convert_SSE2.h.

00300 {
00301                 __m128i temp1,temp2;
00302 
00303                 temp1 = _mm_unpacklo_epi8(*rgba1, *rgba3);                      // a9, a1, b9, b1, g9, g1, r9, r1, a8, a0, b8, b0, g8, g0, r8, r0
00304                 *rgba1 = _mm_unpackhi_epi8(*rgba1, *rgba3);                     //a11, a3,b11, b3,g11, g3,r11, r3,a10, a2,b10, b2,g10, g2,r10, r2
00305                 *rgba3 = _mm_unpacklo_epi8(*rgba2, *rgba4);                     //a13, a5,b13, b5,g13, g5,r13, r5,a12, a4,b12, b4,g12, g4,r12, r4
00306                 temp2 = _mm_unpackhi_epi8(*rgba2, *rgba4);                      //a15, a7,b15, b7,g15, g7,r15, r7,a14, a6,b14, b6,g14, g6,r14, r6
00307 
00308                 *rgba4 = _mm_unpackhi_epi8(*rgba1, temp2);                      //a15,a11, a7, a3,b15,b11, b7, b3,g15,g11, g7, g3,r15,r11, r7, r3
00309                 *rgba1 = _mm_unpacklo_epi8(*rgba1, temp2);                      //a14,a10, a6, a2,b14,b10, b6, b2,g14,g10, g6, g2,r14,r10, r6, r2
00310                 *rgba2 = _mm_unpacklo_epi8(temp1, *rgba3);                      //a12, a8, a4, a0,b12, b8, b4, b0,g12, g8, g4, g0,r12, r8, r4, r0
00311                 *rgba3 = _mm_unpackhi_epi8(temp1, *rgba3);                      //a13, a9, a5, a1,b13, b9, b5, b1,g13, g9, g5, g1,r13, r9, r5, r1
00312 
00313                 temp1 = _mm_unpacklo_epi8(*rgba3, *rgba4);                      //g15,g13,g11, g9, g7, g5, g3, g1,r15,r13,r11, r9, r7, r5, r3, r1
00314                 *rgba3 = _mm_unpackhi_epi8(*rgba3, *rgba4);                     //a15,a13,a11, a9, a7, a5, a3, a1,b15,b13,b11, b9, b7, b5, b3, b1
00315                 temp2 = _mm_unpackhi_epi8(*rgba2, *rgba1);                      //a14,a12,a10, a8, a6, a4, a2, a0,b14,b12,b10, b8, b6, b4, b2, b0
00316                 *rgba2 = _mm_unpacklo_epi8(*rgba2, *rgba1);                     //g14,g12,g10, g8, g6, g4, g2, g0,r14,r12,r10, r8, r6, r4, r2, r0
00317 
00318                 *rgba1 = _mm_unpacklo_epi8(*rgba2, temp1);                      //r15,r14,r13,r12,r11,r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0
00319                 *rgba2 = _mm_unpackhi_epi8(*rgba2, temp1);                      //g15,g14,g13,g12,g11,g10, g9, g8, g7, g6, g5, g4, g3, g2, g1, g0
00320                 *rgba4 = _mm_unpackhi_epi8(temp2, *rgba3);                      //a15,a14,a13,a12,a11,a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0
00321                 *rgba3 = _mm_unpacklo_epi8(temp2, *rgba3);                      //b15,b14,b13,b12,b11,b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0
00322 }

SSP_FORCEINLINE void ssp_convert_4p_4c_epi16_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b,
__m128i *  a 
)

Definition at line 363 of file SSEPlus_convert_SSE2.h.

00364 {
00365                 __m128i temp1, temp2;
00366 
00367                 temp1 = _mm_unpacklo_epi16(*r, *b);                     //b3,r3,b2,r2,b1,r1,b0,r0
00368                 *r     = _mm_unpackhi_epi16(*r, *b);        //b7,r7,b6,r6,b5,r5,b4,r4
00369                 temp2 = _mm_unpacklo_epi16(*g, *a);                     //a3,g3,a2,g2,a1,g1,a0,g0
00370                 *g     = _mm_unpackhi_epi16(*g, *a);            //a7,g7,a6,g6,a5,g5,a4,g4
00371 
00372                 *b     = _mm_unpacklo_epi16(*r, *g);            //a5,b5,g5,r5,a4,b4,g4,r4
00373                 *a     = _mm_unpackhi_epi16(*r, *g);            //a7,b7,g7,r7,a6,b6,g6,r6
00374                 *r     = _mm_unpacklo_epi16(temp1, temp2);      //a1,b1,g1,r1,a0,b0,g0,r0
00375                 *g     = _mm_unpackhi_epi16(temp1, temp2);      //a3,b3,g3,r3,a2,b2,g2,r2
00376 }

SSP_FORCEINLINE void ssp_convert_4p_4c_epi32_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b,
__m128i *  a 
)

Definition at line 395 of file SSEPlus_convert_SSE2.h.

00396 {
00397                 __m128i temp1, temp2;
00398 
00399                 temp1 = _mm_unpacklo_epi32(*r, *b);                     //b1,r1,b0,r0
00400                 *r     = _mm_unpackhi_epi32(*r, *b);        //b3,r3,b2,r2
00401                 temp2 = _mm_unpacklo_epi32(*g, *a);                     //a1,g1,a0,g0
00402                 *g     = _mm_unpackhi_epi32(*g, *a);            //a3,g3,a2,g2
00403 
00404                 *b     = _mm_unpacklo_epi32(*r, *g);            //a2,b2,g2,r2
00405                 *a     = _mm_unpackhi_epi32(*r, *g);            //a3,b3,g3,r3
00406                 *r     = _mm_unpacklo_epi32(temp1, temp2);      //a0,b0,g0,r0
00407                 *g     = _mm_unpackhi_epi32(temp1, temp2);      //a1,b1,g1,r1
00408 }

SSP_FORCEINLINE void ssp_convert_4p_4c_epi8_SSE2 ( __m128i *  r,
__m128i *  g,
__m128i *  b,
__m128i *  a 
)

Definition at line 326 of file SSEPlus_convert_SSE2.h.

00327 {
00328                 __m128i temp1, temp2;
00329 
00330                 temp1 = _mm_unpacklo_epi8(*r, *b);                      // b7, r7, b6, r6, b5, r5, b4, r4, b3, r3, b2, r2, b1, r1, b0, r0
00331                 *r     = _mm_unpackhi_epi8(*r, *b);         //b15,r15,b14,r14,b13,r13,b12,r12,b11,r11,b10,r10, b9, r9, b8, r8
00332                 temp2 = _mm_unpacklo_epi8(*g, *a);                      // a7, g7, a6, g6, a5, g5, a4, g4, a3, g3, a2, g2, a1, g1, a0, g0
00333                 *g     = _mm_unpackhi_epi8(*g, *a);                     //a15,g15,a14,g14,a13,g13,a12,g12,a11,g11,a10,g10, a9, g9, a8, g8
00334 
00335                 *b     = _mm_unpacklo_epi8(*r, *g);                     //a11,b11,g11,r11,a10,b10,g10,r10, a9, b9, g9, r9, a8, b8, g8, r8
00336                 *a     = _mm_unpackhi_epi8(*r, *g);                     //a16,b16,g16,r16,a15,b15,g15,r15,a14, b1,g14,r14,a13,b13,g12,r12
00337                 *r     = _mm_unpacklo_epi8(temp1, temp2);       // a3, b3, g3, r3, a2, b2, g2, r2, a1, b1, g1, r1, a0, b0, g0, r0
00338                 *g     = _mm_unpackhi_epi8(temp1, temp2);       // a7, b7, g7, r7, a6, b6, g6, r6, a5, b5, g5, r5, a4, b4, g4, r4
00339 }

SSP_FORCEINLINE void ssp_convert_odd_even_epi16_SSE2 ( __m128i *  a,
__m128i *  b 
)

SSE2 implementation of ssp_convert_odd_even_epi16_SSE2 [custom]. (Searches MSDN)

Definition at line 16 of file SSEPlus_convert_SSE2.h.

00017 {
00018     // IN
00019     // a = a7,a6,a5,a4,a3,a2,a1,a0
00020     // b = b7,b6,b5,b4,b3,b2,b1,b0
00021 
00022     // OUT
00023     // a = b6,b4,b2,b0,a6,a4,a2,a0  // even
00024     // b = b7,b5,b3,b1,a7,a5,a3,a1  // odd
00025 
00026     __m128i A = *a;
00027     __m128i B = *b;
00028     __m128i ta, tb, odd, even;
00029 
00030     ta   = _mm_srai_epi32 ( A, 16 );    // sign,a7,sign,a5,sign,a3,sign,a1
00031     tb   = _mm_srai_epi32 ( B, 16 );    // sign,b7,sign,b5,sign,b3,sign,b1
00032     odd  = _mm_packs_epi32( ta, tb );   //   b7,b5,  b3,b1,  a7,a5,  a3,a1
00033 
00034     A    = _mm_slli_si128 ( A, 2 );     //   a6, 0,  a4, 0,  a2, 0,  a0, 0
00035     B    = _mm_slli_si128 ( B, 2 );     //   b6, 0,  b4, 0,  b2, 0,  b0, 0
00036     A    = _mm_srai_epi32 ( A, 16 );    // sign,a6,sign,a4,sign,a2,sign,a0
00037     B    = _mm_srai_epi32 ( B, 16 );    // sign,b6,sign,b4,sign,b2,sign,b0                                        
00038     even = _mm_packs_epi32( A, B );     //   b6,b4,  b2,b0,  a6,a4,  a2,a0
00039 
00040     *a = even;
00041     *b = odd;
00042 }

SSP_FORCEINLINE void ssp_convert_odd_even_epi32_SSE2 ( __m128i *  a,
__m128i *  b 
)

SSE2 implementation of ssp_convert_odd_even_epi32_SSE2 [custom]. (Searches MSDN)

Definition at line 66 of file SSEPlus_convert_SSE2.h.

00067 {
00068     // IN
00069     // a = a3,a2,a1,a0
00070     // b = b3,b2,b1,b0
00071 
00072     // OUT
00073     // a = b2,b0,a2,a0  // even
00074     // b = b3,b1,a3,a1  // odd
00075     
00076     ssp_m128 A,B;
00077     A.i = *a;
00078     B.i = *b;  
00079 
00080     ssp_convert_odd_even_ps_SSE2( &A.f, &B.f );
00081 
00082     *a = A.i;
00083     *b = B.i;       
00084 }

SSP_FORCEINLINE void ssp_convert_odd_even_ps_SSE2 ( __m128 *  a,
__m128 *  b 
)

SSE2 implementation of ssp_convert_odd_even_epi32_SSE2 [custom]. (Searches MSDN)

Definition at line 47 of file SSEPlus_convert_SSE2.h.

00048 {
00049     // IN
00050     // a = a3,a2,a1,a0
00051     // b = b3,b2,b1,b0
00052 
00053     // OUT
00054     // a = b2,b0,a2,a0  // even
00055     // b = b3,b1,a3,a1  // odd
00056     
00057     __m128 c, d;  
00058     c = _mm_shuffle_ps( *a, *b, _MM_SHUFFLE(3,1,3,1) );
00059     d = _mm_shuffle_ps( *a, *b, _MM_SHUFFLE(2,0,2,0) );
00060     *a = c;
00061     *b = d;     
00062 }

SSP_FORCEINLINE void ssp_convert_reverse_transpose_SSE2 ( __m128i *  a,
__m128i *  b,
__m128i *  c 
)

Definition at line 126 of file SSEPlus_convert_SSE2.h.

00127 {
00128     ssp_m128 A, B, C, T1, T2, T3;
00129     A.i = *a;   
00130     B.i = *b;   
00131     C.i = *c;  
00132 
00133     T1.f = _mm_shuffle_ps( C.f,  A.f,  _MM_SHUFFLE( 3,1,2,0) ); // 9  3  8  2
00134     T2.f = _mm_shuffle_ps( B.f,  A.f,  _MM_SHUFFLE( 2,0,2,0) ); // 6  0  7  1
00135     T3.f = _mm_shuffle_ps( C.f,  B.f,  _MM_SHUFFLE( 3,1,3,1) ); // 10 4  11 5
00136 
00137     A.f  = _mm_shuffle_ps( T2.f, T1.f, _MM_SHUFFLE( 2,0,0,2 ) ); //3  2  1  0  
00138     B.f  = _mm_shuffle_ps( T3.f, T2.f, _MM_SHUFFLE( 1,3,0,2 ) ); //7  6  5  4  
00139     C.f  = _mm_shuffle_ps( T1.f, T3.f, _MM_SHUFFLE( 1,3,3,1 ) ); //11 10 9  8   
00140 
00141     *a = A.i;
00142     *b = B.i;
00143     *c = C.i; 
00144 }

SSP_FORCEINLINE __m128d ssp_cvtepi32_pd_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_cvtepi32_pd [SSE2]. (Searches MSDN)

Definition at line 339 of file SSEPlus_native_SSE2.h.

00340 {
00341     return _mm_cvtepi32_pd( a );
00342 }

SSP_FORCEINLINE __m128 ssp_cvtepi32_ps_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_cvtepi32_ps [SSE2]. (Searches MSDN)

Definition at line 344 of file SSEPlus_native_SSE2.h.

00345 {
00346     return _mm_cvtepi32_ps( a );
00347 }

SSP_FORCEINLINE __m128i ssp_cvtpd_epi32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvtpd_epi32 [SSE2]. (Searches MSDN)

Definition at line 349 of file SSEPlus_native_SSE2.h.

00350 {
00351     return _mm_cvtpd_epi32( a );
00352 }

SSP_FORCEINLINE __m64 ssp_cvtpd_pi32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvtpd_pi32 [SSE2]. (Searches MSDN)

Definition at line 354 of file SSEPlus_native_SSE2.h.

00355 {
00356     return _mm_cvtpd_pi32( a );
00357 }

SSP_FORCEINLINE __m128 ssp_cvtpd_ps_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvtpd_ps [SSE2]. (Searches MSDN)

Definition at line 359 of file SSEPlus_native_SSE2.h.

00360 {
00361     return _mm_cvtpd_ps( a );
00362 }

SSP_FORCEINLINE __m128d ssp_cvtpi32_pd_SSE2 ( __m64  a  ) 

SSE2 Native implementation of _mm_cvtpi32_pd [SSE2]. (Searches MSDN)

Definition at line 364 of file SSEPlus_native_SSE2.h.

00365 {
00366     return _mm_cvtpi32_pd( a );
00367 }

SSP_FORCEINLINE __m128i ssp_cvtps_epi32_SSE2 ( __m128  a  ) 

SSE2 Native implementation of _mm_cvtps_epi32 [SSE2]. (Searches MSDN)

Definition at line 369 of file SSEPlus_native_SSE2.h.

00370 {
00371     return _mm_cvtps_epi32( a );
00372 }

SSP_FORCEINLINE __m128d ssp_cvtps_pd_SSE2 ( __m128  a  ) 

SSE2 Native implementation of _mm_cvtps_pd [SSE2]. (Searches MSDN)

Definition at line 374 of file SSEPlus_native_SSE2.h.

00375 {
00376     return _mm_cvtps_pd( a );
00377 }

SSP_FORCEINLINE int ssp_cvtsd_si32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvtsd_si32 [SSE2]. (Searches MSDN)

Definition at line 386 of file SSEPlus_native_SSE2.h.

00387 {
00388     return _mm_cvtsd_si32( a );
00389 }

SSP_FORCEINLINE __m128 ssp_cvtsd_ss_SSE2 ( __m128  a,
__m128d  b 
)

SSE2 Native implementation of _mm_cvtsd_ss [SSE2]. (Searches MSDN)

Definition at line 399 of file SSEPlus_native_SSE2.h.

00400 {
00401     return _mm_cvtsd_ss( a, b );
00402 }

SSP_FORCEINLINE int ssp_cvtsi128_si32_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_cvtsi128_si32 [SSE2]. (Searches MSDN)

Definition at line 404 of file SSEPlus_native_SSE2.h.

00405 {
00406     return _mm_cvtsi128_si32( a );
00407 }

SSP_FORCEINLINE __m128d ssp_cvtsi32_sd_SSE2 ( __m128d  a,
int  b 
)

SSE2 Native implementation of _mm_cvtsi32_sd [SSE2]. (Searches MSDN)

Definition at line 417 of file SSEPlus_native_SSE2.h.

00418 {
00419     return _mm_cvtsi32_sd( a, b );
00420 }

SSP_FORCEINLINE __m128i ssp_cvtsi32_si128_SSE2 ( int  a  ) 

SSE2 Native implementation of _mm_cvtsi32_si128 [SSE2]. (Searches MSDN)

Definition at line 422 of file SSEPlus_native_SSE2.h.

00423 {
00424     return _mm_cvtsi32_si128( a );
00425 }

SSP_FORCEINLINE __m128d ssp_cvtss_sd_SSE2 ( __m128d  a,
__m128  b 
)

SSE2 Native implementation of _mm_cvtss_sd [SSE2]. (Searches MSDN)

Definition at line 441 of file SSEPlus_native_SSE2.h.

00442 {
00443     return _mm_cvtss_sd( a, b );
00444 }

SSP_FORCEINLINE __m128i ssp_cvttpd_epi32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvttpd_epi32 [SSE2]. (Searches MSDN)

Definition at line 446 of file SSEPlus_native_SSE2.h.

00447 {
00448     return _mm_cvttpd_epi32( a );
00449 }

SSP_FORCEINLINE __m64 ssp_cvttpd_pi32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvttpd_pi32 [SSE2]. (Searches MSDN)

Definition at line 451 of file SSEPlus_native_SSE2.h.

00452 {
00453     return _mm_cvttpd_pi32( a );
00454 }

SSP_FORCEINLINE __m128i ssp_cvttps_epi32_SSE2 ( __m128  a  ) 

SSE2 Native implementation of _mm_cvttps_epi32 [SSE2]. (Searches MSDN)

Definition at line 456 of file SSEPlus_native_SSE2.h.

00457 {
00458     return _mm_cvttps_epi32( a );
00459 }

SSP_FORCEINLINE int ssp_cvttsd_si32_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_cvttsd_si32 [SSE2]. (Searches MSDN)

Definition at line 461 of file SSEPlus_native_SSE2.h.

00462 {
00463     return _mm_cvttsd_si32( a );
00464 }

SSP_FORCEINLINE __m128d ssp_div_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_div_pd [SSE2]. (Searches MSDN)

Definition at line 474 of file SSEPlus_native_SSE2.h.

00475 {
00476     return _mm_div_pd( a, b );
00477 }

SSP_FORCEINLINE __m128d ssp_div_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_div_sd [SSE2]. (Searches MSDN)

Definition at line 479 of file SSEPlus_native_SSE2.h.

00480 {
00481     return _mm_div_sd( a, b );
00482 }

SSP_FORCEINLINE int ssp_extract_epi16_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_extract_epi16 [SSE2]. (Searches MSDN)

Definition at line 484 of file SSEPlus_native_SSE2.h.

00485 {
00486     switch( imm & 0x7 )
00487     {
00488         CASE_8( _mm_extract_epi16, a );
00489     }
00490 }

SSP_FORCEINLINE __m128i ssp_insert_epi16_SSE2 ( __m128i  a,
int  b,
int  imm 
)

SSE2 Native implementation of _mm_insert_epi16 [SSE2]. (Searches MSDN)

Definition at line 492 of file SSEPlus_native_SSE2.h.

00493 {
00494     switch( imm & 0x7 )
00495     {
00496         CASE_8( _mm_insert_epi16, a, b );
00497     }
00498 }

SSP_FORCEINLINE void ssp_lfence_SSE2 ( void   ) 

SSE2 Native implementation of _mm_lfence [SSE2]. (Searches MSDN)

Definition at line 500 of file SSEPlus_native_SSE2.h.

00501 {
00502     _mm_lfence();
00503 }

SSP_FORCEINLINE __m128d ssp_load1_pd_SSE2 ( double const *  dp  ) 

SSE2 Native implementation of _mm_load1_pd [SSE2]. (Searches MSDN)

Definition at line 520 of file SSEPlus_native_SSE2.h.

00521 {
00522     return _mm_load1_pd( dp );
00523 }

SSP_FORCEINLINE __m128d ssp_load_pd_SSE2 ( double const *  dp  ) 

SSE2 Native implementation of _mm_load_pd [SSE2]. (Searches MSDN)

Definition at line 505 of file SSEPlus_native_SSE2.h.

00506 {
00507     return _mm_load_pd( dp );
00508 }

SSP_FORCEINLINE __m128d ssp_load_sd_SSE2 ( double const *  dp  ) 

SSE2 Native implementation of _mm_load_sd [SSE2]. (Searches MSDN)

Definition at line 510 of file SSEPlus_native_SSE2.h.

00511 {
00512     return _mm_load_sd( dp );
00513 }

SSP_FORCEINLINE __m128i ssp_load_si128_SSE2 ( __m128i const *  p  ) 

SSE2 Native implementation of _mm_load_si128 [SSE2]. (Searches MSDN)

Definition at line 515 of file SSEPlus_native_SSE2.h.

00516 {
00517     return _mm_load_si128( p );
00518 }

SSP_FORCEINLINE __m128d ssp_loadh_pd_SSE2 ( __m128d  a,
double const *  dp 
)

SSE2 Native implementation of _mm_loadh_pd [SSE2]. (Searches MSDN)

Definition at line 525 of file SSEPlus_native_SSE2.h.

00526 {
00527     return _mm_loadh_pd( a, dp );
00528 }

SSP_FORCEINLINE __m128i ssp_loadl_epi64_SSE2 ( __m128i const *  p  ) 

SSE2 Native implementation of _mm_loadl_epi64 [SSE2]. (Searches MSDN)

Definition at line 530 of file SSEPlus_native_SSE2.h.

00531 {
00532     return _mm_loadl_epi64( p );
00533 }

SSP_FORCEINLINE __m128d ssp_loadl_pd_SSE2 ( __m128d  a,
double const *  dp 
)

SSE2 Native implementation of _mm_loadl_pd [SSE2]. (Searches MSDN)

Definition at line 535 of file SSEPlus_native_SSE2.h.

00536 {
00537     return _mm_loadl_pd( a, dp );
00538 }

SSP_FORCEINLINE __m128d ssp_loadr_pd_SSE2 ( double const *  dp  ) 

SSE2 Native implementation of _mm_loadr_pd [SSE2]. (Searches MSDN)

Definition at line 540 of file SSEPlus_native_SSE2.h.

00541 {
00542     return _mm_loadr_pd( dp );
00543 }

SSP_FORCEINLINE __m128d ssp_loadu_pd_SSE2 ( double const *  dp  ) 

SSE2 Native implementation of _mm_loadu_pd [SSE2]. (Searches MSDN)

Definition at line 545 of file SSEPlus_native_SSE2.h.

00546 {
00547     return _mm_loadu_pd( dp );
00548 }

SSP_FORCEINLINE __m128i ssp_loadu_si128_SSE2 ( __m128i const *  p  ) 

SSE2 Native implementation of _mm_loadu_si128 [SSE2]. (Searches MSDN)

Definition at line 550 of file SSEPlus_native_SSE2.h.

00551 {
00552     return _mm_loadu_si128( p );
00553 }

SSP_FORCEINLINE __m128i ssp_logical_bitwise_select_SSE2 ( __m128i  a,
__m128i  b,
__m128i  mask 
)

Definition at line 61 of file SSEPlus_logical_SSE2.h.

00061                                                                                                                      : b) 
00062 {
00063     a = _mm_and_si128   ( a,    mask );                                 // clear a where mask = 0
00064     b = _mm_andnot_si128( mask, b    );                                 // clear b where mask = 1
00065     a = _mm_or_si128    ( a,    b    );                                 // a = a OR b                         
00066     return a; 
00067 }

SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

Definition at line 26 of file SSEPlus_logical_SSE2.h.

00027 {
00028     __m128i signMask, mask;
00029 
00030     mask     = _mm_cmpgt_epi16( a, b );              // FFFF where a > b (signed)
00031     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00032     signMask = _mm_srai_epi16 ( signMask, 15 );      // fill all fields with sign bit     
00033     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00034     return mask;
00035 }

SSP_FORCEINLINE __m128i ssp_logical_cmpgt_epu32_SSE2 ( __m128i  a,
__m128i  b 
)

Definition at line 48 of file SSEPlus_logical_SSE2.h.

00049 {
00050     __m128i signMask, mask;
00051 
00052     mask     = _mm_cmpgt_epi32( a, b );              // FFFF where a < b (signed)
00053     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00054     signMask = _mm_srai_epi32 ( signMask, 31 );      // fill all fields with sign bit     
00055     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00056     return mask;
00057 }

SSP_FORCEINLINE __m128i ssp_logical_cmpgte_epi8 ( __m128i  a,
__m128i  b 
)

r_:= (a_>=b_) ? 0xff : 0x0

Definition at line 109 of file SSEPlus_logical_SSE2.h.

00110 {
00111     a = _mm_cmplt_epi8( a, b );
00112     a = _mm_xor_si128 ( a, _mm_set1_epi8( (char)0xFF) );
00113     return a;
00114 }

SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

Definition at line 15 of file SSEPlus_logical_SSE2.h.

00016 {
00017     __m128i signMask, mask;
00018 
00019     mask     = _mm_cmplt_epi16( a, b );              // FFFF where a < b (signed)
00020     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00021     signMask = _mm_srai_epi16 ( signMask, 15 );      // fill all fields with sign bit     
00022     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00023     return mask;
00024 }

SSP_FORCEINLINE __m128i ssp_logical_cmplt_epu32_SSE2 ( __m128i  a,
__m128i  b 
)

Definition at line 37 of file SSEPlus_logical_SSE2.h.

00038 {
00039     __m128i signMask, mask;
00040 
00041     mask     = _mm_cmplt_epi32( a, b );              // FFFF where a < b (signed)
00042     signMask = _mm_xor_si128  ( a, b );              // Signbit is 1 where signs differ 
00043     signMask = _mm_srai_epi32 ( signMask, 31 );      // fill all fields with sign bit     
00044     mask     = _mm_xor_si128  ( mask, signMask );    // Invert output where signs differed
00045     return mask;
00046 }

SSP_FORCEINLINE __m128i ssp_logical_cmplte_epi8 ( __m128i  a,
__m128i  b 
)

r_:= (a_<=b_) ? 0xff : 0x0

Definition at line 97 of file SSEPlus_logical_SSE2.h.

00098 {
00099     a = _mm_cmpgt_epi8( a, b );
00100     a = _mm_xor_si128 ( a, _mm_set1_epi8( (char)0xFF) );
00101     return a;
00102 }

SSP_FORCEINLINE __m128i ssp_madd_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_madd_epi16 [SSE2]. (Searches MSDN)

Definition at line 555 of file SSEPlus_native_SSE2.h.

00556 {
00557     return _mm_madd_epi16( a, b );
00558 }

SSP_FORCEINLINE void ssp_maskmoveu_si128_SSE2 ( __m128i  a,
__m128i  b,
char *  c 
)

SSE2 Native implementation of _mm_maskmoveu_si128 [SSE2]. (Searches MSDN)

Definition at line 560 of file SSEPlus_native_SSE2.h.

00561 {
00562     _mm_maskmoveu_si128( a, b, c );
00563 }

SSP_FORCEINLINE __m128i ssp_max_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_max_epi16 [SSE2]. (Searches MSDN)

Definition at line 565 of file SSEPlus_native_SSE2.h.

00566 {
00567     return _mm_max_epi16( a, b );
00568 }

SSP_FORCEINLINE __m128i ssp_max_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_max_epu8 [SSE2]. (Searches MSDN)

Definition at line 570 of file SSEPlus_native_SSE2.h.

00571 {
00572     return _mm_max_epu8( a, b );
00573 }

SSP_FORCEINLINE __m128d ssp_max_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_max_pd [SSE2]. (Searches MSDN)

Definition at line 575 of file SSEPlus_native_SSE2.h.

00576 {
00577     return _mm_max_pd( a, b );
00578 }

SSP_FORCEINLINE __m128d ssp_max_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_max_sd [SSE2]. (Searches MSDN)

Definition at line 580 of file SSEPlus_native_SSE2.h.

00581 {
00582     return _mm_max_sd( a, b );
00583 }

SSP_FORCEINLINE void ssp_mfence_SSE2 ( void   ) 

SSE2 Native implementation of _mm_mfence [SSE2]. (Searches MSDN)

Definition at line 585 of file SSEPlus_native_SSE2.h.

00586 {
00587     _mm_mfence( );
00588 }

SSP_FORCEINLINE __m128i ssp_min_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_min_epi16 [SSE2]. (Searches MSDN)

Definition at line 590 of file SSEPlus_native_SSE2.h.

00591 {
00592     return _mm_min_epi16( a, b );
00593 }

SSP_FORCEINLINE __m128i ssp_min_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_min_epu8 [SSE2]. (Searches MSDN)

Definition at line 595 of file SSEPlus_native_SSE2.h.

00596 {
00597     return _mm_min_epu8( a, b );
00598 }

SSP_FORCEINLINE __m128d ssp_min_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_min_pd [SSE2]. (Searches MSDN)

Definition at line 600 of file SSEPlus_native_SSE2.h.

00601 {
00602     return _mm_min_pd( a, b );
00603 }

SSP_FORCEINLINE __m128d ssp_min_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_min_sd [SSE2]. (Searches MSDN)

Definition at line 605 of file SSEPlus_native_SSE2.h.

00606 {
00607     return _mm_min_sd( a, b );
00608 }

SSP_FORCEINLINE __m128i ssp_move_epi64_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_move_epi64 [SSE2]. (Searches MSDN)

Definition at line 610 of file SSEPlus_native_SSE2.h.

00611 {
00612     return _mm_move_epi64( a );
00613 }

SSP_FORCEINLINE __m128d ssp_move_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_move_sd [SSE2]. (Searches MSDN)

Definition at line 615 of file SSEPlus_native_SSE2.h.

00616 {
00617     return _mm_move_sd( a, b );
00618 }

SSP_FORCEINLINE int ssp_movemask_epi8_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_movemask_epi8 [SSE2]. (Searches MSDN)

Definition at line 620 of file SSEPlus_native_SSE2.h.

00621 {
00622     return _mm_movemask_epi8( a );
00623 }

SSP_FORCEINLINE int ssp_movemask_pd_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_movemask_pd [SSE2]. (Searches MSDN)

Definition at line 625 of file SSEPlus_native_SSE2.h.

00626 {
00627     return _mm_movemask_pd( a );
00628 }

SSP_FORCEINLINE __m64 ssp_movepi64_pi64_SSE2 ( __m128i  a  ) 

SSE2 Native implementation of _mm_movepi64_pi64 [SSE2]. (Searches MSDN)

Definition at line 630 of file SSEPlus_native_SSE2.h.

00631 {
00632     return _mm_movepi64_pi64( a );
00633 }

SSP_FORCEINLINE __m128i ssp_movmask_imm8_to_epi32_SSE2 ( int  mask  ) 

Definition at line 82 of file SSEPlus_logical_SSE2.h.

00083 {
00084     __m128i screen;
00085     const static __m128i mulShiftImm = SSP_CONST_SET_16I( 0x1000, 0x0000, 0x2000, 0x0000, 0x4000, 0x0000, 0x8000, 0x0000 ); // Shift mask multiply moves all bits to left, becomes MSB
00086     screen = _mm_set1_epi16 ( mask                );   // Load the mask into register
00087     screen = _mm_mullo_epi16( screen, mulShiftImm );   // Shift bits to MSB
00088     screen = _mm_srai_epi32 ( screen, 31          );   // Shift bits to obtain all F's or all 0's
00089     return screen;
00090 }

SSP_FORCEINLINE __m128i ssp_movpi64_epi64_SSE2 ( __m64  a  ) 

SSE2 Native implementation of _mm_movpi64_epi64 [SSE2]. (Searches MSDN)

Definition at line 635 of file SSEPlus_native_SSE2.h.

00636 {
00637     return _mm_movpi64_epi64( a );
00638 }

SSP_FORCEINLINE __m128i ssp_mul_epu32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_mul_epu32 [SSE2]. (Searches MSDN)

Definition at line 640 of file SSEPlus_native_SSE2.h.

00641 {
00642     return _mm_mul_epu32( a, b );
00643 }

SSP_FORCEINLINE __m128d ssp_mul_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_mul_pd [SSE2]. (Searches MSDN)

Definition at line 645 of file SSEPlus_native_SSE2.h.

00646 {
00647     return _mm_mul_pd( a, b );
00648 }

SSP_FORCEINLINE __m128d ssp_mul_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_mul_sd [SSE2]. (Searches MSDN)

Definition at line 650 of file SSEPlus_native_SSE2.h.

00651 {
00652     return _mm_mul_sd( a, b );
00653 }

SSP_FORCEINLINE __m64 ssp_mul_su32_SSE2 ( __m64  a,
__m64  b 
)

SSE2 Native implementation of _mm_mul_su32 [SSE2]. (Searches MSDN)

Definition at line 655 of file SSEPlus_native_SSE2.h.

00656 {
00657     return _mm_mul_su32( a, b );
00658 }

SSP_FORCEINLINE __m128i ssp_mulhi_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_mulhi_epi16 [SSE2]. (Searches MSDN)

Definition at line 660 of file SSEPlus_native_SSE2.h.

00661 {
00662     return _mm_mulhi_epi16( a, b );
00663 }

SSP_FORCEINLINE __m128i ssp_mulhi_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_mulhi_epu16 [SSE2]. (Searches MSDN)

Definition at line 665 of file SSEPlus_native_SSE2.h.

00666 {
00667     return _mm_mulhi_epu16( a, b );
00668 }

SSP_FORCEINLINE __m128i ssp_mullo_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_mullo_epi16 [SSE2]. (Searches MSDN)

Definition at line 670 of file SSEPlus_native_SSE2.h.

00671 {
00672     return _mm_mullo_epi16( a, b );
00673 }

SSP_FORCEINLINE __m128d ssp_or_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_or_pd [SSE2]. (Searches MSDN)

Definition at line 675 of file SSEPlus_native_SSE2.h.

00676 {
00677     return _mm_or_pd( a, b );
00678 }

SSP_FORCEINLINE __m128i ssp_or_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_or_si128 [SSE2]. (Searches MSDN)

Definition at line 680 of file SSEPlus_native_SSE2.h.

00681 {
00682     return _mm_or_si128( a, b );
00683 }

SSP_FORCEINLINE __m128i ssp_packs_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_packs_epi16 [SSE2]. (Searches MSDN)

Definition at line 685 of file SSEPlus_native_SSE2.h.

00686 {
00687     return _mm_packs_epi16( a, b );
00688 }

SSP_FORCEINLINE __m128i ssp_packs_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_packs_epi32 [SSE2]. (Searches MSDN)

Definition at line 690 of file SSEPlus_native_SSE2.h.

00691 {
00692     return _mm_packs_epi32( a, b );
00693 }

SSP_FORCEINLINE __m128i ssp_packus_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_packus_epi16 [SSE2]. (Searches MSDN)

Definition at line 695 of file SSEPlus_native_SSE2.h.

00696 {
00697     return _mm_packus_epi16( a, b );
00698 }

SSP_FORCEINLINE void ssp_pause_SSE2 ( void   ) 

SSE2 Native implementation of _mm_pause [SSE2]. (Searches MSDN)

Definition at line 700 of file SSEPlus_native_SSE2.h.

00701 {
00702     _mm_pause();
00703 }

SSP_FORCEINLINE __m128 ssp_round_ps_neg_zero_SSE2 ( __m128  a,
int  iRoundMode 
)

This function wraps ssp_round_ps_SSE2. It guarantees that numbers rounding to 0 from a negative will generate a negative zero.

Definition at line 90 of file SSEPlus_arithmetic_SSE2.h.

00091 {
00092     const static __m128i SIGN_BIT = SSP_CONST_SET_32I( 0x80000000, 0x80000000, 0x80000000,0x80000000 );
00093     ssp_m128 A, sign;
00094     A.f = a;
00095     
00096     sign.i = _mm_and_si128    ( A.i, SIGN_BIT );  // Store the sign bits
00097     A.f    = ssp_round_ps_SSE2( A.f, iRoundMode );   
00098     A.i    = _mm_or_si128     ( A.i, sign.i );    // Restore the sign bits (preserves -0)
00099    
00100     return A.f;
00101 }

SSP_FORCEINLINE __m128i ssp_sad_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_sad_epu8 [SSE2]. (Searches MSDN)

Definition at line 705 of file SSEPlus_native_SSE2.h.

00706 {
00707     return _mm_sad_epu8( a, b );
00708 }

SSP_FORCEINLINE __m128i ssp_set1_epi16_SSE2 ( short  w  ) 

SSE2 Native implementation of _mm_set1_epi16 [SSE2]. (Searches MSDN)

Definition at line 740 of file SSEPlus_native_SSE2.h.

00741 {
00742     return _mm_set1_epi16( w );
00743 }

SSP_FORCEINLINE __m128i ssp_set1_epi32_SSE2 ( int  i  ) 

SSE2 Native implementation of _mm_set1_epi32 [SSE2]. (Searches MSDN)

Definition at line 745 of file SSEPlus_native_SSE2.h.

00746 {
00747     return _mm_set1_epi32( i );
00748 }

SSP_FORCEINLINE __m128i ssp_set1_epi64_SSE2 ( __m64  a  ) 

SSE2 Native implementation of _mm_set1_epi64 [SSE2]. (Searches MSDN)

Definition at line 750 of file SSEPlus_native_SSE2.h.

00751 {
00752     return _mm_set1_epi64( a );
00753 }

SSP_FORCEINLINE __m128i ssp_set1_epi8_SSE2 ( char  b  ) 

SSE2 Native implementation of _mm_set1_epi8 [SSE2]. (Searches MSDN)

Definition at line 755 of file SSEPlus_native_SSE2.h.

00756 {
00757     return _mm_set1_epi8( b );
00758 }

SSP_FORCEINLINE __m128d ssp_set1_pd_SSE2 ( double  a  ) 

SSE2 Native implementation of _mm_set1_pd [SSE2]. (Searches MSDN)

Definition at line 760 of file SSEPlus_native_SSE2.h.

00761 {
00762     return _mm_set1_pd( a );
00763 }

SSP_FORCEINLINE __m128i ssp_set_epi16_SSE2 ( short  w7,
short  w6,
short  w5,
short  w4,
short  w3,
short  w2,
short  w1,
short  w0 
)

SSE2 Native implementation of _mm_set_epi16 [SSE2]. (Searches MSDN)

Definition at line 710 of file SSEPlus_native_SSE2.h.

00711 {
00712     return _mm_set_epi16( w7, w6, w5, w4, w3, w2, w1, w0 );
00713 }

SSP_FORCEINLINE __m128i ssp_set_epi32_SSE2 ( int  i3,
int  i2,
int  i1,
int  i0 
)

SSE2 Native implementation of _mm_set_epi32 [SSE2]. (Searches MSDN)

Definition at line 715 of file SSEPlus_native_SSE2.h.

00716 {
00717     return _mm_set_epi32( i3, i2, i1, i0 );
00718 }

SSP_FORCEINLINE __m128i ssp_set_epi64_SSE2 ( __m64  a1,
__m64  a0 
)

SSE2 Native implementation of _mm_set_epi64 [SSE2]. (Searches MSDN)

Definition at line 720 of file SSEPlus_native_SSE2.h.

00721 {
00722     return _mm_set_epi64( a1, a0 );
00723 }

SSP_FORCEINLINE __m128i ssp_set_epi8_SSE2 ( char  b15,
char  b14,
char  b13,
char  b12,
char  b11,
char  b10,
char  b9,
char  b8,
char  b7,
char  b6,
char  b5,
char  b4,
char  b3,
char  b2,
char  b1,
char  b0 
)

SSE2 Native implementation of _mm_set_epi8 [SSE2]. (Searches MSDN)

Definition at line 725 of file SSEPlus_native_SSE2.h.

00726 {
00727     return _mm_set_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );
00728 }

SSP_FORCEINLINE __m128d ssp_set_pd_SSE2 ( double  a1,
double  a0 
)

SSE2 Native implementation of _mm_set_pd [SSE2]. (Searches MSDN)

Definition at line 730 of file SSEPlus_native_SSE2.h.

00731 {
00732     return _mm_set_pd( a1, a0 );
00733 }

SSP_FORCEINLINE __m128d ssp_set_sd_SSE2 ( double  w  ) 

SSE2 Native implementation of _mm_set_sd [SSE2]. (Searches MSDN)

Definition at line 735 of file SSEPlus_native_SSE2.h.

00736 {
00737     return _mm_set_sd( w );
00738 }

SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2 ( short  w0,
short  w1,
short  w2,
short  w3,
short  w4,
short  w5,
short  w6,
short  w7 
)

SSE2 Native implementation of _mm_setr_epi16 [SSE2]. (Searches MSDN)

Definition at line 772 of file SSEPlus_native_SSE2.h.

00772         {SSE2 Native,_mm_setr_epi16,SSE2} */ 
00773 SSP_FORCEINLINE __m128i ssp_setr_epi16_SSE2( short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7 )
00774 {
00775     return _mm_setr_epi16( w0, w1, w2, w3, w4, w5, w6, w7 );

SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2 ( int  i0,
int  i1,
int  i2,
int  i3 
)

SSE2 Native implementation of _mm_setr_epi32 [SSE2]. (Searches MSDN)

Definition at line 777 of file SSEPlus_native_SSE2.h.

00777         {SSE2 Native,_mm_setr_epi32,SSE2} */ 
00778 SSP_FORCEINLINE __m128i ssp_setr_epi32_SSE2( int i0, int i1, int i2, int i3)
00779 {
00780     return _mm_setr_epi32( i0, i1, i2, i3);

SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2 ( __m64  a0,
__m64  a1 
)

SSE2 Native implementation of _mm_setr_epi64 [SSE2]. (Searches MSDN)

Definition at line 782 of file SSEPlus_native_SSE2.h.

00782         {SSE2 Native,_mm_setr_epi64,SSE2} */ 
00783 SSP_FORCEINLINE __m128i ssp_setr_epi64_SSE2( __m64 a0, __m64 a1)
00784 {
00785     return _mm_setr_epi64( a0, a1);

SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2 ( char  b15,
char  b14,
char  b13,
char  b12,
char  b11,
char  b10,
char  b9,
char  b8,
char  b7,
char  b6,
char  b5,
char  b4,
char  b3,
char  b2,
char  b1,
char  b0 
)

SSE2 Native implementation of _mm_setr_epi8 [SSE2]. (Searches MSDN)

Definition at line 787 of file SSEPlus_native_SSE2.h.

00787         {SSE2 Native,_mm_setr_epi8,SSE2} */ 
00788 SSP_FORCEINLINE __m128i ssp_setr_epi8_SSE2( char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0 )
00789 {
00790     return _mm_setr_epi8( b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0 );

SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2 ( double  a0,
double  a1 
)

SSE2 Native implementation of _mm_setr_pd [SSE2]. (Searches MSDN)

Definition at line 792 of file SSEPlus_native_SSE2.h.

00792         {SSE2 Native,_mm_setr_pd,SSE2} */ 
00793 SSP_FORCEINLINE __m128d ssp_setr_pd_SSE2( double a0, double a1 )
00794 {
00795     return _mm_setr_pd( a0, a1);

SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2 ( void   ) 

SSE2 Native implementation of _mm_setzero_pd [SSE2]. (Searches MSDN)

Definition at line 797 of file SSEPlus_native_SSE2.h.

00797         {SSE2 Native,_mm_setzero_pd,SSE2} */ 
00798 SSP_FORCEINLINE __m128d ssp_setzero_pd_SSE2( void )
00799 {
00800     return _mm_setzero_pd( );

SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2 ( void   ) 

SSE2 Native implementation of _mm_setzero_si128 [SSE2]. (Searches MSDN)

Definition at line 802 of file SSEPlus_native_SSE2.h.

00802         {SSE2 Native,_mm_setzero_si128,SSE2} */ 
00803 SSP_FORCEINLINE __m128i ssp_setzero_si128_SSE2( void )
00804 {
00805     return _mm_setzero_si128( );

SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_shuffle_epi32 [SSE2]. (Searches MSDN)

Definition at line 807 of file SSEPlus_native_SSE2.h.

00807         {SSE2 Native,_mm_shuffle_epi32,SSE2} */ 
00808 SSP_FORCEINLINE __m128i ssp_shuffle_epi32_SSE2( __m128i a, int imm )
00809 {
00810     switch( imm & 0xFF )
00811     {
00812         CASE_256( _mm_shuffle_epi32, a );
00813     }

SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2 ( __m128d  a,
__m128d  b,
int  imm 
)

SSE2 Native implementation of _mm_shuffle_pd [SSE2]. (Searches MSDN)

Definition at line 815 of file SSEPlus_native_SSE2.h.

00815         {SSE2 Native,_mm_shuffle_pd,SSE2} */ 
00816 SSP_FORCEINLINE __m128d ssp_shuffle_pd_SSE2( __m128d a, __m128d b, int imm )
00817 {
00818     switch( imm & 0xFF )
00819     {
00820         CASE_4( _mm_shuffle_pd, a, b );
00821     }

SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_shufflehi_epi16 [SSE2]. (Searches MSDN)

Definition at line 823 of file SSEPlus_native_SSE2.h.

00823         {SSE2 Native,_mm_shufflehi_epi16,SSE2} */ 
00824 SSP_FORCEINLINE __m128i ssp_shufflehi_epi16_SSE2( __m128i a, int imm )
00825 {
00826     switch( imm & 0xFF )
00827     {
00828         CASE_256( _mm_shufflehi_epi16, a );
00829     }

SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_shufflelo_epi16 [SSE2]. (Searches MSDN)

Definition at line 831 of file SSEPlus_native_SSE2.h.

00831         {SSE2 Native,_mm_shufflelo_epi16,SSE2} */ 
00832 SSP_FORCEINLINE __m128i ssp_shufflelo_epi16_SSE2( __m128i a, int imm )
00833 {
00834     switch( imm & 0xFF )
00835     {
00836         CASE_256( _mm_shufflelo_epi16, a );
00837     }

SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_sll_epi16 [SSE2]. (Searches MSDN)

Definition at line 839 of file SSEPlus_native_SSE2.h.

00839         {SSE2 Native,_mm_sll_epi16,SSE2} */ 
00840 SSP_FORCEINLINE __m128i ssp_sll_epi16_SSE2( __m128i a, __m128i count )
00841 {
00842     return _mm_sll_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_sll_epi32 [SSE2]. (Searches MSDN)

Definition at line 844 of file SSEPlus_native_SSE2.h.

00844         {SSE2 Native,_mm_sll_epi32,SSE2} */ 
00845 SSP_FORCEINLINE __m128i ssp_sll_epi32_SSE2( __m128i a, __m128i count )
00846 {
00847     return _mm_sll_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_sll_epi64 [SSE2]. (Searches MSDN)

Definition at line 849 of file SSEPlus_native_SSE2.h.

00849         {SSE2 Native,_mm_sll_epi64,SSE2} */ 
00850 SSP_FORCEINLINE __m128i ssp_sll_epi64_SSE2( __m128i a, __m128i count )
00851 {
00852     return _mm_sll_epi64( a, count );

SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_slli_epi16 [SSE2]. (Searches MSDN)

Definition at line 854 of file SSEPlus_native_SSE2.h.

00854         {SSE2 Native,_mm_slli_epi16,SSE2} */ 
00855 SSP_FORCEINLINE __m128i ssp_slli_epi16_SSE2( __m128i a, int count )
00856 {
00857     return _mm_slli_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_slli_epi32 [SSE2]. (Searches MSDN)

Definition at line 859 of file SSEPlus_native_SSE2.h.

00859         {SSE2 Native,_mm_slli_epi32,SSE2} */ 
00860 SSP_FORCEINLINE __m128i ssp_slli_epi32_SSE2( __m128i a, int count )
00861 {
00862     return _mm_slli_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_slli_epi64 [SSE2]. (Searches MSDN)

Definition at line 864 of file SSEPlus_native_SSE2.h.

00864         {SSE2 Native,_mm_slli_epi64,SSE2} */ 
00865 SSP_FORCEINLINE __m128i ssp_slli_epi64_SSE2( __m128i a, int count )
00866 {
00867     return _mm_slli_epi64( a, count );

SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_slli_si128 [SSE2]. (Searches MSDN)

Definition at line 869 of file SSEPlus_native_SSE2.h.

00869         {SSE2 Native,_mm_slli_si128,SSE2} */ 
00870 SSP_FORCEINLINE __m128i ssp_slli_si128_SSE2( __m128i a, int imm )
00871 {
00872     switch( imm & 0x7F )
00873     {
00874         CASE_128( _mm_slli_si128, a );
00875     }

SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2 ( __m128d  a  ) 

SSE2 Native implementation of _mm_sqrt_pd [SSE2]. (Searches MSDN)

Definition at line 877 of file SSEPlus_native_SSE2.h.

00877         {SSE2 Native,_mm_sqrt_pd,SSE2} */ 
00878 SSP_FORCEINLINE __m128d ssp_sqrt_pd_SSE2( __m128d a )
00879 {
00880     return _mm_sqrt_pd( a );

SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_sqrt_sd [SSE2]. (Searches MSDN)

Definition at line 882 of file SSEPlus_native_SSE2.h.

00882         {SSE2 Native,_mm_sqrt_sd,SSE2} */ 
00883 SSP_FORCEINLINE __m128d ssp_sqrt_sd_SSE2( __m128d a, __m128d b )
00884 {
00885     return _mm_sqrt_sd( a, b );

SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_sra_epi16 [SSE2]. (Searches MSDN)

Definition at line 887 of file SSEPlus_native_SSE2.h.

00887         {SSE2 Native,_mm_sra_epi16,SSE2} */ 
00888 SSP_FORCEINLINE __m128i ssp_sra_epi16_SSE2( __m128i a, __m128i count )
00889 {
00890     return _mm_sra_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_sra_epi32 [SSE2]. (Searches MSDN)

Definition at line 892 of file SSEPlus_native_SSE2.h.

00892         {SSE2 Native,_mm_sra_epi32,SSE2} */ 
00893 SSP_FORCEINLINE __m128i ssp_sra_epi32_SSE2( __m128i a, __m128i count )
00894 {
00895     return _mm_sra_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_srai_epi16 [SSE2]. (Searches MSDN)

Definition at line 897 of file SSEPlus_native_SSE2.h.

00897         {SSE2 Native,_mm_srai_epi16,SSE2} */ 
00898 SSP_FORCEINLINE __m128i ssp_srai_epi16_SSE2( __m128i a, int count )
00899 {
00900     return _mm_srai_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_srai_epi32 [SSE2]. (Searches MSDN)

Definition at line 902 of file SSEPlus_native_SSE2.h.

00902         {SSE2 Native,_mm_srai_epi32,SSE2} */ 
00903 SSP_FORCEINLINE __m128i ssp_srai_epi32_SSE2( __m128i a, int count )
00904 {
00905     return _mm_srai_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_srl_epi16 [SSE2]. (Searches MSDN)

Definition at line 907 of file SSEPlus_native_SSE2.h.

00907         {SSE2 Native,_mm_srl_epi16,SSE2} */ 
00908 SSP_FORCEINLINE __m128i ssp_srl_epi16_SSE2( __m128i a, __m128i count )
00909 {
00910     return _mm_srl_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_srl_epi32 [SSE2]. (Searches MSDN)

Definition at line 912 of file SSEPlus_native_SSE2.h.

00912         {SSE2 Native,_mm_srl_epi32,SSE2} */ 
00913 SSP_FORCEINLINE __m128i ssp_srl_epi32_SSE2( __m128i a, __m128i count )
00914 {
00915     return _mm_srl_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2 ( __m128i  a,
__m128i  count 
)

SSE2 Native implementation of _mm_srl_epi64 [SSE2]. (Searches MSDN)

Definition at line 917 of file SSEPlus_native_SSE2.h.

00917         {SSE2 Native,_mm_srl_epi64,SSE2} */ 
00918 SSP_FORCEINLINE __m128i ssp_srl_epi64_SSE2( __m128i a, __m128i count )
00919 {
00920     return _mm_srl_epi64( a, count );

SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_srli_epi16 [SSE2]. (Searches MSDN)

Definition at line 922 of file SSEPlus_native_SSE2.h.

00922         {SSE2 Native,_mm_srli_epi16,SSE2} */ 
00923 SSP_FORCEINLINE __m128i ssp_srli_epi16_SSE2( __m128i a, int count )
00924 {
00925     return _mm_srli_epi16( a, count );

SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_srli_epi32 [SSE2]. (Searches MSDN)

Definition at line 927 of file SSEPlus_native_SSE2.h.

00927         {SSE2 Native,_mm_srli_epi32,SSE2} */ 
00928 SSP_FORCEINLINE __m128i ssp_srli_epi32_SSE2( __m128i a, int count )
00929 {
00930     return _mm_srli_epi32( a, count );

SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2 ( __m128i  a,
int  count 
)

SSE2 Native implementation of _mm_srli_epi64 [SSE2]. (Searches MSDN)

Definition at line 932 of file SSEPlus_native_SSE2.h.

00932         {SSE2 Native,_mm_srli_epi64,SSE2} */ 
00933 SSP_FORCEINLINE __m128i ssp_srli_epi64_SSE2( __m128i a, int count )
00934 {
00935     return _mm_srli_epi64( a, count );

SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2 ( __m128i  a,
int  imm 
)

SSE2 Native implementation of _mm_srli_si128 [SSE2]. (Searches MSDN)

Definition at line 937 of file SSEPlus_native_SSE2.h.

00937         {SSE2 Native,_mm_srli_si128,SSE2} */ 
00938 SSP_FORCEINLINE __m128i ssp_srli_si128_SSE2( __m128i a, int imm )
00939 {
00940     switch( imm & 0x7F )
00941     {
00942         CASE_128( _mm_srli_si128, a );
00943     }

SSP_FORCEINLINE void ssp_store1_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_store1_pd [SSE2]. (Searches MSDN)

Definition at line 960 of file SSEPlus_native_SSE2.h.

00960         {SSE2 Native,_mm_store1_pd,SSE2} */ 
00961 SSP_FORCEINLINE void ssp_store1_pd_SSE2( double *dp, __m128d a )
00962 {
00963     _mm_store1_pd( dp, a );

SSP_FORCEINLINE void ssp_store_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_store_pd [SSE2]. (Searches MSDN)

Definition at line 945 of file SSEPlus_native_SSE2.h.

00945         {SSE2 Native,_mm_store_pd,SSE2} */ 
00946 SSP_FORCEINLINE void ssp_store_pd_SSE2( double *dp, __m128d a )
00947 {
00948     _mm_store_pd( dp, a );

SSP_FORCEINLINE void ssp_store_sd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_store_sd [SSE2]. (Searches MSDN)

Definition at line 950 of file SSEPlus_native_SSE2.h.

00950         {SSE2 Native,_mm_store_sd,SSE2} */ 
00951 SSP_FORCEINLINE void ssp_store_sd_SSE2( double *dp, __m128d a )
00952 {
00953     _mm_store_sd( dp, a );

SSP_FORCEINLINE void ssp_store_si128_SSE2 ( __m128i *  p,
__m128i  b 
)

SSE2 Native implementation of _mm_store_si128 [SSE2]. (Searches MSDN)

Definition at line 955 of file SSEPlus_native_SSE2.h.

00955         {SSE2 Native,_mm_store_si128,SSE2} */ 
00956 SSP_FORCEINLINE void ssp_store_si128_SSE2( __m128i *p, __m128i b )
00957 {
00958     _mm_store_si128( p, b );

SSP_FORCEINLINE void ssp_storeh_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_storeh_pd [SSE2]. (Searches MSDN)

Definition at line 965 of file SSEPlus_native_SSE2.h.

00965         {SSE2 Native,_mm_storeh_pd,SSE2} */ 
00966 SSP_FORCEINLINE void ssp_storeh_pd_SSE2( double *dp, __m128d a )
00967 {
00968     _mm_storeh_pd( dp, a );

SSP_FORCEINLINE void ssp_storel_epi64_SSE2 ( __m128i *  p,
__m128i  b 
)

SSE2 Native implementation of _mm_storel_epi64 [SSE2]. (Searches MSDN)

Definition at line 970 of file SSEPlus_native_SSE2.h.

00970         {SSE2 Native,_mm_storel_epi64,SSE2} */ 
00971 SSP_FORCEINLINE void ssp_storel_epi64_SSE2( __m128i *p, __m128i b )
00972 {
00973     _mm_storel_epi64( p, b );

SSP_FORCEINLINE void ssp_storel_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_storel_pd [SSE2]. (Searches MSDN)

Definition at line 975 of file SSEPlus_native_SSE2.h.

00975         {SSE2 Native,_mm_storel_pd,SSE2} */ 
00976 SSP_FORCEINLINE void ssp_storel_pd_SSE2( double *dp, __m128d a )
00977 {
00978     _mm_storel_pd( dp, a );

SSP_FORCEINLINE void ssp_storer_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_storer_pd [SSE2]. (Searches MSDN)

Definition at line 980 of file SSEPlus_native_SSE2.h.

00980         {SSE2 Native,_mm_storer_pd,SSE2} */ 
00981 SSP_FORCEINLINE void ssp_storer_pd_SSE2( double *dp, __m128d a )
00982 {
00983     _mm_storer_pd( dp, a );

SSP_FORCEINLINE void ssp_storeu_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_storeu_pd [SSE2]. (Searches MSDN)

Definition at line 985 of file SSEPlus_native_SSE2.h.

00985         {SSE2 Native,_mm_storeu_pd,SSE2} */ 
00986 SSP_FORCEINLINE void ssp_storeu_pd_SSE2( double *dp, __m128d a )
00987 {
00988     _mm_storeu_pd( dp, a );

SSP_FORCEINLINE void ssp_storeu_si128_SSE2 ( __m128i *  p,
__m128i  b 
)

SSE2 Native implementation of _mm_storeu_si128 [SSE2]. (Searches MSDN)

Definition at line 990 of file SSEPlus_native_SSE2.h.

00990         {SSE2 Native,_mm_storeu_si128,SSE2} */ 
00991 SSP_FORCEINLINE void ssp_storeu_si128_SSE2( __m128i *p, __m128i b )
00992 {
00993     _mm_storeu_si128( p, b );

SSP_FORCEINLINE void ssp_stream_pd_SSE2 ( double *  dp,
__m128d  a 
)

SSE2 Native implementation of _mm_stream_pd [SSE2]. (Searches MSDN)

Definition at line 995 of file SSEPlus_native_SSE2.h.

00995         {SSE2 Native,_mm_stream_pd,SSE2} */ 
00996 SSP_FORCEINLINE void ssp_stream_pd_SSE2( double *dp, __m128d a )
00997 {
00998     _mm_stream_pd( dp, a );

SSP_FORCEINLINE void ssp_stream_si128_SSE2 ( __m128i *  p,
__m128i  a 
)

SSE2 Native implementation of _mm_stream_si128 [SSE2]. (Searches MSDN)

Definition at line 1000 of file SSEPlus_native_SSE2.h.

01000         {SSE2 Native,_mm_stream_si128,SSE2} */ 
01001 SSP_FORCEINLINE void ssp_stream_si128_SSE2( __m128i *p, __m128i a )
01002 {
01003     _mm_stream_si128( p, a );

SSP_FORCEINLINE void ssp_stream_si32_SSE2 ( int *  p,
int  i 
)

SSE2 Native implementation of _mm_stream_si32 [SSE2]. (Searches MSDN)

Definition at line 1005 of file SSEPlus_native_SSE2.h.

01005         {SSE2 Native,_mm_stream_si32,SSE2} */ 
01006 SSP_FORCEINLINE void ssp_stream_si32_SSE2( int *p, int i )
01007 {
01008     _mm_stream_si32( p, i );

SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_sub_epi16 [SSE2]. (Searches MSDN)

Definition at line 1010 of file SSEPlus_native_SSE2.h.

01010         {SSE2 Native,_mm_sub_epi16,SSE2} */ 
01011 SSP_FORCEINLINE __m128i ssp_sub_epi16_SSE2( __m128i a, __m128i b )
01012 {
01013     return _mm_sub_epi16( a, b );

SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_sub_epi32 [SSE2]. (Searches MSDN)

Definition at line 1015 of file SSEPlus_native_SSE2.h.

01015         {SSE2 Native,_mm_sub_epi32,SSE2} */ 
01016 SSP_FORCEINLINE __m128i ssp_sub_epi32_SSE2( __m128i a, __m128i b )
01017 {
01018     return _mm_sub_epi32( a, b );

SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_sub_epi64 [SSE2]. (Searches MSDN)

Definition at line 1020 of file SSEPlus_native_SSE2.h.

01020         {SSE2 Native,_mm_sub_epi64,SSE2} */ 
01021 SSP_FORCEINLINE __m128i ssp_sub_epi64_SSE2( __m128i a, __m128i b )
01022 {
01023     return _mm_sub_epi64( a, b );

SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_sub_epi8 [SSE2]. (Searches MSDN)

Definition at line 1025 of file SSEPlus_native_SSE2.h.

01025         {SSE2 Native,_mm_sub_epi8,SSE2} */ 
01026 SSP_FORCEINLINE __m128i ssp_sub_epi8_SSE2( __m128i a, __m128i b )
01027 {
01028     return _mm_sub_epi8( a, b );

SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_sub_pd [SSE2]. (Searches MSDN)

Definition at line 1030 of file SSEPlus_native_SSE2.h.

01030         {SSE2 Native,_mm_sub_pd,SSE2} */ 
01031 SSP_FORCEINLINE __m128d ssp_sub_pd_SSE2( __m128d a, __m128d b )
01032 {
01033     return _mm_sub_pd( a, b );

SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_sub_sd [SSE2]. (Searches MSDN)

Definition at line 1035 of file SSEPlus_native_SSE2.h.

01035         {SSE2 Native,_mm_sub_sd,SSE2} */ 
01036 SSP_FORCEINLINE __m128d ssp_sub_sd_SSE2( __m128d a, __m128d b )
01037 {
01038     return _mm_sub_sd( a, b );

SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2 ( __m64  a,
__m64  b 
)

SSE2 Native implementation of _mm_sub_si64 [SSE2]. (Searches MSDN)

Definition at line 1040 of file SSEPlus_native_SSE2.h.

01040         {SSE2 Native,_mm_sub_si64,SSE2} */ 
01041 SSP_FORCEINLINE __m64 ssp_sub_si64_SSE2( __m64 a, __m64 b)
01042 {
01043     return _mm_sub_si64( a, b );

SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_subs_epi16 [SSE2]. (Searches MSDN)

Definition at line 1045 of file SSEPlus_native_SSE2.h.

01045         {SSE2 Native,_mm_subs_epi16,SSE2} */ 
01046 SSP_FORCEINLINE __m128i ssp_subs_epi16_SSE2( __m128i a, __m128i b )
01047 {
01048     return _mm_subs_epi16( a, b );

SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_subs_epi8 [SSE2]. (Searches MSDN)

Definition at line 1050 of file SSEPlus_native_SSE2.h.

01050         {SSE2 Native,_mm_subs_epi8,SSE2} */ 
01051 SSP_FORCEINLINE __m128i ssp_subs_epi8_SSE2( __m128i a, __m128i b )
01052 {
01053     return _mm_subs_epi8( a, b );

SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_subs_epu16 [SSE2]. (Searches MSDN)

Definition at line 1055 of file SSEPlus_native_SSE2.h.

01055         {SSE2 Native,_mm_subs_epu16,SSE2} */ 
01056 SSP_FORCEINLINE __m128i ssp_subs_epu16_SSE2( __m128i a, __m128i b )
01057 {
01058     return _mm_subs_epu16( a, b );

SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_subs_epu8 [SSE2]. (Searches MSDN)

Definition at line 1060 of file SSEPlus_native_SSE2.h.

01060         {SSE2 Native,_mm_subs_epu8,SSE2} */ 
01061 SSP_FORCEINLINE __m128i ssp_subs_epu8_SSE2( __m128i a, __m128i b )
01062 {
01063     return _mm_subs_epu8( a, b );

SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomieq_sd [SSE2]. (Searches MSDN)

Definition at line 1065 of file SSEPlus_native_SSE2.h.

01065         {SSE2 Native,_mm_ucomieq_sd,SSE2} */ 
01066 SSP_FORCEINLINE int ssp_ucomieq_sd_SSE2( __m128d a, __m128d b )
01067 {
01068     return _mm_ucomieq_sd( a, b );

SSP_FORCEINLINE int ssp_ucomige_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomige_sd [SSE2]. (Searches MSDN)

Definition at line 1070 of file SSEPlus_native_SSE2.h.

01070         {SSE2 Native,_mm_ucomige_sd,SSE2} */ 
01071 SSP_FORCEINLINE int ssp_ucomige_sd_SSE2( __m128d a, __m128d b )
01072 {
01073     return _mm_ucomige_sd( a, b );

SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomigt_sd [SSE2]. (Searches MSDN)

Definition at line 1075 of file SSEPlus_native_SSE2.h.

01075         {SSE2 Native,_mm_ucomigt_sd,SSE2} */ 
01076 SSP_FORCEINLINE int ssp_ucomigt_sd_SSE2( __m128d a, __m128d b )
01077 {
01078     return _mm_ucomigt_sd( a, b );

SSP_FORCEINLINE int ssp_ucomile_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomile_sd [SSE2]. (Searches MSDN)

Definition at line 1080 of file SSEPlus_native_SSE2.h.

01080         {SSE2 Native,_mm_ucomile_sd,SSE2} */ 
01081 SSP_FORCEINLINE int ssp_ucomile_sd_SSE2( __m128d a, __m128d b )
01082 {
01083     return _mm_ucomile_sd( a, b );

SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomilt_sd [SSE2]. (Searches MSDN)

Definition at line 1085 of file SSEPlus_native_SSE2.h.

01085         {SSE2 Native,_mm_ucomilt_sd,SSE2} */ 
01086 SSP_FORCEINLINE int ssp_ucomilt_sd_SSE2( __m128d a, __m128d b )
01087 {
01088     return _mm_ucomilt_sd( a, b );

SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_ucomineq_sd [SSE2]. (Searches MSDN)

Definition at line 1090 of file SSEPlus_native_SSE2.h.

01090         {SSE2 Native,_mm_ucomineq_sd,SSE2} */ 
01091 SSP_FORCEINLINE int ssp_ucomineq_sd_SSE2( __m128d a, __m128d b )
01092 {
01093     return _mm_ucomineq_sd( a, b );

SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpackhi_epi16 [SSE2]. (Searches MSDN)

Definition at line 1095 of file SSEPlus_native_SSE2.h.

01095         {SSE2 Native,_mm_unpackhi_epi16,SSE2} */ 
01096 SSP_FORCEINLINE __m128i ssp_unpackhi_epi16_SSE2( __m128i a, __m128i b )
01097 {
01098     return _mm_unpackhi_epi16( a, b );

SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpackhi_epi32 [SSE2]. (Searches MSDN)

Definition at line 1100 of file SSEPlus_native_SSE2.h.

01100         {SSE2 Native,_mm_unpackhi_epi32,SSE2} */ 
01101 SSP_FORCEINLINE __m128i ssp_unpackhi_epi32_SSE2( __m128i a, __m128i b )
01102 {
01103     return _mm_unpackhi_epi32( a, b );

SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpackhi_epi64 [SSE2]. (Searches MSDN)

Definition at line 1105 of file SSEPlus_native_SSE2.h.

01105         {SSE2 Native,_mm_unpackhi_epi64,SSE2} */ 
01106 SSP_FORCEINLINE __m128i ssp_unpackhi_epi64_SSE2( __m128i a, __m128i b )
01107 {
01108     return _mm_unpackhi_epi64( a, b );

SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpackhi_epi8 [SSE2]. (Searches MSDN)

Definition at line 1110 of file SSEPlus_native_SSE2.h.

01110         {SSE2 Native,_mm_unpackhi_epi8,SSE2} */ 
01111 SSP_FORCEINLINE __m128i ssp_unpackhi_epi8_SSE2( __m128i a, __m128i b )
01112 {
01113     return _mm_unpackhi_epi8( a, b );

SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_unpackhi_pd [SSE2]. (Searches MSDN)

Definition at line 1115 of file SSEPlus_native_SSE2.h.

01115         {SSE2 Native,_mm_unpackhi_pd,SSE2} */ 
01116 SSP_FORCEINLINE __m128d ssp_unpackhi_pd_SSE2( __m128d a, __m128d b )
01117 {
01118     return _mm_unpackhi_pd( a, b );

SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpacklo_epi16 [SSE2]. (Searches MSDN)

Definition at line 1120 of file SSEPlus_native_SSE2.h.

01120         {SSE2 Native,_mm_unpacklo_epi16,SSE2} */ 
01121 SSP_FORCEINLINE __m128i ssp_unpacklo_epi16_SSE2( __m128i a, __m128i b )
01122 {
01123     return _mm_unpacklo_epi16( a, b );

SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpacklo_epi32 [SSE2]. (Searches MSDN)

Definition at line 1125 of file SSEPlus_native_SSE2.h.

01125         {SSE2 Native,_mm_unpacklo_epi32,SSE2} */ 
01126 SSP_FORCEINLINE __m128i ssp_unpacklo_epi32_SSE2( __m128i a, __m128i b )
01127 {
01128     return _mm_unpacklo_epi32( a, b );

SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpacklo_epi64 [SSE2]. (Searches MSDN)

Definition at line 1130 of file SSEPlus_native_SSE2.h.

01130         {SSE2 Native,_mm_unpacklo_epi64,SSE2} */ 
01131 SSP_FORCEINLINE __m128i ssp_unpacklo_epi64_SSE2( __m128i a, __m128i b )
01132 {
01133     return _mm_unpacklo_epi64( a, b );

SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_unpacklo_epi8 [SSE2]. (Searches MSDN)

Definition at line 1135 of file SSEPlus_native_SSE2.h.

01135         {SSE2 Native,_mm_unpacklo_epi8,SSE2} */ 
01136 SSP_FORCEINLINE __m128i ssp_unpacklo_epi8_SSE2( __m128i a, __m128i b )
01137 {
01138     return _mm_unpacklo_epi8( a, b );

SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_unpacklo_pd [SSE2]. (Searches MSDN)

Definition at line 1140 of file SSEPlus_native_SSE2.h.

01140         {SSE2 Native,_mm_unpacklo_pd,SSE2} */ 
01141 SSP_FORCEINLINE __m128d ssp_unpacklo_pd_SSE2( __m128d a, __m128d b )
01142 {
01143     return _mm_unpacklo_pd( a, b );

SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2 ( __m128d  a,
__m128d  b 
)

SSE2 Native implementation of _mm_xor_pd [SSE2]. (Searches MSDN)

Definition at line 1145 of file SSEPlus_native_SSE2.h.

01145         {SSE2 Native,_mm_xor_pd,SSE2} */ 
01146 SSP_FORCEINLINE __m128d ssp_xor_pd_SSE2( __m128d a, __m128d b )
01147 {
01148     return _mm_xor_pd( a, b );

SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2 ( __m128i  a,
__m128i  b 
)

SSE2 Native implementation of _mm_xor_si128 [SSE2]. (Searches MSDN)

Definition at line 1150 of file SSEPlus_native_SSE2.h.

01150         {SSE2 Native,_mm_xor_si128,SSE2} */ 
01151 SSP_FORCEINLINE __m128i ssp_xor_si128_SSE2( __m128i a, __m128i b )
01152 {
01153     return _mm_xor_si128( a, b );


Generated on Thu Apr 17 14:08:59 2008 for "SSEPlus" by  doxygen 1.5.4