4279065.c   [plain text]


/* APPLE LOCAL file 4279065 */
/* { dg-do compile { target "i?86*-*-darwin*" } } */
/* { dg-options "-O3" } */
#include <emmintrin.h>

void S_Interpolate_4x4_IntPel_Mono_Add_Later(unsigned char *current_part_ptr, int current_part_stride, unsigned char *ref_part_ptr, int ref_part_stride){

  static const unsigned int c_0[4] = { 0, 0, 0, 0 };
  unsigned long s_row0_0, s_row1_0, s_row2_0, s_row3_0;
  __m128i v_row0_0, v_row1_0, v_row2_0, v_row3_0;

  __m128i v_Zero = _mm_loadu_si128((__m128i*)c_0);


  s_row0_0  = *(unsigned long*)(ref_part_ptr+(0*ref_part_stride));
  s_row1_0  = *(unsigned long*)(ref_part_ptr+(1*ref_part_stride));
  s_row2_0  = *(unsigned long*)(ref_part_ptr+(2*ref_part_stride));
  s_row3_0  = *(unsigned long*)(ref_part_ptr+(3*ref_part_stride));

  v_row0_0  = _mm_cvtsi32_si128(s_row0_0);
  v_row1_0  = _mm_cvtsi32_si128(s_row1_0);
  v_row2_0  = _mm_cvtsi32_si128(s_row2_0);
  v_row3_0  = _mm_cvtsi32_si128(s_row3_0);

  v_row0_0  = _mm_unpacklo_epi8(v_row0_0,  v_Zero);
  v_row1_0  = _mm_unpacklo_epi8(v_row1_0,  v_Zero);
  v_row2_0  = _mm_unpacklo_epi8(v_row2_0,  v_Zero);
  v_row3_0  = _mm_unpacklo_epi8(v_row3_0,  v_Zero);

  _mm_storel_epi64((__m128i*)(current_part_ptr+(0*current_part_stride)), v_row0_0);
  _mm_storel_epi64((__m128i*)(current_part_ptr+(1*current_part_stride)), v_row1_0);
  _mm_storel_epi64((__m128i*)(current_part_ptr+(2*current_part_stride)), v_row2_0);
  _mm_storel_epi64((__m128i*)(current_part_ptr+(3*current_part_stride)), v_row3_0);
}
/* { dg-final { scan-assembler-not "-24\\\(%ebp\\\)" } } */