4736174.c   [plain text]


/* APPLE LOCAL file 4736174 */
/* { dg-do compile { target i?86-*-* } } */
/* { dg-options "-O3 -msse2" } */
typedef double __v2df __attribute__ ((__vector_size__ (16)));
typedef __v2df __m128d;
__m128d _mm_cmpgt_sd (__m128d __A, __m128d __B)
{
  return (__m128d) __builtin_ia32_movsd ((__m128d) __A,
         (__m128d) __builtin_ia32_cmpltsd ((__m128d) __B,
         (__m128d) __A));
}

__m128d _mm_and_pd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_andpd ((__m128d)__A, (__m128d)__B);
}

__m128d _mm_add_sd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_addsd ((__m128d)__A, (__m128d)__B);
}

__m128d _mm_sub_sd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_subsd ((__m128d)__A, (__m128d)__B);
}

__m128d _mm_mul_sd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_mulsd ((__m128d)__A, (__m128d)__B);
}

__m128d _mm_storer_pd (double *__P, __m128d __A)
{
  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, (((0) << 1) | (1))));
}

__m128d _mm_add_pd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_addpd ((__m128d)__A, (__m128d)__B);
}

__m128d _mm_sub_pd (__m128d __A, __m128d __B)
{
  return (__m128d)__builtin_ia32_subpd ((__m128d)__A, (__m128d)__B);
}

void foo ()
{
  __m128d t1r, t1i, t2r, t2i, t3r, t3i, t5r, ttp4x, carry4x, bj4x;
  __m128d MM_bigA, MM_bc[4], MM_c;
  double * px, pd1, pd2, pd3;
  int i, j, k, l, ll, UPDATE, pad2;
  for (i = 0, j = 0; j < pad2; j += UPDATE, i++)
    {
      for (k = 0; k < UPDATE; k += 4, px += 8)
        {
          { 
            __m128d maskj;
            int imaskj; 
            maskj = _mm_cmpgt_sd (bj4x, MM_c);
            maskj = _mm_and_pd (maskj, ttp4x);
            __asm__ volatile ("addsd %2, %0 \n" "        subsd %2, %0" : "=&x" (carry4x) : "0" (t5r), "X" (MM_bigA));
            ttp4x = _mm_add_sd (ttp4x, maskj);
            t5r = _mm_sub_sd (t5r, carry4x);
            t5r = _mm_mul_sd (t5r, ttp4x);
            bj4x = _mm_add_sd (bj4x, MM_bc[imaskj]);
          }
          { 
            __m128d _ar, _ai, _br, _bi;
            t1r = _mm_sub_pd (t1r, t3i);
            t1i = _mm_add_pd (t1i, t3r);
            _mm_store_pd (pd1 + ll, t1r);
            _mm_store_pd ((pd1 + ll + 2), t1i);
            t3r = _mm_add_pd (t2r, _ai);
            t3i = _mm_sub_pd (t2i, _ar);
            t2r = _mm_sub_pd (t2r, _ai);
            t2i = _mm_add_pd (t2i, _ar);
            _mm_store_pd (pd3 + ll, t3r);
            _mm_store_pd ((pd3 + ll + 2), t3i);
            _mm_store_pd (pd2 + ll, t2r);
            _mm_store_pd ((pd2 + ll + 2), t2i);
          }
        }
    }
}