#include <altivec.h>
inline void
transpose4x4(vector float *matrix)
{
vector float v0, v1, v2, v3;
v0 = vec_mergeh(matrix[0], matrix[2]);
v1 = vec_mergel(matrix[0], matrix[2]);
v2 = vec_mergeh(matrix[1], matrix[3]);
v3 = vec_mergel(matrix[1], matrix[3]);
matrix[0] = vec_mergeh(v0, v2);
matrix[1] = vec_mergel(v0, v2);
matrix[2] = vec_mergeh(v1, v3);
matrix[3] = vec_mergel(v1, v3);
}
void
vec_ifft64(vector float *x0, vector float *x1)
{
int i;
vector float real[4], imag[4];
vector float c0r, c1r, c2r, c3r, c0i, c1i, c2i, c3i;
vector float d0r, d1r, d2r, d3r, d0i, d1i, d2i, d3i;
for (i=0; i < 4; i++)
{
c0r = x0[i];
c1r = x0[i+4];
c2r = x0[i+8];
c3r = x0[i+12];
c0i = x0[i+16];
c1i = x0[i+20];
c2i = x0[i+24];
c3i = x0[i+28];
d0r = vec_add(c0r, c2r);
d1r = vec_sub(c0r, c2r);
d2r = vec_add(c1r, c3r);
d3r = vec_sub(c3i, c1i);
d0i = vec_add(c0i, c2i);
d1i = vec_sub(c0i, c2i);
d2i = vec_add(c1i, c3i);
d3i = vec_sub(c1r, c3r);
real[0] = vec_add(d0r, d2r);
real[1] = vec_add(d1r, d3r);
real[2] = vec_sub(d0r, d2r);
real[3] = vec_sub(d1r, d3r);
transpose4x4(real);
imag[0] = vec_add(d0i, d2i);
imag[1] = vec_add(d1i, d3i);
imag[2] = vec_sub(d0i, d2i);
imag[3] = vec_sub(d1i, d3i);
transpose4x4(imag);
x1[4*i] = real[0];
x1[4*i+1] = real[1];
x1[4*i+2] = real[2];
x1[4*i+3] = real[3];
x1[4*i+16] = imag[0];
x1[4*i+17] = imag[1];
x1[4*i+18] = imag[2];
x1[4*i+19] = imag[3];
}
}