This problem gives an error when the __mm_load_ss function tries to retrieve the data from the input and stores it as x. The compiler remarks the line as
error: incompatible types in initialization.
Here is the code:
------------------------------------------
Code:
typedef float v4sf __attribute__((vector_size(16)));
typedef double v2df __attribute__((vector_size(32)));
static v4sf f3 = { 3.0f, 3.0f, 3.0f, 3.0f };
static v4sf f05 = { 0.5f, 0.5f, 0.5f, 0.5f };
#if defined __SSE2__
static v2df d3 = { 3.0, 3.0 };
static v2df d05 = { 0.5, 0.5 };
#endif
inline float cuRecSqrtSEE(float x)
{
#if defined __SSE__
v4sf xx = __mm_load_ss( & x ); <----- ERROR HERE
v4sf xr = __builtin_ia32_rsqrtss( xx );
v4sf xt;
xt = __builtin_ia32_mulss( xr, xr );
xt = __builtin_ia32_mulss( xt, xx );
xt = __builtin_ia32_subss( f3, xt );
xt = __builtin_ia32_mulss( xt, f05 );
xr = __builtin_ia32_mulss( xr, xt );
__builtin_ia32_storess( &x, xr );
return x;
#else
float r;
__builtin_ia32_storess( & r, __builtin_ia32_rsqrtss(__mm_load_ss( & x ) ) );
r *= ((3.0f - r * r * x) * 0.5f);
return r;
#endif
}
Any help anyone??
