This is a little hlsl-style C++ 11 (VS2012) library that makes use of DirectXMath.

Custom traits combined with a lot of std::enabled_if<> are employed to route the compiler to the optimal operators/functions.

The purpose is to go from:

float blinn_phong_xm( const XMFLOAT3A &normal, const XMFLOAT3A &view, const XMFLOAT3A &light )
{
return XMVectorGetX( XMVector3Dot( XMVector3Normalize( XMVectorAdd( XMLoadFloat3A(&view ), XMLoadFloat3A( &light ) ) ), XMLoadFloat3A( &normal ) ) );
}



With the generated x86-SSE code (VS2012):

mov	eax, DWORD PTR _light$[esp-4]
movaps	xmm4, XMMWORD PTR ?g_XMMask3@DirectX@@3UXMVECTORI32@1@B
movaps	xmm5, XMMWORD PTR [edx]
movaps	xmm0, XMMWORD PTR [eax]
andps	xmm5, xmm4
andps	xmm0, xmm4
addps	xmm5, xmm0
movaps	xmm2, XMMWORD PTR ?g_XMInfinity@DirectX@@3UXMVECTORI32@1@B
movaps	xmm3, xmm5
mulps	xmm3, xmm5
movaps	xmm0, xmm3
shufps	xmm0, xmm3, 153				; 00000099H
addss	xmm3, xmm0
shufps	xmm0, xmm0, 85				; 00000055H
addss	xmm3, xmm0
xorps	xmm0, xmm0
shufps	xmm3, xmm3, 0
sqrtps	xmm1, xmm3
cmpneqps xmm2, xmm3
divps	xmm5, xmm1
cmpneqps xmm1, xmm0
movaps	xmm0, XMMWORD PTR [ecx]
andps	xmm5, xmm1
andps	xmm5, xmm2
andnps	xmm2, XMMWORD PTR ?g_XMQNaN@DirectX@@3UXMVECTORI32@1@B
orps	xmm5, xmm2
andps	xmm0, xmm4
mulps	xmm5, xmm0
movaps	xmm1, xmm5
shufps	xmm1, xmm5, 153				; 00000099H
addss	xmm5, xmm1
shufps	xmm1, xmm1, 85				; 00000055H
addss	xmm5, xmm1
movaps	xmm0, xmm5



To:

float blinn_phong( const float3a &normal, const float3a &view, const float3a &light )
{
auto h = normalize( view + light );
return dot( h, normal );
}



Generating:

mov	eax, DWORD PTR _light$[esp-4]
movaps	xmm3, XMMWORD PTR ?g_XMMask3@DirectX@@3UXMVECTORI32@1@B
movaps	xmm4, XMMWORD PTR [edx]
movaps	xmm0, XMMWORD PTR [eax]
andps	xmm4, xmm3
andps	xmm0, xmm3
addps	xmm4, xmm0
movaps	xmm2, xmm4
mulps	xmm2, xmm4
movaps	xmm0, xmm2
shufps	xmm0, xmm2, 153				; 00000099H
addss	xmm2, xmm0
shufps	xmm0, xmm0, 85				; 00000055H
addss	xmm2, xmm0
xorps	xmm0, xmm0
shufps	xmm2, xmm2, 0
sqrtps	xmm1, xmm2
cmpneqps xmm2, XMMWORD PTR ?g_XMInfinity@DirectX@@3UXMVECTORI32@1@B
divps	xmm4, xmm1
cmpneqps xmm1, xmm0
movaps	xmm0, XMMWORD PTR [ecx]
andps	xmm4, xmm1
andps	xmm4, xmm2
andnps	xmm2, XMMWORD PTR ?g_XMQNaN@DirectX@@3UXMVECTORI32@1@B
orps	xmm4, xmm2
andps	xmm0, xmm3
mulps	xmm4, xmm0
movaps	xmm1, xmm4
shufps	xmm1, xmm4, 153				; 00000099H
addss	xmm4, xmm1
shufps	xmm1, xmm1, 85				; 00000055H
addss	xmm4, xmm1
movaps	xmm0, xmm4



So, mostly the same asm code.

In fact this might be interpretable as a push to make DirectXMath evolve into these kind of C++ 11 HLSL-style lib. (plain DirectXMath is a pain to work with)

We (realtime 3D developers) need an efficient and straightforward HLSL/GLSL-like C++ lib (Boost or MIT).

Last edited Oct 3, 2013 at 2:13 PM by BJOne, version 28