This is a little hlsl-style C++ 11 (VS2012) library that makes use of DirectXMath.
Custom traits, and combinaisons of std::enabled_if<> are employed to select the optimal operators/function.
The purpose is to go from:
float blinn_phong_xm( const XMFLOAT3A &normal, const XMFLOAT3A &view, const XMFLOAT3A &light )
{
return XMVectorGetX( XMVector3Dot( XMVector3Normalize( XMVectorAdd( XMLoadFloat3A(&view ), XMLoadFloat3A( &light ) ) ), XMLoadFloat3A( &normal ) ) );
}
With the generated x86-SSE code (VS2012):
mov eax, DWORD PTR _light$[esp-4]
movaps xmm4, XMMWORD PTR ?g_XMMask3@DirectX@@3UXMVECTORI32@1@B
movaps xmm5, XMMWORD PTR [edx]
movaps xmm0, XMMWORD PTR [eax]
andps xmm5, xmm4
andps xmm0, xmm4
addps xmm5, xmm0
movaps xmm2, XMMWORD PTR ?g_XMInfinity@DirectX@@3UXMVECTORI32@1@B
movaps xmm3, xmm5
mulps xmm3, xmm5
movaps xmm0, xmm3
shufps xmm0, xmm3, 153 ; 00000099H
addss xmm3, xmm0
shufps xmm0, xmm0, 85 ; 00000055H
addss xmm3, xmm0
xorps xmm0, xmm0
shufps xmm3, xmm3, 0
sqrtps xmm1, xmm3
cmpneqps xmm2, xmm3
divps xmm5, xmm1
cmpneqps xmm1, xmm0
movaps xmm0, XMMWORD PTR [ecx]
andps xmm5, xmm1
andps xmm5, xmm2
andnps xmm2, XMMWORD PTR ?g_XMQNaN@DirectX@@3UXMVECTORI32@1@B
orps xmm5, xmm2
andps xmm0, xmm4
mulps xmm5, xmm0
movaps xmm1, xmm5
shufps xmm1, xmm5, 153 ; 00000099H
addss xmm5, xmm1
shufps xmm1, xmm1, 85 ; 00000055H
addss xmm5, xmm1
movaps xmm0, xmm5
To:
float blinn_phong( const float3a &normal, const float3a &view, const float3a &light )
{
auto h = normalize( view + light );
return dot( h, normal );
}
Generating:
mov eax, DWORD PTR _light$[esp-4]
movaps xmm3, XMMWORD PTR ?g_XMMask3@DirectX@@3UXMVECTORI32@1@B
movaps xmm4, XMMWORD PTR [edx]
movaps xmm0, XMMWORD PTR [eax]
andps xmm4, xmm3
andps xmm0, xmm3
addps xmm4, xmm0
movaps xmm2, xmm4
mulps xmm2, xmm4
movaps xmm0, xmm2
shufps xmm0, xmm2, 153 ; 00000099H
addss xmm2, xmm0
shufps xmm0, xmm0, 85 ; 00000055H
addss xmm2, xmm0
xorps xmm0, xmm0
shufps xmm2, xmm2, 0
sqrtps xmm1, xmm2
cmpneqps xmm2, XMMWORD PTR ?g_XMInfinity@DirectX@@3UXMVECTORI32@1@B
divps xmm4, xmm1
cmpneqps xmm1, xmm0
movaps xmm0, XMMWORD PTR [ecx]
andps xmm4, xmm1
andps xmm4, xmm2
andnps xmm2, XMMWORD PTR ?g_XMQNaN@DirectX@@3UXMVECTORI32@1@B
orps xmm4, xmm2
andps xmm0, xmm3
mulps xmm4, xmm0
movaps xmm1, xmm4
shufps xmm1, xmm4, 153 ; 00000099H
addss xmm4, xmm1
shufps xmm1, xmm1, 85 ; 00000055H
addss xmm4, xmm1
movaps xmm0, xmm4
So, mostly the same asm code.
In fact this might be interpretable as a push to make DirectXMath evolve into these kind of C++ 11 HLSL-style lib. (plain DirectXMath is a pain to work with)
We (realtime 3D developers) need an efficient and straightforward HLSL/GLSL-like C++ lib (Boost or MIT).