Improved clipping and fix culling normal computation
This commit is contained in:
parent
f1a1c2199f
commit
debc5b219b
@ -25,7 +25,7 @@
|
||||
// * https://en.wikipedia.org/wiki/Hidden-surface_determination#Occlusion_culling
|
||||
// * https://en.wikipedia.org/wiki/Bounding_volume_hierarchy
|
||||
|
||||
static bool VertexIsInsideClipSpace(M3D_F4& V);
|
||||
static bool VertexClipTest(M3D_F4& V, sf::Vector2f& RTsize, float gb_factor);
|
||||
|
||||
Graphic3DRenderer::Graphic3DRenderer() {
|
||||
mRTSize = {1280.f, 324.f};
|
||||
@ -140,28 +140,27 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) {
|
||||
break;
|
||||
|
||||
// Triangle clipping
|
||||
//TODO: implement complete Cohen-Sutherland algo or similar
|
||||
if (VertexIsInsideClipSpace(projVertices[indicePtr[i]]) &&
|
||||
VertexIsInsideClipSpace(projVertices[indicePtr[i+1]]) &&
|
||||
VertexIsInsideClipSpace(projVertices[indicePtr[i+2]]))
|
||||
//TODO: scissor/clipping depending of how many vertices are outside/inside the clipspace, implement complete Cohen-Sutherland algo or similar
|
||||
if (VertexClipTest(projVertices[indicePtr[i]], mRTSize, 2.5f) &&
|
||||
VertexClipTest(projVertices[indicePtr[i+1]], mRTSize, 2.5f) &&
|
||||
VertexClipTest(projVertices[indicePtr[i+2]], mRTSize, 2.5f))
|
||||
{
|
||||
|
||||
M3D_VECTOR V1 = M3D_V4LoadF4(&projVertices[indicePtr[i]]);
|
||||
M3D_VECTOR V2 = M3D_V4LoadF4(&projVertices[indicePtr[i+1]]);
|
||||
M3D_VECTOR V3 = M3D_V4LoadF4(&projVertices[indicePtr[i+2]]);
|
||||
|
||||
// Do the perspective divide
|
||||
V1 = M3D_V4Divide(V1, M3D_V4SplatW(V1));
|
||||
V2 = M3D_V4Divide(V2, M3D_V4SplatW(V2));
|
||||
V3 = M3D_V4Divide(V3, M3D_V4SplatW(V3));
|
||||
|
||||
V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
|
||||
// Face culling
|
||||
M3D_VECTOR faceNormal = M3D_TNormal(V1,V2,V3);
|
||||
if (M3D_V4GetX(M3D_V3Dot(V1, faceNormal)) >= 0) {
|
||||
// Do the perspective divide
|
||||
V1 = M3D_V4Divide(V1, M3D_V4SplatW(V1));
|
||||
V2 = M3D_V4Divide(V2, M3D_V4SplatW(V2));
|
||||
V3 = M3D_V4Divide(V3, M3D_V4SplatW(V3));
|
||||
|
||||
V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f);
|
||||
|
||||
if (M3D_V4GetX(M3D_TNormal(V1,V2,V3))*0.5f <= 0) {
|
||||
if (objInFrustrum == DISJOINT) {
|
||||
v_tri[0].color = sf::Color::Red;
|
||||
v_tri[1].color = sf::Color::Red;
|
||||
@ -205,9 +204,9 @@ void Graphic3DRenderer::UpdateInternalTestObjects() {
|
||||
mRenderList[3]->SetRotation(0.f, thetaAngle, 0.f);
|
||||
}
|
||||
|
||||
inline static bool VertexIsInsideClipSpace(M3D_F4& V) {
|
||||
return (V.x > -V.w && V.x < V.w &&
|
||||
V.y > -V.w && V.y < V.w &&
|
||||
V.z > 0 && V.z < V.w
|
||||
inline static bool VertexClipTest(M3D_F4& V, sf::Vector2f& RTsize, float gb_factor) {
|
||||
// Guard band are usually 2-3x the viewport size for the clipping test
|
||||
return (V.x > -RTsize.x*gb_factor*V.w && V.x < RTsize.y*gb_factor*V.w &&
|
||||
V.y > -RTsize.x*gb_factor*V.w && V.y < RTsize.y*gb_factor*V.w
|
||||
);
|
||||
}
|
@ -419,6 +419,11 @@ M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V2Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V2Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V2LengthSq(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V2Length(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V2Normalize(M3D_VECTOR V) noexcept;
|
||||
|
||||
|
||||
#ifndef DISABLE_INTRINSICS
|
||||
|
@ -65,16 +65,16 @@ inline M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept {
|
||||
V.v4f[2] = src->z;
|
||||
V.v4f[3] = 0.f;
|
||||
return V;
|
||||
/*
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
__m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(src)));
|
||||
__m128 z = _mm_load_ss(&src->z);
|
||||
return _mm_insert_ps(xy, z, 0x20);
|
||||
*/
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
__m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(src)));
|
||||
__m128 z = _mm_load_ss(&src->z);
|
||||
return _mm_movelh_ps(xy, z);
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -97,16 +97,16 @@ inline void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept {
|
||||
dst->x = V.v4f[0];
|
||||
dst->y = V.v4f[1];
|
||||
dst->z = V.v4f[2];
|
||||
/*
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
*reinterpret_cast<int*>(&dst->x) = _mm_extract_ps(V, 0);
|
||||
*reinterpret_cast<int*>(&dst->y) = _mm_extract_ps(V, 1);
|
||||
*reinterpret_cast<int*>(&dst->z) = _mm_extract_ps(V, 2);
|
||||
*/
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
_mm_store_sd(reinterpret_cast<double*>(dst), _mm_castps_pd(V));
|
||||
__m128 z = M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
_mm_store_ss(&dst->z, z);
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -115,15 +115,15 @@ inline void M3D_V4StoreF3A(M3D_F3A* dst, M3D_VECTOR V) noexcept {
|
||||
dst->x = V.v4f[0];
|
||||
dst->y = V.v4f[1];
|
||||
dst->z = V.v4f[2];
|
||||
/*
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
_mm_store_sd(reinterpret_cast<double*>(dst), _mm_castps_pd(V));
|
||||
*reinterpret_cast<int*>(&dst->z) = _mm_extract_ps(V, 2);
|
||||
*/
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
_mm_store_sd(reinterpret_cast<double*>(dst), _mm_castps_pd(V));
|
||||
__m128 z = _mm_movehl_ps(V, V);
|
||||
_mm_store_ss(&dst->z, z);
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -434,11 +434,12 @@ inline M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept {
|
||||
V.v4f[3]
|
||||
}}};
|
||||
return U.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vResult = _mm_set_ss(y);
|
||||
vResult = _mm_insert_ps(V, vResult, 0x10);
|
||||
return vResult;
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Swap y and x
|
||||
M3D_VECTOR vResult = M3D_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1));
|
||||
// Convert input to vector
|
||||
@ -448,6 +449,7 @@ inline M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept {
|
||||
// Swap y and x again
|
||||
vResult = M3D_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1));
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -460,11 +462,12 @@ inline M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept {
|
||||
V.v4f[3]
|
||||
}}};
|
||||
return U.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vResult = _mm_set_ss(z);
|
||||
vResult = _mm_insert_ps(V, vResult, 0x20);
|
||||
return vResult;
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Swap z and x
|
||||
M3D_VECTOR vResult = M3D_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2));
|
||||
// Convert input to vector
|
||||
@ -474,6 +477,7 @@ inline M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept {
|
||||
// Swap z and x again
|
||||
vResult = M3D_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2));
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -486,11 +490,12 @@ inline M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept {
|
||||
w
|
||||
}}};
|
||||
return U.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vResult = _mm_set_ss(w);
|
||||
vResult = _mm_insert_ps(V, vResult, 0x30);
|
||||
return vResult;
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Swap w and x
|
||||
M3D_VECTOR vResult = M3D_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3));
|
||||
// Convert input to vector
|
||||
@ -500,6 +505,7 @@ inline M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept {
|
||||
// Swap w and x again
|
||||
vResult = M3D_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3));
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -693,9 +699,10 @@ inline M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept {
|
||||
M3D_Internal::round_to_nearest(V.v4f[3])
|
||||
} } };
|
||||
return Result.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
return _mm_round_ps(V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
#else
|
||||
/*
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
__m128 sign = _mm_and_ps(V, M3D_MNegativeZero);
|
||||
__m128 sMagic = _mm_or_ps(M3D_MNoFraction, sign);
|
||||
__m128 R1 = _mm_add_ps(V, sMagic);
|
||||
@ -706,6 +713,7 @@ inline M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept {
|
||||
R1 = _mm_and_ps(R1, mask);
|
||||
M3D_VECTOR vResult = _mm_xor_ps(R1, R2);
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -827,8 +835,9 @@ inline M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
Result.f[2] =
|
||||
Result.f[3] = V1.v4f[0] * V2.v4f[0] + V1.v4f[1] * V2.v4f[1] + V1.v4f[2] * V2.v4f[2] + V1.v4f[3] * V2.v4f[3];
|
||||
return Result.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
return _mm_dp_ps(V1, V2, 0xff);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vTemp = _mm_mul_ps(V1, V2);
|
||||
vTemp = _mm_hadd_ps(vTemp, vTemp);
|
||||
@ -841,6 +850,7 @@ inline M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
vTemp = _mm_shuffle_ps(vTemp, vTemp2, _MM_SHUFFLE(0, 3, 0, 0)); // Copy W to the Z position
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2); // Add Z and W together
|
||||
return M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(2, 2, 2, 2)); // Splat Z and return
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -856,16 +866,17 @@ inline M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept {
|
||||
Result = M3D_V4Sqrt(Result);
|
||||
|
||||
return Result;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vTemp = _mm_dp_ps(V, V, 0xff);
|
||||
return _mm_sqrt_ps(vTemp);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq);
|
||||
vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq);
|
||||
vLengthSq = _mm_sqrt_ps(vLengthSq);
|
||||
return vLengthSq;
|
||||
#else
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x,y,z and w
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
// vTemp has z and w
|
||||
@ -883,6 +894,7 @@ inline M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept {
|
||||
// Get the length
|
||||
vLengthSq = _mm_sqrt_ps(vLengthSq);
|
||||
return vLengthSq;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1066,14 +1078,15 @@ inline M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
vResult.f[2] =
|
||||
vResult.f[3] = fValue;
|
||||
return vResult.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
return _mm_dp_ps(V1, V2, 0x7f);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vTemp = _mm_mul_ps(V1, V2);
|
||||
vTemp = _mm_and_ps(vTemp, g_XMMask3);
|
||||
vTemp = _mm_hadd_ps(vTemp, vTemp);
|
||||
return _mm_hadd_ps(vTemp, vTemp);
|
||||
#else
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product
|
||||
M3D_VECTOR vDot = _mm_mul_ps(V1, V2);
|
||||
// x=Dot.v4f[1], y=Dot.v4f[2]
|
||||
@ -1086,6 +1099,7 @@ inline M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
vDot = _mm_add_ss(vDot, vTemp);
|
||||
// Splat x
|
||||
return M3D_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1130,9 +1144,10 @@ inline M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept {
|
||||
Result = M3D_V4Sqrt(Result);
|
||||
|
||||
return Result;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vTemp = _mm_dp_ps(V, V, 0x7f);
|
||||
return _mm_sqrt_ps(vTemp);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3);
|
||||
@ -1140,7 +1155,7 @@ inline M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept {
|
||||
vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq);
|
||||
vLengthSq = _mm_sqrt_ps(vLengthSq);
|
||||
return vLengthSq;
|
||||
#else
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x,y and z
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
// vTemp has z and y
|
||||
@ -1156,6 +1171,7 @@ inline M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept {
|
||||
// Get the length
|
||||
vLengthSq = _mm_sqrt_ps(vLengthSq);
|
||||
return vLengthSq;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1174,8 +1190,7 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept {
|
||||
vResult.v4f[2] = V.v4f[2] * fLength;
|
||||
vResult.v4f[3] = V.v4f[3] * fLength;
|
||||
return vResult;
|
||||
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vLengthSq = _mm_dp_ps(V, V, 0x7f);
|
||||
// Prepare for the division
|
||||
M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq);
|
||||
@ -1185,16 +1200,17 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept {
|
||||
vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
// If the length is infinity, set the elements to zero
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity);
|
||||
// Divide to perform the normalization
|
||||
vResult = _mm_div_ps(V, vResult);
|
||||
// Any that are infinity, set to zero
|
||||
vResult = _mm_and_ps(vResult, vZeroMask);
|
||||
// Select qnan or result based on infinite length
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN);
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN);
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
// Perform the dot product on x,y and z only
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
@ -1209,17 +1225,17 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept {
|
||||
vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
// If the length is infinity, set the elements to zero
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity);
|
||||
// Divide to perform the normalization
|
||||
vResult = _mm_div_ps(V, vResult);
|
||||
// Any that are infinity, set to zero
|
||||
vResult = _mm_and_ps(vResult, vZeroMask);
|
||||
// Select qnan or result based on infinite length
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN);
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN);
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
#else
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x,y and z only
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
M3D_VECTOR vTemp = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1));
|
||||
@ -1245,6 +1261,182 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept {
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V2Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Result;
|
||||
Result.f[0] =
|
||||
Result.f[1] =
|
||||
Result.f[2] =
|
||||
Result.f[3] = V1.v4f[0] * V2.v4f[0] + V1.v4f[1] * V2.v4f[1];
|
||||
return Result.v;
|
||||
#else // SSE4_INTRINSICS
|
||||
return _mm_dp_ps(V1, V2, 0x3f);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vDot = _mm_mul_ps(V1, V2);
|
||||
vDot = _mm_hadd_ps(vDot, vDot);
|
||||
vDot = _mm_moveldup_ps(vDot);
|
||||
return vDot;
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x and y
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V1, V2);
|
||||
// vTemp has y splatted
|
||||
M3D_VECTOR vTemp = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
// x+y
|
||||
vLengthSq = _mm_add_ss(vLengthSq, vTemp);
|
||||
vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
return vLengthSq;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V2Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
// [ V1.x*V2.y - V1.y*V2.x, V1.x*V2.y - V1.y*V2.x ]
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
float fCross = (V1.v4f[0] * V2.v4f[1]) - (V1.v4f[1] * V2.v4f[0]);
|
||||
M3D_V4F32 vResult;
|
||||
vResult.f[0] =
|
||||
vResult.f[1] =
|
||||
vResult.f[2] =
|
||||
vResult.f[3] = fCross;
|
||||
return vResult.v;
|
||||
#else
|
||||
// Swap x and y
|
||||
M3D_VECTOR vResult = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(0, 1, 0, 1));
|
||||
// Perform the muls
|
||||
vResult = _mm_mul_ps(vResult, V1);
|
||||
// Splat y
|
||||
M3D_VECTOR vTemp = M3D_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
// Sub the values
|
||||
vResult = _mm_sub_ss(vResult, vTemp);
|
||||
// Splat the cross product
|
||||
vResult = M3D_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
return vResult;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V2LengthSq(M3D_VECTOR V) noexcept {
|
||||
return M3D_V2Dot(V, V);
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V2Length(M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR Result;
|
||||
Result = M3D_V2LengthSq(V);
|
||||
Result = M3D_V4Sqrt(Result);
|
||||
return Result;
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vTemp = _mm_dp_ps(V, V, 0x3f);
|
||||
return _mm_sqrt_ps(vTemp);
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
M3D_VECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq);
|
||||
vLengthSq = _mm_sqrt_ss(vTemp);
|
||||
vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
return vLengthSq;
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x and y
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
// vTemp has y splatted
|
||||
M3D_VECTOR vTemp = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
// x+y
|
||||
vLengthSq = _mm_add_ss(vLengthSq, vTemp);
|
||||
vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vLengthSq = _mm_sqrt_ps(vLengthSq);
|
||||
return vLengthSq;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V2Normalize(M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR vResult = M3D_V2Length(V);
|
||||
float fLength = vResult.v4f[0];
|
||||
|
||||
// Prevent divide by zero
|
||||
if (fLength > 0) {
|
||||
fLength = 1.0f / fLength;
|
||||
}
|
||||
|
||||
vResult.v4f[0] = V.v4f[0] * fLength;
|
||||
vResult.v4f[1] = V.v4f[1] * fLength;
|
||||
vResult.v4f[2] = V.v4f[2] * fLength;
|
||||
vResult.v4f[3] = V.v4f[3] * fLength;
|
||||
return vResult;
|
||||
#else // SSE4_INTRINSICS
|
||||
M3D_VECTOR vLengthSq = _mm_dp_ps(V, V, 0x3f);
|
||||
// Prepare for the division
|
||||
M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq);
|
||||
// Create zero with a single instruction
|
||||
M3D_VECTOR vZeroMask = _mm_setzero_ps();
|
||||
// Test for a divide by zero (Must be FP to detect -0.0)
|
||||
vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
// If the length is infinity, set the elements to zero
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity);
|
||||
// Reciprocal mul to perform the normalization
|
||||
vResult = _mm_div_ps(V, vResult);
|
||||
// Any that are infinity, set to zero
|
||||
vResult = _mm_and_ps(vResult, vZeroMask);
|
||||
// Select qnan or result based on infinite length
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN);
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
/*
|
||||
#elif defined(SSE3_INTRINSICS)
|
||||
// Perform the dot product on x and y only
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq);
|
||||
vLengthSq = _mm_moveldup_ps(vLengthSq);
|
||||
// Prepare for the division
|
||||
M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq);
|
||||
// Create zero with a single instruction
|
||||
M3D_VECTOR vZeroMask = _mm_setzero_ps();
|
||||
// Test for a divide by zero (Must be FP to detect -0.0)
|
||||
vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
// If the length is infinity, set the elements to zero
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity);
|
||||
// Reciprocal mul to perform the normalization
|
||||
vResult = _mm_div_ps(V, vResult);
|
||||
// Any that are infinity, set to zero
|
||||
vResult = _mm_and_ps(vResult, vZeroMask);
|
||||
// Select qnan or result based on infinite length
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN);
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
#elif defined(SSE_INTRINSICS)
|
||||
// Perform the dot product on x and y only
|
||||
M3D_VECTOR vLengthSq = _mm_mul_ps(V, V);
|
||||
M3D_VECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vLengthSq = _mm_add_ss(vLengthSq, vTemp);
|
||||
vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
// Prepare for the division
|
||||
M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq);
|
||||
// Create zero with a single instruction
|
||||
M3D_VECTOR vZeroMask = _mm_setzero_ps();
|
||||
// Test for a divide by zero (Must be FP to detect -0.0)
|
||||
vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
|
||||
// Failsafe on zero (Or epsilon) length planes
|
||||
// If the length is infinity, set the elements to zero
|
||||
vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity);
|
||||
// Reciprocal mul to perform the normalization
|
||||
vResult = _mm_div_ps(V, vResult);
|
||||
// Any that are infinity, set to zero
|
||||
vResult = _mm_and_ps(vResult, vZeroMask);
|
||||
// Select qnan or result based on infinite length
|
||||
M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN);
|
||||
M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq);
|
||||
vResult = _mm_or_ps(vTemp1, vTemp2);
|
||||
return vResult;
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1317,7 +1509,7 @@ inline M3D_VECTOR M3D_TNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexc
|
||||
M3D_VECTOR L1 = M3D_V4Subtract(P2, P1);
|
||||
M3D_VECTOR L2 = M3D_V4Subtract(P3, P1);
|
||||
|
||||
return M3D_V3Normalize(M3D_V3Cross(L2, L1));
|
||||
return M3D_V2Normalize(M3D_V2Cross(L2, L1));
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user