Join perspective divide with projection transform
This commit is contained in:
parent
9ff5d34908
commit
294f806900
@ -39,10 +39,11 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) {
|
||||
M3D_MATRIX MVPMat = modelMat * viewProjMat;
|
||||
M3D_MATRIX viewportMat = M3D_TransformMatrixViewport(1280.0f, 324.f, 0.0f, 0.0f);
|
||||
sf::Vertex v_tri[4];
|
||||
|
||||
auto cubeMesh = testObj.GetObjectMesh();
|
||||
M3D_F4 projVertices[cubeMesh.vertices.size()];
|
||||
M3D_V3Transform(projVertices, sizeof(M3D_F4), (M3D_F3*)cubeMesh.vertices.data(), sizeof(Vertex), cubeMesh.vertices.size(), MVPMat);
|
||||
|
||||
// Do the vertices projection and perspective divide
|
||||
M3D_F3 projVertices[cubeMesh.vertices.size()];
|
||||
M3D_V3TransformPersDiv(projVertices, sizeof(M3D_F3), (M3D_F3*)cubeMesh.vertices.data(), sizeof(Vertex), cubeMesh.vertices.size(), MVPMat);
|
||||
|
||||
auto indicePtr = (uint32_t*)cubeMesh.parts[0].indices.data();
|
||||
for (uint32_t i = 0; i < cubeMesh.parts[0].indices.size(); i += 3) {
|
||||
@ -50,17 +51,15 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) {
|
||||
if (i+2 > cubeMesh.parts[0].indices.size())
|
||||
break;
|
||||
|
||||
// Simple clipping
|
||||
//TODO: implement complete Cohen-Sutherland algo or similar
|
||||
if ((projVertices[indicePtr[i]]).z > 0 &&
|
||||
(projVertices[indicePtr[i+1]]).z > 0 &&
|
||||
(projVertices[indicePtr[i+2]]).z > 0) {
|
||||
|
||||
M3D_VECTOR V1 = M3D_V4LoadF4(&projVertices[indicePtr[i]]);
|
||||
M3D_VECTOR V2 = M3D_V4LoadF4(&projVertices[indicePtr[i+1]]);
|
||||
M3D_VECTOR V3 = M3D_V4LoadF4(&projVertices[indicePtr[i+2]]);
|
||||
|
||||
V1 = M3D_V4Divide(V1, M3D_V4SplatW(V1));
|
||||
V2 = M3D_V4Divide(V2, M3D_V4SplatW(V2));
|
||||
V3 = M3D_V4Divide(V3, M3D_V4SplatW(V3));
|
||||
M3D_VECTOR V1 = M3D_V4LoadF3(&projVertices[indicePtr[i]]);
|
||||
M3D_VECTOR V2 = M3D_V4LoadF3(&projVertices[indicePtr[i+1]]);
|
||||
M3D_VECTOR V3 = M3D_V4LoadF3(&projVertices[indicePtr[i+2]]);
|
||||
|
||||
V1 = M3D_V3Transform(V1, viewportMat);
|
||||
V2 = M3D_V3Transform(V2, viewportMat);
|
||||
|
@ -42,10 +42,17 @@
|
||||
#endif
|
||||
|
||||
#define M3D_UNPACK3INTO4(l1, l2, l3) \
|
||||
M3D_VECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2));\
|
||||
M3D_VECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0));\
|
||||
V2 = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2));\
|
||||
M3D_VECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8))
|
||||
M3D_VECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2));\
|
||||
M3D_VECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0));\
|
||||
V2 = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2));\
|
||||
M3D_VECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8))
|
||||
|
||||
#define M3D_PACK4INTO3(v2x) \
|
||||
v2x = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 2, 1));\
|
||||
V2 = _mm_shuffle_ps(V2, V1, _MM_SHUFFLE(2, 2, 0, 0));\
|
||||
V1 = _mm_shuffle_ps(V1, V2, _MM_SHUFFLE(0, 2, 1, 0));\
|
||||
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(0, 0, 2, 2));\
|
||||
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0))
|
||||
#endif
|
||||
|
||||
//
|
||||
@ -339,7 +346,9 @@ M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept;
|
||||
// Vector/Matrix operation
|
||||
//
|
||||
M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
||||
M3D_F4* M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
||||
void M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
||||
M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
||||
void M3D_V3TransformPersDiv(M3D_F3* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
||||
|
||||
|
||||
//
|
||||
|
@ -1130,7 +1130,7 @@ inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_F4* M3D_V3Transform(
|
||||
inline void M3D_V3Transform(
|
||||
M3D_F4* pOutputStream,
|
||||
size_t OutputStride,
|
||||
const M3D_F3* pInputStream,
|
||||
@ -1162,8 +1162,6 @@ inline M3D_F4* M3D_V3Transform(
|
||||
pInputVector += InputStride;
|
||||
pOutputVector += OutputStride;
|
||||
}
|
||||
|
||||
return pOutputStream;
|
||||
#else
|
||||
size_t i = 0;
|
||||
size_t four = VectorCount >> 2;
|
||||
@ -1346,8 +1344,333 @@ inline M3D_F4* M3D_V3Transform(
|
||||
}
|
||||
|
||||
M3D_SFENCE();
|
||||
#endif
|
||||
}
|
||||
|
||||
return pOutputStream;
|
||||
inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
M3D_VECTOR Y = M3D_V4SplatY(V);
|
||||
M3D_VECTOR X = M3D_V4SplatX(V);
|
||||
|
||||
M3D_VECTOR Result = M3D_V4MultiplyAdd(Z, M.rows[2], M.rows[3]);
|
||||
Result = M3D_V4MultiplyAdd(Y, M.rows[1], Result);
|
||||
Result = M3D_V4MultiplyAdd(X, M.rows[0], Result);
|
||||
|
||||
M3D_VECTOR W = M3D_V4SplatW(Result);
|
||||
return M3D_V4Divide(Result, W);
|
||||
}
|
||||
|
||||
inline void M3D_V3TransformPersDiv(
|
||||
M3D_F3* pOutputStream,
|
||||
size_t OutputStride,
|
||||
const M3D_F3* pInputStream,
|
||||
size_t InputStride,
|
||||
size_t VectorCount,
|
||||
M3D_MATRIX M
|
||||
) noexcept {
|
||||
auto pInputVector = reinterpret_cast<const uint8_t*>(pInputStream);
|
||||
auto pOutputVector = reinterpret_cast<uint8_t*>(pOutputStream);
|
||||
|
||||
const M3D_VECTOR row0 = M.rows[0];
|
||||
const M3D_VECTOR row1 = M.rows[1];
|
||||
const M3D_VECTOR row2 = M.rows[2];
|
||||
const M3D_VECTOR row3 = M.rows[3];
|
||||
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
for (size_t i = 0; i < VectorCount; i++)
|
||||
{
|
||||
M3D_VECTOR V = M3D_V4LoadF3(reinterpret_cast<const M3D_F3*>(pInputVector));
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
M3D_VECTOR Y = M3D_V4SplatY(V);
|
||||
M3D_VECTOR X = M3D_V4SplatX(V);
|
||||
|
||||
M3D_VECTOR Result = M3D_V4MultiplyAdd(Z, row2, row3);
|
||||
Result = M3D_V4MultiplyAdd(Y, row1, Result);
|
||||
Result = M3D_V4MultiplyAdd(X, row0, Result);
|
||||
|
||||
M3D_VECTOR W = M3D_V4SplatW(Result);
|
||||
|
||||
Result = M3D_V4Divide(Result, W);
|
||||
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), Result);
|
||||
|
||||
pInputVector += InputStride;
|
||||
pOutputVector += OutputStride;
|
||||
}
|
||||
#else
|
||||
size_t i = 0;
|
||||
size_t four = VectorCount >> 2;
|
||||
if (four > 0) {
|
||||
if (InputStride == sizeof(M3D_F3)) {
|
||||
if (OutputStride == sizeof(M3D_F3)) {
|
||||
if (!(reinterpret_cast<uintptr_t>(pOutputStream) & 0xF)) {
|
||||
// Packed input, aligned & packed output
|
||||
for (size_t j = 0; j < four; ++j) {
|
||||
__m128 V1 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector));
|
||||
__m128 L2 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 16));
|
||||
__m128 L3 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 32));
|
||||
pInputVector += sizeof(M3D_F3) * 4;
|
||||
|
||||
// Unpack the 4 vectors (.w components are junk)
|
||||
M3D_UNPACK3INTO4(V1, L2, L3);
|
||||
|
||||
// Result 1
|
||||
M3D_VECTOR Z = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
M3D_VECTOR Y = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
M3D_VECTOR X = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
M3D_VECTOR vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
M3D_VECTOR vTemp2 = _mm_mul_ps(Y, row1);
|
||||
M3D_VECTOR vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
M3D_VECTOR W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V1 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 2
|
||||
Z = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V2 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 3
|
||||
Z = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V3 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 4
|
||||
Z = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V4 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Pack and store the vectors
|
||||
M3D_PACK4INTO3(vTemp);
|
||||
M3D_STREAM_PS(reinterpret_cast<float*>(pOutputVector), V1);
|
||||
M3D_STREAM_PS(reinterpret_cast<float*>(pOutputVector + 16), vTemp);
|
||||
M3D_STREAM_PS(reinterpret_cast<float*>(pOutputVector + 32), V3);
|
||||
pOutputVector += sizeof(M3D_F3) * 4;
|
||||
i += 4;
|
||||
}
|
||||
} else {
|
||||
// Packed input, unaligned & packed output
|
||||
for (size_t j = 0; j < four; ++j) {
|
||||
__m128 V1 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector));
|
||||
__m128 L2 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 16));
|
||||
__m128 L3 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 32));
|
||||
pInputVector += sizeof(M3D_F3) * 4;
|
||||
|
||||
// Unpack the 4 vectors (.w components are junk)
|
||||
M3D_UNPACK3INTO4(V1, L2, L3);
|
||||
|
||||
// Result 1
|
||||
M3D_VECTOR Z = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
M3D_VECTOR Y = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
M3D_VECTOR X = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
M3D_VECTOR vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
M3D_VECTOR vTemp2 = _mm_mul_ps(Y, row1);
|
||||
M3D_VECTOR vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
M3D_VECTOR W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V1 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 2
|
||||
Z = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V2 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 3
|
||||
Z = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V3 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Result 4
|
||||
Z = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
V4 = _mm_div_ps(vTemp, W);
|
||||
|
||||
// Pack and store the vectors
|
||||
M3D_PACK4INTO3(vTemp);
|
||||
_mm_storeu_ps(reinterpret_cast<float*>(pOutputVector), V1);
|
||||
_mm_storeu_ps(reinterpret_cast<float*>(pOutputVector + 16), vTemp);
|
||||
_mm_storeu_ps(reinterpret_cast<float*>(pOutputVector + 32), V3);
|
||||
pOutputVector += sizeof(M3D_F3) * 4;
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Packed input, unpacked output
|
||||
for (size_t j = 0; j < four; ++j)
|
||||
{
|
||||
__m128 V1 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector));
|
||||
__m128 L2 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 16));
|
||||
__m128 L3 = _mm_loadu_ps(reinterpret_cast<const float*>(pInputVector + 32));
|
||||
pInputVector += sizeof(M3D_F3) * 4;
|
||||
|
||||
// Unpack the 4 vectors (.w components are junk)
|
||||
M3D_UNPACK3INTO4(V1, L2, L3);
|
||||
|
||||
// Result 1
|
||||
M3D_VECTOR Z = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
M3D_VECTOR Y = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
M3D_VECTOR X = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
M3D_VECTOR vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
M3D_VECTOR vTemp2 = _mm_mul_ps(Y, row1);
|
||||
M3D_VECTOR vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
M3D_VECTOR W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
vTemp = _mm_div_ps(vTemp, W);
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), vTemp);
|
||||
pOutputVector += OutputStride;
|
||||
|
||||
// Result 2
|
||||
Z = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
vTemp = _mm_div_ps(vTemp, W);
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), vTemp);
|
||||
pOutputVector += OutputStride;
|
||||
|
||||
// Result 3
|
||||
Z = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
vTemp = _mm_div_ps(vTemp, W);
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), vTemp);
|
||||
pOutputVector += OutputStride;
|
||||
|
||||
// Result 4
|
||||
Z = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Y = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
X = M3D_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
vTemp2 = _mm_mul_ps(Y, row1);
|
||||
vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
vTemp = _mm_div_ps(vTemp, W);
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), vTemp);
|
||||
pOutputVector += OutputStride;
|
||||
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < VectorCount; i++) {
|
||||
M3D_VECTOR V = M3D_V4LoadF3(reinterpret_cast<const M3D_F3*>(pInputVector));
|
||||
pInputVector += InputStride;
|
||||
|
||||
M3D_VECTOR Z = M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
M3D_VECTOR Y = M3D_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
M3D_VECTOR X = M3D_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
M3D_VECTOR vTemp = M3D_FMADD_PS(Z, row2, row3);
|
||||
M3D_VECTOR vTemp2 = _mm_mul_ps(Y, row1);
|
||||
M3D_VECTOR vTemp3 = _mm_mul_ps(X, row0);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp2);
|
||||
vTemp = _mm_add_ps(vTemp, vTemp3);
|
||||
|
||||
M3D_VECTOR W = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
vTemp = _mm_div_ps(vTemp, W);
|
||||
|
||||
M3D_V4StoreF3(reinterpret_cast<M3D_F3*>(pOutputVector), vTemp);
|
||||
pOutputVector += OutputStride;
|
||||
}
|
||||
|
||||
M3D_SFENCE();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -5,14 +5,21 @@
|
||||
#include "../Utils/MeshHelper.hpp"
|
||||
|
||||
|
||||
class WorldObject {};
|
||||
class WorldObject {
|
||||
public:
|
||||
virtual const Mesh& GetObjectMesh() const = 0;
|
||||
|
||||
protected:
|
||||
WorldObject() = default;
|
||||
|
||||
};
|
||||
|
||||
template<class D>
|
||||
class WorldObjectAbstract : public WorldObject {
|
||||
class WorldObjectAbstract : virtual public WorldObject {
|
||||
public:
|
||||
virtual ~WorldObjectAbstract() = 0;
|
||||
|
||||
const Mesh& GetObjectMesh() const noexcept { return mMesh; }
|
||||
const Mesh& GetObjectMesh() const noexcept override { return mMesh; }
|
||||
|
||||
protected:
|
||||
inline static Mesh mMesh;
|
||||
|
Loading…
x
Reference in New Issue
Block a user