diff --git a/CMakeLists.txt b/CMakeLists.txt index f43caf0..438e867 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ project(ProtoTank VERSION 0.1.0 DESCRIPTION "Arcade 80s-style game with tanks" L # Compilation option option(DISABLE_CPU_OPTI "Disable CPU optimizations" OFF) +option(ENABLE_PROFILER "Enable gprof integration" ON) if(NOT DISABLE_CPU_OPTI) if(NOT MSVC) @@ -97,8 +98,10 @@ if(MSVC) set_target_properties(${PROJECT_NAME} PROPERTIES IMPORT_PREFIX "lib") else() # GCC profiler options - list(APPEND COMPOPTS -pg -ggdb3 -no-pie) - list(APPEND LINKOPTS -pg -ggdb3 -no-pie) + if(ENABLE_PROFILER) + list(APPEND COMPOPTS -pg -ggdb3 -no-pie) + list(APPEND LINKOPTS -pg -ggdb3 -no-pie) + endif() # static linking of stdlib if(MINGW) diff --git a/Engine/Graphics/3DRenderer.cpp b/Engine/Graphics/3DRenderer.cpp index 74dbae6..d0cd1cd 100644 --- a/Engine/Graphics/3DRenderer.cpp +++ b/Engine/Graphics/3DRenderer.cpp @@ -9,11 +9,17 @@ //#define DISABLE_AABB_CLIPPING //#define DISABLE_TRIANGLE_CLIPPING -#define DISABLE_WIREFRAME_MODE +//#define DISABLE_WIREFRAME_MODE // Rendering pipeline: -// model matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix -> clipping -> perspective divide -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen) +// model matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix -> clipping -> perspective divide +// -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen) +// Revised rendering pipeline: +// AABB clipping -> model transform matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix +// -> faces culling -> triangles clipping -> perspective divide -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen) +// +// Virtual space transformations order: // object space -> world space -> camera space -> homogeneous clip space -> NDC space -> raster space // // Rasterizer inputs elements: @@ -31,7 +37,16 @@ // * https://en.wikipedia.org/wiki/Hidden-surface_determination#Occlusion_culling // * https://en.wikipedia.org/wiki/Bounding_volume_hierarchy -static bool VertexClipTest(M3D_F4& V, sf::Vector2f& RTsize, float gb_factor); +struct RenderItem final { + const WorldObject* pObj = nullptr; + const M3D_ContainmentType frustrumClipType = CONTAINS; + + RenderItem() = delete; + RenderItem(const WorldObject* pObj) : pObj(pObj) {} + RenderItem(const WorldObject* pObj, const M3D_ContainmentType cType) : pObj(pObj), frustrumClipType(cType) {} +}; + +static bool VertexClipTest(M3D_F4* V, sf::Vector2f& RTsize); Graphic3DRenderer::Graphic3DRenderer() { if (mMainCamera == nullptr) { @@ -42,24 +57,24 @@ Graphic3DRenderer::Graphic3DRenderer() { mMainCamera->UpdateCamView(); // Fill world object list to render - mRenderList.clear(); - mRenderList.push_back(std::make_shared()); - mRenderList.back()->SetPosition(0.f, 0.f, 15.f); - mRenderList.back()->SetScale(2.0f); - mRenderList.push_back(std::make_shared()); - mRenderList.back()->SetPosition(6.f, 2.f, 2.f); - mRenderList.back()->SetScale(2.0f); - mRenderList.push_back(std::make_shared()); - mRenderList.back()->SetPosition(-8.f, 5.f, 10.f); - mRenderList.back()->SetScale(2.0f); - mRenderList.push_back(std::make_shared()); - mRenderList.back()->SetPosition(0.f, 0.f, 0.f); - mRenderList.back()->SetScale(5.0f); + mWorldObjsList.clear(); + mWorldObjsList.push_back(std::make_shared()); + mWorldObjsList.back()->SetPosition(0.f, 0.f, 15.f); + mWorldObjsList.back()->SetScale(2.0f); + mWorldObjsList.push_back(std::make_shared()); + mWorldObjsList.back()->SetPosition(6.f, 2.f, 2.f); + mWorldObjsList.back()->SetScale(2.0f); + mWorldObjsList.push_back(std::make_shared()); + mWorldObjsList.back()->SetPosition(-8.f, 5.f, 10.f); + mWorldObjsList.back()->SetScale(2.0f); + mWorldObjsList.push_back(std::make_shared()); + mWorldObjsList.back()->SetPosition(0.f, 0.f, 0.f); + mWorldObjsList.back()->SetScale(5.0f); for (size_t i = 0; i < 40; i++) { - mRenderList.push_back(std::make_shared()); - mRenderList.back()->SetPosition(-100.f + (i * 5.f), 0.f, 8.f); - mRenderList.back()->SetScale(5.0f); + mWorldObjsList.push_back(std::make_shared()); + mWorldObjsList.back()->SetPosition(-100.f + (i * 5.f), 0.f, 8.f); + mWorldObjsList.back()->SetScale(5.0f); } } @@ -100,9 +115,6 @@ void Graphic3DRenderer::UpdateCamera(CAMERA_MOVE type, const float value) { } void Graphic3DRenderer::Draw(sf::RenderTexture& context) { - sf::BlendMode sBM = sf::BlendNone; - sf::RenderStates sRS(sBM); - #ifdef DEBUG drawnTriCount = 0; #endif @@ -110,149 +122,8 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) { // Hardcoded debug movement, TODO: remove it UpdateInternalTestObjects(); - // Load main matrices - M3D_MATRIX viewMat = mMainCamera->GetView(); - M3D_MATRIX invViewMat = M3D_MInverse(viewMat); // aka. camMat - M3D_MATRIX projMat = mMainCamera->GetProj(); - M3D_MATRIX viewProjMat = viewMat * projMat; - - // Create the frustrum "box" - M3D_BoundingFrustum camFrustrum(projMat, false); - camFrustrum.Transform(camFrustrum, invViewMat); - - const float sgRatio = ComputeSGRatio(); - // -= Draw the sky =- - // To avoid unfilled pixels on screen, the "sky-plane" will be rendered - // all over the screen. - // It's will be useless to use and compute a specific rectangle from the - // size of the screen! - // The sky have an infinite z-depth (any objects will be rendered over). -#ifdef DISABLE_WIREFRAME_MODE - context.clear(SF_COLOR_4CHEX(0x00B5E2FF)); -#endif - - // -= Draw the ground =- - // A simple rectangle shape is used to draw the ground over the sky-plane. - // The ground is draw after the sky, and before any other object. - // Depending of the camera pitch, the ratio sky/ground on screen vary. - // Like the sky, the ground have an infinite z-depth (any objects will - // be rendered over). -#ifdef DISABLE_WIREFRAME_MODE - sf::RectangleShape gndRect; - if (mMainCamera->GetPos3f().y >= 0) { - gndRect.setSize(sf::Vector2f(mRTSize.x, mRTSize.y * sgRatio)); - gndRect.setPosition(sf::Vector2f(0, mRTSize.y * (1.f - sgRatio) - 1)); - } else { - gndRect.setSize(sf::Vector2f(mRTSize.x, mRTSize.y * (1.f - sgRatio))); - gndRect.setPosition(sf::Vector2f(0, 0)); - } - gndRect.setFillColor(SF_COLOR_4CHEX(0x009A17FF)); - //gndRect.setFillColor(SF_COLOR_4CHEX(0xD5C2A5FF)); - context.draw(gndRect, sRS); -#else - sf::Vertex gndLine[2]; - gndLine[0].position = sf::Vector2f(0, mRTSize.y * (1.f - sgRatio)); - gndLine[0].color = sf::Color::White; - gndLine[1].position = sf::Vector2f(mRTSize.x - 1, mRTSize.y * (1.f - sgRatio)); - gndLine[1].color = sf::Color::White; - context.draw(gndLine, 2, sf::Lines); -#endif - - // Process scene's objects - size_t prevVCount = 0; - std::vector projVertices; - for (auto& obj : mRenderList) { - M3D_BoundingBox projAABB = obj->GetAABB(); - auto oTMat = obj->GetTransform(); - - // Object outside frustrum clipping - projAABB.Transform(projAABB, oTMat); - M3D_ContainmentType objInFrustrum = camFrustrum.Contains(projAABB); -#ifndef DISABLE_AABB_CLIPPING - if (objInFrustrum != DISJOINT) -#endif - { - size_t vCount = obj->GetObjectVerticesCount(); - auto& oMesh = obj->GetObjectMesh(); - if (vCount > prevVCount) - projVertices.resize(vCount); - - // Vertices homogeneous clip space transformation - M3D_V3Transform( - projVertices.data(), sizeof(M3D_F4), - reinterpret_cast(oMesh.vertices.data()), sizeof(Vertex), - vCount, - oTMat * viewProjMat - ); - - // Draw the object indice triangles if visible or partially clipped - sf::Vertex v_tri[4]; - for (auto& objPt : oMesh.parts) { - auto indicePtr = static_cast(objPt.indices.data()); - - for (uint32_t i = 0; i < objPt.GetIndicesCount(); i += 3) { - // Misscontructed indices tree failsafe - if (i+2 > objPt.GetIndicesCount()) - break; - - // Triangle clipping -#ifndef DISABLE_TRIANGLE_CLIPPING - //TODO: scissor/clipping depending of how many vertices are outside/inside the clipspace, implement complete Cohen-Sutherland algo or Cyrus–Beck one - if (VertexClipTest(projVertices.at(indicePtr[i]), mRTSize, 2.5f) && - VertexClipTest(projVertices.at(indicePtr[i+1]), mRTSize, 2.5f) && - VertexClipTest(projVertices.at(indicePtr[i+2]), mRTSize, 2.5f)) -#endif - { - - M3D_VECTOR V1 = M3D_V4LoadF4(&projVertices.at(indicePtr[i])); - M3D_VECTOR V2 = M3D_V4LoadF4(&projVertices.at(indicePtr[i+1])); - M3D_VECTOR V3 = M3D_V4LoadF4(&projVertices.at(indicePtr[i+2])); - - // Do the perspective divide - V1 = M3D_V4Divide(V1, M3D_V4SplatW(V1)); - V2 = M3D_V4Divide(V2, M3D_V4SplatW(V2)); - V3 = M3D_V4Divide(V3, M3D_V4SplatW(V3)); - - V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); - V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); - V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); - - // Face culling - if (M3D_V4GetX(M3D_TNormal(V1,V2,V3))*0.5f <= 0) { - if (objInFrustrum == DISJOINT) { - v_tri[0].color = sf::Color::Red; - v_tri[1].color = sf::Color::Red; - v_tri[2].color = sf::Color::Red; - } else if (objInFrustrum == INTERSECTS) { - v_tri[0].color = sf::Color::Yellow; - v_tri[1].color = sf::Color::Yellow; - v_tri[2].color = sf::Color::Yellow; - } else { - v_tri[0].color = oMesh.vertices[indicePtr[i]].color; - v_tri[1].color = oMesh.vertices[indicePtr[i+1]].color; - v_tri[2].color = oMesh.vertices[indicePtr[i+2]].color; - } - - v_tri[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1)); - v_tri[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2)); - v_tri[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3)); - v_tri[3] = v_tri[0]; -#ifdef DISABLE_WIREFRAME_MODE - context.draw(v_tri, 4, sf::Triangles, sRS); -#else - context.draw(v_tri, 4, sf::LineStrip, sRS); -#endif -#ifdef DEBUG - drawnTriCount++; -#endif - } - } - } - } - } - - prevVCount = prevVCount; - } + DrawBackground(context); + DrawSceneObjects(context); } void Graphic3DRenderer::UpdateInternalTestObjects() { @@ -262,10 +133,10 @@ void Graphic3DRenderer::UpdateInternalTestObjects() { thetaAngle2 = thetaAngle2 >= 6.283185f ? -6.283185f : thetaAngle2 + 0.005f; static float thetaAngle3 = -4.78f; thetaAngle3 = thetaAngle3 >= 6.283185f ? -6.283185f : thetaAngle3 + 0.008f; - mRenderList[0]->SetRotation(thetaAngle, 0.f, thetaAngle * 0.5f); - mRenderList[1]->SetRotation(thetaAngle2, 0.f, thetaAngle2 * 0.5f); - mRenderList[2]->SetRotation(thetaAngle3, 0.f, thetaAngle3 * 0.5f); - mRenderList[3]->SetRotation(0.f, thetaAngle, 0.f); + mWorldObjsList[0]->SetRotation(thetaAngle, 0.f, thetaAngle * 0.5f); + mWorldObjsList[1]->SetRotation(thetaAngle2, 0.f, thetaAngle2 * 0.5f); + mWorldObjsList[2]->SetRotation(thetaAngle3, 0.f, thetaAngle3 * 0.5f); + mWorldObjsList[3]->SetRotation(0.f, thetaAngle, 0.f); } // Compute the screen ratio between the ground and the sky (aka. Line of Horizon) @@ -314,9 +185,180 @@ float Graphic3DRenderer::ComputeSGRatio() { return sgRatio; } -inline static bool VertexClipTest(M3D_F4& V, sf::Vector2f& RTsize, float gb_factor) { +void Graphic3DRenderer::DrawBackground(sf::RenderTexture& context) { + sf::BlendMode sBM = sf::BlendNone; + sf::RenderStates sRS(sBM); + + const float sgRatio = ComputeSGRatio(); + + // -= Draw the sky =- + // To avoid unfilled pixels on screen, the "sky-plane" will be rendered + // all over the screen. + // It's will be useless to use and compute a specific rectangle from the + // size of the screen! + // The sky have an infinite z-depth (any objects will be rendered over). +#ifdef DISABLE_WIREFRAME_MODE + context.clear(SF_COLOR_4CHEX(0x00B5E2FF)); +#endif + + // -= Draw the ground =- + // A simple rectangle shape is used to draw the ground over the sky-plane. + // The ground is draw after the sky, and before any other object. + // Depending of the camera pitch, the ratio sky/ground on screen vary. + // Like the sky, the ground have an infinite z-depth (any objects will + // be rendered over). +#ifdef DISABLE_WIREFRAME_MODE + sf::RectangleShape gndRect; + if (mMainCamera->GetPos3f().y >= 0) { + gndRect.setSize(sf::Vector2f(mRTSize.x, mRTSize.y * sgRatio)); + gndRect.setPosition(sf::Vector2f(0, mRTSize.y * (1.f - sgRatio) - 1)); + } else { + gndRect.setSize(sf::Vector2f(mRTSize.x, mRTSize.y * (1.f - sgRatio))); + gndRect.setPosition(sf::Vector2f(0, 0)); + } + gndRect.setFillColor(SF_COLOR_4CHEX(0x009A17FF)); + //gndRect.setFillColor(SF_COLOR_4CHEX(0xD5C2A5FF)); + context.draw(gndRect, sRS); +#else + sf::Vertex gndLine[2]; + gndLine[0].position = sf::Vector2f(0, mRTSize.y * (1.f - sgRatio)); + gndLine[0].color = sf::Color::White; + gndLine[1].position = sf::Vector2f(mRTSize.x - 1, mRTSize.y * (1.f - sgRatio)); + gndLine[1].color = sf::Color::White; + context.draw(gndLine, 2, sf::Lines, sRS); +#endif +} + +void Graphic3DRenderer::DrawSceneObjects(sf::RenderTexture& context) { + sf::BlendMode sBM = sf::BlendNone; + sf::RenderStates sRS(sBM); + + // Get global (camera and projection) matrixes + M3D_MATRIX viewMat = mMainCamera->GetView(); + M3D_MATRIX invViewMat = M3D_MInverse(viewMat); // aka. camera matrix + M3D_MATRIX projMat = mMainCamera->GetProj(); + M3D_MATRIX viewProjMat = viewMat * projMat; + + std::vector renderingList; + renderingList.reserve(mWorldObjsList.size()); + M3D_BoundingFrustum camFrustrum(projMat, false); + camFrustrum.Transform(camFrustrum, invViewMat); + for (auto& obj : mWorldObjsList) { +#ifndef DISABLE_AABB_CLIPPING + // Objects visibility AABB test + M3D_BoundingBox projAABB = obj->GetAABB(); + projAABB.Transform(projAABB, obj->GetTransform()); + + // Do the camera/AABB test + M3D_ContainmentType aabbTestResult = camFrustrum.Contains(projAABB); + if (aabbTestResult != DISJOINT) + renderingList.emplace_back(RenderItem(obj.get(), aabbTestResult)); +#else + renderingList.emplace_back(RenderItem(obj.get())); +#endif + } + + // Do the NDC projection of visibles vertices in camera frustrum + size_t prevVCount = 0; + std::vector projVertices; + sf::Vector2f guardband = mRTSize * 3.5f; + for (auto& ri : renderingList) { + size_t vCount = ri.pObj->GetObjectVerticesCount(); + // Resize the output buffer only if we encounter object with more vertices than before + if (vCount > prevVCount) { + projVertices.resize(vCount); + prevVCount = vCount; + } + + auto& oMesh = ri.pObj->GetObjectMesh(); + // Vertices homogeneous clip space (NDC) transformation + M3D_V3Transform( + projVertices.data(), sizeof(M3D_F4), + reinterpret_cast(oMesh.vertices.data()), sizeof(Vertex), + vCount, + ri.pObj->GetTransform() * viewProjMat + ); + + // Look into triangles indices + M3D_F4* triVertices[3]; + sf::Vertex drawPoints[4]; + for (auto& objPt : oMesh.parts) { + auto indicePtr = static_cast(objPt.indices.data()); + + for (uint32_t i = 0; i < objPt.GetIndicesCount(); i += 3) { + // Indices failsafe - discard triangle rendering + if ((i+2 > objPt.GetIndicesCount()) || indicePtr[i] >= vCount || indicePtr[i+1] >= vCount || indicePtr[i+2] >= vCount) { + //log.PrintWarning() + break; + } + + // Retrieve the vertices pointer from indices list + triVertices[0] = &projVertices[indicePtr[i]]; + triVertices[1] = &projVertices[indicePtr[i+1]]; + triVertices[2] = &projVertices[indicePtr[i+2]]; + + // Triangle frustrum clipping -- TODO: Use complete Cohen-Sutherland algo or Cyrus–Beck one +#ifndef DISABLE_TRIANGLE_CLIPPING + if (VertexClipTest(triVertices[0], guardband) && VertexClipTest(triVertices[1], guardband) && VertexClipTest(triVertices[2], guardband)) +#endif + { + M3D_VECTOR V1 = M3D_V4LoadF4(triVertices[0]); + M3D_VECTOR V2 = M3D_V4LoadF4(triVertices[1]); + M3D_VECTOR V3 = M3D_V4LoadF4(triVertices[2]); + + // Back-face culling in LH-z system + // (front when (triangle normal . projected vertice) < 0) + //if (M3D_V4GetX(M3D_V3Dot(M3D_Tri3DNormal(V1,V2,V3), V1)) < 0) + // NOT USED - Too heavy computation resources usage + + // Do the perspective divide + V1 = M3D_V4Divide(V1, M3D_V4SplatW(V1)); + V2 = M3D_V4Divide(V2, M3D_V4SplatW(V2)); + V3 = M3D_V4Divide(V3, M3D_V4SplatW(V3)); + + // Finally project from NDC to the screen + V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); + V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); + V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, mRTSize.x, mRTSize.y, 1.f, 100.f); + + // Simplified back-face culling on 2D viewport triangle + if (M3D_V4GetX(M3D_Tri2DNormal(V1,V2,V3)) > 0) { + + + // Set pixels color depending of frustrum clipping type - debug purpose + if (ri.frustrumClipType == DISJOINT) { + drawPoints[0].color = drawPoints[1].color = drawPoints[2].color = sf::Color::Red; + } else if (ri.frustrumClipType == INTERSECTS) { + drawPoints[0].color = drawPoints[1].color = drawPoints[2].color = sf::Color::Yellow; + } else { + drawPoints[0].color = oMesh.vertices[indicePtr[i]].color; + drawPoints[1].color = oMesh.vertices[indicePtr[i+1]].color; + drawPoints[2].color = oMesh.vertices[indicePtr[i+2]].color; + } + + drawPoints[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1)); + drawPoints[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2)); + drawPoints[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3)); + drawPoints[3] = drawPoints[0]; +#ifdef DISABLE_WIREFRAME_MODE + context.draw(drawPoints, 4, sf::Triangles, sRS); +#else + context.draw(drawPoints, 4, sf::LineStrip, sRS); +#endif +#ifdef DEBUG + drawnTriCount++; +#endif + } + } + } + } + } +} + +__attribute__((always_inline)) inline static bool VertexClipTest(M3D_F4* V, sf::Vector2f& RTsize) { // Guard band are usually 2-3x the viewport size for the clipping test - return (V.x > -RTsize.x*gb_factor*V.w && V.x < RTsize.y*gb_factor*V.w && - V.y > -RTsize.x*gb_factor*V.w && V.y < RTsize.y*gb_factor*V.w + return (V->z >= 0 && V->z <= V->w && + V->x >= -RTsize.x*V->w && V->x <= RTsize.x*V->w && + V->y >= -RTsize.y*V->w && V->y <= RTsize.y*V->w ); } \ No newline at end of file diff --git a/Engine/Graphics/3DRenderer.hpp b/Engine/Graphics/3DRenderer.hpp index 1afd3d7..9aff053 100644 --- a/Engine/Graphics/3DRenderer.hpp +++ b/Engine/Graphics/3DRenderer.hpp @@ -33,22 +33,21 @@ public: void Draw(sf::RenderTexture& context); // Debug datas -#ifdef DEBUG const unsigned int GetDrawTriCount() const noexcept { return drawnTriCount; } -#endif private: std::unique_ptr mMainCamera; // Default player view sf::Vector2f mRTSize; - std::vector> mRenderList; // List of elements to be rendered next frame + std::vector> mWorldObjsList; // List of elements to be rendered next frame void UpdateInternalTestObjects(); float ComputeSGRatio(); + void DrawBackground(sf::RenderTexture& context); + void DrawSceneObjects(sf::RenderTexture& context); + // Debug datas -#ifdef DEBUG - unsigned int drawnTriCount; -#endif + unsigned int drawnTriCount = 0; }; \ No newline at end of file diff --git a/Engine/Utils/3DMaths.hpp b/Engine/Utils/3DMaths.hpp index 74d0334..90fbd1c 100644 --- a/Engine/Utils/3DMaths.hpp +++ b/Engine/Utils/3DMaths.hpp @@ -350,6 +350,10 @@ M3D_ALIGNED_STRUCT(16) M3D_MATRIX { // // Load/Store functions // +M3D_VECTOR M3D_V4LoadF2(const M3D_F2* src) noexcept; +M3D_VECTOR M3D_V4LoadF2A(const M3D_F2A* src) noexcept; +void M3D_V4StoreF2(M3D_F2* dst, M3D_VECTOR V) noexcept; +void M3D_V4StoreF2A(M3D_F2A* dst, M3D_VECTOR V) noexcept; M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept; M3D_VECTOR M3D_V4LoadF3A(const M3D_F3A* src) noexcept; void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept; @@ -542,7 +546,8 @@ inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V) noexcept { M3D_VECTOR M3D_QMultiply(M3D_VECTOR Q1, M3D_VECTOR Q2) noexcept; M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept; -M3D_VECTOR M3D_TNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept; +M3D_VECTOR M3D_Tri2DNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept; +M3D_VECTOR M3D_Tri3DNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept; void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept; diff --git a/Engine/Utils/3DMaths_vec.inl b/Engine/Utils/3DMaths_vec.inl index 24bcb1d..e31586c 100644 --- a/Engine/Utils/3DMaths_vec.inl +++ b/Engine/Utils/3DMaths_vec.inl @@ -25,6 +25,50 @@ namespace M3D_Internal { /* -------------------------------------------------------------------------------------------------------------------------- */ +inline M3D_VECTOR M3D_V4LoadF2(const M3D_F2* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = 0.f; + V.v4f[3] = 0.f; + return V; +#else + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(src))); +#endif +} + +inline M3D_VECTOR M3D_V4LoadF2A(const M3D_F2A* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = 0.f; + V.v4f[3] = 0.f; + return V; +#else + return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(src))); +#endif +} + +inline void M3D_V4StoreF2(M3D_F2* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; +#else + _mm_store_sd(reinterpret_cast(dst), _mm_castps_pd(V)); +#endif +} + +inline void M3D_V4StoreF2A(M3D_F2A* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; +#else + _mm_store_sd(reinterpret_cast(dst), _mm_castps_pd(V)); +#endif +} + inline M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR V; @@ -1161,18 +1205,18 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept { #else // SSE4_INTRINSICS M3D_VECTOR vLengthSq = _mm_dp_ps(V, V, 0x7f); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); + vResult = _mm_mul_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1186,18 +1230,18 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept { vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); + vResult = _mm_mul_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1212,18 +1256,18 @@ inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept { vLengthSq = _mm_add_ss(vLengthSq, vTemp); vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Divide to perform the normalization vResult = _mm_div_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1339,18 +1383,18 @@ inline M3D_VECTOR M3D_V2Normalize(M3D_VECTOR V) noexcept { #else // SSE4_INTRINSICS M3D_VECTOR vLengthSq = _mm_dp_ps(V, V, 0x3f); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); + vResult = _mm_mul_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1363,18 +1407,18 @@ inline M3D_VECTOR M3D_V2Normalize(M3D_VECTOR V) noexcept { vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); vLengthSq = _mm_moveldup_ps(vLengthSq); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); + vResult = _mm_mul_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1387,18 +1431,18 @@ inline M3D_VECTOR M3D_V2Normalize(M3D_VECTOR V) noexcept { vLengthSq = _mm_add_ss(vLengthSq, vTemp); vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); // Prepare for the division - M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + M3D_VECTOR vResult = _mm_rsqrt_ps(vLengthSq); // Create zero with a single instruction - M3D_VECTOR vZeroMask = _mm_setzero_ps(); + //M3D_VECTOR vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + //vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); + vResult = _mm_mul_ps(V, vResult); // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); + //vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); @@ -1473,11 +1517,18 @@ inline M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept { #endif } -inline M3D_VECTOR M3D_TNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept { +inline M3D_VECTOR M3D_Tri2DNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept { M3D_VECTOR L1 = M3D_V4Subtract(P2, P1); M3D_VECTOR L2 = M3D_V4Subtract(P3, P1); - return M3D_V2Normalize(M3D_V2Cross(L2, L1)); + return M3D_V2Normalize(M3D_V2Cross(L1, L2)); +} + +inline M3D_VECTOR M3D_Tri3DNormal(M3D_VECTOR P1, M3D_VECTOR P2, M3D_VECTOR P3) noexcept { + M3D_VECTOR L1 = M3D_V4Subtract(P2, P1); + M3D_VECTOR L2 = M3D_V4Subtract(P3, P1); + + return M3D_V3Normalize(M3D_V3Cross(L1, L2)); } diff --git a/Engine/World/WorldObject.hpp b/Engine/World/WorldObject.hpp index 119a8f3..154ea08 100644 --- a/Engine/World/WorldObject.hpp +++ b/Engine/World/WorldObject.hpp @@ -8,14 +8,14 @@ public: virtual ~WorldObject() = 0; virtual const Mesh& GetObjectMesh() const = 0; virtual const size_t GetObjectVerticesCount() const = 0; - const M3D_MATRIX GetTransform() noexcept { + const M3D_MATRIX GetTransform() const noexcept { M3D_MATRIX M = M3D_MIdentity(); M *= M3D_TransformMatrixScale(M3D_V4LoadF3(&scale)); M *= M3D_TransformMatrixRotation(M3D_V4LoadF3(&rot)); M *= M3D_TransformMatrixTranslate(M3D_V4LoadF3(&pos)); return M; } - const M3D_F4X4 GetTransform4x4f() noexcept { + const M3D_F4X4 GetTransform4x4f() const noexcept { M3D_F4X4 out; M3D_V4StoreF4x4(&out, GetTransform()); return out;