diff --git a/Engine/Graphics/3DGraphics.cpp b/Engine/Graphics/3DGraphics.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/Engine/Graphics/3DRenderer.cpp b/Engine/Graphics/3DRenderer.cpp new file mode 100644 index 0000000..5dc1c13 --- /dev/null +++ b/Engine/Graphics/3DRenderer.cpp @@ -0,0 +1,79 @@ +#include "3DRenderer.hpp" + + +// Rendering order: +// model matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix -> clipping -> perspective divide -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen) +// object coordinate -> world coordinate -> camera coordinate -> clip/screen coordinate +// +// Rasterizer inputs elements: +// - texture-buffer (2D array of pixels color value) +// - z-buffer (2D array of float representing the nearest pixel's depth, all pixels beyond are ignored) +// - projected vertices-buffer on screen (using vertices-buffer and projection function) +// +// Refs: +// * https://en.wikipedia.org/wiki/3D_projection +// * https://www.scratchapixel.com/lessons/3d-basic-rendering/rasterization-practical-implementation/overview-rasterization-algorithm.html + +Graphic3DRenderer::Graphic3DRenderer() { + if (mMainCamera == nullptr) { + mMainCamera = std::make_unique(); + mMainCamera->SetPosition(0.0f, 3.0f, -20.0f); + mMainCamera->SetFrustrum(90.0f, 1280.f/324.f, 1.0f, 100.f); + mMainCamera->UpdateCamView(); + } +} + +Graphic3DRenderer::~Graphic3DRenderer() {} + +void Graphic3DRenderer::Draw(sf::RenderTexture& context) { + sf::BlendMode sBM = sf::BlendNone; + sf::RenderStates sRS(sBM); + sf::Color yep[] = {sf::Color::White, sf::Color::Blue, sf::Color::Green, sf::Color::Red, sf::Color::Magenta, sf::Color::Cyan, sf::Color::Yellow}; + + static float thetaAngle = 0.31f; + thetaAngle = thetaAngle >= 6.283185f ? -6.283185f : thetaAngle + 0.004f; + bool clipped = false; + + M3D_MATRIX viewMat = mMainCamera->GetView(); + M3D_MATRIX projMat = mMainCamera->GetProj(); + M3D_MATRIX modelMat = M3D_MIdentity() * M3D_TransformMatrixScaling(10.0f, 10.0f, 10.0f) * M3D_TransformMatrixRotationX(thetaAngle) * M3D_TransformMatrixRotationZ(0.5f*thetaAngle) * M3D_TransformMatrixTranslate(0.0f, 0.0f, 5.0f); + M3D_MATRIX viewProjMat = (viewMat) * (projMat); + M3D_MATRIX MVPMat = modelMat * viewProjMat; + M3D_MATRIX viewportMat = M3D_TransformMatrixViewport(1280.0f, 324.f, 0.0f, 0.0f); + uint8_t v_cnt = 0, yep_cnt = 0; + sf::Vertex v_tri[4]; + M3D_F3 _2dCoord; + + for (const auto _v : testObj.mMesh) { + M3D_VECTOR _vV = M3D_V4LoadF3(&_v.pos); + M3D_VECTOR projV = M3D_V3Transform(_vV, MVPMat); + //projV = M3D_V3Transform(projV, viewProjMat); + + // Clipping (simple) + if (M3D_V4GetZ(projV) <= 0) + clipped = true; + + // Perspective divide + M3D_VECTOR _w = M3D_V4SplatW(projV); + projV = M3D_V4Divide(projV, _w); + + // Viewport transform + projV = M3D_V3Transform(projV, (viewportMat)); + M3D_V4StoreF3(&_2dCoord, projV); + v_tri[v_cnt].position.x = _2dCoord.x; + v_tri[v_cnt].position.y = _2dCoord.y; + //v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z; //TODO: transform matrix is incomplete + v_tri[v_cnt].color = yep[yep_cnt % 5]; + + if (v_cnt++ >= 2) { + if (!clipped) { + v_tri[3] = v_tri[0]; + context.draw(v_tri, 4, sf::LineStrip, sRS); + //context.draw(v_tri, 3, sf::Triangles, sRS); + } + v_cnt = 0; + yep_cnt++; + clipped = false; + } + } +} \ No newline at end of file diff --git a/Engine/Graphics/3DRenderer.hpp b/Engine/Graphics/3DRenderer.hpp new file mode 100644 index 0000000..226b77d --- /dev/null +++ b/Engine/Graphics/3DRenderer.hpp @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include + +#include "Camera.hpp" +#include "../Utils/MeshHelper.hpp" + + +class MeshObjCube final { +public: + MeshObjCube() { + mMesh = { + // RIGHT + {0.5f, 0.5f, -0.5f}, {0.5f, -0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, + {0.5f, 0.5f, -0.5f}, {0.5f, -0.5f, 0.5f}, {0.5f, -0.5f, -0.5f}, + + // FRONT + {0.5f, 0.5f, 0.5f}, {0.5f, -0.5f, 0.5f}, {-0.5f, -0.5f, 0.5f}, + {0.5f, 0.5f, 0.5f}, {-0.5f, -0.5f, 0.5f}, {-0.5f, 0.5f, 0.5f}, + + // LEFT + {-0.5f, 0.5f, 0.5f}, {-0.5f, -0.5f, 0.5f}, {-0.5f, 0.5f, -0.5f}, + {-0.5f, 0.5f, -0.5f}, {-0.5f, -0.5f, 0.5f}, {-0.5f, -0.5f, -0.5f}, + + // BACK + {0.5f, 0.5f, -0.5f}, {-0.5f, 0.5f, -0.5f}, {0.5f, -0.5f, -0.5f}, + {-0.5f, 0.5f, -0.5f}, {-0.5f, -0.5f, -0.5f}, {0.5f, -0.5f, -0.5f}, + + // TOP + {-0.5f, 0.5f, -0.5f}, {0.5f, 0.5f, -0.5f}, {-0.5f, 0.5f, 0.5f}, + {0.5f, 0.5f, -0.5f}, {0.5f, 0.5f, 0.5f}, {-0.5f, 0.5f, 0.5f}, + + // BOTTOM + {-0.5f, -0.5f, -0.5f}, {-0.5f, -0.5f, 0.5f}, {0.5f, -0.5f, 0.5f}, + {0.5f, -0.5f, 0.5f}, {0.5f, -0.5f, -0.5f}, {-0.5f, -0.5f, -0.5f}, + }; + } + + std::vector mMesh; + +private: + +}; + +class Graphic3DRenderer final { +public: + Graphic3DRenderer(); + ~Graphic3DRenderer(); + + Graphic3DRenderer(Graphic3DRenderer&&) = default; + Graphic3DRenderer& operator= (Graphic3DRenderer&&) = default; + Graphic3DRenderer(Graphic3DRenderer const&) = delete; + Graphic3DRenderer& operator= (Graphic3DRenderer const&) = delete; + + void Draw(sf::RenderTexture& context); + +private: + sf::RenderTexture mWorldRender; // This is used to create the scene + std::unique_ptr mMainCamera; + + MeshObjCube testObj; + +}; \ No newline at end of file diff --git a/Engine/Graphics/Camera.cpp b/Engine/Graphics/Camera.cpp index e69de29..9edac58 100644 --- a/Engine/Graphics/Camera.cpp +++ b/Engine/Graphics/Camera.cpp @@ -0,0 +1,39 @@ +#include "Camera.hpp" + + +Camera::Camera() { + SetFrustrum(90.f, (1280.f/1024.f), 1.0f, 1000.f); +} + +void Camera::SetFrustrum(float fov, float r, float zn, float zf) { + //if (!frameDirty) + M3D_MATRIX pMat = M3D_TransformMatrixFrustrumFovLH(M3D_Deg2Rad(fov), r, zn, zf); + M3D_V4StoreF4x4(&mProjMat, pMat); +} + +void Camera::UpdateCamView() { + M3D_VECTOR P = M3D_V4LoadF3(&mPos); + M3D_VECTOR L = M3D_V4LoadF3(&mLook); + M3D_VECTOR U = M3D_V4LoadF3(&mUp); + + M3D_V4StoreF4x4(&mViewMat, M3D_TransformMatrixCamLookAtLH(P, L, U)); +} + +void Camera::LookAt(M3D_VECTOR pos, M3D_VECTOR target, M3D_VECTOR worldUp) { + M3D_VECTOR L = M3D_V3Normalize(M3D_V4Subtract(target, pos)); + M3D_VECTOR R = M3D_V3Normalize(M3D_V3Cross(worldUp, L)); + M3D_VECTOR U = M3D_V3Cross(L, R); + + M3D_V4StoreF3(&mPos, pos); + M3D_V4StoreF3(&mLook, L); + M3D_V4StoreF3(&mRight, R); + M3D_V4StoreF3(&mUp, U); +} + +void Camera::LookAt(const M3D_F3& pos, const M3D_F3& target, const M3D_F3& up) { + M3D_VECTOR P = M3D_V4LoadF3(&pos); + M3D_VECTOR T = M3D_V4LoadF3(&target); + M3D_VECTOR U = M3D_V4LoadF3(&up); + + LookAt(P, T, U); +} \ No newline at end of file diff --git a/Engine/Graphics/Camera.hpp b/Engine/Graphics/Camera.hpp new file mode 100644 index 0000000..a99bc00 --- /dev/null +++ b/Engine/Graphics/Camera.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include "../Utils/3DMaths.hpp" + + +class Camera final { +public: + Camera(); + ~Camera() {} + + Camera(Camera&&) = default; + Camera& operator= (Camera&&) = default; + Camera(Camera const&) = delete; + Camera& operator= (Camera const&) = delete; + + M3D_VECTOR GetPos() const { return M3D_V4LoadF3(&mPos); } + M3D_F3 GetPos3f() const { return mPos; } + M3D_MATRIX GetView() const { return M3D_V4LoadF4x4(&mViewMat); } + M3D_F4X4 GetView4x4f() const { return mViewMat; } + M3D_MATRIX GetProj() const { return M3D_V4LoadF4x4(&mProjMat); } + M3D_F4X4 GetProj4x4f() const { return mProjMat; } + + void SetPosition(const float x, const float y, const float z) { mPos = M3D_F3(x, y, z); } + void SetPosition(const M3D_F3 v) { mPos = v; } + + void SetFrustrum(float fov, float r, float zn, float zf); + void UpdateCamView(); + void LookAt(M3D_VECTOR pos, M3D_VECTOR target, M3D_VECTOR worldUp); + void LookAt(const M3D_F3& pos, const M3D_F3& target, const M3D_F3& up); + +private: + M3D_F4X4 mProjMat = M3D_MIdentity4x4(); + M3D_F4X4 mViewMat = M3D_MIdentity4x4(); + + M3D_F3 mPos = {1.5f, 1.5f, 1.5f}; + M3D_F3 mRight = {1.0f, 0.0f, 0.0f}; + M3D_F3 mUp = {0.0f, 1.0f, 0.0f}; + M3D_F3 mLook = {0.0f, 0.0f, 1.0f}; + +}; \ No newline at end of file diff --git a/Engine/Graphics/UI.cpp b/Engine/Graphics/UI.cpp index 6b996a0..a19389b 100644 --- a/Engine/Graphics/UI.cpp +++ b/Engine/Graphics/UI.cpp @@ -34,8 +34,9 @@ CockpitUI::CockpitUI() : UI() { mUIRender.setSmooth(true); mUIRender.setRepeated(false); - mBoardTexture.setSrgb(true); - mBoardTexture.loadFromFile("cockpit_ui_empty_rescaled.png", sf::IntRect(0, 0, 1280, 780)); + mStaticCockpitTexture.setSrgb(true); + if (!mStaticCockpitTexture.loadFromFile("cockpit_ui_empty_rescaled.png", sf::IntRect(0, 0, 1280, 780))) + throw std::runtime_error("Unable to load texture datas"); } void CockpitUI::Update() { @@ -50,7 +51,7 @@ void CockpitUI::Draw(std::shared_ptr context) { mUIRender.clear(sf::Color::Transparent); // Draw the static board - sf::Sprite staticBoardSprite(mBoardTexture); + sf::Sprite staticBoardSprite(mStaticCockpitTexture); mUIRender.draw(staticBoardSprite, sRS); // Draw the radar display @@ -87,4 +88,21 @@ WorldUI::WorldUI() : UI() { void WorldUI::Update() { +} + +void WorldUI::Draw(std::shared_ptr context) { + sf::BlendMode sBM = sf::BlendNone; + sf::RenderStates sRS(sBM); + + // Clear the UI screen + mUIRender.clear(sf::Color::Transparent); + + // Draw the 3D view + m3DEngine.Draw(mUIRender); + + // Do the final texture render + mUIRender.display(); + + // OnScreen rendering + DrawUIOnRenderWindow(context); } \ No newline at end of file diff --git a/Engine/Graphics/UI.hpp b/Engine/Graphics/UI.hpp index 5007b50..d159745 100644 --- a/Engine/Graphics/UI.hpp +++ b/Engine/Graphics/UI.hpp @@ -5,6 +5,8 @@ #include #include +#include "3DRenderer.hpp" + class UI { public: @@ -14,7 +16,7 @@ public: virtual inline void Draw(std::shared_ptr c) { DrawUIOnRenderWindow(c); } protected: - sf::RenderTexture mUIRender; // This is used to create the scene + sf::RenderTexture mUIRender; // The screen to draw onto void DrawUIOnRenderWindow(std::shared_ptr context); @@ -36,7 +38,7 @@ public: void Draw(std::shared_ptr) override; private: - sf::Texture mBoardTexture; + sf::Texture mStaticCockpitTexture; }; @@ -51,6 +53,9 @@ public: WorldUI& operator= (WorldUI const&) = delete; void Update() override; - //void Draw(std::shared_ptr) override; + void Draw(std::shared_ptr) override; + +private: + Graphic3DRenderer m3DEngine; }; \ No newline at end of file diff --git a/Engine/Utils/3DMaths.cpp b/Engine/Utils/3DMaths.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/Engine/Utils/3DMaths.hpp b/Engine/Utils/3DMaths.hpp index e69de29..53d0830 100644 --- a/Engine/Utils/3DMaths.hpp +++ b/Engine/Utils/3DMaths.hpp @@ -0,0 +1,399 @@ +#pragma once + +#include +#ifndef DISABLE_INTRINSICS + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html# + // https://stackoverflow.com/tags/sse/info + // https://lowleveldev.substack.com/p/simd-a-practical-guide +# include + // If GNU-gcc => NO_XMVECTOR_OVERLOADS +#else +# include +#endif + +#ifndef __cplusplus +#error This header requires C++ +#endif + + +#ifndef DISABLE_INTRINSICS + #ifdef NO_MOVNT + #define M3D_STREAM_PS( p, a ) _mm_store_ps((p), (a)) + #define M3D_STREAM_256_PS( p, a ) _mm256_store_ps((p), (a)) + #define M3D_SFENCE() + #else + #define M3D_STREAM_PS( p, a ) _mm_stream_ps((p), (a)) + #define M3D_STREAM_256b_PS( p, a ) _mm256_stream_ps((p), (a)) + #define M3D_SFENCE() _mm_sfence() + #endif + + #ifdef FMA3_INTRINSICS + #define M3D_FMADD_PS( a, b, c ) _mm_fmadd_ps((a), (b), (c)) + #define M3D_FNMADD_PS( a, b, c ) _mm_fnmadd_ps((a), (b), (c)) + #else + #define M3D_FMADD_PS( a, b, c ) _mm_add_ps(_mm_mul_ps((a), (b)), (c)) + #define M3D_FNMADD_PS( a, b, c ) _mm_sub_ps((c), _mm_mul_ps((a), (b))) + #endif + + #if defined(AVX_INTRINSICS) && defined(FAVOR_INTEL) + #define M3D_PERMUTE_PS( v, c ) _mm_permute_ps((v), c ) + #else + #define M3D_PERMUTE_PS( v, c ) _mm_shuffle_ps((v), (v), c ) + #endif +#endif + +// +// Math constants and helping functions +// +constexpr float M3D_PI = 3.141592654f; +constexpr float M3D_2PI = 6.283185307f; +constexpr float M3D_1DIVPI = 0.318309886f; +constexpr float M3D_1DIV2PI = 0.159154943f; +constexpr float M3D_PIDIV2 = 1.570796327f; +constexpr float M3D_PIDIV4 = 0.785398163f; + +constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); } +constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); } + + +// +// Generic SIMD vector implementation +// +// Call convention (x86_64): +// 1-3rd vector parameter should be M3D_VECTOR +// 4th+ vector parameter should be M3D_VECTOR& +// +#ifdef DISABLE_INTRINSICS +struct sM3DV4 { + union { + float v4f[4]; + uint32_t v4u[4]; + }; +}; +using M3D_VECTOR = sM3DV4; +#else +using M3D_VECTOR = __m128; +#endif + +struct __attribute__((aligned(16))) M3D_V4F32 { + union { + float f[4]; + M3D_VECTOR v; + }; + + inline operator M3D_VECTOR() const noexcept { return v; } + inline operator const float* () const noexcept { return f; } +#ifndef DISABLE_INTRINSICS + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#endif +}; + +struct __attribute__((aligned(16))) M3D_V4U8 { + union { + uint8_t u[16]; + M3D_VECTOR v; + }; + + inline operator M3D_VECTOR() const noexcept { return v; } + inline operator const uint8_t* () const noexcept { return u; } +#ifndef DISABLE_INTRINSICS + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#endif +}; + +struct __attribute__((aligned(16))) M3D_V4U32 { + union { + uint32_t u[4]; + M3D_VECTOR v; + }; + + inline operator M3D_VECTOR() const noexcept { return v; } + inline operator const uint32_t* () const noexcept { return u; } +#ifndef DISABLE_INTRINSICS + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#endif +}; + +struct __attribute__((aligned(16))) M3D_V4I32 { + union { + int32_t i[4]; + M3D_VECTOR v; + }; + + inline operator M3D_VECTOR() const noexcept { return v; } + inline operator const int32_t* () const noexcept { return i; } +#ifndef DISABLE_INTRINSICS + inline operator __m128i() const noexcept { return _mm_castps_si128(v); } + inline operator __m128d() const noexcept { return _mm_castps_pd(v); } +#endif +}; + +struct M3D_F3 { + float x; + float y; + float z; + + M3D_F3() = default; + + M3D_F3(const M3D_F3&) = default; + M3D_F3& operator=(const M3D_F3&) = default; + M3D_F3(M3D_F3&&) = default; + M3D_F3& operator=(M3D_F3&&) = default; + + constexpr M3D_F3(float _x, float _y, float _z) noexcept : x(_x), y(_y), z(_z) {} +}; +struct __attribute__((aligned(16))) M3D_F3A : public M3D_F3 { + using M3D_F3::M3D_F3; +}; + +struct M3D_F4 { + float x; + float y; + float z; + float w; + + M3D_F4() = default; + + M3D_F4(const M3D_F4&) = default; + M3D_F4& operator=(const M3D_F4&) = default; + M3D_F4(M3D_F4&&) = default; + M3D_F4& operator=(M3D_F4&&) = default; + + constexpr M3D_F4(float _x, float _y, float _z, float _w) noexcept : x(_x), y(_y), z(_z), w(_w) {} + +#if (__cplusplus >= 202002L) + bool operator == (const M3D_F4&) const = default; + auto operator <=> (const M3D_F4&) const = default; +#endif +}; +struct __attribute__((aligned(16))) M3D_F4A : public M3D_F4 { + using M3D_F4::M3D_F4; +}; + +struct M3D_F4X4 { + union { + struct { + float _00, _01, _02, _03; + float _10, _11, _12, _13; + float _20, _21, _22, _23; + float _30, _31, _32, _33; + }; + float mat[4][4]; + }; + + M3D_F4X4() = default; + + M3D_F4X4(const M3D_F4X4&) = default; + M3D_F4X4& operator=(const M3D_F4X4&) = default; + M3D_F4X4(M3D_F4X4&&) = default; + M3D_F4X4& operator=(M3D_F4X4&&) = default; + + constexpr M3D_F4X4(float f00, float f01, float f02, float f03, + float f10, float f11, float f12, float f13, + float f20, float f21, float f22, float f23, + float f30, float f31, float f32, float f33) noexcept + : _00(f00), _01(f01), _02(f02), _03(f03), + _10(f10), _11(f11), _12(f12), _13(f13), + _20(f20), _21(f21), _22(f22), _23(f23), + _30(f30), _31(f31), _32(f32), _33(f33) {} + + float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; } + float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; } + +#if (__cplusplus >= 202002L) + bool operator == (const M3D_F4X4&) const = default; + auto operator <=> (const M3D_F4X4&) const = default; +#endif +}; +struct __attribute__((aligned(16))) M3D_F4X4A : public M3D_F4X4 +{ + using M3D_F4X4::M3D_F4X4; +}; + + +// +// Generic SIMD matrix implementation +// +// Call convention (x86_64): +// 1st matrix parameter should be M3D_MATRIX +// 2nd+ matrix parameter should be M3D_MATRIX& +// +#ifdef DISABLE_INTRINSICS +struct M3D_MATRIX { + union { + M3D_VECTOR rows[4]; + struct { + float _00, _01, _02, _03; + float _10, _11, _12, _13; + float _20, _21, _22, _23; + float _30, _31, _32, _33; + }; + float mat[4][4]; + }; +#else +struct __attribute__((aligned(16))) M3D_MATRIX { + M3D_VECTOR rows[4]; +#endif + M3D_MATRIX() = default; + M3D_MATRIX(const M3D_MATRIX&) = default; + M3D_MATRIX& operator=(const M3D_MATRIX&) = default; + + M3D_MATRIX(M3D_MATRIX&&) = default; + M3D_MATRIX& operator=(M3D_MATRIX&&) = default; + + constexpr M3D_MATRIX(M3D_VECTOR v0, M3D_VECTOR v1, M3D_VECTOR v2, M3D_VECTOR& v3) noexcept : rows{ v0,v1,v2,v3 } {} + M3D_MATRIX(float f00, float f01, float f02, float f03, + float f10, float f11, float f12, float f13, + float f20, float f21, float f22, float f23, + float f30, float f31, float f32, float f33) noexcept; + +#ifdef DISABLE_INTRINSICS + float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; } + float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; } +#endif + + M3D_MATRIX operator+ () const noexcept { return *this; } + M3D_MATRIX operator- () const noexcept; + + M3D_MATRIX& operator+= (M3D_MATRIX M) noexcept; + M3D_MATRIX& operator-= (M3D_MATRIX M) noexcept; + M3D_MATRIX& operator*= (M3D_MATRIX M) noexcept; + M3D_MATRIX& operator*= (float S) noexcept; + M3D_MATRIX& operator/= (float S) noexcept; + + M3D_MATRIX operator+ (M3D_MATRIX M) const noexcept; + M3D_MATRIX operator- (M3D_MATRIX M) const noexcept; + M3D_MATRIX operator* (M3D_MATRIX M) const noexcept; + M3D_MATRIX operator* (float S) const noexcept; + M3D_MATRIX operator/ (float S) const noexcept; + + friend M3D_MATRIX operator* (float S, M3D_MATRIX& M) noexcept; +}; + + +// +// Load/Store functions +// +M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept; +M3D_VECTOR M3D_V4LoadF3A(const M3D_F3A* src) noexcept; +void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept; +void M3D_V4StoreF3A(M3D_F3A* dst, M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4LoadF4(const M3D_F4* src) noexcept; +M3D_VECTOR M3D_V4LoadF4A(const M3D_F4A* src) noexcept; +void M3D_V4StoreF4(M3D_F4* dst, M3D_VECTOR V) noexcept; +void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept; +M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept; +M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept; +void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept; +void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept; + + +// +// Vector operation +// +M3D_VECTOR M3D_V4Set(float x, float y, float z, float w) noexcept; +M3D_VECTOR M3D_V4Negate(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4Replicate(float val) noexcept; +float M3D_V4GetX(M3D_VECTOR V) noexcept; +float M3D_V4GetY(M3D_VECTOR V) noexcept; +float M3D_V4GetZ(M3D_VECTOR V) noexcept; +float M3D_V4GetW(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept; +M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept; +M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept; +M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept; +M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept; + + +// +// Matrix operation +// +M3D_MATRIX M3D_MIdentity() noexcept; +M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept; +M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept; + + +// +// Vector/Matrix operation +// +M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept; + + +// +// Common transformation matrix constructor functions +// +M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept; +M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept; +M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept; +M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept; +M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept; +M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept; +M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept; +M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept; +M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept; +M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept; +M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept; +M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept; + + +// +// Common values for vector/matrix manipulation +// +#ifndef M3D_GCONST +# if defined(__GNUC__) && !defined(__MINGW32__) +# define M3D_GCONST extern const __attribute__((weak)) +# else +# define M3D_GCONST extern const __declspec(selectany) +# endif +#endif +M3D_GCONST M3D_V4F32 M3D_MIdentityR0 = {{{1.0f, 0.0f, 0.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR1 = {{{0.0f, 1.0f, 0.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR2 = {{{0.0f, 0.0f, 1.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR3 = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR0_n = {{{-1.0f, 0.0f, 0.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR1_n = {{{0.0f, -1.0f, 0.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR2_n = {{{0.0f, 0.0f, -1.0f, 0.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MIdentityR3_n = {{{0.0f, 0.0f, 0.0f, -1.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}}; +M3D_GCONST M3D_V4F32 M3D_MNegateW = {{{1.0f, 1.0f, 1.0f, -1.0f}}}; +M3D_GCONST M3D_V4I32 M3D_MInfinity = {{{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}}}; +M3D_GCONST M3D_V4I32 M3D_MQNaN = {{{0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000}}}; +M3D_GCONST M3D_V4U32 M3D_MMaskX = {{{0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}}}; +M3D_GCONST M3D_V4U32 M3D_MMaskY = {{{0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000}}}; +M3D_GCONST M3D_V4U32 M3D_MMaskZ = {{{0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000}}}; +M3D_GCONST M3D_V4U32 M3D_MMaskW = {{{0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}}}; +M3D_GCONST M3D_V4U32 M3D_MMask3 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}}; +M3D_GCONST M3D_V4U32 M3D_MSelect1000 = {{{0xFFFFFFFF, 0x0, 0x0, 0x0}}}; +M3D_GCONST M3D_V4U32 M3D_MSelect1100 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0}}}; +M3D_GCONST M3D_V4U32 M3D_MSelect1110 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}}}; +M3D_GCONST M3D_V4U32 M3D_MSelect1011 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0xFFFFFFFF}}}; + +constexpr M3D_F4X4 M3D_MIdentity4x4() { + M3D_F4X4 I( + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f); + + return I; +} + +#include "3DMaths.inl" \ No newline at end of file diff --git a/Engine/Utils/3DMaths.inl b/Engine/Utils/3DMaths.inl new file mode 100644 index 0000000..02d73a0 --- /dev/null +++ b/Engine/Utils/3DMaths.inl @@ -0,0 +1,1528 @@ +#pragma once + +#include "3DMaths.hpp" + + +inline void M3D_ScalarSinCos(float* pSin, float* pCos, float Value) noexcept { + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = M3D_1DIV2PI * Value; + if (Value >= 0.0f) + quotient = static_cast(static_cast(quotient + 0.5f)); + else + quotient = static_cast(static_cast(quotient - 0.5f)); + + float y = Value - M3D_2PI * quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > M3D_PIDIV2) { + y = M3D_PI - y; + sign = -1.0f; + } else if (y < -M3D_PIDIV2) { + y = -M3D_PI - y; + sign = -1.0f; + } else { + sign = +1.0f; + } + + float y2 = y * y; + + // 11-degree minimax approximation + *pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y; + + // 10-degree minimax approximation + float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f; + *pCos = sign * p; +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_MATRIX::M3D_MATRIX( + float f00, float f01, float f02, float f03, + float f10, float f11, float f12, float f13, + float f20, float f21, float f22, float f23, + float f30, float f31, float f32, float f33 +) noexcept { + rows[0] = M3D_V4Set(f00, f01, f02, f03); + rows[1] = M3D_V4Set(f10, f11, f12, f13); + rows[2] = M3D_V4Set(f20, f21, f22, f23); + rows[3] = M3D_V4Set(f30, f31, f32, f33); +} + +inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Negate(rows[0]); + ret.rows[1] = M3D_V4Negate(rows[1]); + ret.rows[2] = M3D_V4Negate(rows[2]); + ret.rows[3] = M3D_V4Negate(rows[3]); + return ret; +} + +inline M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept { + rows[0] = M3D_V4Add(rows[0], M.rows[0]); + rows[1] = M3D_V4Add(rows[1], M.rows[1]); + rows[2] = M3D_V4Add(rows[2], M.rows[2]); + rows[3] = M3D_V4Add(rows[3], M.rows[3]); + return *this; +} +inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Add(rows[0], M.rows[0]); + ret.rows[1] = M3D_V4Add(rows[1], M.rows[1]); + ret.rows[2] = M3D_V4Add(rows[2], M.rows[2]); + ret.rows[3] = M3D_V4Add(rows[3], M.rows[3]); + return ret; +} + +inline M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept { + rows[0] = M3D_V4Subtract(rows[0], M.rows[0]); + rows[1] = M3D_V4Subtract(rows[1], M.rows[1]); + rows[2] = M3D_V4Subtract(rows[2], M.rows[2]); + rows[3] = M3D_V4Subtract(rows[3], M.rows[3]); + return *this; +} +inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Subtract(rows[0], M.rows[0]); + ret.rows[1] = M3D_V4Subtract(rows[1], M.rows[1]); + ret.rows[2] = M3D_V4Subtract(rows[2], M.rows[2]); + ret.rows[3] = M3D_V4Subtract(rows[3], M.rows[3]); + return ret; +} + +inline M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept { + *this = M3D_MMultiply(*this, M); + return *this; +} +inline M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept { + return M3D_MMultiply(*this, M); +} + +inline M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept { + rows[0] = M3D_V4Scale(rows[0], S); + rows[1] = M3D_V4Scale(rows[1], S); + rows[2] = M3D_V4Scale(rows[2], S); + rows[3] = M3D_V4Scale(rows[3], S); + return *this; +} +inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Scale(rows[0], S); + ret.rows[1] = M3D_V4Scale(rows[1], S); + ret.rows[2] = M3D_V4Scale(rows[2], S); + ret.rows[3] = M3D_V4Scale(rows[3], S); + return ret; +} +inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Scale(M.rows[0], S); + ret.rows[1] = M3D_V4Scale(M.rows[1], S); + ret.rows[2] = M3D_V4Scale(M.rows[2], S); + ret.rows[3] = M3D_V4Scale(M.rows[3], S); + return ret; +} + +inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR vS = M3D_V4Replicate(S); + rows[0] = M3D_V4Divide(rows[0], vS); + rows[1] = M3D_V4Divide(rows[1], vS); + rows[2] = M3D_V4Divide(rows[2], vS); + rows[3] = M3D_V4Divide(rows[3], vS); + return *this; +#else + __m128 vS = _mm_set_ps1(S); + rows[0] = _mm_div_ps(rows[0], vS); + rows[1] = _mm_div_ps(rows[1], vS); + rows[2] = _mm_div_ps(rows[2], vS); + rows[3] = _mm_div_ps(rows[3], vS); + return *this; +#endif +} +inline M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR vS = M3D_V4Replicate(S); + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Divide(rows[0], vS); + ret.rows[1] = M3D_V4Divide(rows[1], vS); + ret.rows[2] = M3D_V4Divide(rows[2], vS); + ret.rows[3] = M3D_V4Divide(rows[3], vS); + return ret; +#else + __m128 vS = _mm_set_ps1(S); + M3D_MATRIX ret; + ret.rows[0] = _mm_div_ps(rows[0], vS); + ret.rows[1] = _mm_div_ps(rows[1], vS); + ret.rows[2] = _mm_div_ps(rows[2], vS); + ret.rows[3] = _mm_div_ps(rows[3], vS); + return ret; +#endif +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = src->z; + V.v4f[3] = 0.f; + return V; +/* +#elif defined(SSE4_INTRINSICS) + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(src))); + __m128 z = _mm_load_ss(&src->z); + return _mm_insert_ps(xy, z, 0x20); +*/ +#else + __m128 xy = _mm_castpd_ps(_mm_load_sd(reinterpret_cast(src))); + __m128 z = _mm_load_ss(&src->z); + return _mm_movelh_ps(xy, z); +#endif +} + +inline M3D_VECTOR M3D_V4LoadF3A(const M3D_F3A* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = src->z; + V.v4f[3] = 0.f; + return V; +#else + __m128 V = _mm_load_ps(&src->x); // Reads an extra float which is zero'd + return _mm_and_ps(V, M3D_MMask3); +#endif +} + +inline void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; + dst->z = V.v4f[2]; +/* +#elif defined(SSE4_INTRINSICS) + *reinterpret_cast(&dst->x) = _mm_extract_ps(V, 0); + *reinterpret_cast(&dst->y) = _mm_extract_ps(V, 1); + *reinterpret_cast(&dst->z) = _mm_extract_ps(V, 2); +*/ +#else + _mm_store_sd(reinterpret_cast(dst), _mm_castps_pd(V)); + __m128 z = M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + _mm_store_ss(&dst->z, z); +#endif +} + +inline void M3D_V4StoreF3A(M3D_F3A* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; + dst->z = V.v4f[2]; +/* +#elif defined(SSE4_INTRINSICS) + _mm_store_sd(reinterpret_cast(dst), _mm_castps_pd(V)); + *reinterpret_cast(&dst->z) = _mm_extract_ps(V, 2); +*/ +#else + _mm_store_sd(reinterpret_cast(dst), _mm_castps_pd(V)); + __m128 z = _mm_movehl_ps(V, V); + _mm_store_ss(&dst->z, z); +#endif +} + +inline M3D_VECTOR M3D_V4LoadF4(const M3D_F4* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = src->z; + V.v4f[3] = src->w; + return V; +#else + return _mm_loadu_ps(&src->x); +#endif +} + +inline M3D_VECTOR M3D_V4LoadV4A(const M3D_F4A* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR V; + V.v4f[0] = src->x; + V.v4f[1] = src->y; + V.v4f[2] = src->z; + V.v4f[3] = src->w; + return V; +#else + return _mm_load_ps(&src->x); +#endif +} + +inline void M3D_V4StoreF4(M3D_F4* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; + dst->z = V.v4f[2]; + dst->w = V.v4f[3]; +#else + _mm_storeu_ps(&dst->x, V); +#endif +} + +inline void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + dst->x = V.v4f[0]; + dst->y = V.v4f[1]; + dst->z = V.v4f[2]; + dst->w = V.v4f[3]; +#else + _mm_store_ps(&dst->x, V); +#endif +} + +inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.rows[0].v4f[0] = src->mat[0][0]; + ret.rows[0].v4f[1] = src->mat[0][1]; + ret.rows[0].v4f[2] = src->mat[0][2]; + ret.rows[0].v4f[3] = src->mat[0][3]; + + ret.rows[1].v4f[0] = src->mat[1][0]; + ret.rows[1].v4f[1] = src->mat[1][1]; + ret.rows[1].v4f[2] = src->mat[1][2]; + ret.rows[1].v4f[3] = src->mat[1][3]; + + ret.rows[2].v4f[0] = src->mat[2][0]; + ret.rows[2].v4f[1] = src->mat[2][1]; + ret.rows[2].v4f[2] = src->mat[2][2]; + ret.rows[2].v4f[3] = src->mat[2][3]; + + ret.rows[3].v4f[0] = src->mat[3][0]; + ret.rows[3].v4f[1] = src->mat[3][1]; + ret.rows[3].v4f[2] = src->mat[3][2]; + ret.rows[3].v4f[3] = src->mat[3][3]; + return ret; +#else + M3D_MATRIX ret; + ret.rows[0] = _mm_loadu_ps(&src->_00); + ret.rows[1] = _mm_loadu_ps(&src->_10); + ret.rows[2] = _mm_loadu_ps(&src->_20); + ret.rows[3] = _mm_loadu_ps(&src->_30); + return ret; +#endif +} + +inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.rows[0].v4f[0] = src->mat[0][0]; + ret.rows[0].v4f[1] = src->mat[0][1]; + ret.rows[0].v4f[2] = src->mat[0][2]; + ret.rows[0].v4f[3] = src->mat[0][3]; + + ret.rows[1].v4f[0] = src->mat[1][0]; + ret.rows[1].v4f[1] = src->mat[1][1]; + ret.rows[1].v4f[2] = src->mat[1][2]; + ret.rows[1].v4f[3] = src->mat[1][3]; + + ret.rows[2].v4f[0] = src->mat[2][0]; + ret.rows[2].v4f[1] = src->mat[2][1]; + ret.rows[2].v4f[2] = src->mat[2][2]; + ret.rows[2].v4f[3] = src->mat[2][3]; + + ret.rows[3].v4f[0] = src->mat[3][0]; + ret.rows[3].v4f[1] = src->mat[3][1]; + ret.rows[3].v4f[2] = src->mat[3][2]; + ret.rows[3].v4f[3] = src->mat[3][3]; + return ret; +#else + M3D_MATRIX ret; + ret.rows[0] = _mm_load_ps(&src->_00); + ret.rows[1] = _mm_load_ps(&src->_10); + ret.rows[2] = _mm_load_ps(&src->_20); + ret.rows[3] = _mm_load_ps(&src->_30); + return ret; +#endif +} + +inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept { +#ifdef DISABLE_INTRINSICS + dst->mat[0][0] = M.rows[0].v4f[0]; + dst->mat[0][1] = M.rows[0].v4f[1]; + dst->mat[0][2] = M.rows[0].v4f[2]; + dst->mat[0][3] = M.rows[0].v4f[3]; + + dst->mat[1][0] = M.rows[1].v4f[0]; + dst->mat[1][1] = M.rows[1].v4f[1]; + dst->mat[1][2] = M.rows[1].v4f[2]; + dst->mat[1][3] = M.rows[1].v4f[3]; + + dst->mat[2][0] = M.rows[2].v4f[0]; + dst->mat[2][1] = M.rows[2].v4f[1]; + dst->mat[2][2] = M.rows[2].v4f[2]; + dst->mat[2][3] = M.rows[2].v4f[3]; + + dst->mat[3][0] = M.rows[3].v4f[0]; + dst->mat[3][1] = M.rows[3].v4f[1]; + dst->mat[3][2] = M.rows[3].v4f[2]; + dst->mat[3][3] = M.rows[3].v4f[3]; +#else + _mm_storeu_ps(&dst->_00, M.rows[0]); + _mm_storeu_ps(&dst->_10, M.rows[1]); + _mm_storeu_ps(&dst->_20, M.rows[2]); + _mm_storeu_ps(&dst->_30, M.rows[3]); +#endif +} + +inline void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept { +#ifdef DISABLE_INTRINSICS + dst->mat[0][0] = M.rows[0].v4f[0]; + dst->mat[0][1] = M.rows[0].v4f[1]; + dst->mat[0][2] = M.rows[0].v4f[2]; + dst->mat[0][3] = M.rows[0].v4f[3]; + + dst->mat[1][0] = M.rows[1].v4f[0]; + dst->mat[1][1] = M.rows[1].v4f[1]; + dst->mat[1][2] = M.rows[1].v4f[2]; + dst->mat[1][3] = M.rows[1].v4f[3]; + + dst->mat[2][0] = M.rows[2].v4f[0]; + dst->mat[2][1] = M.rows[2].v4f[1]; + dst->mat[2][2] = M.rows[2].v4f[2]; + dst->mat[2][3] = M.rows[2].v4f[3]; + + dst->mat[3][0] = M.rows[3].v4f[0]; + dst->mat[3][1] = M.rows[3].v4f[1]; + dst->mat[3][2] = M.rows[3].v4f[2]; + dst->mat[3][3] = M.rows[3].v4f[3]; +#else + _mm_store_ps(&dst->_00, M.rows[0]); + _mm_store_ps(&dst->_10, M.rows[1]); + _mm_store_ps(&dst->_20, M.rows[2]); + _mm_store_ps(&dst->_30, M.rows[3]); +#endif +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_VECTOR M3D_V4Set(float x, float y, float z, float w) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{x, y, z, w}}}; + return ret.v; +#else + return _mm_set_ps(w, z, y, x); +#endif +} + +inline M3D_VECTOR M3D_V4Negate(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + -V.v4f[0], + -V.v4f[1], + -V.v4f[2], + -V.v4f[3] + }}}; + return ret.v; +#else + M3D_VECTOR Z = _mm_setzero_ps(); + return _mm_sub_ps(Z, V); +#endif +} + +inline M3D_VECTOR M3D_V4Replicate(float val) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret; + ret.f[0] = + ret.f[1] = + ret.f[2] = + ret.f[3] = val; + return ret.v; +#else + return _mm_set_ps1(val); +#endif +} + +inline float M3D_V4GetX(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + return V.v4f[0]; +#else + return _mm_cvtss_f32(V); +#endif +} + +inline float M3D_V4GetY(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + return V.v4f[1]; +#else + M3D_VECTOR vTemp = M3D_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); + return _mm_cvtss_f32(vTemp); +#endif +} + +inline float M3D_V4GetZ(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + return V.v4f[2]; +#else + M3D_VECTOR vTemp = M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); + return _mm_cvtss_f32(vTemp); +#endif +} + +inline float M3D_V4GetW(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + return V.v4f[3]; +#else + M3D_VECTOR vTemp = M3D_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_cvtss_f32(vTemp); +#endif +} + +inline M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.v4f[0]; + return vResult.v; +#elif defined(AVX2_INTRINSICS) && defined(FAVOR_INTEL) + return _mm_broadcastss_ps(V); +#else + return M3D_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); +#endif +} + +inline M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.v4f[1]; + return vResult.v; +#else + return M3D_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); +#endif +} + +inline M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.v4f[2]; + return vResult.v; +#else + return M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); +#endif +} + +inline M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = V.v4f[3]; + return vResult.v; +#else + return M3D_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); +#endif +} + +inline M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + V1.v4f[0] + V2.v4f[0], + V1.v4f[1] + V2.v4f[1], + V1.v4f[2] + V2.v4f[2], + V1.v4f[3] + V2.v4f[3] + }}}; + return ret.v; +#else + return _mm_add_ps(V1, V2); +#endif +} + +inline M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + V1.v4f[0] - V2.v4f[0], + V1.v4f[1] - V2.v4f[1], + V1.v4f[2] - V2.v4f[2], + V1.v4f[3] - V2.v4f[3] + }}}; + return ret.v; +#else + return _mm_sub_ps(V1, V2); +#endif +} + +inline M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + V1.v4f[0] * V2.v4f[0] + V3.v4f[0], + V1.v4f[1] * V2.v4f[1] + V3.v4f[1], + V1.v4f[2] * V2.v4f[2] + V3.v4f[2], + V1.v4f[3] * V2.v4f[3] + V3.v4f[3] + }}}; + return ret.v; +#else + return M3D_FMADD_PS(V1, V2, V3); +#endif +} + +inline M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + V1.v4f[0] / V2.v4f[0], + V1.v4f[1] / V2.v4f[1], + V1.v4f[2] / V2.v4f[2], + V1.v4f[3] / V2.v4f[3] + }}}; + return ret.v; +#else + return _mm_div_ps(V1, V2); +#endif +} + +inline M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 ret = {{{ + V.v4f[0] * scale, + V.v4f[1] * scale, + V.v4f[2] * scale, + V.v4f[3] * scale + }}}; + return ret.v; +#else + M3D_VECTOR ret = _mm_set_ps1(scale); + return _mm_mul_ps(ret, V); +#endif +} + +inline M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4U32 ret = {{{ + (V1.v4u[0] & ~Control.v4u[0]) | (V2.v4u[0] & Control.v4u[0]), + (V1.v4u[1] & ~Control.v4u[1]) | (V2.v4u[1] & Control.v4u[1]), + (V1.v4u[2] & ~Control.v4u[2]) | (V2.v4u[2] & Control.v4u[2]), + (V1.v4u[3] & ~Control.v4u[3]) | (V2.v4u[3] & Control.v4u[3]), + }}}; + return ret.v; +#else + M3D_VECTOR vTemp1 = _mm_andnot_ps(Control, V1); + M3D_VECTOR vTemp2 = _mm_and_ps(V2, Control); + return _mm_or_ps(vTemp1, vTemp2); +#endif +} + +inline M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4U32 Result = { { { + V1.v4u[0], + V2.v4u[0], + V1.v4u[1], + V2.v4u[1], + } } }; + return Result.v; +#else + return _mm_unpacklo_ps(V1, V2); +#endif +} + +inline M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4U32 Result = { { { + V1.v4u[2], + V2.v4u[2], + V1.v4u[3], + V2.v4u[3] + } } }; + return Result.v; +#else + return _mm_unpackhi_ps(V1, V2); +#endif +} + +inline M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 Result = { { { + sqrtf(V.v4f[0]), + sqrtf(V.v4f[1]), + sqrtf(V.v4f[2]), + sqrtf(V.v4f[3]) + } } }; + return Result.v; +#else + return _mm_sqrt_ps(V); +#endif +} + +inline M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { +#ifdef DISABLE_INTRINSICS + float fValue = V1.v4f[0] * V2.v4f[0] + V1.v4f[1] * V2.v4f[1] + V1.v4f[2] * V2.v4f[2]; + M3D_V4F32 vResult; + vResult.f[0] = + vResult.f[1] = + vResult.f[2] = + vResult.f[3] = fValue; + return vResult.v; +#elif defined(SSE4_INTRINSICS) + return _mm_dp_ps(V1, V2, 0x7f); +#elif defined(SSE3_INTRINSICS) + M3D_VECTOR vTemp = _mm_mul_ps(V1, V2); + vTemp = _mm_and_ps(vTemp, g_XMMask3); + vTemp = _mm_hadd_ps(vTemp, vTemp); + return _mm_hadd_ps(vTemp, vTemp); +#else + // Perform the dot product + M3D_VECTOR vDot = _mm_mul_ps(V1, V2); + // x=Dot.v4f[1], y=Dot.v4f[2] + M3D_VECTOR vTemp = M3D_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); + // Result.v4f[0] = x+y + vDot = _mm_add_ss(vDot, vTemp); + // x=Dot.v4f[2] + vTemp = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // Result.v4f[0] = (x+y)+z + vDot = _mm_add_ss(vDot, vTemp); + // Splat x + return M3D_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); +#endif +} + +inline M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { + // [ V1.y*V2.z - V1.z*V2.y, V1.z*V2.x - V1.x*V2.z, V1.x*V2.y - V1.y*V2.x ] + +#ifdef DISABLE_INTRINSICS + M3D_V4F32 vResult = {{{ + (V1.v4f[1] * V2.v4f[2]) - (V1.v4f[2] * V2.v4f[1]), + (V1.v4f[2] * V2.v4f[0]) - (V1.v4f[0] * V2.v4f[2]), + (V1.v4f[0] * V2.v4f[1]) - (V1.v4f[1] * V2.v4f[0]), + 0.0f + }}}; + return vResult.v; +#else + // y1,z1,x1,w1 + M3D_VECTOR vTemp1 = M3D_PERMUTE_PS(V1, _MM_SHUFFLE(3, 0, 2, 1)); + // z2,x2,y2,w2 + M3D_VECTOR vTemp2 = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(3, 1, 0, 2)); + // Perform the left operation + M3D_VECTOR vResult = _mm_mul_ps(vTemp1, vTemp2); + // z1,x1,y1,w1 + vTemp1 = M3D_PERMUTE_PS(vTemp1, _MM_SHUFFLE(3, 0, 2, 1)); + // y2,z2,x2,w2 + vTemp2 = M3D_PERMUTE_PS(vTemp2, _MM_SHUFFLE(3, 1, 0, 2)); + // Perform the right operation + vResult = M3D_FMADD_PS(vTemp1, vTemp2, vResult); + // Set w to zero + return _mm_and_ps(vResult, M3D_MMask3); +#endif +} + +inline M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept { + return M3D_V3Dot(V, V); +} + +inline M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR Result; + + Result = M3D_V3LengthSq(V); + Result = M3D_V4Sqrt(Result); + + return Result; +#elif defined(SSE4_INTRINSICS) + M3D_VECTOR vTemp = _mm_dp_ps(V, V, 0x7f); + return _mm_sqrt_ps(vTemp); +#elif defined(SSE3_INTRINSICS) + M3D_VECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#else + // Perform the dot product on x,y and z + M3D_VECTOR vLengthSq = _mm_mul_ps(V, V); + // vTemp has z and y + M3D_VECTOR vTemp = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); + // x+z, y + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // y,y,y,y + vTemp = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + // x+z+y,??,??,?? + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + // Splat the length squared + vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Get the length + vLengthSq = _mm_sqrt_ps(vLengthSq); + return vLengthSq; +#endif +} + +inline M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR vResult = M3D_V3Length(V); + float fLength = vResult.v4f[0]; + + // Prevent divide by zero - uhuh + if (fLength > 0) { + fLength = 1.0f / fLength; + } + + vResult.v4f[0] = V.v4f[0] * fLength; + vResult.v4f[1] = V.v4f[1] * fLength; + vResult.v4f[2] = V.v4f[2] * fLength; + vResult.v4f[3] = V.v4f[3] * fLength; + return vResult; + +#elif defined(SSE4_INTRINSICS) + M3D_VECTOR vLengthSq = _mm_dp_ps(V, V, 0x7f); + // Prepare for the division + M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + M3D_VECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#elif defined(SSE3_INTRINSICS) + // Perform the dot product on x,y and z only + M3D_VECTOR vLengthSq = _mm_mul_ps(V, V); + vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); + // Prepare for the division + M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + M3D_VECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); + M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#else + // Perform the dot product on x,y and z only + M3D_VECTOR vLengthSq = _mm_mul_ps(V, V); + M3D_VECTOR vTemp = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vTemp = M3D_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); + vLengthSq = _mm_add_ss(vLengthSq, vTemp); + vLengthSq = M3D_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); + // Prepare for the division + M3D_VECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Create zero with a single instruction + M3D_VECTOR vZeroMask = _mm_setzero_ps(); + // Test for a divide by zero (Must be FP to detect -0.0) + vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq, M3D_MInfinity); + // Divide to perform the normalization + vResult = _mm_div_ps(V, vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult, vZeroMask); + // Select qnan or result based on infinite length + M3D_VECTOR vTemp1 = _mm_andnot_ps(vLengthSq, M3D_MQNaN); + M3D_VECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); + vResult = _mm_or_ps(vTemp1, vTemp2); + return vResult; +#endif +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_MATRIX M3D_MIdentity() noexcept { + M3D_MATRIX ret; + ret.rows[0] = M3D_MIdentityR0.v; + ret.rows[1] = M3D_MIdentityR1.v; + ret.rows[2] = M3D_MIdentityR2.v; + ret.rows[3] = M3D_MIdentityR3.v; + return ret; +} + +inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + // Cache the invariants in registers + float x = M1.mat[0][0]; + float y = M1.mat[0][1]; + float z = M1.mat[0][2]; + float w = M1.mat[0][3]; + // Perform the operation on the first row + ret.mat[0][0] = (M2.mat[0][0] * x) + (M2.mat[1][0] * y) + (M2.mat[2][0] * z) + (M2.mat[3][0] * w); + ret.mat[0][1] = (M2.mat[0][1] * x) + (M2.mat[1][1] * y) + (M2.mat[2][1] * z) + (M2.mat[3][1] * w); + ret.mat[0][2] = (M2.mat[0][2] * x) + (M2.mat[1][2] * y) + (M2.mat[2][2] * z) + (M2.mat[3][2] * w); + ret.mat[0][3] = (M2.mat[0][3] * x) + (M2.mat[1][3] * y) + (M2.mat[2][3] * z) + (M2.mat[3][3] * w); + // Repeat for all the other rows + x = M1.mat[1][0]; + y = M1.mat[1][1]; + z = M1.mat[1][2]; + w = M1.mat[1][3]; + ret.mat[1][0] = (M2.mat[0][0] * x) + (M2.mat[1][0] * y) + (M2.mat[2][0] * z) + (M2.mat[3][0] * w); + ret.mat[1][1] = (M2.mat[0][1] * x) + (M2.mat[1][1] * y) + (M2.mat[2][1] * z) + (M2.mat[3][1] * w); + ret.mat[1][2] = (M2.mat[0][2] * x) + (M2.mat[1][2] * y) + (M2.mat[2][2] * z) + (M2.mat[3][2] * w); + ret.mat[1][3] = (M2.mat[0][3] * x) + (M2.mat[1][3] * y) + (M2.mat[2][3] * z) + (M2.mat[3][3] * w); + x = M1.mat[2][0]; + y = M1.mat[2][1]; + z = M1.mat[2][2]; + w = M1.mat[2][3]; + ret.mat[2][0] = (M2.mat[0][0] * x) + (M2.mat[1][0] * y) + (M2.mat[2][0] * z) + (M2.mat[3][0] * w); + ret.mat[2][1] = (M2.mat[0][1] * x) + (M2.mat[1][1] * y) + (M2.mat[2][1] * z) + (M2.mat[3][1] * w); + ret.mat[2][2] = (M2.mat[0][2] * x) + (M2.mat[1][2] * y) + (M2.mat[2][2] * z) + (M2.mat[3][2] * w); + ret.mat[2][3] = (M2.mat[0][3] * x) + (M2.mat[1][3] * y) + (M2.mat[2][3] * z) + (M2.mat[3][3] * w); + x = M1.mat[3][0]; + y = M1.mat[3][1]; + z = M1.mat[3][2]; + w = M1.mat[3][3]; + ret.mat[3][0] = (M2.mat[0][0] * x) + (M2.mat[1][0] * y) + (M2.mat[2][0] * z) + (M2.mat[3][0] * w); + ret.mat[3][1] = (M2.mat[0][1] * x) + (M2.mat[1][1] * y) + (M2.mat[2][1] * z) + (M2.mat[3][1] * w); + ret.mat[3][2] = (M2.mat[0][2] * x) + (M2.mat[1][2] * y) + (M2.mat[2][2] * z) + (M2.mat[3][2] * w); + ret.mat[3][3] = (M2.mat[0][3] * x) + (M2.mat[1][3] * y) + (M2.mat[2][3] * z) + (M2.mat[3][3] * w); + return ret; +#elif defined(AVX2_INTRINSICS) + __m256 t0 = _mm256_castps128_ps256(M1.rows[0]); + t0 = _mm256_insertf128_ps(t0, M1.rows[1], 1); + __m256 t1 = _mm256_castps128_ps256(M1.rows[2]); + t1 = _mm256_insertf128_ps(t1, M1.rows[3], 1); + + __m256 u0 = _mm256_castps128_ps256(M2.rows[0]); + u0 = _mm256_insertf128_ps(u0, M2.rows[1], 1); + __m256 u1 = _mm256_castps128_ps256(M2.rows[2]); + u1 = _mm256_insertf128_ps(u1, M2.rows[3], 1); + + __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); + __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); + __m256 c0 = _mm256_mul_ps(a0, b0); + __m256 c1 = _mm256_mul_ps(a1, b0); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); + b0 = _mm256_permute2f128_ps(u0, u0, 0x11); + __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); + __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); + __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); + __m256 c4 = _mm256_mul_ps(a0, b1); + __m256 c5 = _mm256_mul_ps(a1, b1); + + a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); + a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); + b1 = _mm256_permute2f128_ps(u1, u1, 0x11); + __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); + __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); + + t0 = _mm256_add_ps(c2, c6); + t1 = _mm256_add_ps(c3, c7); + + M3D_MATRIX ret; + ret.rows[0] = _mm256_castps256_ps128(t0); + ret.rows[1] = _mm256_extractf128_ps(t0, 1); + ret.rows[2] = _mm256_castps256_ps128(t1); + ret.rows[3] = _mm256_extractf128_ps(t1, 1); + return ret; +#else + M3D_MATRIX ret; + // Splat the component X,Y,Z then W +#ifdef AVX_INTRINSICS + XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast(&M1.rows[0]) + 0); + XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast(&M1.rows[0]) + 1); + XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast(&M1.rows[0]) + 2); + XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast(&M1.rows[0]) + 3); +#else + // Use vW to hold the original row + M3D_VECTOR vW = M1.rows[0]; + M3D_VECTOR vX = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + M3D_VECTOR vY = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + M3D_VECTOR vZ = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + // Perform the operation on the first row + vX = _mm_mul_ps(vX, M2.rows[0]); + vY = _mm_mul_ps(vY, M2.rows[1]); + vZ = _mm_mul_ps(vZ, M2.rows[2]); + vW = _mm_mul_ps(vW, M2.rows[3]); + // Perform a binary add to reduce cumulative errors + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + ret.rows[0] = vX; + // Repeat for the other 3 rows +#ifdef AVX_INTRINSICS + vX = _mm_broadcast_ss(reinterpret_cast(&M1.rows[1]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.rows[1]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.rows[1]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.rows[1]) + 3); +#else + vW = M1.rows[1]; + vX = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.rows[0]); + vY = _mm_mul_ps(vY, M2.rows[1]); + vZ = _mm_mul_ps(vZ, M2.rows[2]); + vW = _mm_mul_ps(vW, M2.rows[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + ret.rows[1] = vX; +#ifdef AVX_INTRINSICS + vX = _mm_broadcast_ss(reinterpret_cast(&M1.rows[2]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.rows[2]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.rows[2]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.rows[2]) + 3); +#else + vW = M1.rows[2]; + vX = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.rows[0]); + vY = _mm_mul_ps(vY, M2.rows[1]); + vZ = _mm_mul_ps(vZ, M2.rows[2]); + vW = _mm_mul_ps(vW, M2.rows[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + ret.rows[2] = vX; +#ifdef AVX_INTRINSICS + vX = _mm_broadcast_ss(reinterpret_cast(&M1.rows[3]) + 0); + vY = _mm_broadcast_ss(reinterpret_cast(&M1.rows[3]) + 1); + vZ = _mm_broadcast_ss(reinterpret_cast(&M1.rows[3]) + 2); + vW = _mm_broadcast_ss(reinterpret_cast(&M1.rows[3]) + 3); +#else + vW = M1.rows[3]; + vX = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); + vY = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); + vZ = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = M3D_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); +#endif + vX = _mm_mul_ps(vX, M2.rows[0]); + vY = _mm_mul_ps(vY, M2.rows[1]); + vZ = _mm_mul_ps(vZ, M2.rows[2]); + vW = _mm_mul_ps(vW, M2.rows[3]); + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + ret.rows[3] = vX; + return ret; +#endif +} + +inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept { +#ifdef DISABLE_INTRINSICS + // Original matrix: + // + // m00m01m02m03 + // m10m11m12m13 + // m20m21m22m23 + // m30m31m32m33 + + M3D_MATRIX P; + P.rows[0] = M3D_V4MergeXY(M.rows[0], M.rows[2]); // m00m20m01m21 + P.rows[1] = M3D_V4MergeXY(M.rows[1], M.rows[3]); // m10m30m11m31 + P.rows[2] = M3D_V4MergeZW(M.rows[0], M.rows[2]); // m02m22m03m23 + P.rows[3] = M3D_V4MergeZW(M.rows[1], M.rows[3]); // m12m32m13m33 + + M3D_MATRIX MT; + MT.rows[0] = M3D_V4MergeXY(P.rows[0], P.rows[1]); // m00m10m20m30 + MT.rows[1] = M3D_V4MergeZW(P.rows[0], P.rows[1]); // m01m11m21m31 + MT.rows[2] = M3D_V4MergeXY(P.rows[2], P.rows[3]); // m02m12m22m32 + MT.rows[3] = M3D_V4MergeZW(P.rows[2], P.rows[3]); // m03m13m23m33 + return MT; +#elif defined(AVX2_INTRINSICS) + __m256 t0 = _mm256_castps128_ps256(M.rows[0]); + t0 = _mm256_insertf128_ps(t0, M.rows[1], 1); + __m256 t1 = _mm256_castps128_ps256(M.rows[2]); + t1 = _mm256_insertf128_ps(t1, M.rows[3], 1); + + __m256 vTemp = _mm256_unpacklo_ps(t0, t1); + __m256 vTemp2 = _mm256_unpackhi_ps(t0, t1); + __m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + __m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4); + vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4); + t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); + t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); + + M3D_MATRIX ret; + ret.rows[0] = _mm256_castps256_ps128(t0); + ret.rows[1] = _mm256_extractf128_ps(t0, 1); + ret.rows[2] = _mm256_castps256_ps128(t1); + ret.rows[3] = _mm256_extractf128_ps(t1, 1); + return ret; +#else + // x.x,x.y,y.x,y.y + M3D_VECTOR vTemp1 = _mm_shuffle_ps(M.rows[0], M.rows[1], _MM_SHUFFLE(1, 0, 1, 0)); + // x.z,x.w,y.z,y.w + M3D_VECTOR vTemp3 = _mm_shuffle_ps(M.rows[0], M.rows[1], _MM_SHUFFLE(3, 2, 3, 2)); + // z.x,z.y,w.x,w.y + M3D_VECTOR vTemp2 = _mm_shuffle_ps(M.rows[2], M.rows[3], _MM_SHUFFLE(1, 0, 1, 0)); + // z.z,z.w,w.z,w.w + M3D_VECTOR vTemp4 = _mm_shuffle_ps(M.rows[2], M.rows[3], _MM_SHUFFLE(3, 2, 3, 2)); + + M3D_MATRIX ret; + // x.x,y.x,z.x,w.x + ret.rows[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); + // x.y,y.y,z.y,w.y + ret.rows[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); + // x.z,y.z,z.z,w.z + ret.rows[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); + // x.w,y.w,z.w,w.w + ret.rows[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); + return ret; +#endif +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_VECTOR Z = M3D_V4SplatZ(V); + M3D_VECTOR Y = M3D_V4SplatY(V); + M3D_VECTOR X = M3D_V4SplatX(V); + + M3D_VECTOR Result = M3D_V4MultiplyAdd(Z, M.rows[2], M.rows[3]); + Result = M3D_V4MultiplyAdd(Y, M.rows[1], Result); + Result = M3D_V4MultiplyAdd(X, M.rows[0], Result); + + return Result; +#else + M3D_VECTOR vResult = M3D_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z + vResult = M3D_FMADD_PS(vResult, M.rows[2], M.rows[3]); + M3D_VECTOR vTemp = M3D_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y + vResult = M3D_FMADD_PS(vTemp, M.rows[1], vResult); + vTemp = M3D_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X + vResult = M3D_FMADD_PS(vTemp, M.rows[0], vResult); + return vResult; +#endif +} + + +/* -------------------------------------------------------------------------------------------------------------------------- */ + +inline M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { + M3D_VECTOR dir = M3D_V4Subtract(focusPos, viewPos); + return M3D_TransformMatrixCamLookToLH(viewPos, dir, upDirection); +} + +inline M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { + M3D_VECTOR dir_n = M3D_V4Subtract(viewPos, focusPos); + return M3D_TransformMatrixCamLookToLH(viewPos, dir_n, upDirection); +} + +inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { + // Keep viewer's axes orthogonal to each other and of unit length + M3D_VECTOR look_normal = M3D_V3Normalize(viewDirection); + M3D_VECTOR up_norm = M3D_V3Cross(upDirection, look_normal); + up_norm = M3D_V3Normalize(up_norm); + + // U, L already ortho-normal, so no need to normalize cross product + M3D_VECTOR right_norm = M3D_V3Cross(look_normal, up_norm); + + M3D_VECTOR viewPos_n = M3D_V4Negate(viewPos); + + M3D_VECTOR right_vec = M3D_V3Dot(up_norm, viewPos_n); + M3D_VECTOR up_vec = M3D_V3Dot(right_norm, viewPos_n); + M3D_VECTOR look_vec = M3D_V3Dot(look_normal, viewPos_n); + + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Select(right_vec, up_norm, M3D_MSelect1110.v); + ret.rows[1] = M3D_V4Select(up_vec, right_norm, M3D_MSelect1110.v); + ret.rows[2] = M3D_V4Select(look_vec, look_normal, M3D_MSelect1110.v); + ret.rows[3] = M3D_MIdentityR3.v; + + ret = M3D_MTranspose(ret); + + return ret; +} + +inline M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { + M3D_VECTOR viewDirection_n = M3D_V4Negate(viewDirection); + return M3D_TransformMatrixCamLookToLH(viewPos, viewDirection_n, upDirection); +} + +inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept { + float SinFov; + float CosFov; + M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov); + float fRange = far / (far - near); + float Height = CosFov / SinFov; + float Width = Height / ratio; + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = Width; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = Height; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = fRange; + ret.mat[2][3] = 1.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = -fRange * near; + ret.mat[3][3] = 0.0f; + return ret; +#else + M3D_VECTOR rMem = { + Width, + Height, + fRange, + -fRange * near + }; + + // Copy from memory to SSE register + M3D_VECTOR vValues = rMem; + + M3D_MATRIX ret; + M3D_VECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_move_ss(vTemp, vValues); + ret.rows[0] = vTemp; // Width, 0, 0, 0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, M3D_MMaskY); + ret.rows[1] = vTemp; // 0, Height, 0, 0 + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, M3D_MIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + ret.rows[2] = vTemp; // 0, 0, fRange, 1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + ret.rows[3] = vTemp; // 0, 0, -fRange * near, 0.0f + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept { + float SinFov; + float CosFov; + M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov); + float fRange = far / (near - far); + float Height = CosFov / SinFov; + float Width = Height / ratio; + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = Width; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = Height; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = fRange; + ret.mat[2][3] = -1.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = fRange * near; + ret.mat[3][3] = 0.0f; + return ret; +#else + M3D_VECTOR rMem = { + Width, + Height, + fRange, + fRange * near + }; + + // Copy from memory to SSE register + M3D_VECTOR vValues = rMem; + + M3D_MATRIX ret; + M3D_VECTOR vTemp = _mm_setzero_ps(); + vTemp = _mm_move_ss(vTemp, vValues); + ret.rows[0] = vTemp; // Height / a_ratio, 0, 0, 0 + vTemp = vValues; + vTemp = _mm_and_ps(vTemp, M3D_MMaskY); + ret.rows[1] = vTemp; // 0, CosFov / SinFov, 0, 0 + vTemp = _mm_setzero_ps(); + vValues = _mm_shuffle_ps(vValues, M3D_MIdentityR3_n, _MM_SHUFFLE(3, 2, 3, 2)); + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); + ret.rows[2] = vTemp; // 0, 0, fRange, -1.0f + vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); + ret.rows[3] = vTemp; // 0, 0, fRange * near, 0.0f + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = ScaleX; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = ScaleY; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = ScaleZ; + ret.mat[2][3] = 0.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = 0.0f; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_MATRIX ret; + ret.rows[0] = _mm_set_ps(0, 0, 0, ScaleX); + ret.rows[1] = _mm_set_ps(0, 0, ScaleY, 0); + ret.rows[2] = _mm_set_ps(0, ScaleZ, 0, 0); + ret.rows[3] = M3D_MIdentityR3.v; + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = 1.0f; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = 1.0f; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = 1.0f; + ret.mat[2][3] = 0.0f; + + ret.mat[3][0] = OffsetX; + ret.mat[3][1] = OffsetY; + ret.mat[3][2] = OffsetZ; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_MATRIX ret; + ret.rows[0] = M3D_MIdentityR0.v; + ret.rows[1] = M3D_MIdentityR1.v; + ret.rows[2] = M3D_MIdentityR2.v; + ret.rows[3] = M3D_V4Set(OffsetX, OffsetY, OffsetZ, 1.f); + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept { + float SinAngle; + float CosAngle; + M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = 1.0f; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = CosAngle; + ret.mat[1][2] = SinAngle; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = -SinAngle; + ret.mat[2][2] = CosAngle; + ret.mat[2][3] = 0.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = 0.0f; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_VECTOR vSin = _mm_set_ss(SinAngle); + M3D_VECTOR vCos = _mm_set_ss(CosAngle); + // x = 0,y = cos,z = sin, w = 0 + vCos = _mm_shuffle_ps(vCos, vSin, _MM_SHUFFLE(3, 0, 0, 3)); + M3D_MATRIX ret; + ret.rows[0] = M3D_MIdentityR0; + ret.rows[1] = vCos; + // x = 0,y = sin,z = cos, w = 0 + vCos = M3D_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 1, 2, 0)); + // x = 0,y = -sin,z = cos, w = 0 + vCos = _mm_mul_ps(vCos, M3D_MNegateY); + ret.rows[2] = vCos; + ret.rows[3] = M3D_MIdentityR3; + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept { + float SinAngle; + float CosAngle; + M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = CosAngle; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = -SinAngle; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = 1.0f; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = SinAngle; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = CosAngle; + ret.mat[2][3] = 0.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = 0.0f; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_VECTOR vSin = _mm_set_ss(SinAngle); + M3D_VECTOR vCos = _mm_set_ss(CosAngle); + // x = sin,y = 0,z = cos, w = 0 + vSin = _mm_shuffle_ps(vSin, vCos, _MM_SHUFFLE(3, 0, 3, 0)); + M3D_MATRIX ret; + ret.rows[2] = vSin; + ret.rows[1] = M3D_MIdentityR1; + // x = cos,y = 0,z = sin, w = 0 + vSin = M3D_PERMUTE_PS(vSin, _MM_SHUFFLE(3, 0, 1, 2)); + // x = cos,y = 0,z = -sin, w = 0 + vSin = _mm_mul_ps(vSin, M3D_MNegateZ); + ret.rows[0] = vSin; + ret.rows[3] = M3D_MIdentityR3; + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept { + float SinAngle; + float CosAngle; + M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = CosAngle; + ret.mat[0][1] = SinAngle; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = -SinAngle; + ret.mat[1][1] = CosAngle; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = 1.0f; + ret.mat[2][3] = 0.0f; + + ret.mat[3][0] = 0.0f; + ret.mat[3][1] = 0.0f; + ret.mat[3][2] = 0.0f; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_VECTOR vSin = _mm_set_ss(SinAngle); + M3D_VECTOR vCos = _mm_set_ss(CosAngle); + // x = cos,y = sin,z = 0, w = 0 + vCos = _mm_unpacklo_ps(vCos, vSin); + M3D_MATRIX ret; + ret.rows[0] = vCos; + // x = sin,y = cos,z = 0, w = 0 + vCos = M3D_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 2, 0, 1)); + // x = cos,y = -sin,z = 0, w = 0 + vCos = _mm_mul_ps(vCos, M3D_MNegateX); + ret.rows[1] = vCos; + ret.rows[2] = M3D_MIdentityR2; + ret.rows[3] = M3D_MIdentityR3; + return ret; +#endif +} + +inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept { + const float widthDiv2 = _w / 2; + const float heightDiv2 = _h / 2; + +#ifdef DISABLE_INTRINSICS + M3D_MATRIX ret; + ret.mat[0][0] = widthDiv2; + ret.mat[0][1] = 0.0f; + ret.mat[0][2] = 0.0f; + ret.mat[0][3] = 0.0f; + + ret.mat[1][0] = 0.0f; + ret.mat[1][1] = -heightDiv2; + ret.mat[1][2] = 0.0f; + ret.mat[1][3] = 0.0f; + + ret.mat[2][0] = 0.0f; + ret.mat[2][1] = 0.0f; + ret.mat[2][2] = 1.0f; // maxZ-minZ ignored + ret.mat[2][3] = 0.0f; // minZ ignored + + ret.mat[3][0] = _wOffset + widthDiv2; + ret.mat[3][1] = _hOffset + heightDiv2; + ret.mat[3][2] = 0.0f; + ret.mat[3][3] = 1.0f; + return ret; +#else + M3D_MATRIX ret; + ret.rows[0] = M3D_V4Set(widthDiv2, 0, 0, 0); + ret.rows[1] = M3D_V4Set(0, -heightDiv2, 0, 0); + ret.rows[2] = M3D_MIdentityR2.v; // maxZ-minZ and minZ are ignored + ret.rows[3] = M3D_V4Set(_wOffset + widthDiv2, _hOffset + heightDiv2, 0, 1); + return ret; +#endif +} \ No newline at end of file diff --git a/Engine/Utils/MeshHelper.hpp b/Engine/Utils/MeshHelper.hpp new file mode 100644 index 0000000..2ffd58b --- /dev/null +++ b/Engine/Utils/MeshHelper.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include "../Utils/3DMaths.hpp" + + +struct MeshVertex { + MeshVertex() = default; + + MeshVertex(const MeshVertex&) = default; + MeshVertex& operator=(const MeshVertex&) = default; + MeshVertex(MeshVertex&&) = default; + MeshVertex& operator=(MeshVertex&&) = default; + + MeshVertex(M3D_F3 const& _pos) noexcept : pos(_pos) {} + MeshVertex(const float _x, const float _y, const float _z) noexcept : pos(M3D_F3(_x,_y,_z)) {} + MeshVertex(M3D_VECTOR const _pos) noexcept { + M3D_V4StoreF3(&this->pos, _pos); + } + + M3D_F3 pos = {0.0f, 0.0f, 0.0f}; + M3D_F4 color = {255.0f, 255.0f, 255.0f, 255.f}; + +}; \ No newline at end of file diff --git a/srcs.list b/srcs.list index 7f2e681..ceb8172 100644 --- a/srcs.list +++ b/srcs.list @@ -5,8 +5,9 @@ set(MAIN_SCRS icon.h ) set(UTILS_SCRS - Engine/Utils/3DMaths.cpp + Engine/Utils/3DMaths.inl Engine/Utils/3DMaths.hpp + Engine/Utils/MeshHelper.hpp Engine/Utils/Timers.hpp Engine/Utils/Perfs.cpp Engine/Utils/Perfs.hpp @@ -24,8 +25,10 @@ set(GAME_SCRS Engine/World/Tank.cpp ) set(GRAPHS_SCRS - Engine/Graphics/3DGraphics.cpp + Engine/Graphics/3DRenderer.cpp + Engine/Graphics/3DRenderer.hpp Engine/Graphics/Camera.cpp + Engine/Graphics/Camera.hpp Engine/Graphics/UI.cpp Engine/Graphics/UI.hpp Engine/Graphics/DebugUI.cpp