From a8e396d08f26fdd1bbd6381b3a2477331c24a068 Mon Sep 17 00:00:00 2001 From: JackCarterSmith Date: Fri, 1 Nov 2024 16:43:19 +0100 Subject: [PATCH] GCC issue 54412 fix AVX datas aren't correctly aligned in stack in following case: - using inline functions (GCC not always inline function), - not using reference to AVX data (copy of data in stack dynamically), - not using O2 or O3 optimization flags (related behaviors of the previous points). --- CMakeLists.txt | 12 +++++- Engine/Utils/3DMaths.hpp | 38 +++++++++++++++++ Engine/Utils/3DMaths_bs.inl | 8 ++-- Engine/Utils/3DMaths_mat.inl | 80 ++++++++++++++++++------------------ Engine/Utils/3DMaths_vec.inl | 40 ++++++++++++++++-- 5 files changed, 129 insertions(+), 49 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aff6bbc..d18a27b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ if(NOT DEFINED PROJECT_BINARY_DIR) endif() if(NOT MSVC) - add_compile_options(-Wall -march=native -mavx2 -mfma -msse4.2) + add_compile_options(-Wall) else() add_compile_options(/Wall) endif() @@ -26,6 +26,16 @@ project(ProtoTank VERSION 0.1.0 DESCRIPTION "Arcade 80s-style game with tanks" L # Compilation option option(DISABLE_CPU_OPTI "Disable CPU optimizations" OFF) +if(NOT DISABLE_CPU_OPTI) + if(NOT MSVC) + #add_compile_options(-march=native -mtune=generic) + add_compile_options(-march=native -mtune=native) + add_compile_options(-msse2 -msse4.2 -mavx -mavx2 -mfma) + else() + add_compile_options(/fp:fast /arch:SSE2 /arch:SSE4.2 /arch:AVX /arch:AVX2 /GL) + endif() +endif() + include(FindPkgConfig) include(CheckIncludeFile) include(CheckCSourceCompiles) diff --git a/Engine/Utils/3DMaths.hpp b/Engine/Utils/3DMaths.hpp index 81439d6..ffccaf1 100644 --- a/Engine/Utils/3DMaths.hpp +++ b/Engine/Utils/3DMaths.hpp @@ -15,6 +15,12 @@ #error This header requires C++ #endif +#ifdef _MSC_VER +#define INLINE_AVX_FIX +#else +#define INLINE_AVX_FIX __attribute__((always_inline)) inline +#endif + #ifndef DISABLE_INTRINSICS #ifdef NO_MOVNT @@ -85,6 +91,11 @@ constexpr uint32_t M3D_PERMUTE_1Y = 5; constexpr uint32_t M3D_PERMUTE_1Z = 6; constexpr uint32_t M3D_PERMUTE_1W = 7; +constexpr uint32_t M3D_SWIZZLE_X = 0; +constexpr uint32_t M3D_SWIZZLE_Y = 1; +constexpr uint32_t M3D_SWIZZLE_Z = 2; +constexpr uint32_t M3D_SWIZZLE_W = 3; + constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); } constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); } @@ -358,6 +369,7 @@ M3D_VECTOR M3D_V4SetX(M3D_VECTOR V, float x) noexcept; M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept; M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept; M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept; +M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept; M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept; M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept; M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept; @@ -376,6 +388,7 @@ M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECT bool M3D_V4EqualInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; M3D_VECTOR M3D_V4Abs(M3D_VECTOR V) noexcept; M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept; +M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept; M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept; M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept; M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept; @@ -483,6 +496,31 @@ template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); } #endif +template +inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V) noexcept { +#ifndef DISABLE_INTRINSICS + return M3D_PERMUTE_PS(V, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)); +#else + return M3D_V4Swizzle(V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW); +#endif +} + +#if !defined(DISABLE_INTRINSICS) + template<> inline M3D_VECTOR M3D_V4Swizzle<0, 1, 0, 1>(M3D_VECTOR V) noexcept { return _mm_movelh_ps(V, V); } + template<> inline M3D_VECTOR M3D_V4Swizzle<2, 3, 2, 3>(M3D_VECTOR V) noexcept { return _mm_movehl_ps(V, V); } + template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 1, 1>(M3D_VECTOR V) noexcept { return _mm_unpacklo_ps(V, V); } + template<> inline M3D_VECTOR M3D_V4Swizzle<2, 2, 3, 3>(M3D_VECTOR V) noexcept { return _mm_unpackhi_ps(V, V); } +#endif + +#if defined(SSE3_INTRINSICS) && !defined(DISABLE_INTRINSICS) + template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 2, 2>(M3D_VECTOR V) noexcept { return _mm_moveldup_ps(V); } + template<> inline M3D_VECTOR M3D_V4Swizzle<1, 1, 3, 3>(M3D_VECTOR V) noexcept { return _mm_movehdup_ps(V); } +#endif + +#if defined(AVX2_INTRINSICS) && !defined(DISABLE_INTRINSICS) + template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 0, 0>(M3D_VECTOR V) noexcept { return _mm_broadcastss_ps(V); } +#endif + M3D_VECTOR M3D_QMultiply(M3D_VECTOR Q1, M3D_VECTOR Q2) noexcept; M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept; diff --git a/Engine/Utils/3DMaths_bs.inl b/Engine/Utils/3DMaths_bs.inl index 6536698..24ade29 100644 --- a/Engine/Utils/3DMaths_bs.inl +++ b/Engine/Utils/3DMaths_bs.inl @@ -128,7 +128,7 @@ inline void M3D_BoundingBox::CreateFromPoints(M3D_BoundingBox& Out, size_t Count M3D_V4StoreF3(&Out.Extents, M3D_V4Scale(M3D_V4Subtract(vMax, vMin), 0.5f)); } -inline void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept { +INLINE_AVX_FIX void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept { // Load center and extents. M3D_VECTOR vCenter = M3D_V4LoadF3(&Center); M3D_VECTOR vExtents = M3D_V4LoadF3(&Extents); @@ -163,11 +163,11 @@ inline void M3D_BoundingBox::GetCorners(M3D_F3* Corners) const noexcept { } } -inline M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept { +INLINE_AVX_FIX M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept { CreateFromMatrix(*this, Projection, rhcoords); } -inline void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept { +INLINE_AVX_FIX void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept { // Load the frustum. M3D_VECTOR vOrigin = M3D_V4LoadF3(&Origin); M3D_VECTOR vOrientation = M3D_V4LoadF4(&Orientation); @@ -318,7 +318,7 @@ inline void M3D_BoundingFrustum::GetPlanes(M3D_VECTOR* NearPlane, M3D_VECTOR* Fa } } -inline void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept { +INLINE_AVX_FIX void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept { // Corners of the projection frustum in NDC space. static M3D_V4F32 NDCPoints[6] = { {{{1.0f, 0.0f, 1.0f, 1.0f}}}, // right (at far plane) diff --git a/Engine/Utils/3DMaths_mat.inl b/Engine/Utils/3DMaths_mat.inl index ab39c2b..13a8350 100644 --- a/Engine/Utils/3DMaths_mat.inl +++ b/Engine/Utils/3DMaths_mat.inl @@ -13,7 +13,7 @@ inline M3D_MATRIX::M3D_MATRIX( rows[3] = M3D_V4Set(f30, f31, f32, f33); } -inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- () const noexcept { M3D_MATRIX ret; ret.rows[0] = M3D_V4Negate(rows[0]); ret.rows[1] = M3D_V4Negate(rows[1]); @@ -22,14 +22,14 @@ inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept { return ret; } -inline M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept { rows[0] = M3D_V4Add(rows[0], M.rows[0]); rows[1] = M3D_V4Add(rows[1], M.rows[1]); rows[2] = M3D_V4Add(rows[2], M.rows[2]); rows[3] = M3D_V4Add(rows[3], M.rows[3]); return *this; } -inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept { M3D_MATRIX ret; ret.rows[0] = M3D_V4Add(rows[0], M.rows[0]); ret.rows[1] = M3D_V4Add(rows[1], M.rows[1]); @@ -38,14 +38,14 @@ inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept { return ret; } -inline M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept { rows[0] = M3D_V4Subtract(rows[0], M.rows[0]); rows[1] = M3D_V4Subtract(rows[1], M.rows[1]); rows[2] = M3D_V4Subtract(rows[2], M.rows[2]); rows[3] = M3D_V4Subtract(rows[3], M.rows[3]); return *this; } -inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept { M3D_MATRIX ret; ret.rows[0] = M3D_V4Subtract(rows[0], M.rows[0]); ret.rows[1] = M3D_V4Subtract(rows[1], M.rows[1]); @@ -54,22 +54,22 @@ inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept { return ret; } -inline M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept { *this = M3D_MMultiply(*this, M); return *this; } -inline M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept { return M3D_MMultiply(*this, M); } -inline M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept { +INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept { rows[0] = M3D_V4Scale(rows[0], S); rows[1] = M3D_V4Scale(rows[1], S); rows[2] = M3D_V4Scale(rows[2], S); rows[3] = M3D_V4Scale(rows[3], S); return *this; } -inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept { M3D_MATRIX ret; ret.rows[0] = M3D_V4Scale(rows[0], S); ret.rows[1] = M3D_V4Scale(rows[1], S); @@ -77,7 +77,7 @@ inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept { ret.rows[3] = M3D_V4Scale(rows[3], S); return ret; } -inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept { M3D_MATRIX ret; ret.rows[0] = M3D_V4Scale(M.rows[0], S); ret.rows[1] = M3D_V4Scale(M.rows[1], S); @@ -86,7 +86,7 @@ inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept { return ret; } -inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept { +INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR vS = M3D_V4Replicate(S); rows[0] = M3D_V4Divide(rows[0], vS); @@ -103,7 +103,7 @@ inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept { return *this; #endif } -inline M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR vS = M3D_V4Replicate(S); M3D_MATRIX ret; @@ -135,7 +135,7 @@ inline M3D_MATRIX M3D_MIdentity() noexcept { return ret; } -inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; // Cache the invariants in registers @@ -309,7 +309,7 @@ inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept { #endif } -inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS // Original matrix: // @@ -374,7 +374,7 @@ inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept { #endif } -inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX MT = M3D_MTranspose(M); @@ -591,7 +591,7 @@ inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept { /* -------------------------------------------------------------------------------------------------------------------------- */ -inline M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS M3D_V4F32 q; float r22 = M.mat[2][2]; @@ -733,7 +733,7 @@ inline M3D_VECTOR M3D_V3Rotate(M3D_VECTOR V, M3D_VECTOR RotationQuaternion) noex return M3D_QMultiply(Result, RotationQuaternion); } -inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR Z = M3D_V4SplatZ(V); M3D_VECTOR Y = M3D_V4SplatY(V); @@ -755,7 +755,7 @@ inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { #endif } -inline void M3D_V3Transform( +INLINE_AVX_FIX void M3D_V3Transform( M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, @@ -972,7 +972,7 @@ inline void M3D_V3Transform( #endif } -inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR Z = M3D_V4SplatZ(V); M3D_VECTOR Y = M3D_V4SplatY(V); @@ -994,7 +994,7 @@ inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept { #endif } -inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept { M3D_VECTOR Z = M3D_V4SplatZ(V); M3D_VECTOR Y = M3D_V4SplatY(V); M3D_VECTOR X = M3D_V4SplatX(V); @@ -1007,7 +1007,7 @@ inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept { return M3D_V4Divide(Result, W); } -inline void M3D_V3TransformPersDiv( +INLINE_AVX_FIX void M3D_V3TransformPersDiv( M3D_F3* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, @@ -1321,7 +1321,7 @@ inline void M3D_V3TransformPersDiv( #endif } -inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR W = M3D_V4SplatW(V); M3D_VECTOR Z = M3D_V4SplatZ(V); @@ -1346,7 +1346,7 @@ inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept { #endif } -inline void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept { auto pInputVector = reinterpret_cast(pInputStream); auto pOutputVector = reinterpret_cast(pOutputStream); @@ -1593,17 +1593,17 @@ inline M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vp /* -------------------------------------------------------------------------------------------------------------------------- */ -inline M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { M3D_VECTOR dir = M3D_V4Subtract(focusPos, viewPos); return M3D_TransformMatrixCamLookToLH(viewPos, dir, upDirection); } -inline M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept { M3D_VECTOR dir_n = M3D_V4Subtract(viewPos, focusPos); return M3D_TransformMatrixCamLookToLH(viewPos, dir_n, upDirection); } -inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { // Keep viewer's axes orthogonal to each other and of unit length M3D_VECTOR look_normal = M3D_V3Normalize(viewDirection); M3D_VECTOR up_norm = M3D_V3Normalize(M3D_V3Cross(upDirection, look_normal)); @@ -1628,12 +1628,12 @@ inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR return ret; } -inline M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept { M3D_VECTOR viewDirection_n = M3D_V4Negate(viewDirection); return M3D_TransformMatrixCamLookToLH(viewPos, viewDirection_n, upDirection); } -inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept { float SinFov; float CosFov; M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov); @@ -1691,7 +1691,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float #endif } -inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept { float SinFov; float CosFov; M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov); @@ -1749,7 +1749,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float #endif } -inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.mat[0][0] = 1.0f; @@ -1782,7 +1782,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.mat[0][0] = ScaleX; @@ -1815,7 +1815,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float Sca #endif } -inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.mat[0][0] = Scale.v4f[0]; @@ -1848,7 +1848,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.mat[0][0] = 1.0f; @@ -1881,7 +1881,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, flo #endif } -inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept { float SinAngle; float CosAngle; M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); @@ -1926,7 +1926,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept { float SinAngle; float CosAngle; M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); @@ -1971,7 +1971,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept { float SinAngle; float CosAngle; M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle); @@ -2016,7 +2016,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept { #ifdef DISABLE_INTRINSICS float cp = cosf(Angles.v4f[0]); float sp = sinf(Angles.v4f[0]); @@ -2082,7 +2082,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept { #endif } -inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept { float fSinAngle; float fCosAngle; M3D_ScalarSinCos(&fSinAngle, &fCosAngle, Angle); @@ -2159,14 +2159,14 @@ inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float #endif } -inline M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept { M3D_VECTOR nv = M3D_V3Normalize(axis); return M3D_TransformMatrixRotationNormal(nv, angle); } //TODO: transform matrix is incomplete //v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z; -inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept { const float widthDiv2 = _w / 2; const float heightDiv2 = _h / 2; diff --git a/Engine/Utils/3DMaths_vec.inl b/Engine/Utils/3DMaths_vec.inl index 7e643ff..453849b 100644 --- a/Engine/Utils/3DMaths_vec.inl +++ b/Engine/Utils/3DMaths_vec.inl @@ -175,7 +175,7 @@ inline void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept { #endif } -inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.rows[0].v4f[0] = src->mat[0][0]; @@ -208,7 +208,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept { #endif } -inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept { +INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept { #ifdef DISABLE_INTRINSICS M3D_MATRIX ret; ret.rows[0].v4f[0] = src->mat[0][0]; @@ -241,7 +241,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept { #endif } -inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS dst->mat[0][0] = M.rows[0].v4f[0]; dst->mat[0][1] = M.rows[0].v4f[1]; @@ -270,7 +270,7 @@ inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept { #endif } -inline void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept { +INLINE_AVX_FIX void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept { #ifdef DISABLE_INTRINSICS dst->mat[0][0] = M.rows[0].v4f[0]; dst->mat[0][1] = M.rows[0].v4f[1]; @@ -548,6 +548,34 @@ inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, #endif } +inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept { +#ifdef DISABLE_INTRINSICS + M3D_V4F32 Result = {{{ + V.v4f[E0], + V.v4f[E1], + V.v4f[E2], + V.v4f[E3] + }}}; + return Result.v; +#elif defined(AVX_INTRINSICS) + unsigned int elem[4] = { E0, E1, E2, E3 }; + __m128i vControl = _mm_loadu_si128(reinterpret_cast(&elem[0])); + return _mm_permutevar_ps(V, vControl); +#else + auto aPtr = reinterpret_cast(&V); + + M3D_VECTOR Result; + auto pWork = reinterpret_cast(&Result); + + pWork[0] = aPtr[E0]; + pWork[1] = aPtr[E1]; + pWork[2] = aPtr[E2]; + pWork[3] = aPtr[E3]; + + return Result; +#endif +} + inline M3D_VECTOR M3D_V4SplatOne() noexcept { #ifdef DISABLE_INTRINSICS M3D_V4F32 vResult; @@ -816,6 +844,10 @@ inline M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { #endif } +inline M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept { + return M3D_V4Dot(V, V); +} + inline M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept { #ifdef DISABLE_INTRINSICS M3D_VECTOR Result;