GCC issue 54412 fix

AVX datas aren't correctly aligned in stack in following case:
- using inline functions (GCC not always inline function),
- not using reference to AVX data (copy of data in stack dynamically),
- not using O2 or O3 optimization flags (related behaviors of the previous points).
This commit is contained in:
JackCarterSmith 2024-11-01 16:43:19 +01:00
parent aa871b8b76
commit a8e396d08f
Signed by: JackCarterSmith
GPG Key ID: 832E52F4E23F8F24
5 changed files with 129 additions and 49 deletions

View File

@ -11,7 +11,7 @@ if(NOT DEFINED PROJECT_BINARY_DIR)
endif()
if(NOT MSVC)
add_compile_options(-Wall -march=native -mavx2 -mfma -msse4.2)
add_compile_options(-Wall)
else()
add_compile_options(/Wall)
endif()
@ -26,6 +26,16 @@ project(ProtoTank VERSION 0.1.0 DESCRIPTION "Arcade 80s-style game with tanks" L
# Compilation option
option(DISABLE_CPU_OPTI "Disable CPU optimizations" OFF)
if(NOT DISABLE_CPU_OPTI)
if(NOT MSVC)
#add_compile_options(-march=native -mtune=generic)
add_compile_options(-march=native -mtune=native)
add_compile_options(-msse2 -msse4.2 -mavx -mavx2 -mfma)
else()
add_compile_options(/fp:fast /arch:SSE2 /arch:SSE4.2 /arch:AVX /arch:AVX2 /GL)
endif()
endif()
include(FindPkgConfig)
include(CheckIncludeFile)
include(CheckCSourceCompiles)

View File

@ -15,6 +15,12 @@
#error This header requires C++
#endif
#ifdef _MSC_VER
#define INLINE_AVX_FIX
#else
#define INLINE_AVX_FIX __attribute__((always_inline)) inline
#endif
#ifndef DISABLE_INTRINSICS
#ifdef NO_MOVNT
@ -85,6 +91,11 @@ constexpr uint32_t M3D_PERMUTE_1Y = 5;
constexpr uint32_t M3D_PERMUTE_1Z = 6;
constexpr uint32_t M3D_PERMUTE_1W = 7;
constexpr uint32_t M3D_SWIZZLE_X = 0;
constexpr uint32_t M3D_SWIZZLE_Y = 1;
constexpr uint32_t M3D_SWIZZLE_Z = 2;
constexpr uint32_t M3D_SWIZZLE_W = 3;
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
@ -358,6 +369,7 @@ M3D_VECTOR M3D_V4SetX(M3D_VECTOR V, float x) noexcept;
M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept;
M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept;
M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept;
M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept;
M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept;
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
@ -376,6 +388,7 @@ M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECT
bool M3D_V4EqualInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4Abs(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
@ -483,6 +496,31 @@ template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); }
#endif
template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V) noexcept {
#ifndef DISABLE_INTRINSICS
return M3D_PERMUTE_PS(V, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
#else
return M3D_V4Swizzle(V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
#endif
}
#if !defined(DISABLE_INTRINSICS)
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 1, 0, 1>(M3D_VECTOR V) noexcept { return _mm_movelh_ps(V, V); }
template<> inline M3D_VECTOR M3D_V4Swizzle<2, 3, 2, 3>(M3D_VECTOR V) noexcept { return _mm_movehl_ps(V, V); }
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 1, 1>(M3D_VECTOR V) noexcept { return _mm_unpacklo_ps(V, V); }
template<> inline M3D_VECTOR M3D_V4Swizzle<2, 2, 3, 3>(M3D_VECTOR V) noexcept { return _mm_unpackhi_ps(V, V); }
#endif
#if defined(SSE3_INTRINSICS) && !defined(DISABLE_INTRINSICS)
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 2, 2>(M3D_VECTOR V) noexcept { return _mm_moveldup_ps(V); }
template<> inline M3D_VECTOR M3D_V4Swizzle<1, 1, 3, 3>(M3D_VECTOR V) noexcept { return _mm_movehdup_ps(V); }
#endif
#if defined(AVX2_INTRINSICS) && !defined(DISABLE_INTRINSICS)
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 0, 0>(M3D_VECTOR V) noexcept { return _mm_broadcastss_ps(V); }
#endif
M3D_VECTOR M3D_QMultiply(M3D_VECTOR Q1, M3D_VECTOR Q2) noexcept;
M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept;

View File

@ -128,7 +128,7 @@ inline void M3D_BoundingBox::CreateFromPoints(M3D_BoundingBox& Out, size_t Count
M3D_V4StoreF3(&Out.Extents, M3D_V4Scale(M3D_V4Subtract(vMax, vMin), 0.5f));
}
inline void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept {
INLINE_AVX_FIX void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept {
// Load center and extents.
M3D_VECTOR vCenter = M3D_V4LoadF3(&Center);
M3D_VECTOR vExtents = M3D_V4LoadF3(&Extents);
@ -163,11 +163,11 @@ inline void M3D_BoundingBox::GetCorners(M3D_F3* Corners) const noexcept {
}
}
inline M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept {
INLINE_AVX_FIX M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept {
CreateFromMatrix(*this, Projection, rhcoords);
}
inline void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept {
INLINE_AVX_FIX void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept {
// Load the frustum.
M3D_VECTOR vOrigin = M3D_V4LoadF3(&Origin);
M3D_VECTOR vOrientation = M3D_V4LoadF4(&Orientation);
@ -318,7 +318,7 @@ inline void M3D_BoundingFrustum::GetPlanes(M3D_VECTOR* NearPlane, M3D_VECTOR* Fa
}
}
inline void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept {
INLINE_AVX_FIX void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept {
// Corners of the projection frustum in NDC space.
static M3D_V4F32 NDCPoints[6] = {
{{{1.0f, 0.0f, 1.0f, 1.0f}}}, // right (at far plane)

View File

@ -13,7 +13,7 @@ inline M3D_MATRIX::M3D_MATRIX(
rows[3] = M3D_V4Set(f30, f31, f32, f33);
}
inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Negate(rows[0]);
ret.rows[1] = M3D_V4Negate(rows[1]);
@ -22,14 +22,14 @@ inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
return ret;
}
inline M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept {
rows[0] = M3D_V4Add(rows[0], M.rows[0]);
rows[1] = M3D_V4Add(rows[1], M.rows[1]);
rows[2] = M3D_V4Add(rows[2], M.rows[2]);
rows[3] = M3D_V4Add(rows[3], M.rows[3]);
return *this;
}
inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Add(rows[0], M.rows[0]);
ret.rows[1] = M3D_V4Add(rows[1], M.rows[1]);
@ -38,14 +38,14 @@ inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
return ret;
}
inline M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept {
rows[0] = M3D_V4Subtract(rows[0], M.rows[0]);
rows[1] = M3D_V4Subtract(rows[1], M.rows[1]);
rows[2] = M3D_V4Subtract(rows[2], M.rows[2]);
rows[3] = M3D_V4Subtract(rows[3], M.rows[3]);
return *this;
}
inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Subtract(rows[0], M.rows[0]);
ret.rows[1] = M3D_V4Subtract(rows[1], M.rows[1]);
@ -54,22 +54,22 @@ inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
return ret;
}
inline M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept {
*this = M3D_MMultiply(*this, M);
return *this;
}
inline M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept {
return M3D_MMultiply(*this, M);
}
inline M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept {
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept {
rows[0] = M3D_V4Scale(rows[0], S);
rows[1] = M3D_V4Scale(rows[1], S);
rows[2] = M3D_V4Scale(rows[2], S);
rows[3] = M3D_V4Scale(rows[3], S);
return *this;
}
inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Scale(rows[0], S);
ret.rows[1] = M3D_V4Scale(rows[1], S);
@ -77,7 +77,7 @@ inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
ret.rows[3] = M3D_V4Scale(rows[3], S);
return ret;
}
inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Scale(M.rows[0], S);
ret.rows[1] = M3D_V4Scale(M.rows[1], S);
@ -86,7 +86,7 @@ inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
return ret;
}
inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR vS = M3D_V4Replicate(S);
rows[0] = M3D_V4Divide(rows[0], vS);
@ -103,7 +103,7 @@ inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
return *this;
#endif
}
inline M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR vS = M3D_V4Replicate(S);
M3D_MATRIX ret;
@ -135,7 +135,7 @@ inline M3D_MATRIX M3D_MIdentity() noexcept {
return ret;
}
inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
// Cache the invariants in registers
@ -309,7 +309,7 @@ inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
#endif
}
inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
// Original matrix:
//
@ -374,7 +374,7 @@ inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
#endif
}
inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX MT = M3D_MTranspose(M);
@ -591,7 +591,7 @@ inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
/* -------------------------------------------------------------------------------------------------------------------------- */
inline M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 q;
float r22 = M.mat[2][2];
@ -733,7 +733,7 @@ inline M3D_VECTOR M3D_V3Rotate(M3D_VECTOR V, M3D_VECTOR RotationQuaternion) noex
return M3D_QMultiply(Result, RotationQuaternion);
}
inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR Z = M3D_V4SplatZ(V);
M3D_VECTOR Y = M3D_V4SplatY(V);
@ -755,7 +755,7 @@ inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#endif
}
inline void M3D_V3Transform(
INLINE_AVX_FIX void M3D_V3Transform(
M3D_F4* pOutputStream,
size_t OutputStride,
const M3D_F3* pInputStream,
@ -972,7 +972,7 @@ inline void M3D_V3Transform(
#endif
}
inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR Z = M3D_V4SplatZ(V);
M3D_VECTOR Y = M3D_V4SplatY(V);
@ -994,7 +994,7 @@ inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#endif
}
inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
M3D_VECTOR Z = M3D_V4SplatZ(V);
M3D_VECTOR Y = M3D_V4SplatY(V);
M3D_VECTOR X = M3D_V4SplatX(V);
@ -1007,7 +1007,7 @@ inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
return M3D_V4Divide(Result, W);
}
inline void M3D_V3TransformPersDiv(
INLINE_AVX_FIX void M3D_V3TransformPersDiv(
M3D_F3* pOutputStream,
size_t OutputStride,
const M3D_F3* pInputStream,
@ -1321,7 +1321,7 @@ inline void M3D_V3TransformPersDiv(
#endif
}
inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR W = M3D_V4SplatW(V);
M3D_VECTOR Z = M3D_V4SplatZ(V);
@ -1346,7 +1346,7 @@ inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
#endif
}
inline void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept {
auto pInputVector = reinterpret_cast<const uint8_t*>(pInputStream);
auto pOutputVector = reinterpret_cast<uint8_t*>(pOutputStream);
@ -1593,17 +1593,17 @@ inline M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vp
/* -------------------------------------------------------------------------------------------------------------------------- */
inline M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
M3D_VECTOR dir = M3D_V4Subtract(focusPos, viewPos);
return M3D_TransformMatrixCamLookToLH(viewPos, dir, upDirection);
}
inline M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
M3D_VECTOR dir_n = M3D_V4Subtract(viewPos, focusPos);
return M3D_TransformMatrixCamLookToLH(viewPos, dir_n, upDirection);
}
inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
// Keep viewer's axes orthogonal to each other and of unit length
M3D_VECTOR look_normal = M3D_V3Normalize(viewDirection);
M3D_VECTOR up_norm = M3D_V3Normalize(M3D_V3Cross(upDirection, look_normal));
@ -1628,12 +1628,12 @@ inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR
return ret;
}
inline M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
M3D_VECTOR viewDirection_n = M3D_V4Negate(viewDirection);
return M3D_TransformMatrixCamLookToLH(viewPos, viewDirection_n, upDirection);
}
inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept {
float SinFov;
float CosFov;
M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov);
@ -1691,7 +1691,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float
#endif
}
inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept {
float SinFov;
float CosFov;
M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov);
@ -1749,7 +1749,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float
#endif
}
inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = 1.0f;
@ -1782,7 +1782,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = ScaleX;
@ -1815,7 +1815,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float Sca
#endif
}
inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = Scale.v4f[0];
@ -1848,7 +1848,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = 1.0f;
@ -1881,7 +1881,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, flo
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
float SinAngle;
float CosAngle;
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
@ -1926,7 +1926,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
float SinAngle;
float CosAngle;
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
@ -1971,7 +1971,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
float SinAngle;
float CosAngle;
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
@ -2016,7 +2016,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
#ifdef DISABLE_INTRINSICS
float cp = cosf(Angles.v4f[0]);
float sp = sinf(Angles.v4f[0]);
@ -2082,7 +2082,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept {
float fSinAngle;
float fCosAngle;
M3D_ScalarSinCos(&fSinAngle, &fCosAngle, Angle);
@ -2159,14 +2159,14 @@ inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept {
M3D_VECTOR nv = M3D_V3Normalize(axis);
return M3D_TransformMatrixRotationNormal(nv, angle);
}
//TODO: transform matrix is incomplete
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z;
inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
const float widthDiv2 = _w / 2;
const float heightDiv2 = _h / 2;

View File

@ -175,7 +175,7 @@ inline void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept {
#endif
}
inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.rows[0].v4f[0] = src->mat[0][0];
@ -208,7 +208,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
#endif
}
inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.rows[0].v4f[0] = src->mat[0][0];
@ -241,7 +241,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
#endif
}
inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
dst->mat[0][0] = M.rows[0].v4f[0];
dst->mat[0][1] = M.rows[0].v4f[1];
@ -270,7 +270,7 @@ inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
#endif
}
inline void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept {
INLINE_AVX_FIX void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept {
#ifdef DISABLE_INTRINSICS
dst->mat[0][0] = M.rows[0].v4f[0];
dst->mat[0][1] = M.rows[0].v4f[1];
@ -548,6 +548,34 @@ inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX,
#endif
}
inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 Result = {{{
V.v4f[E0],
V.v4f[E1],
V.v4f[E2],
V.v4f[E3]
}}};
return Result.v;
#elif defined(AVX_INTRINSICS)
unsigned int elem[4] = { E0, E1, E2, E3 };
__m128i vControl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&elem[0]));
return _mm_permutevar_ps(V, vControl);
#else
auto aPtr = reinterpret_cast<const uint32_t*>(&V);
M3D_VECTOR Result;
auto pWork = reinterpret_cast<uint32_t*>(&Result);
pWork[0] = aPtr[E0];
pWork[1] = aPtr[E1];
pWork[2] = aPtr[E2];
pWork[3] = aPtr[E3];
return Result;
#endif
}
inline M3D_VECTOR M3D_V4SplatOne() noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 vResult;
@ -816,6 +844,10 @@ inline M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept {
return M3D_V4Dot(V, V);
}
inline M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR Result;