GCC issue 54412 fix
AVX datas aren't correctly aligned in stack in following case: - using inline functions (GCC not always inline function), - not using reference to AVX data (copy of data in stack dynamically), - not using O2 or O3 optimization flags (related behaviors of the previous points).
This commit is contained in:
parent
aa871b8b76
commit
a8e396d08f
@ -11,7 +11,7 @@ if(NOT DEFINED PROJECT_BINARY_DIR)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
add_compile_options(-Wall -march=native -mavx2 -mfma -msse4.2)
|
||||
add_compile_options(-Wall)
|
||||
else()
|
||||
add_compile_options(/Wall)
|
||||
endif()
|
||||
@ -26,6 +26,16 @@ project(ProtoTank VERSION 0.1.0 DESCRIPTION "Arcade 80s-style game with tanks" L
|
||||
# Compilation option
|
||||
option(DISABLE_CPU_OPTI "Disable CPU optimizations" OFF)
|
||||
|
||||
if(NOT DISABLE_CPU_OPTI)
|
||||
if(NOT MSVC)
|
||||
#add_compile_options(-march=native -mtune=generic)
|
||||
add_compile_options(-march=native -mtune=native)
|
||||
add_compile_options(-msse2 -msse4.2 -mavx -mavx2 -mfma)
|
||||
else()
|
||||
add_compile_options(/fp:fast /arch:SSE2 /arch:SSE4.2 /arch:AVX /arch:AVX2 /GL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FindPkgConfig)
|
||||
include(CheckIncludeFile)
|
||||
include(CheckCSourceCompiles)
|
||||
|
@ -15,6 +15,12 @@
|
||||
#error This header requires C++
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define INLINE_AVX_FIX
|
||||
#else
|
||||
#define INLINE_AVX_FIX __attribute__((always_inline)) inline
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef DISABLE_INTRINSICS
|
||||
#ifdef NO_MOVNT
|
||||
@ -85,6 +91,11 @@ constexpr uint32_t M3D_PERMUTE_1Y = 5;
|
||||
constexpr uint32_t M3D_PERMUTE_1Z = 6;
|
||||
constexpr uint32_t M3D_PERMUTE_1W = 7;
|
||||
|
||||
constexpr uint32_t M3D_SWIZZLE_X = 0;
|
||||
constexpr uint32_t M3D_SWIZZLE_Y = 1;
|
||||
constexpr uint32_t M3D_SWIZZLE_Z = 2;
|
||||
constexpr uint32_t M3D_SWIZZLE_W = 3;
|
||||
|
||||
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
|
||||
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
|
||||
|
||||
@ -358,6 +369,7 @@ M3D_VECTOR M3D_V4SetX(M3D_VECTOR V, float x) noexcept;
|
||||
M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept;
|
||||
M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept;
|
||||
M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept;
|
||||
M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept;
|
||||
M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
|
||||
@ -376,6 +388,7 @@ M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECT
|
||||
bool M3D_V4EqualInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4Abs(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
|
||||
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
|
||||
@ -483,6 +496,31 @@ template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); }
|
||||
#endif
|
||||
|
||||
template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
|
||||
inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V) noexcept {
|
||||
#ifndef DISABLE_INTRINSICS
|
||||
return M3D_PERMUTE_PS(V, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
|
||||
#else
|
||||
return M3D_V4Swizzle(V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(DISABLE_INTRINSICS)
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 1, 0, 1>(M3D_VECTOR V) noexcept { return _mm_movelh_ps(V, V); }
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<2, 3, 2, 3>(M3D_VECTOR V) noexcept { return _mm_movehl_ps(V, V); }
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 1, 1>(M3D_VECTOR V) noexcept { return _mm_unpacklo_ps(V, V); }
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<2, 2, 3, 3>(M3D_VECTOR V) noexcept { return _mm_unpackhi_ps(V, V); }
|
||||
#endif
|
||||
|
||||
#if defined(SSE3_INTRINSICS) && !defined(DISABLE_INTRINSICS)
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 2, 2>(M3D_VECTOR V) noexcept { return _mm_moveldup_ps(V); }
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<1, 1, 3, 3>(M3D_VECTOR V) noexcept { return _mm_movehdup_ps(V); }
|
||||
#endif
|
||||
|
||||
#if defined(AVX2_INTRINSICS) && !defined(DISABLE_INTRINSICS)
|
||||
template<> inline M3D_VECTOR M3D_V4Swizzle<0, 0, 0, 0>(M3D_VECTOR V) noexcept { return _mm_broadcastss_ps(V); }
|
||||
#endif
|
||||
|
||||
M3D_VECTOR M3D_QMultiply(M3D_VECTOR Q1, M3D_VECTOR Q2) noexcept;
|
||||
M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept;
|
||||
|
||||
|
@ -128,7 +128,7 @@ inline void M3D_BoundingBox::CreateFromPoints(M3D_BoundingBox& Out, size_t Count
|
||||
M3D_V4StoreF3(&Out.Extents, M3D_V4Scale(M3D_V4Subtract(vMax, vMin), 0.5f));
|
||||
}
|
||||
|
||||
inline void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept {
|
||||
INLINE_AVX_FIX void M3D_BoundingBox::Transform(M3D_BoundingBox& Out, M3D_MATRIX M) const noexcept {
|
||||
// Load center and extents.
|
||||
M3D_VECTOR vCenter = M3D_V4LoadF3(&Center);
|
||||
M3D_VECTOR vExtents = M3D_V4LoadF3(&Extents);
|
||||
@ -163,11 +163,11 @@ inline void M3D_BoundingBox::GetCorners(M3D_F3* Corners) const noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
inline M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept {
|
||||
INLINE_AVX_FIX M3D_BoundingFrustum::M3D_BoundingFrustum(M3D_MATRIX Projection, bool rhcoords) noexcept {
|
||||
CreateFromMatrix(*this, Projection, rhcoords);
|
||||
}
|
||||
|
||||
inline void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept {
|
||||
INLINE_AVX_FIX void M3D_BoundingFrustum::Transform(M3D_BoundingFrustum& Out, M3D_MATRIX M) const noexcept {
|
||||
// Load the frustum.
|
||||
M3D_VECTOR vOrigin = M3D_V4LoadF3(&Origin);
|
||||
M3D_VECTOR vOrientation = M3D_V4LoadF4(&Orientation);
|
||||
@ -318,7 +318,7 @@ inline void M3D_BoundingFrustum::GetPlanes(M3D_VECTOR* NearPlane, M3D_VECTOR* Fa
|
||||
}
|
||||
}
|
||||
|
||||
inline void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept {
|
||||
INLINE_AVX_FIX void M3D_BoundingFrustum::CreateFromMatrix(M3D_BoundingFrustum& Out, M3D_MATRIX Projection, bool rhcoords) noexcept {
|
||||
// Corners of the projection frustum in NDC space.
|
||||
static M3D_V4F32 NDCPoints[6] = {
|
||||
{{{1.0f, 0.0f, 1.0f, 1.0f}}}, // right (at far plane)
|
||||
|
@ -13,7 +13,7 @@ inline M3D_MATRIX::M3D_MATRIX(
|
||||
rows[3] = M3D_V4Set(f30, f31, f32, f33);
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Negate(rows[0]);
|
||||
ret.rows[1] = M3D_V4Negate(rows[1]);
|
||||
@ -22,14 +22,14 @@ inline M3D_MATRIX M3D_MATRIX::operator- () const noexcept {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator+= (M3D_MATRIX M) noexcept {
|
||||
rows[0] = M3D_V4Add(rows[0], M.rows[0]);
|
||||
rows[1] = M3D_V4Add(rows[1], M.rows[1]);
|
||||
rows[2] = M3D_V4Add(rows[2], M.rows[2]);
|
||||
rows[3] = M3D_V4Add(rows[3], M.rows[3]);
|
||||
return *this;
|
||||
}
|
||||
inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Add(rows[0], M.rows[0]);
|
||||
ret.rows[1] = M3D_V4Add(rows[1], M.rows[1]);
|
||||
@ -38,14 +38,14 @@ inline M3D_MATRIX M3D_MATRIX::operator+ (M3D_MATRIX M) const noexcept {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator-= (M3D_MATRIX M) noexcept {
|
||||
rows[0] = M3D_V4Subtract(rows[0], M.rows[0]);
|
||||
rows[1] = M3D_V4Subtract(rows[1], M.rows[1]);
|
||||
rows[2] = M3D_V4Subtract(rows[2], M.rows[2]);
|
||||
rows[3] = M3D_V4Subtract(rows[3], M.rows[3]);
|
||||
return *this;
|
||||
}
|
||||
inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Subtract(rows[0], M.rows[0]);
|
||||
ret.rows[1] = M3D_V4Subtract(rows[1], M.rows[1]);
|
||||
@ -54,22 +54,22 @@ inline M3D_MATRIX M3D_MATRIX::operator- (M3D_MATRIX M) const noexcept {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*=(M3D_MATRIX M) noexcept {
|
||||
*this = M3D_MMultiply(*this, M);
|
||||
return *this;
|
||||
}
|
||||
inline M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator*(M3D_MATRIX M) const noexcept {
|
||||
return M3D_MMultiply(*this, M);
|
||||
}
|
||||
|
||||
inline M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator*= (float S) noexcept {
|
||||
rows[0] = M3D_V4Scale(rows[0], S);
|
||||
rows[1] = M3D_V4Scale(rows[1], S);
|
||||
rows[2] = M3D_V4Scale(rows[2], S);
|
||||
rows[3] = M3D_V4Scale(rows[3], S);
|
||||
return *this;
|
||||
}
|
||||
inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Scale(rows[0], S);
|
||||
ret.rows[1] = M3D_V4Scale(rows[1], S);
|
||||
@ -77,7 +77,7 @@ inline M3D_MATRIX M3D_MATRIX::operator* (float S) const noexcept {
|
||||
ret.rows[3] = M3D_V4Scale(rows[3], S);
|
||||
return ret;
|
||||
}
|
||||
inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Scale(M.rows[0], S);
|
||||
ret.rows[1] = M3D_V4Scale(M.rows[1], S);
|
||||
@ -86,7 +86,7 @@ inline M3D_MATRIX operator* (float S, M3D_MATRIX M) noexcept {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR vS = M3D_V4Replicate(S);
|
||||
rows[0] = M3D_V4Divide(rows[0], vS);
|
||||
@ -103,7 +103,7 @@ inline M3D_MATRIX& M3D_MATRIX::operator/= (float S) noexcept {
|
||||
return *this;
|
||||
#endif
|
||||
}
|
||||
inline M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MATRIX::operator/ (float S) const noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR vS = M3D_V4Replicate(S);
|
||||
M3D_MATRIX ret;
|
||||
@ -135,7 +135,7 @@ inline M3D_MATRIX M3D_MIdentity() noexcept {
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
// Cache the invariants in registers
|
||||
@ -309,7 +309,7 @@ inline M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
// Original matrix:
|
||||
//
|
||||
@ -374,7 +374,7 @@ inline M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX MT = M3D_MTranspose(M);
|
||||
|
||||
@ -591,7 +591,7 @@ inline M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept {
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------------------------------- */
|
||||
|
||||
inline M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 q;
|
||||
float r22 = M.mat[2][2];
|
||||
@ -733,7 +733,7 @@ inline M3D_VECTOR M3D_V3Rotate(M3D_VECTOR V, M3D_VECTOR RotationQuaternion) noex
|
||||
return M3D_QMultiply(Result, RotationQuaternion);
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
M3D_VECTOR Y = M3D_V4SplatY(V);
|
||||
@ -755,7 +755,7 @@ inline M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void M3D_V3Transform(
|
||||
INLINE_AVX_FIX void M3D_V3Transform(
|
||||
M3D_F4* pOutputStream,
|
||||
size_t OutputStride,
|
||||
const M3D_F3* pInputStream,
|
||||
@ -972,7 +972,7 @@ inline void M3D_V3Transform(
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
M3D_VECTOR Y = M3D_V4SplatY(V);
|
||||
@ -994,7 +994,7 @@ inline M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
M3D_VECTOR Y = M3D_V4SplatY(V);
|
||||
M3D_VECTOR X = M3D_V4SplatX(V);
|
||||
@ -1007,7 +1007,7 @@ inline M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
return M3D_V4Divide(Result, W);
|
||||
}
|
||||
|
||||
inline void M3D_V3TransformPersDiv(
|
||||
INLINE_AVX_FIX void M3D_V3TransformPersDiv(
|
||||
M3D_F3* pOutputStream,
|
||||
size_t OutputStride,
|
||||
const M3D_F3* pInputStream,
|
||||
@ -1321,7 +1321,7 @@ inline void M3D_V3TransformPersDiv(
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR W = M3D_V4SplatW(V);
|
||||
M3D_VECTOR Z = M3D_V4SplatZ(V);
|
||||
@ -1346,7 +1346,7 @@ inline M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept {
|
||||
auto pInputVector = reinterpret_cast<const uint8_t*>(pInputStream);
|
||||
auto pOutputVector = reinterpret_cast<uint8_t*>(pOutputStream);
|
||||
|
||||
@ -1593,17 +1593,17 @@ inline M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vp
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------------------------------- */
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
|
||||
M3D_VECTOR dir = M3D_V4Subtract(focusPos, viewPos);
|
||||
return M3D_TransformMatrixCamLookToLH(viewPos, dir, upDirection);
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept {
|
||||
M3D_VECTOR dir_n = M3D_V4Subtract(viewPos, focusPos);
|
||||
return M3D_TransformMatrixCamLookToLH(viewPos, dir_n, upDirection);
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
|
||||
// Keep viewer's axes orthogonal to each other and of unit length
|
||||
M3D_VECTOR look_normal = M3D_V3Normalize(viewDirection);
|
||||
M3D_VECTOR up_norm = M3D_V3Normalize(M3D_V3Cross(upDirection, look_normal));
|
||||
@ -1628,12 +1628,12 @@ inline M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept {
|
||||
M3D_VECTOR viewDirection_n = M3D_V4Negate(viewDirection);
|
||||
return M3D_TransformMatrixCamLookToLH(viewPos, viewDirection_n, upDirection);
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept {
|
||||
float SinFov;
|
||||
float CosFov;
|
||||
M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov);
|
||||
@ -1691,7 +1691,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept {
|
||||
float SinFov;
|
||||
float CosFov;
|
||||
M3D_ScalarSinCos(&SinFov, &CosFov, 0.5f * fov);
|
||||
@ -1749,7 +1749,7 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = 1.0f;
|
||||
@ -1782,7 +1782,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = ScaleX;
|
||||
@ -1815,7 +1815,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float Sca
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = Scale.v4f[0];
|
||||
@ -1848,7 +1848,7 @@ inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = 1.0f;
|
||||
@ -1881,7 +1881,7 @@ inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, flo
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
|
||||
float SinAngle;
|
||||
float CosAngle;
|
||||
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
|
||||
@ -1926,7 +1926,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
|
||||
float SinAngle;
|
||||
float CosAngle;
|
||||
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
|
||||
@ -1971,7 +1971,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
|
||||
float SinAngle;
|
||||
float CosAngle;
|
||||
M3D_ScalarSinCos(&SinAngle, &CosAngle, Angle);
|
||||
@ -2016,7 +2016,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
float cp = cosf(Angles.v4f[0]);
|
||||
float sp = sinf(Angles.v4f[0]);
|
||||
@ -2082,7 +2082,7 @@ inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept {
|
||||
float fSinAngle;
|
||||
float fCosAngle;
|
||||
M3D_ScalarSinCos(&fSinAngle, &fCosAngle, Angle);
|
||||
@ -2159,14 +2159,14 @@ inline M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept {
|
||||
M3D_VECTOR nv = M3D_V3Normalize(axis);
|
||||
return M3D_TransformMatrixRotationNormal(nv, angle);
|
||||
}
|
||||
|
||||
//TODO: transform matrix is incomplete
|
||||
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z;
|
||||
inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
|
||||
const float widthDiv2 = _w / 2;
|
||||
const float heightDiv2 = _h / 2;
|
||||
|
||||
|
@ -175,7 +175,7 @@ inline void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0].v4f[0] = src->mat[0][0];
|
||||
@ -208,7 +208,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
|
||||
INLINE_AVX_FIX M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0].v4f[0] = src->mat[0][0];
|
||||
@ -241,7 +241,7 @@ inline M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
dst->mat[0][0] = M.rows[0].v4f[0];
|
||||
dst->mat[0][1] = M.rows[0].v4f[1];
|
||||
@ -270,7 +270,7 @@ inline void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept {
|
||||
INLINE_AVX_FIX void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
dst->mat[0][0] = M.rows[0].v4f[0];
|
||||
dst->mat[0][1] = M.rows[0].v4f[1];
|
||||
@ -548,6 +548,34 @@ inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX,
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Swizzle(M3D_VECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Result = {{{
|
||||
V.v4f[E0],
|
||||
V.v4f[E1],
|
||||
V.v4f[E2],
|
||||
V.v4f[E3]
|
||||
}}};
|
||||
return Result.v;
|
||||
#elif defined(AVX_INTRINSICS)
|
||||
unsigned int elem[4] = { E0, E1, E2, E3 };
|
||||
__m128i vControl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&elem[0]));
|
||||
return _mm_permutevar_ps(V, vControl);
|
||||
#else
|
||||
auto aPtr = reinterpret_cast<const uint32_t*>(&V);
|
||||
|
||||
M3D_VECTOR Result;
|
||||
auto pWork = reinterpret_cast<uint32_t*>(&Result);
|
||||
|
||||
pWork[0] = aPtr[E0];
|
||||
pWork[1] = aPtr[E1];
|
||||
pWork[2] = aPtr[E2];
|
||||
pWork[3] = aPtr[E3];
|
||||
|
||||
return Result;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4SplatOne() noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 vResult;
|
||||
@ -816,6 +844,10 @@ inline M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4LengthSq(M3D_VECTOR V) noexcept {
|
||||
return M3D_V4Dot(V, V);
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR Result;
|
||||
|
Loading…
x
Reference in New Issue
Block a user