406 lines
14 KiB
C++
406 lines
14 KiB
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#ifndef DISABLE_INTRINSICS
|
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#
|
|
// https://stackoverflow.com/tags/sse/info
|
|
// https://lowleveldev.substack.com/p/simd-a-practical-guide
|
|
# include <immintrin.h>
|
|
// If GNU-gcc => NO_XMVECTOR_OVERLOADS
|
|
#else
|
|
# include <cmath>
|
|
#endif
|
|
|
|
#ifndef __cplusplus
|
|
#error This header requires C++
|
|
#endif
|
|
|
|
|
|
#ifndef DISABLE_INTRINSICS
|
|
#ifdef NO_MOVNT
|
|
#define M3D_STREAM_PS( p, a ) _mm_store_ps((p), (a))
|
|
#define M3D_STREAM_256_PS( p, a ) _mm256_store_ps((p), (a))
|
|
#define M3D_SFENCE()
|
|
#else
|
|
#define M3D_STREAM_PS( p, a ) _mm_stream_ps((p), (a))
|
|
#define M3D_STREAM_256b_PS( p, a ) _mm256_stream_ps((p), (a))
|
|
#define M3D_SFENCE() _mm_sfence()
|
|
#endif
|
|
|
|
#ifdef FMA3_INTRINSICS
|
|
#define M3D_FMADD_PS( a, b, c ) _mm_fmadd_ps((a), (b), (c))
|
|
#define M3D_FNMADD_PS( a, b, c ) _mm_fnmadd_ps((a), (b), (c))
|
|
#else
|
|
#define M3D_FMADD_PS( a, b, c ) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
|
|
#define M3D_FNMADD_PS( a, b, c ) _mm_sub_ps((c), _mm_mul_ps((a), (b)))
|
|
#endif
|
|
|
|
#if defined(AVX_INTRINSICS) && defined(FAVOR_INTEL)
|
|
#define M3D_PERMUTE_PS( v, c ) _mm_permute_ps((v), c )
|
|
#else
|
|
#define M3D_PERMUTE_PS( v, c ) _mm_shuffle_ps((v), (v), c )
|
|
#endif
|
|
|
|
#define M3D_UNPACK3INTO4(l1, l2, l3) \
|
|
M3D_VECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2));\
|
|
M3D_VECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0));\
|
|
V2 = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2));\
|
|
M3D_VECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8))
|
|
#endif
|
|
|
|
//
|
|
// Math constants and helping functions
|
|
//
|
|
constexpr float M3D_PI = 3.141592654f;
|
|
constexpr float M3D_2PI = 6.283185307f;
|
|
constexpr float M3D_1DIVPI = 0.318309886f;
|
|
constexpr float M3D_1DIV2PI = 0.159154943f;
|
|
constexpr float M3D_PIDIV2 = 1.570796327f;
|
|
constexpr float M3D_PIDIV4 = 0.785398163f;
|
|
|
|
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
|
|
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
|
|
|
|
|
|
//
|
|
// Generic SIMD vector implementation
|
|
//
|
|
// Call convention (x86_64):
|
|
// 1-3rd vector parameter should be M3D_VECTOR
|
|
// 4th+ vector parameter should be M3D_VECTOR&
|
|
//
|
|
#ifdef DISABLE_INTRINSICS
|
|
struct sM3DV4 {
|
|
union {
|
|
float v4f[4];
|
|
uint32_t v4u[4];
|
|
};
|
|
};
|
|
using M3D_VECTOR = sM3DV4;
|
|
#else
|
|
using M3D_VECTOR = __m128;
|
|
#endif
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4F32 {
|
|
union {
|
|
float f[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const float* () const noexcept { return f; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4U8 {
|
|
union {
|
|
uint8_t u[16];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const uint8_t* () const noexcept { return u; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4U32 {
|
|
union {
|
|
uint32_t u[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const uint32_t* () const noexcept { return u; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4I32 {
|
|
union {
|
|
int32_t i[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const int32_t* () const noexcept { return i; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct M3D_F3 {
|
|
float x;
|
|
float y;
|
|
float z;
|
|
|
|
M3D_F3() = default;
|
|
|
|
M3D_F3(const M3D_F3&) = default;
|
|
M3D_F3& operator=(const M3D_F3&) = default;
|
|
M3D_F3(M3D_F3&&) = default;
|
|
M3D_F3& operator=(M3D_F3&&) = default;
|
|
|
|
constexpr M3D_F3(float _x, float _y, float _z) noexcept : x(_x), y(_y), z(_z) {}
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F3A : public M3D_F3 {
|
|
using M3D_F3::M3D_F3;
|
|
};
|
|
|
|
struct M3D_F4 {
|
|
float x;
|
|
float y;
|
|
float z;
|
|
float w;
|
|
|
|
M3D_F4() = default;
|
|
|
|
M3D_F4(const M3D_F4&) = default;
|
|
M3D_F4& operator=(const M3D_F4&) = default;
|
|
M3D_F4(M3D_F4&&) = default;
|
|
M3D_F4& operator=(M3D_F4&&) = default;
|
|
|
|
constexpr M3D_F4(float _x, float _y, float _z, float _w) noexcept : x(_x), y(_y), z(_z), w(_w) {}
|
|
|
|
#if (__cplusplus >= 202002L)
|
|
bool operator == (const M3D_F4&) const = default;
|
|
auto operator <=> (const M3D_F4&) const = default;
|
|
#endif
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F4A : public M3D_F4 {
|
|
using M3D_F4::M3D_F4;
|
|
};
|
|
|
|
struct M3D_F4X4 {
|
|
union {
|
|
struct {
|
|
float _00, _01, _02, _03;
|
|
float _10, _11, _12, _13;
|
|
float _20, _21, _22, _23;
|
|
float _30, _31, _32, _33;
|
|
};
|
|
float mat[4][4];
|
|
};
|
|
|
|
M3D_F4X4() = default;
|
|
|
|
M3D_F4X4(const M3D_F4X4&) = default;
|
|
M3D_F4X4& operator=(const M3D_F4X4&) = default;
|
|
M3D_F4X4(M3D_F4X4&&) = default;
|
|
M3D_F4X4& operator=(M3D_F4X4&&) = default;
|
|
|
|
constexpr M3D_F4X4(float f00, float f01, float f02, float f03,
|
|
float f10, float f11, float f12, float f13,
|
|
float f20, float f21, float f22, float f23,
|
|
float f30, float f31, float f32, float f33) noexcept
|
|
: _00(f00), _01(f01), _02(f02), _03(f03),
|
|
_10(f10), _11(f11), _12(f12), _13(f13),
|
|
_20(f20), _21(f21), _22(f22), _23(f23),
|
|
_30(f30), _31(f31), _32(f32), _33(f33) {}
|
|
|
|
float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; }
|
|
float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; }
|
|
|
|
#if (__cplusplus >= 202002L)
|
|
bool operator == (const M3D_F4X4&) const = default;
|
|
auto operator <=> (const M3D_F4X4&) const = default;
|
|
#endif
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F4X4A : public M3D_F4X4
|
|
{
|
|
using M3D_F4X4::M3D_F4X4;
|
|
};
|
|
|
|
|
|
//
|
|
// Generic SIMD matrix implementation
|
|
//
|
|
// Call convention (x86_64):
|
|
// 1st matrix parameter should be M3D_MATRIX
|
|
// 2nd+ matrix parameter should be M3D_MATRIX&
|
|
//
|
|
#ifdef DISABLE_INTRINSICS
|
|
struct M3D_MATRIX {
|
|
union {
|
|
M3D_VECTOR rows[4];
|
|
struct {
|
|
float _00, _01, _02, _03;
|
|
float _10, _11, _12, _13;
|
|
float _20, _21, _22, _23;
|
|
float _30, _31, _32, _33;
|
|
};
|
|
float mat[4][4];
|
|
};
|
|
#else
|
|
struct __attribute__((aligned(16))) M3D_MATRIX {
|
|
M3D_VECTOR rows[4];
|
|
#endif
|
|
M3D_MATRIX() = default;
|
|
M3D_MATRIX(const M3D_MATRIX&) = default;
|
|
M3D_MATRIX& operator=(const M3D_MATRIX&) = default;
|
|
|
|
M3D_MATRIX(M3D_MATRIX&&) = default;
|
|
M3D_MATRIX& operator=(M3D_MATRIX&&) = default;
|
|
|
|
constexpr M3D_MATRIX(M3D_VECTOR v0, M3D_VECTOR v1, M3D_VECTOR v2, M3D_VECTOR& v3) noexcept : rows{ v0,v1,v2,v3 } {}
|
|
M3D_MATRIX(float f00, float f01, float f02, float f03,
|
|
float f10, float f11, float f12, float f13,
|
|
float f20, float f21, float f22, float f23,
|
|
float f30, float f31, float f32, float f33) noexcept;
|
|
|
|
#ifdef DISABLE_INTRINSICS
|
|
float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; }
|
|
float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; }
|
|
#endif
|
|
|
|
M3D_MATRIX operator+ () const noexcept { return *this; }
|
|
M3D_MATRIX operator- () const noexcept;
|
|
|
|
M3D_MATRIX& operator+= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator-= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator*= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator*= (float S) noexcept;
|
|
M3D_MATRIX& operator/= (float S) noexcept;
|
|
|
|
M3D_MATRIX operator+ (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator- (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator* (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator* (float S) const noexcept;
|
|
M3D_MATRIX operator/ (float S) const noexcept;
|
|
|
|
friend M3D_MATRIX operator* (float S, M3D_MATRIX& M) noexcept;
|
|
};
|
|
|
|
|
|
//
|
|
// Load/Store functions
|
|
//
|
|
M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF3A(const M3D_F3A* src) noexcept;
|
|
void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept;
|
|
void M3D_V4StoreF3A(M3D_F3A* dst, M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF4(const M3D_F4* src) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF4A(const M3D_F4A* src) noexcept;
|
|
void M3D_V4StoreF4(M3D_F4* dst, M3D_VECTOR V) noexcept;
|
|
void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept;
|
|
M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept;
|
|
M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept;
|
|
void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept;
|
|
void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept;
|
|
|
|
|
|
//
|
|
// Vector operation
|
|
//
|
|
M3D_VECTOR M3D_V4Set(float x, float y, float z, float w) noexcept;
|
|
M3D_VECTOR M3D_V4Negate(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Replicate(float val) noexcept;
|
|
float M3D_V4GetX(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetY(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetZ(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetW(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
|
|
M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
|
|
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
|
|
M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept;
|
|
|
|
|
|
//
|
|
// Matrix operation
|
|
//
|
|
M3D_MATRIX M3D_MIdentity() noexcept;
|
|
M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept;
|
|
M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept;
|
|
|
|
|
|
//
|
|
// Vector/Matrix operation
|
|
//
|
|
M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
|
M3D_F4* M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
|
|
|
|
|
//
|
|
// Common transformation matrix constructor functions
|
|
//
|
|
M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept;
|
|
|
|
|
|
//
|
|
// Common values for vector/matrix manipulation
|
|
//
|
|
#ifndef M3D_GCONST
|
|
# if defined(__GNUC__) && !defined(__MINGW32__)
|
|
# define M3D_GCONST extern const __attribute__((weak))
|
|
# else
|
|
# define M3D_GCONST extern const __declspec(selectany)
|
|
# endif
|
|
#endif
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR0 = {{{1.0f, 0.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR1 = {{{0.0f, 1.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR2 = {{{0.0f, 0.0f, 1.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR3 = {{{0.0f, 0.0f, 0.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR0_n = {{{-1.0f, 0.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR1_n = {{{0.0f, -1.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR2_n = {{{0.0f, 0.0f, -1.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR3_n = {{{0.0f, 0.0f, 0.0f, -1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateW = {{{1.0f, 1.0f, 1.0f, -1.0f}}};
|
|
M3D_GCONST M3D_V4I32 M3D_MInfinity = {{{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}}};
|
|
M3D_GCONST M3D_V4I32 M3D_MQNaN = {{{0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskX = {{{0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskY = {{{0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskZ = {{{0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskW = {{{0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMask3 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1000 = {{{0xFFFFFFFF, 0x0, 0x0, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1100 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1110 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1011 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0xFFFFFFFF}}};
|
|
|
|
constexpr M3D_F4X4 M3D_MIdentity4x4() {
|
|
M3D_F4X4 I(
|
|
1.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 1.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 1.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 1.0f);
|
|
|
|
return I;
|
|
}
|
|
|
|
#include "3DMaths.inl" |