601 lines
25 KiB
C++
601 lines
25 KiB
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <cmath>
|
|
#include <cfloat>
|
|
#ifndef DISABLE_INTRINSICS
|
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#
|
|
// https://stackoverflow.com/tags/sse/info
|
|
// https://lowleveldev.substack.com/p/simd-a-practical-guide
|
|
# include <immintrin.h>
|
|
// If GNU-gcc => NO_XMVECTOR_OVERLOADS
|
|
#endif
|
|
|
|
#ifndef __cplusplus
|
|
#error This header requires C++
|
|
#endif
|
|
|
|
|
|
#ifndef DISABLE_INTRINSICS
|
|
#ifdef NO_MOVNT
|
|
#define M3D_STREAM_PS( p, a ) _mm_store_ps((p), (a))
|
|
#define M3D_STREAM_256_PS( p, a ) _mm256_store_ps((p), (a))
|
|
#define M3D_SFENCE()
|
|
#else
|
|
#define M3D_STREAM_PS( p, a ) _mm_stream_ps((p), (a))
|
|
#define M3D_STREAM_256b_PS( p, a ) _mm256_stream_ps((p), (a))
|
|
#define M3D_SFENCE() _mm_sfence()
|
|
#endif
|
|
|
|
#ifdef FMA3_INTRINSICS
|
|
#define M3D_FMADD_PS( a, b, c ) _mm_fmadd_ps((a), (b), (c))
|
|
#define M3D_FNMADD_PS( a, b, c ) _mm_fnmadd_ps((a), (b), (c))
|
|
#else
|
|
#define M3D_FMADD_PS( a, b, c ) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
|
|
#define M3D_FNMADD_PS( a, b, c ) _mm_sub_ps((c), _mm_mul_ps((a), (b)))
|
|
#endif
|
|
|
|
#if defined(AVX_INTRINSICS) && defined(FAVOR_INTEL)
|
|
#define M3D_PERMUTE_PS( v, c ) _mm_permute_ps((v), c )
|
|
#else
|
|
#define M3D_PERMUTE_PS( v, c ) _mm_shuffle_ps((v), (v), c )
|
|
#endif
|
|
|
|
#define M3D_UNPACK3INTO4(l1, l2, l3) \
|
|
M3D_VECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2));\
|
|
M3D_VECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0));\
|
|
V2 = M3D_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2));\
|
|
M3D_VECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8))
|
|
|
|
#define M3D_PACK4INTO3(v2x) \
|
|
v2x = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 2, 1));\
|
|
V2 = _mm_shuffle_ps(V2, V1, _MM_SHUFFLE(2, 2, 0, 0));\
|
|
V1 = _mm_shuffle_ps(V1, V2, _MM_SHUFFLE(0, 2, 1, 0));\
|
|
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(0, 0, 2, 2));\
|
|
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0))
|
|
#endif
|
|
|
|
#if __cplusplus >= 201703L
|
|
#define M3D_ALIGNED_DATA(x) alignas(x)
|
|
#define M3D_ALIGNED_STRUCT(x) struct alignas(x)
|
|
#elif defined(__GNUC__)
|
|
#define M3D_ALIGNED_DATA(x) __attribute__ ((aligned(x)))
|
|
#define M3D_ALIGNED_STRUCT(x) struct __attribute__ ((aligned(x)))
|
|
#else
|
|
#define M3D_ALIGNED_DATA(x) __declspec(align(x))
|
|
#define M3D_ALIGNED_STRUCT(x) __declspec(align(x)) struct
|
|
#endif
|
|
|
|
//
|
|
// Math constants and helping functions
|
|
//
|
|
constexpr float M3D_PI = 3.141592654f;
|
|
constexpr float M3D_2PI = 6.283185307f;
|
|
constexpr float M3D_1DIVPI = 0.318309886f;
|
|
constexpr float M3D_1DIV2PI = 0.159154943f;
|
|
constexpr float M3D_PIDIV2 = 1.570796327f;
|
|
constexpr float M3D_PIDIV4 = 0.785398163f;
|
|
|
|
constexpr uint32_t M3D_PERMUTE_0X = 0;
|
|
constexpr uint32_t M3D_PERMUTE_0Y = 1;
|
|
constexpr uint32_t M3D_PERMUTE_0Z = 2;
|
|
constexpr uint32_t M3D_PERMUTE_0W = 3;
|
|
constexpr uint32_t M3D_PERMUTE_1X = 4;
|
|
constexpr uint32_t M3D_PERMUTE_1Y = 5;
|
|
constexpr uint32_t M3D_PERMUTE_1Z = 6;
|
|
constexpr uint32_t M3D_PERMUTE_1W = 7;
|
|
|
|
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
|
|
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
|
|
|
|
|
|
//
|
|
// Generic SIMD vector implementation
|
|
//
|
|
// Call convention (x86_64):
|
|
// 1-3rd vector parameter should be M3D_VECTOR
|
|
// 4th+ vector parameter should be M3D_VECTOR&
|
|
//
|
|
#ifdef DISABLE_INTRINSICS
|
|
struct sM3DV4 {
|
|
union {
|
|
float v4f[4];
|
|
uint32_t v4u[4];
|
|
};
|
|
};
|
|
using M3D_VECTOR = sM3DV4;
|
|
#else
|
|
using M3D_VECTOR = __m128;
|
|
#endif
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4F32 {
|
|
union {
|
|
float f[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const float* () const noexcept { return f; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4U8 {
|
|
union {
|
|
uint8_t u[16];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const uint8_t* () const noexcept { return u; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4U32 {
|
|
union {
|
|
uint32_t u[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const uint32_t* () const noexcept { return u; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct __attribute__((aligned(16))) M3D_V4I32 {
|
|
union {
|
|
int32_t i[4];
|
|
M3D_VECTOR v;
|
|
};
|
|
|
|
inline operator M3D_VECTOR() const noexcept { return v; }
|
|
inline operator const int32_t* () const noexcept { return i; }
|
|
#ifndef DISABLE_INTRINSICS
|
|
inline operator __m128i() const noexcept { return _mm_castps_si128(v); }
|
|
inline operator __m128d() const noexcept { return _mm_castps_pd(v); }
|
|
#endif
|
|
};
|
|
|
|
struct M3D_F2 {
|
|
float x;
|
|
float y;
|
|
|
|
M3D_F2() = default;
|
|
|
|
M3D_F2(const M3D_F2&) = default;
|
|
M3D_F2& operator=(const M3D_F2&) = default;
|
|
M3D_F2(M3D_F2&&) = default;
|
|
M3D_F2& operator=(M3D_F2&&) = default;
|
|
|
|
constexpr M3D_F2(float _x, float _y) noexcept : x(_x), y(_y) {}
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F2A : public M3D_F2 {
|
|
using M3D_F2::M3D_F2;
|
|
};
|
|
|
|
struct M3D_F3 {
|
|
float x;
|
|
float y;
|
|
float z;
|
|
|
|
M3D_F3() = default;
|
|
|
|
M3D_F3(const M3D_F3&) = default;
|
|
M3D_F3& operator=(const M3D_F3&) = default;
|
|
M3D_F3(M3D_F3&&) = default;
|
|
M3D_F3& operator=(M3D_F3&&) = default;
|
|
|
|
constexpr M3D_F3(float _x, float _y, float _z) noexcept : x(_x), y(_y), z(_z) {}
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F3A : public M3D_F3 {
|
|
using M3D_F3::M3D_F3;
|
|
};
|
|
|
|
struct M3D_F4 {
|
|
float x;
|
|
float y;
|
|
float z;
|
|
float w;
|
|
|
|
M3D_F4() = default;
|
|
|
|
M3D_F4(const M3D_F4&) = default;
|
|
M3D_F4& operator=(const M3D_F4&) = default;
|
|
M3D_F4(M3D_F4&&) = default;
|
|
M3D_F4& operator=(M3D_F4&&) = default;
|
|
|
|
constexpr M3D_F4(float _x, float _y, float _z, float _w) noexcept : x(_x), y(_y), z(_z), w(_w) {}
|
|
|
|
#if (__cplusplus >= 202002L)
|
|
bool operator == (const M3D_F4&) const = default;
|
|
auto operator <=> (const M3D_F4&) const = default;
|
|
#endif
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F4A : public M3D_F4 {
|
|
using M3D_F4::M3D_F4;
|
|
};
|
|
|
|
struct M3D_F4X4 {
|
|
union {
|
|
struct {
|
|
float _00, _01, _02, _03;
|
|
float _10, _11, _12, _13;
|
|
float _20, _21, _22, _23;
|
|
float _30, _31, _32, _33;
|
|
};
|
|
float mat[4][4];
|
|
};
|
|
|
|
M3D_F4X4() = default;
|
|
|
|
M3D_F4X4(const M3D_F4X4&) = default;
|
|
M3D_F4X4& operator=(const M3D_F4X4&) = default;
|
|
M3D_F4X4(M3D_F4X4&&) = default;
|
|
M3D_F4X4& operator=(M3D_F4X4&&) = default;
|
|
|
|
constexpr M3D_F4X4(float f00, float f01, float f02, float f03,
|
|
float f10, float f11, float f12, float f13,
|
|
float f20, float f21, float f22, float f23,
|
|
float f30, float f31, float f32, float f33) noexcept
|
|
: _00(f00), _01(f01), _02(f02), _03(f03),
|
|
_10(f10), _11(f11), _12(f12), _13(f13),
|
|
_20(f20), _21(f21), _22(f22), _23(f23),
|
|
_30(f30), _31(f31), _32(f32), _33(f33) {}
|
|
|
|
float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; }
|
|
float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; }
|
|
|
|
#if (__cplusplus >= 202002L)
|
|
bool operator == (const M3D_F4X4&) const = default;
|
|
auto operator <=> (const M3D_F4X4&) const = default;
|
|
#endif
|
|
};
|
|
struct __attribute__((aligned(16))) M3D_F4X4A : public M3D_F4X4
|
|
{
|
|
using M3D_F4X4::M3D_F4X4;
|
|
};
|
|
|
|
|
|
//
|
|
// Generic SIMD matrix implementation
|
|
//
|
|
// Call convention (x86_64):
|
|
// 1st matrix parameter should be M3D_MATRIX
|
|
// 2nd+ matrix parameter should be M3D_MATRIX&
|
|
//
|
|
#ifdef DISABLE_INTRINSICS
|
|
struct M3D_MATRIX {
|
|
union {
|
|
M3D_VECTOR rows[4];
|
|
struct {
|
|
float _00, _01, _02, _03;
|
|
float _10, _11, _12, _13;
|
|
float _20, _21, _22, _23;
|
|
float _30, _31, _32, _33;
|
|
};
|
|
float mat[4][4];
|
|
};
|
|
#else
|
|
struct __attribute__((aligned(16))) M3D_MATRIX {
|
|
M3D_VECTOR rows[4];
|
|
#endif
|
|
M3D_MATRIX() = default;
|
|
M3D_MATRIX(const M3D_MATRIX&) = default;
|
|
M3D_MATRIX& operator=(const M3D_MATRIX&) = default;
|
|
|
|
M3D_MATRIX(M3D_MATRIX&&) = default;
|
|
M3D_MATRIX& operator=(M3D_MATRIX&&) = default;
|
|
|
|
constexpr M3D_MATRIX(M3D_VECTOR v0, M3D_VECTOR v1, M3D_VECTOR v2, M3D_VECTOR& v3) noexcept : rows{ v0,v1,v2,v3 } {}
|
|
M3D_MATRIX(float f00, float f01, float f02, float f03,
|
|
float f10, float f11, float f12, float f13,
|
|
float f20, float f21, float f22, float f23,
|
|
float f30, float f31, float f32, float f33) noexcept;
|
|
|
|
#ifdef DISABLE_INTRINSICS
|
|
float operator() (size_t row, size_t column) const noexcept { return mat[row][column]; }
|
|
float& operator() (size_t row, size_t column) noexcept { return mat[row][column]; }
|
|
#endif
|
|
|
|
M3D_MATRIX operator+ () const noexcept { return *this; }
|
|
M3D_MATRIX operator- () const noexcept;
|
|
|
|
M3D_MATRIX& operator+= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator-= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator*= (M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX& operator*= (float S) noexcept;
|
|
M3D_MATRIX& operator/= (float S) noexcept;
|
|
|
|
M3D_MATRIX operator+ (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator- (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator* (M3D_MATRIX M) const noexcept;
|
|
M3D_MATRIX operator* (float S) const noexcept;
|
|
M3D_MATRIX operator/ (float S) const noexcept;
|
|
|
|
friend M3D_MATRIX operator* (float S, M3D_MATRIX& M) noexcept;
|
|
};
|
|
|
|
|
|
//
|
|
// Load/Store functions
|
|
//
|
|
M3D_VECTOR M3D_V4LoadF3(const M3D_F3* src) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF3A(const M3D_F3A* src) noexcept;
|
|
void M3D_V4StoreF3(M3D_F3* dst, M3D_VECTOR V) noexcept;
|
|
void M3D_V4StoreF3A(M3D_F3A* dst, M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF4(const M3D_F4* src) noexcept;
|
|
M3D_VECTOR M3D_V4LoadF4A(const M3D_F4A* src) noexcept;
|
|
void M3D_V4StoreF4(M3D_F4* dst, M3D_VECTOR V) noexcept;
|
|
void M3D_V4StoreF4A(M3D_F4A* dst, M3D_VECTOR V) noexcept;
|
|
M3D_MATRIX M3D_V4LoadF4x4(const M3D_F4X4* src) noexcept;
|
|
M3D_MATRIX M3D_V4LoadF4x4A(const M3D_F4X4A* src) noexcept;
|
|
void M3D_V4StoreF4x4(M3D_F4X4* dst, M3D_MATRIX M) noexcept;
|
|
void M3D_V4StoreF4x4A(M3D_F4X4A* dst, M3D_MATRIX M) noexcept;
|
|
|
|
|
|
//
|
|
// Vector operation
|
|
//
|
|
M3D_VECTOR M3D_V4Zero() noexcept;
|
|
M3D_VECTOR M3D_V4Set(float x, float y, float z, float w) noexcept;
|
|
M3D_VECTOR M3D_V4Negate(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Replicate(float val) noexcept;
|
|
M3D_VECTOR M3D_V4ReplicatePtr(const float* pValue) noexcept;
|
|
M3D_VECTOR M3D_V4TrueInt() noexcept;
|
|
float M3D_V4GetX(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetY(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetZ(M3D_VECTOR V) noexcept;
|
|
float M3D_V4GetW(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SetX(M3D_VECTOR V, float x) noexcept;
|
|
M3D_VECTOR M3D_V4SetY(M3D_VECTOR V, float y) noexcept;
|
|
M3D_VECTOR M3D_V4SetZ(M3D_VECTOR V, float z) noexcept;
|
|
M3D_VECTOR M3D_V4SetW(M3D_VECTOR V, float w) noexcept;
|
|
M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept;
|
|
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4SplatOne() noexcept;
|
|
M3D_VECTOR M3D_V4SplatInfinity() noexcept;
|
|
M3D_VECTOR M3D_V4Min(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Max(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
|
|
M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
|
|
bool M3D_V4EqualInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Abs(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Length(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
|
|
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
|
|
M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Greater(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Less(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4AndInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4OrInt(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V4Reciprocal(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V4ModAngles(M3D_VECTOR Angles) noexcept;
|
|
M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
|
M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept;
|
|
M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept;
|
|
|
|
|
|
#ifndef DISABLE_INTRINSICS
|
|
namespace M3D_Internal {
|
|
// Slow path fallback for permutes that do not map to a single SSE shuffle opcode.
|
|
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper {
|
|
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept {
|
|
static const M3D_V4U32 selectMask = {{{
|
|
WhichX ? 0xFFFFFFFF : 0,
|
|
WhichY ? 0xFFFFFFFF : 0,
|
|
WhichZ ? 0xFFFFFFFF : 0,
|
|
WhichW ? 0xFFFFFFFF : 0,
|
|
}}};
|
|
|
|
M3D_VECTOR shuffled1 = M3D_PERMUTE_PS(v1, Shuffle);
|
|
M3D_VECTOR shuffled2 = M3D_PERMUTE_PS(v2, Shuffle);
|
|
|
|
M3D_VECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
|
|
M3D_VECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
|
|
|
|
return _mm_or_ps(masked1, masked2);
|
|
}
|
|
};
|
|
|
|
// Fast path for permutes that only read from the first vector.
|
|
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false> {
|
|
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR) noexcept { return M3D_PERMUTE_PS(v1, Shuffle); }
|
|
};
|
|
|
|
// Fast path for permutes that only read from the second vector.
|
|
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true> {
|
|
static M3D_VECTOR Permute(M3D_VECTOR, M3D_VECTOR v2) noexcept { return M3D_PERMUTE_PS(v2, Shuffle); }
|
|
};
|
|
|
|
// Fast path for permutes that read XY from the first vector, ZW from the second.
|
|
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true> {
|
|
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v1, v2, Shuffle); }
|
|
};
|
|
|
|
// Fast path for permutes that read XY from the second vector, ZW from the first.
|
|
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false> {
|
|
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v2, v1, Shuffle); }
|
|
};
|
|
}
|
|
#endif
|
|
|
|
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
|
|
inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
|
#ifdef DISABLE_INTRINSICS
|
|
return M3D_V4Permute(V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW);
|
|
#else
|
|
constexpr uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
|
|
|
|
constexpr bool WhichX = PermuteX > 3;
|
|
constexpr bool WhichY = PermuteY > 3;
|
|
constexpr bool WhichZ = PermuteZ > 3;
|
|
constexpr bool WhichW = PermuteW > 3;
|
|
|
|
return M3D_Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
|
|
#endif
|
|
}
|
|
|
|
template<> constexpr M3D_VECTOR M3D_V4Permute<0, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR) noexcept { return V1; }
|
|
template<> constexpr M3D_VECTOR M3D_V4Permute<4, 5, 6, 7>(M3D_VECTOR, M3D_VECTOR V2) noexcept { return V2; }
|
|
|
|
#ifndef DISABLE_INTRINSICS
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 4, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movelh_ps(V1, V2); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<6, 7, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movehl_ps(V1, V2); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 4, 1, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpacklo_ps(V1, V2); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<2, 6, 3, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpackhi_ps(V1, V2); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<2, 3, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(V1), _mm_castps_pd(V2))); }
|
|
#endif
|
|
|
|
#if defined(SSE4_INTRINSICS) && !defined(DISABLE_INTRINSICS)
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x1); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x2); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x3); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x4); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x5); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x6); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x7); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x8); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x9); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xA); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xB); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xC); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xD); }
|
|
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); }
|
|
#endif
|
|
|
|
M3D_VECTOR M3D_QMultiply(M3D_VECTOR Q1, M3D_VECTOR Q2) noexcept;
|
|
M3D_VECTOR M3D_QConjugate(M3D_VECTOR Q) noexcept;
|
|
|
|
void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept;
|
|
|
|
|
|
//
|
|
// Matrix operation
|
|
//
|
|
M3D_MATRIX M3D_MIdentity() noexcept;
|
|
M3D_MATRIX M3D_MMultiply(M3D_MATRIX M1, M3D_MATRIX& M2) noexcept;
|
|
M3D_MATRIX M3D_MTranspose(M3D_MATRIX M) noexcept;
|
|
M3D_MATRIX M3D_MInverse(M3D_MATRIX M) noexcept;
|
|
|
|
M3D_VECTOR M3D_QRotationMatrix(M3D_MATRIX M) noexcept;
|
|
|
|
|
|
//
|
|
// Vector/Matrix operation
|
|
//
|
|
M3D_VECTOR M3D_V3Rotate(M3D_VECTOR V, M3D_VECTOR RotationQuaternion) noexcept;
|
|
M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
|
void M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
|
M3D_VECTOR M3D_V3TransformNormal(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
|
M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
|
void M3D_V3TransformPersDiv(M3D_F3* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
|
M3D_VECTOR M3D_V4Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
|
void M3D_V4Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F4* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
|
M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vpY, float vpW, float vpH, float vpMinZ, float vpMaxZ) noexcept;
|
|
|
|
|
|
//
|
|
// Common transformation matrix constructor functions
|
|
//
|
|
M3D_MATRIX M3D_TransformMatrixCamLookAtLH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookAtRH(M3D_VECTOR viewPos, M3D_VECTOR focusPos, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Scale) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationNormal(M3D_VECTOR NormalAxis, float Angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixRotationAxis(M3D_VECTOR axis, float angle) noexcept;
|
|
M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept;
|
|
|
|
|
|
//
|
|
// Common values for vector/matrix manipulation
|
|
//
|
|
#ifndef M3D_GCONST
|
|
# if defined(__GNUC__) && !defined(__MINGW32__)
|
|
# define M3D_GCONST extern const __attribute__((weak))
|
|
# else
|
|
# define M3D_GCONST extern const __declspec(selectany)
|
|
# endif
|
|
#endif
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR0 = {{{1.0f, 0.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR1 = {{{0.0f, 1.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR2 = {{{0.0f, 0.0f, 1.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR3 = {{{0.0f, 0.0f, 0.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR0_n = {{{-1.0f, 0.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR1_n = {{{0.0f, -1.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR2_n = {{{0.0f, 0.0f, -1.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MIdentityR3_n = {{{0.0f, 0.0f, 0.0f, -1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegativeOne = {{{-1.0f, -1.0f, -1.0f, -1.0f}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MNegativeZero = {{{0x80000000, 0x80000000, 0x80000000, 0x80000000}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MOne = {{{1.0f, 1.0f, 1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MZero = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNegateW = {{{1.0f, 1.0f, 1.0f, -1.0f}}};
|
|
M3D_GCONST M3D_V4I32 M3D_MInfinity = {{{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}}};
|
|
M3D_GCONST M3D_V4I32 M3D_MQNaN = {{{0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskX = {{{0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskY = {{{0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskZ = {{{0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMaskW = {{{0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MMask3 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1000 = {{{0xFFFFFFFF, 0x0, 0x0, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1100 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1010 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1110 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect1011 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0xFFFFFFFF}}};
|
|
M3D_GCONST M3D_V4U32 M3D_MSelect0101 = {{{0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF}}};
|
|
M3D_GCONST M3D_V4I32 M3D_MAbsMask = {{{0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MNoFraction = {{{8388608.0f, 8388608.0f, 8388608.0f, 8388608.0f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MHalfPi = {{{M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MPi = {{{M3D_PI, M3D_PI, M3D_PI, M3D_PI}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MTwoPi = {{{M3D_2PI, M3D_2PI, M3D_2PI, M3D_2PI}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MReciprocalTwoPi = {{{M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MSinCoeff0 = {{{-0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MSinCoeff1 = {{{-2.3889859e-08f, -0.16665852f, +0.0083139502f, -0.00018524670f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MCosCoeff0 = {{{-0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f}}};
|
|
M3D_GCONST M3D_V4F32 M3D_MCosCoeff1 = {{{-2.6051615e-07f, -0.49992746f, +0.041493919f, -0.0012712436f}}};
|
|
|
|
constexpr M3D_F4X4 M3D_MIdentity4x4() {
|
|
M3D_F4X4 I(
|
|
1.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 1.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 1.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 1.0f);
|
|
|
|
return I;
|
|
}
|
|
|
|
#include "3DMaths_vec.inl"
|
|
#include "3DMaths_mat.inl"
|
|
#include "3DMaths_bs.inl" |