Experimental multipart rendering

This commit is contained in:
JackCarterSmith 2024-10-03 18:15:26 +02:00
parent 1cf2d3c145
commit def845e741
Signed by: JackCarterSmith
GPG Key ID: 832E52F4E23F8F24
6 changed files with 626 additions and 56 deletions

View File

@ -1,7 +1,9 @@
#include "3DRenderer.hpp"
#include "../World/DbgCube.hpp"
// Rendering order:
// Rendering pipeline:
// model matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix -> clipping -> perspective divide -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen)
// object coordinate -> world coordinate -> camera coordinate -> clip/screen coordinate
//
@ -21,6 +23,18 @@ Graphic3DRenderer::Graphic3DRenderer() {
mMainCamera->SetFrustrum(90.0f, 1280.f/324.f, 1.0f, 100.f);
mMainCamera->UpdateCamView();
}
// Fill world object list to render
mRenderList.clear();
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
mRenderList.back()->SetPosition(0.f, 0.f, 50.f);
mRenderList.back()->SetScale(10.0f);
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
mRenderList.back()->SetPosition(24.f, 5.f, 12.f);
mRenderList.back()->SetScale(10.0f);
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
mRenderList.back()->SetPosition(-31.f, 16.f, 24.f);
mRenderList.back()->SetScale(10.0f);
}
Graphic3DRenderer::~Graphic3DRenderer() {}
@ -29,52 +43,72 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) {
sf::BlendMode sBM = sf::BlendNone;
sf::RenderStates sRS(sBM);
// Hardcoded debug movement, TODO: remove it
static float thetaAngle = 0.31f;
thetaAngle = thetaAngle >= 6.283185f ? -6.283185f : thetaAngle + 0.004f;
M3D_MATRIX viewMat = mMainCamera->GetView();
M3D_MATRIX projMat = mMainCamera->GetProj();
M3D_MATRIX modelMat = M3D_MIdentity() * M3D_TransformMatrixScaling(10.0f, 10.0f, 10.0f) * M3D_TransformMatrixRotationX(thetaAngle) * M3D_TransformMatrixRotationZ(0.5f*thetaAngle) * M3D_TransformMatrixTranslate(0.0f, 0.0f, 5.0f);
M3D_MATRIX viewProjMat = (viewMat) * (projMat);
M3D_MATRIX MVPMat = modelMat * viewProjMat;
M3D_MATRIX viewportMat = M3D_TransformMatrixViewport(1280.0f, 324.f, 0.0f, 0.0f);
static float thetaAngle2 = 2.12f;
thetaAngle2 = thetaAngle2 >= 6.283185f ? -6.283185f : thetaAngle2 + 0.005f;
static float thetaAngle3 = -4.78f;
thetaAngle3 = thetaAngle3 >= 6.283185f ? -6.283185f : thetaAngle3 + 0.008f;
mRenderList[0]->SetRotation(thetaAngle, 0.f, thetaAngle * 0.5f);
mRenderList[1]->SetRotation(thetaAngle2, 0.f, thetaAngle2 * 0.5f);
mRenderList[2]->SetRotation(thetaAngle3, 0.f, thetaAngle3 * 0.5f);
M3D_MATRIX viewProjMat = mMainCamera->GetView() * mMainCamera->GetProj();
sf::Vertex v_tri[4];
auto cubeMesh = testObj.GetObjectMesh();
uint32_t totVerticesCnt = 0, processedVerticesCnt = 0;
for (auto obj : mRenderList)
totVerticesCnt += obj->GetObjectMesh().GetVerticesCount();
// Do the vertices projection and perspective divide
M3D_F3 projVertices[cubeMesh.vertices.size()];
M3D_V3TransformPersDiv(projVertices, sizeof(M3D_F3), (M3D_F3*)cubeMesh.vertices.data(), sizeof(Vertex), cubeMesh.vertices.size(), MVPMat);
M3D_F3 projVertices[totVerticesCnt];
for (auto obj : mRenderList) {
auto& oMesh = obj->GetObjectMesh();
M3D_V3TransformPersDiv(
projVertices + processedVerticesCnt, sizeof(M3D_F3),
(M3D_F3*)oMesh.vertices.data(), sizeof(Vertex),
oMesh.vertices.size(),
obj->GetTransform() * viewProjMat
);
auto indicePtr = (uint32_t*)cubeMesh.parts[0].indices.data();
for (uint32_t i = 0; i < cubeMesh.parts[0].indices.size(); i += 3) {
// Misscontructed indices tree failsafe
if (i+2 > cubeMesh.parts[0].indices.size())
break;
//TODO: Fill a z-depth buffer...
// Simple clipping
//TODO: implement complete Cohen-Sutherland algo or similar
if ((projVertices[indicePtr[i]]).z > 0 &&
(projVertices[indicePtr[i+1]]).z > 0 &&
(projVertices[indicePtr[i+2]]).z > 0) {
for (auto& objPt : obj->GetObjectMesh().parts) {
auto indicePtr = (uint32_t*)objPt.indices.data();
for (uint32_t i = 0; i < objPt.GetIndicesCount(); i += 3) {
// Misscontructed indices tree failsafe
if (i+2 > objPt.GetIndicesCount())
break;
M3D_VECTOR V1 = M3D_V4LoadF3(&projVertices[indicePtr[i]]);
M3D_VECTOR V2 = M3D_V4LoadF3(&projVertices[indicePtr[i+1]]);
M3D_VECTOR V3 = M3D_V4LoadF3(&projVertices[indicePtr[i+2]]);
//TODO: Proceed with z-test: if z is lesser than previous z, draw the pixel and update z
V1 = M3D_V3Transform(V1, viewportMat);
V2 = M3D_V3Transform(V2, viewportMat);
V3 = M3D_V3Transform(V3, viewportMat);
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z; //TODO: transform matrix is incomplete
// Simple clipping
//TODO: implement complete Cohen-Sutherland algo or similar
if (((projVertices + processedVerticesCnt)[indicePtr[i]]).z > 0 &&
((projVertices + processedVerticesCnt)[indicePtr[i+1]]).z > 0 &&
((projVertices + processedVerticesCnt)[indicePtr[i+2]]).z > 0) {
M3D_VECTOR V1 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i]]);
M3D_VECTOR V2 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i+1]]);
M3D_VECTOR V3 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i+2]]);
v_tri[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1));
v_tri[0].color = cubeMesh.vertices[indicePtr[i]].color;
v_tri[3] = v_tri[0];
v_tri[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2));
v_tri[1].color = cubeMesh.vertices[indicePtr[i+1]].color;
v_tri[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3));
v_tri[2].color = cubeMesh.vertices[indicePtr[i+2]].color;
context.draw(v_tri, 4, sf::LineStrip, sRS);
//context.draw(v_tri, 3, sf::Triangles, sRS);
} //TODO: else cut triangle to the window (need vector crossing math...)
V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
v_tri[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1));
v_tri[0].color = oMesh.vertices[indicePtr[i]].color;
v_tri[3] = v_tri[0];
v_tri[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2));
v_tri[1].color = oMesh.vertices[indicePtr[i+1]].color;
v_tri[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3));
v_tri[2].color = oMesh.vertices[indicePtr[i+2]].color;
context.draw(v_tri, 4, sf::LineStrip, sRS);
//context.draw(v_tri, 3, sf::Triangles, sRS);
}
}
}
processedVerticesCnt += oMesh.GetVerticesCount();
}
}

View File

@ -4,7 +4,7 @@
#include "Camera.hpp"
#include "../Utils/MeshHelper.hpp"
#include "../World/DbgCube.hpp"
#include "../World/WorldObject.hpp"
class Graphic3DRenderer final {
@ -21,8 +21,8 @@ public:
private:
sf::RenderTexture mWorldRender; // This is used to create the scene
std::unique_ptr<Camera> mMainCamera;
std::unique_ptr<Camera> mMainCamera; // Default player view
ObjectDbgCube testObj;
std::vector<std::shared_ptr<WorldObject>> mRenderList; // List of elements to be rendered next frame
};

View File

@ -55,6 +55,17 @@
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0))
#endif
#if __cplusplus >= 201703L
#define M3D_ALIGNED_DATA(x) alignas(x)
#define M3D_ALIGNED_STRUCT(x) struct alignas(x)
#elif defined(__GNUC__)
#define M3D_ALIGNED_DATA(x) __attribute__ ((aligned(x)))
#define M3D_ALIGNED_STRUCT(x) struct __attribute__ ((aligned(x)))
#else
#define M3D_ALIGNED_DATA(x) __declspec(align(x))
#define M3D_ALIGNED_STRUCT(x) __declspec(align(x)) struct
#endif
//
// Math constants and helping functions
//
@ -65,6 +76,15 @@ constexpr float M3D_1DIV2PI = 0.159154943f;
constexpr float M3D_PIDIV2 = 1.570796327f;
constexpr float M3D_PIDIV4 = 0.785398163f;
constexpr uint32_t M3D_PERMUTE_0X = 0;
constexpr uint32_t M3D_PERMUTE_0Y = 1;
constexpr uint32_t M3D_PERMUTE_0Z = 2;
constexpr uint32_t M3D_PERMUTE_0W = 3;
constexpr uint32_t M3D_PERMUTE_1X = 4;
constexpr uint32_t M3D_PERMUTE_1Y = 5;
constexpr uint32_t M3D_PERMUTE_1Z = 6;
constexpr uint32_t M3D_PERMUTE_1W = 7;
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
@ -314,19 +334,23 @@ float M3D_V4GetX(M3D_VECTOR V) noexcept;
float M3D_V4GetY(M3D_VECTOR V) noexcept;
float M3D_V4GetZ(M3D_VECTOR V) noexcept;
float M3D_V4GetW(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept;
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V4ModAngles(M3D_VECTOR Angles) noexcept;
M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept;
@ -334,6 +358,95 @@ M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept;
M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept;
#ifndef DISABLE_INTRINSICS
namespace M3D_Internal {
// Slow path fallback for permutes that do not map to a single SSE shuffle opcode.
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper {
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept {
static const M3D_V4U32 selectMask = {{{
WhichX ? 0xFFFFFFFF : 0,
WhichY ? 0xFFFFFFFF : 0,
WhichZ ? 0xFFFFFFFF : 0,
WhichW ? 0xFFFFFFFF : 0,
}}};
M3D_VECTOR shuffled1 = M3D_PERMUTE_PS(v1, Shuffle);
M3D_VECTOR shuffled2 = M3D_PERMUTE_PS(v2, Shuffle);
M3D_VECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
M3D_VECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
return _mm_or_ps(masked1, masked2);
}
};
// Fast path for permutes that only read from the first vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false> {
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR) noexcept { return M3D_PERMUTE_PS(v1, Shuffle); }
};
// Fast path for permutes that only read from the second vector.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true> {
static M3D_VECTOR Permute(M3D_VECTOR, M3D_VECTOR v2) noexcept { return M3D_PERMUTE_PS(v2, Shuffle); }
};
// Fast path for permutes that read XY from the first vector, ZW from the second.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true> {
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v1, v2, Shuffle); }
};
// Fast path for permutes that read XY from the second vector, ZW from the first.
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false> {
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v2, v1, Shuffle); }
};
}
#endif
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#ifdef DISABLE_INTRINSICS
return M3D_V4Permute(V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW);
#else
constexpr uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
constexpr bool WhichX = PermuteX > 3;
constexpr bool WhichY = PermuteY > 3;
constexpr bool WhichZ = PermuteZ > 3;
constexpr bool WhichW = PermuteW > 3;
return M3D_Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
#endif
}
template<> constexpr M3D_VECTOR M3D_V4Permute<0, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR) noexcept { return V1; }
template<> constexpr M3D_VECTOR M3D_V4Permute<4, 5, 6, 7>(M3D_VECTOR, M3D_VECTOR V2) noexcept { return V2; }
#ifndef DISABLE_INTRINSICS
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 4, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movelh_ps(V1, V2); }
template<> inline M3D_VECTOR M3D_V4Permute<6, 7, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movehl_ps(V1, V2); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 4, 1, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpacklo_ps(V1, V2); }
template<> inline M3D_VECTOR M3D_V4Permute<2, 6, 3, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpackhi_ps(V1, V2); }
template<> inline M3D_VECTOR M3D_V4Permute<2, 3, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(V1), _mm_castps_pd(V2))); }
#endif
#if defined(SSE4_INTRINSICS) && !defined(DISABLE_INTRINSICS)
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x1); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x2); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x3); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x4); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x5); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x6); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x7); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x8); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x9); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xA); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xB); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xC); }
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xD); }
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); }
#endif
//
// Matrix operation
//
@ -349,6 +462,7 @@ M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
void M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept;
void M3D_V3TransformPersDiv(M3D_F3* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vpY, float vpW, float vpH, float vpMinZ, float vpMaxZ) noexcept;
//
@ -360,11 +474,14 @@ M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDir
M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept;
M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept;
M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept;
M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept;
M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept;
M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept;
M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Scale) noexcept;
M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept;
M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept;
M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept;
M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept;
M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept;
@ -386,6 +503,10 @@ M3D_GCONST M3D_V4F32 M3D_MIdentityR0_n = {{{-1.0f, 0.0f, 0.0f, 0.0f}}};
M3D_GCONST M3D_V4F32 M3D_MIdentityR1_n = {{{0.0f, -1.0f, 0.0f, 0.0f}}};
M3D_GCONST M3D_V4F32 M3D_MIdentityR2_n = {{{0.0f, 0.0f, -1.0f, 0.0f}}};
M3D_GCONST M3D_V4F32 M3D_MIdentityR3_n = {{{0.0f, 0.0f, 0.0f, -1.0f}}};
M3D_GCONST M3D_V4F32 M3D_MNegativeOne = {{{-1.0f, -1.0f, -1.0f, -1.0f}}};
M3D_GCONST M3D_V4U32 M3D_MNegativeZero = {{{0x80000000, 0x80000000, 0x80000000, 0x80000000}}};
M3D_GCONST M3D_V4F32 M3D_MOne = {{{1.0f, 1.0f, 1.0f, 1.0f}}};
M3D_GCONST M3D_V4F32 M3D_MZero = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
M3D_GCONST M3D_V4F32 M3D_MNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}};
M3D_GCONST M3D_V4F32 M3D_MNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}};
M3D_GCONST M3D_V4F32 M3D_MNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}};
@ -401,7 +522,18 @@ M3D_GCONST M3D_V4U32 M3D_MSelect1000 = {{{0xFFFFFFFF, 0x0, 0x0, 0x
M3D_GCONST M3D_V4U32 M3D_MSelect1100 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0}}};
M3D_GCONST M3D_V4U32 M3D_MSelect1110 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}}};
M3D_GCONST M3D_V4U32 M3D_MSelect1011 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0xFFFFFFFF}}};
M3D_GCONST M3D_V4I32 M3D_MAbsMask = {{{0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}}};
M3D_GCONST M3D_V4F32 M3D_MNoFraction = {{{8388608.0f, 8388608.0f, 8388608.0f, 8388608.0f}}};
M3D_GCONST M3D_V4F32 M3D_MHalfPi = {{{M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2}}};
M3D_GCONST M3D_V4F32 M3D_MPi = {{{M3D_PI, M3D_PI, M3D_PI, M3D_PI}}};
M3D_GCONST M3D_V4F32 M3D_MTwoPi = {{{M3D_2PI, M3D_2PI, M3D_2PI, M3D_2PI}}};
M3D_GCONST M3D_V4F32 M3D_MReciprocalTwoPi = {{{M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI}}};
M3D_GCONST M3D_V4F32 M3D_MSinCoeff0 = {{{-0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f}}};
M3D_GCONST M3D_V4F32 M3D_MSinCoeff1 = {{{-2.3889859e-08f, -0.16665852f, +0.0083139502f, -0.00018524670f}}};
M3D_GCONST M3D_V4F32 M3D_MCosCoeff0 = {{{-0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f}}};
M3D_GCONST M3D_V4F32 M3D_MCosCoeff1 = {{{-2.6051615e-07f, -0.49992746f, +0.041493919f, -0.0012712436f}}};
void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept;
constexpr M3D_F4X4 M3D_MIdentity4x4() {
M3D_F4X4 I(
1.0f, 0.0f, 0.0f, 0.0f,

View File

@ -33,6 +33,27 @@ inline void M3D_ScalarSinCos(float* pSin, float* pCos, float Value) noexcept {
*pCos = sign * p;
}
namespace M3D_Internal {
#ifdef DISABLE_INTRINSICS
// Round to nearest (even) a.k.a. banker's rounding
inline float round_to_nearest(float x) noexcept {
float i = floorf(x);
x -= i;
if (x < 0.5f)
return i;
if (x > 0.5f)
return i + 1.f;
float int_part;
(void)modff(i / 2.f, &int_part);
if ((2.f * int_part) == i)
return i;
return i + 1.f;
}
#endif
}
/* -------------------------------------------------------------------------------------------------------------------------- */
@ -478,6 +499,51 @@ inline float M3D_V4GetW(M3D_VECTOR V) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept {
#if defined(AVX_INTRINSICS) && !defined(DISABLE_INTRINSICS)
static const M3D_V4U32 three = {{{3, 3, 3, 3}}};
M3D_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
__m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
__m128i vSelect = _mm_cmpgt_epi32(vControl, three);
vControl = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(vControl), three));
__m128 shuffled1 = _mm_permutevar_ps(V1, vControl);
__m128 shuffled2 = _mm_permutevar_ps(V2, vControl);
__m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
__m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
return _mm_or_ps(masked1, masked2);
#else
const uint32_t* aPtr[2];
aPtr[0] = reinterpret_cast<const uint32_t*>(&V1);
aPtr[1] = reinterpret_cast<const uint32_t*>(&V2);
M3D_VECTOR Result;
auto pWork = reinterpret_cast<uint32_t*>(&Result);
const uint32_t i0 = PermuteX & 3;
const uint32_t vi0 = PermuteX >> 2;
pWork[0] = aPtr[vi0][i0];
const uint32_t i1 = PermuteY & 3;
const uint32_t vi1 = PermuteY >> 2;
pWork[1] = aPtr[vi1][i1];
const uint32_t i2 = PermuteZ & 3;
const uint32_t vi2 = PermuteZ >> 2;
pWork[2] = aPtr[vi2][i2];
const uint32_t i3 = PermuteW & 3;
const uint32_t vi3 = PermuteW >> 2;
pWork[3] = aPtr[vi3][i3];
return Result;
#endif
}
inline M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 vResult;
@ -532,6 +598,31 @@ inline M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 Result = { { {
M3D_Internal::round_to_nearest(V.v4f[0]),
M3D_Internal::round_to_nearest(V.v4f[1]),
M3D_Internal::round_to_nearest(V.v4f[2]),
M3D_Internal::round_to_nearest(V.v4f[3])
} } };
return Result.v;
#elif defined(SSE4_INTRINSICS)
return _mm_round_ps(V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#else
__m128 sign = _mm_and_ps(V, M3D_MNegativeZero);
__m128 sMagic = _mm_or_ps(M3D_MNoFraction, sign);
__m128 R1 = _mm_add_ps(V, sMagic);
R1 = _mm_sub_ps(R1, sMagic);
__m128 R2 = _mm_and_ps(V, M3D_MAbsMask);
__m128 mask = _mm_cmple_ps(R2, M3D_MNoFraction);
R2 = _mm_andnot_ps(mask, V);
R1 = _mm_and_ps(R1, mask);
M3D_VECTOR vResult = _mm_xor_ps(R1, R2);
return vResult;
#endif
}
inline M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 ret = {{{
@ -560,6 +651,20 @@ inline M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4Multiply(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 Result = {{{
V1.v4f[0] * V2.v4f[0],
V1.v4f[1] * V2.v4f[1],
V1.v4f[2] * V2.v4f[2],
V1.v4f[3] * V2.v4f[3]
}}};
return Result.v;
#else
return _mm_mul_ps(V1, V2);
#endif
}
inline M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 ret = {{{
@ -588,6 +693,20 @@ inline M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 Result = {{{
V3.v4f[0] - (V1.v4f[0] * V2.v4f[0]),
V3.v4f[1] - (V1.v4f[1] * V2.v4f[1]),
V3.v4f[2] - (V1.v4f[2] * V2.v4f[2]),
V3.v4f[3] - (V1.v4f[3] * V2.v4f[3])
}}};
return Result;
#else
return M3D_FNMADD_PS(V1, V2, V3);
#endif
}
inline M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 ret = {{{
@ -661,6 +780,25 @@ inline M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept {
#endif
}
inline M3D_VECTOR M3D_V4ModAngles(M3D_VECTOR Angles) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_VECTOR V;
M3D_VECTOR Result;
// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
V = M3D_V4Multiply(Angles, M3D_MReciprocalTwoPi.v);
V = M3D_V4Round(V);
Result = M3D_V4NegativeMultiplySubtract(M3D_MTwoPi.v, V, Angles);
return Result;
#else
// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
M3D_VECTOR vResult = _mm_mul_ps(Angles, M3D_MReciprocalTwoPi);
// Use the inline function due to complexity for rounding
vResult = M3D_V4Round(vResult);
return M3D_FNMADD_PS(vResult, M3D_MTwoPi, Angles);
#endif
}
inline M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
#ifdef DISABLE_INTRINSICS
float fValue = V1.v4f[0] * V2.v4f[0] + V1.v4f[1] * V2.v4f[1] + V1.v4f[2] * V2.v4f[2];
@ -1672,6 +1810,16 @@ inline void M3D_V3TransformPersDiv(
#endif
}
inline M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vpY, float vpW, float vpH, float vpMinZ, float vpMaxZ) noexcept {
const float halfVPWidth = vpW * 0.5f;
const float halfVPHeight = vpH * 0.5f;
M3D_VECTOR s = M3D_V4Set(halfVPWidth, -halfVPHeight, vpMaxZ - vpMinZ, 0.0f);
M3D_VECTOR o = M3D_V4Set(vpX + halfVPWidth, vpY + halfVPHeight, vpMinZ, 0.0f);
return M3D_V4MultiplyAdd(V, s, o);
}
/* -------------------------------------------------------------------------------------------------------------------------- */
@ -1832,7 +1980,40 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float
#endif
}
inline M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept {
inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = 1.0f;
ret.mat[0][1] = 0.0f;
ret.mat[0][2] = 0.0f;
ret.mat[0][3] = 0.0f;
ret.mat[1][0] = 0.0f;
ret.mat[1][1] = 1.0f;
ret.mat[1][2] = 0.0f;
ret.mat[1][3] = 0.0f;
ret.mat[2][0] = 0.0f;
ret.mat[2][1] = 0.0f;
ret.mat[2][2] = 1.0f;
ret.mat[2][3] = 0.0f;
ret.mat[3][0] = Offset.v4f[0];
ret.mat[3][1] = Offset.v4f[1];
ret.mat[3][2] = Offset.v4f[2];
ret.mat[3][3] = 1.0f;
return ret;
#else
M3D_MATRIX ret;
ret.rows[0] = M3D_MIdentityR0.v;
ret.rows[1] = M3D_MIdentityR1.v;
ret.rows[2] = M3D_MIdentityR2.v;
ret.rows[3] = M3D_V4Select(M3D_MIdentityR3.v, Offset, M3D_MSelect1110.v);
return ret;
#endif
}
inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = ScaleX;
@ -1865,6 +2046,39 @@ inline M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float S
#endif
}
inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
ret.mat[0][0] = Scale.v4f[0];
ret.mat[0][1] = 0.0f;
ret.mat[0][2] = 0.0f;
ret.mat[0][3] = 0.0f;
ret.mat[1][0] = 0.0f;
ret.mat[1][1] = Scale.v4f[1];
ret.mat[1][2] = 0.0f;
ret.mat[1][3] = 0.0f;
ret.mat[2][0] = 0.0f;
ret.mat[2][1] = 0.0f;
ret.mat[2][2] = Scale.v4f[2];
ret.mat[2][3] = 0.0f;
ret.mat[3][0] = 0.0f;
ret.mat[3][1] = 0.0f;
ret.mat[3][2] = 0.0f;
ret.mat[3][3] = 1.0f;
return ret;
#else
M3D_MATRIX ret;
ret.rows[0] = _mm_and_ps(Scale, M3D_MMaskX);
ret.rows[1] = _mm_and_ps(Scale, M3D_MMaskY);
ret.rows[2] = _mm_and_ps(Scale, M3D_MMaskZ);
ret.rows[3] = M3D_MIdentityR3.v;
return ret;
#endif
}
inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_MATRIX ret;
@ -2033,6 +2247,74 @@ inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
#endif
}
inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
#ifdef DISABLE_INTRINSICS
float cp = cosf(Angles.v4f[0]);
float sp = sinf(Angles.v4f[0]);
float cy = cosf(Angles.v4f[1]);
float sy = sinf(Angles.v4f[1]);
float cr = cosf(Angles.v4f[2]);
float sr = sinf(Angles.v4f[2]);
M3D_MATRIX ret;
ret.mat[0][0] = cr * cy + sr * sp * sy;
ret.mat[0][1] = sr * cp;
ret.mat[0][2] = sr * sp * cy - cr * sy;
ret.mat[0][3] = 0.0f;
ret.mat[1][0] = cr * sp * sy - sr * cy;
ret.mat[1][1] = cr * cp;
ret.mat[1][2] = sr * sy + cr * sp * cy;
ret.mat[1][3] = 0.0f;
ret.mat[2][0] = cp * sy;
ret.mat[2][1] = -sp;
ret.mat[2][2] = cp * cy;
ret.mat[2][3] = 0.0f;
ret.mat[3][0] = 0.0f;
ret.mat[3][1] = 0.0f;
ret.mat[3][2] = 0.0f;
ret.mat[3][3] = 1.0f;
return ret;
#else
static const M3D_V4F32 Sign = {{{1.0f, -1.0f, -1.0f, 1.0f}}};
M3D_VECTOR SinAngles, CosAngles;
M3D_V4SinCos(&SinAngles, &CosAngles, Angles);
M3D_VECTOR P0 = M3D_V4Permute<M3D_PERMUTE_1X, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_1X>(SinAngles, CosAngles);
M3D_VECTOR Y0 = M3D_V4Permute<M3D_PERMUTE_0Y, M3D_PERMUTE_1X, M3D_PERMUTE_1X, M3D_PERMUTE_1Y>(SinAngles, CosAngles);
M3D_VECTOR P1 = M3D_V4Permute<M3D_PERMUTE_1Z, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_0Z>(SinAngles, CosAngles);
M3D_VECTOR Y1 = M3D_V4Permute<M3D_PERMUTE_1Y, M3D_PERMUTE_1Y, M3D_PERMUTE_0Y, M3D_PERMUTE_0Y>(SinAngles, CosAngles);
M3D_VECTOR P2 = M3D_V4Permute<M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z>(SinAngles, CosAngles);
M3D_VECTOR P3 = M3D_V4Permute<M3D_PERMUTE_0Y, M3D_PERMUTE_0Y, M3D_PERMUTE_1Y, M3D_PERMUTE_1Y>(SinAngles, CosAngles);
M3D_VECTOR Y2 = M3D_V4SplatX(SinAngles);
M3D_VECTOR NS = M3D_V4Negate(SinAngles);
M3D_VECTOR Q0 = M3D_V4Multiply(P0, Y0);
M3D_VECTOR Q1 = M3D_V4Multiply(P1, Sign.v);
Q1 = M3D_V4Multiply(Q1, Y1);
M3D_VECTOR Q2 = M3D_V4Multiply(P2, Y2);
Q2 = M3D_V4MultiplyAdd(Q2, P3, Q1);
M3D_VECTOR V0 = M3D_V4Permute<M3D_PERMUTE_1X, M3D_PERMUTE_0Y, M3D_PERMUTE_1Z, M3D_PERMUTE_0W>(Q0, Q2);
M3D_VECTOR V1 = M3D_V4Permute<M3D_PERMUTE_1Y, M3D_PERMUTE_0Z, M3D_PERMUTE_1W, M3D_PERMUTE_0W>(Q0, Q2);
M3D_VECTOR V2 = M3D_V4Permute<M3D_PERMUTE_0X, M3D_PERMUTE_1X, M3D_PERMUTE_0W, M3D_PERMUTE_0W>(Q0, NS);
M3D_MATRIX ret;
ret.rows[0] = M3D_V4Select(M3D_MZero, V0, M3D_MSelect1110.v);
ret.rows[1] = M3D_V4Select(M3D_MZero, V1, M3D_MSelect1110.v);
ret.rows[2] = M3D_V4Select(M3D_MZero, V2, M3D_MSelect1110.v);
ret.rows[3] = M3D_MIdentityR3;
return ret;
#endif
}
//TODO: transform matrix is incomplete
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z;
inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
const float widthDiv2 = _w / 2;
const float heightDiv2 = _h / 2;
@ -2067,4 +2349,83 @@ inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset
ret.rows[3] = M3D_V4Set(_wOffset + widthDiv2, _hOffset + heightDiv2, 0, 1);
return ret;
#endif
}
inline void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept {
#ifdef DISABLE_INTRINSICS
M3D_V4F32 Sin = { { {
sinf(V.v4f[0]),
sinf(V.v4f[1]),
sinf(V.v4f[2]),
sinf(V.v4f[3])
} } };
M3D_V4F32 Cos = { { {
cosf(V.v4f[0]),
cosf(V.v4f[1]),
cosf(V.v4f[2]),
cosf(V.v4f[3])
} } };
*pSin = Sin.v;
*pCos = Cos.v;
#else
// Force the value within the bounds of pi
M3D_VECTOR x = M3D_V4ModAngles(V);
// Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x).
M3D_VECTOR sign = _mm_and_ps(x, M3D_MNegativeZero);
__m128 c = _mm_or_ps(M3D_MPi, sign); // pi when x >= 0, -pi when x < 0
__m128 absx = _mm_andnot_ps(sign, x); // |x|
__m128 rflx = _mm_sub_ps(c, x);
__m128 comp = _mm_cmple_ps(absx, M3D_MHalfPi);
__m128 select0 = _mm_and_ps(comp, x);
__m128 select1 = _mm_andnot_ps(comp, rflx);
x = _mm_or_ps(select0, select1);
select0 = _mm_and_ps(comp, M3D_MOne);
select1 = _mm_andnot_ps(comp, M3D_MNegativeOne);
sign = _mm_or_ps(select0, select1);
__m128 x2 = _mm_mul_ps(x, x);
// Compute polynomial approximation of sine
const M3D_VECTOR SC1 = M3D_MSinCoeff1;
__m128 vConstantsB = M3D_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0));
const M3D_VECTOR SC0 = M3D_MSinCoeff0;
__m128 vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3));
__m128 Result = M3D_FMADD_PS(vConstantsB, x2, vConstants);
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2));
Result = M3D_FMADD_PS(Result, x2, vConstants);
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1));
Result = M3D_FMADD_PS(Result, x2, vConstants);
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0));
Result = M3D_FMADD_PS(Result, x2, vConstants);
Result = M3D_FMADD_PS(Result, x2, M3D_MOne);
Result = _mm_mul_ps(Result, x);
*pSin = Result;
// Compute polynomial approximation of cosine
const M3D_VECTOR CC1 = M3D_MCosCoeff1;
vConstantsB = M3D_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0));
const M3D_VECTOR CC0 = M3D_MCosCoeff0;
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3));
Result = M3D_FMADD_PS(vConstantsB, x2, vConstants);
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2));
Result = M3D_FMADD_PS(Result, x2, vConstants);
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1));
Result = M3D_FMADD_PS(Result, x2, vConstants);
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0));
Result = M3D_FMADD_PS(Result, x2, vConstants);
Result = M3D_FMADD_PS(Result, x2, M3D_MOne);
Result = _mm_mul_ps(Result, sign);
*pCos = Result;
#endif
}

View File

@ -33,15 +33,43 @@ struct Vertex {
struct MeshPart{
MeshPart() = default;
std::vector<unsigned int> indices = {};
std::vector<uint32_t> indices = {};
M3D_F4X4 transform = M3D_MIdentity4x4();
/*
M3D_F3 offsetTransform = M3D_F3(0.0f, 0.0f, 0.0f);
M3D_F3 scaleTransform = M3D_F3(1.0f, 1.0f, 1.0f);
M3D_F3 rotateTransform = M3D_F3(0.0f, 0.0f, 0.0f);
M3D_F3 translateTransform = M3D_F3(0.0f, 0.0f, 0.0f);
*/
std::vector<MeshPart> subparts;
inline constexpr size_t GetIndexStride() const noexcept { return sizeof(uint32_t); }
inline const uint32_t GetIndicesCount() const { return indices.size(); }
inline const uint32_t GetRootIndicesCount() const { return RecursiveIndicesCount(this); }
private:
inline static const uint32_t RecursiveIndicesCount(const MeshPart* mp) {
uint32_t sum = 0;
for (auto& sb : mp->subparts) {
sum += RecursiveIndicesCount(&sb);
}
sum += mp->indices.size();
return sum;
}
};
struct Mesh {
std::vector<Vertex> vertices;
std::vector<MeshPart> parts;
};
inline constexpr size_t GetVertexStride() const noexcept { return sizeof(Vertex); }
inline const size_t GetVerticesCount() const noexcept { return vertices.size(); }
inline const uint32_t GetRootIndicesCount() const {
uint32_t sum = 0;
for (auto& sb : parts)
sum += sb.GetRootIndicesCount();
return sum;
}
};

View File

@ -6,10 +6,35 @@
class WorldObject {
public:
virtual const Mesh& GetObjectMesh() const = 0;
const M3D_MATRIX GetTransform() noexcept {
M3D_MATRIX M = M3D_MIdentity();
M *= M3D_TransformMatrixScale(M3D_V4LoadF3(&scale));
M *= M3D_TransformMatrixRotation(M3D_V4LoadF3(&rot));
M *= M3D_TransformMatrixTranslate(M3D_V4LoadF3(&pos));
return M;
}
const M3D_F4X4 GetTransform4x4f() noexcept {
M3D_F4X4 out;
M3D_V4StoreF4x4(&out, GetTransform());
return out;
}
void SetPosition(M3D_F3& _pos) noexcept { pos = _pos; }
void SetPosition(float _x, float _y, float _z) noexcept { pos = M3D_F3(_x, _y, _z); }
void SetRotation(M3D_F3& _rot) noexcept { rot = _rot; }
void SetRotation(float _x, float _y, float _z) noexcept { rot = M3D_F3(_x, _y, _z); }
void SetScale(M3D_F3& _scale) noexcept { scale = _scale; }
void SetScale(float _s) noexcept { scale = M3D_F3(_s, _s, _s); }
void SetScale(float _x, float _y, float _z) noexcept { scale = M3D_F3(_x, _y, _z); }
protected:
WorldObject() = default;
private:
M3D_F3 scale = M3D_F3(1.0f, 1.0f, 1.0f);
M3D_F3 rot = M3D_F3(0.0f, 0.0f, 0.0f);
M3D_F3 pos = M3D_F3(0.0f, 0.0f, 0.0f);
};
template<class D>
@ -25,14 +50,4 @@ protected:
};
template<class D>
inline WorldObjectAbstract<D>::~WorldObjectAbstract() {}
/*
class WorldObject {
public:
virtual ~WorldObjectAbstract() = 0;
virtual std::vector<MeshVertex>& GetObjectMesh() const = 0;
};
*/
inline WorldObjectAbstract<D>::~WorldObjectAbstract() {}