Experimental multipart rendering
This commit is contained in:
parent
1cf2d3c145
commit
def845e741
@ -1,7 +1,9 @@
|
||||
#include "3DRenderer.hpp"
|
||||
|
||||
#include "../World/DbgCube.hpp"
|
||||
|
||||
// Rendering order:
|
||||
|
||||
// Rendering pipeline:
|
||||
// model matrix (Object SRT) -> view matrix (camera matrix inverted) -> proj matrix -> clipping -> perspective divide -> viewport transformation -> Rasterizer (draw pixels inside projected triangles on 2D screen)
|
||||
// object coordinate -> world coordinate -> camera coordinate -> clip/screen coordinate
|
||||
//
|
||||
@ -21,6 +23,18 @@ Graphic3DRenderer::Graphic3DRenderer() {
|
||||
mMainCamera->SetFrustrum(90.0f, 1280.f/324.f, 1.0f, 100.f);
|
||||
mMainCamera->UpdateCamView();
|
||||
}
|
||||
|
||||
// Fill world object list to render
|
||||
mRenderList.clear();
|
||||
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
|
||||
mRenderList.back()->SetPosition(0.f, 0.f, 50.f);
|
||||
mRenderList.back()->SetScale(10.0f);
|
||||
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
|
||||
mRenderList.back()->SetPosition(24.f, 5.f, 12.f);
|
||||
mRenderList.back()->SetScale(10.0f);
|
||||
mRenderList.push_back(std::make_shared<ObjectDbgCube>());
|
||||
mRenderList.back()->SetPosition(-31.f, 16.f, 24.f);
|
||||
mRenderList.back()->SetScale(10.0f);
|
||||
}
|
||||
|
||||
Graphic3DRenderer::~Graphic3DRenderer() {}
|
||||
@ -29,52 +43,72 @@ void Graphic3DRenderer::Draw(sf::RenderTexture& context) {
|
||||
sf::BlendMode sBM = sf::BlendNone;
|
||||
sf::RenderStates sRS(sBM);
|
||||
|
||||
// Hardcoded debug movement, TODO: remove it
|
||||
static float thetaAngle = 0.31f;
|
||||
thetaAngle = thetaAngle >= 6.283185f ? -6.283185f : thetaAngle + 0.004f;
|
||||
|
||||
M3D_MATRIX viewMat = mMainCamera->GetView();
|
||||
M3D_MATRIX projMat = mMainCamera->GetProj();
|
||||
M3D_MATRIX modelMat = M3D_MIdentity() * M3D_TransformMatrixScaling(10.0f, 10.0f, 10.0f) * M3D_TransformMatrixRotationX(thetaAngle) * M3D_TransformMatrixRotationZ(0.5f*thetaAngle) * M3D_TransformMatrixTranslate(0.0f, 0.0f, 5.0f);
|
||||
M3D_MATRIX viewProjMat = (viewMat) * (projMat);
|
||||
M3D_MATRIX MVPMat = modelMat * viewProjMat;
|
||||
M3D_MATRIX viewportMat = M3D_TransformMatrixViewport(1280.0f, 324.f, 0.0f, 0.0f);
|
||||
static float thetaAngle2 = 2.12f;
|
||||
thetaAngle2 = thetaAngle2 >= 6.283185f ? -6.283185f : thetaAngle2 + 0.005f;
|
||||
static float thetaAngle3 = -4.78f;
|
||||
thetaAngle3 = thetaAngle3 >= 6.283185f ? -6.283185f : thetaAngle3 + 0.008f;
|
||||
mRenderList[0]->SetRotation(thetaAngle, 0.f, thetaAngle * 0.5f);
|
||||
mRenderList[1]->SetRotation(thetaAngle2, 0.f, thetaAngle2 * 0.5f);
|
||||
mRenderList[2]->SetRotation(thetaAngle3, 0.f, thetaAngle3 * 0.5f);
|
||||
|
||||
M3D_MATRIX viewProjMat = mMainCamera->GetView() * mMainCamera->GetProj();
|
||||
sf::Vertex v_tri[4];
|
||||
auto cubeMesh = testObj.GetObjectMesh();
|
||||
|
||||
uint32_t totVerticesCnt = 0, processedVerticesCnt = 0;
|
||||
for (auto obj : mRenderList)
|
||||
totVerticesCnt += obj->GetObjectMesh().GetVerticesCount();
|
||||
|
||||
// Do the vertices projection and perspective divide
|
||||
M3D_F3 projVertices[cubeMesh.vertices.size()];
|
||||
M3D_V3TransformPersDiv(projVertices, sizeof(M3D_F3), (M3D_F3*)cubeMesh.vertices.data(), sizeof(Vertex), cubeMesh.vertices.size(), MVPMat);
|
||||
M3D_F3 projVertices[totVerticesCnt];
|
||||
for (auto obj : mRenderList) {
|
||||
auto& oMesh = obj->GetObjectMesh();
|
||||
M3D_V3TransformPersDiv(
|
||||
projVertices + processedVerticesCnt, sizeof(M3D_F3),
|
||||
(M3D_F3*)oMesh.vertices.data(), sizeof(Vertex),
|
||||
oMesh.vertices.size(),
|
||||
obj->GetTransform() * viewProjMat
|
||||
);
|
||||
|
||||
auto indicePtr = (uint32_t*)cubeMesh.parts[0].indices.data();
|
||||
for (uint32_t i = 0; i < cubeMesh.parts[0].indices.size(); i += 3) {
|
||||
// Misscontructed indices tree failsafe
|
||||
if (i+2 > cubeMesh.parts[0].indices.size())
|
||||
break;
|
||||
//TODO: Fill a z-depth buffer...
|
||||
|
||||
// Simple clipping
|
||||
//TODO: implement complete Cohen-Sutherland algo or similar
|
||||
if ((projVertices[indicePtr[i]]).z > 0 &&
|
||||
(projVertices[indicePtr[i+1]]).z > 0 &&
|
||||
(projVertices[indicePtr[i+2]]).z > 0) {
|
||||
for (auto& objPt : obj->GetObjectMesh().parts) {
|
||||
auto indicePtr = (uint32_t*)objPt.indices.data();
|
||||
for (uint32_t i = 0; i < objPt.GetIndicesCount(); i += 3) {
|
||||
// Misscontructed indices tree failsafe
|
||||
if (i+2 > objPt.GetIndicesCount())
|
||||
break;
|
||||
|
||||
M3D_VECTOR V1 = M3D_V4LoadF3(&projVertices[indicePtr[i]]);
|
||||
M3D_VECTOR V2 = M3D_V4LoadF3(&projVertices[indicePtr[i+1]]);
|
||||
M3D_VECTOR V3 = M3D_V4LoadF3(&projVertices[indicePtr[i+2]]);
|
||||
//TODO: Proceed with z-test: if z is lesser than previous z, draw the pixel and update z
|
||||
|
||||
V1 = M3D_V3Transform(V1, viewportMat);
|
||||
V2 = M3D_V3Transform(V2, viewportMat);
|
||||
V3 = M3D_V3Transform(V3, viewportMat);
|
||||
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z; //TODO: transform matrix is incomplete
|
||||
// Simple clipping
|
||||
//TODO: implement complete Cohen-Sutherland algo or similar
|
||||
if (((projVertices + processedVerticesCnt)[indicePtr[i]]).z > 0 &&
|
||||
((projVertices + processedVerticesCnt)[indicePtr[i+1]]).z > 0 &&
|
||||
((projVertices + processedVerticesCnt)[indicePtr[i+2]]).z > 0) {
|
||||
M3D_VECTOR V1 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i]]);
|
||||
M3D_VECTOR V2 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i+1]]);
|
||||
M3D_VECTOR V3 = M3D_V4LoadF3(&(projVertices + processedVerticesCnt)[indicePtr[i+2]]);
|
||||
|
||||
v_tri[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1));
|
||||
v_tri[0].color = cubeMesh.vertices[indicePtr[i]].color;
|
||||
v_tri[3] = v_tri[0];
|
||||
v_tri[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2));
|
||||
v_tri[1].color = cubeMesh.vertices[indicePtr[i+1]].color;
|
||||
v_tri[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3));
|
||||
v_tri[2].color = cubeMesh.vertices[indicePtr[i+2]].color;
|
||||
context.draw(v_tri, 4, sf::LineStrip, sRS);
|
||||
//context.draw(v_tri, 3, sf::Triangles, sRS);
|
||||
} //TODO: else cut triangle to the window (need vector crossing math...)
|
||||
V1 = M3D_V3TransformNDCToViewport(V1, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
|
||||
V2 = M3D_V3TransformNDCToViewport(V2, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
|
||||
V3 = M3D_V3TransformNDCToViewport(V3, 0.f, 0.f, 1280.f, 324.f, 1.f, 100.f);
|
||||
|
||||
v_tri[0].position = sf::Vector2f(M3D_V4GetX(V1), M3D_V4GetY(V1));
|
||||
v_tri[0].color = oMesh.vertices[indicePtr[i]].color;
|
||||
v_tri[3] = v_tri[0];
|
||||
v_tri[1].position = sf::Vector2f(M3D_V4GetX(V2), M3D_V4GetY(V2));
|
||||
v_tri[1].color = oMesh.vertices[indicePtr[i+1]].color;
|
||||
v_tri[2].position = sf::Vector2f(M3D_V4GetX(V3), M3D_V4GetY(V3));
|
||||
v_tri[2].color = oMesh.vertices[indicePtr[i+2]].color;
|
||||
context.draw(v_tri, 4, sf::LineStrip, sRS);
|
||||
//context.draw(v_tri, 3, sf::Triangles, sRS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processedVerticesCnt += oMesh.GetVerticesCount();
|
||||
}
|
||||
}
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include "Camera.hpp"
|
||||
#include "../Utils/MeshHelper.hpp"
|
||||
#include "../World/DbgCube.hpp"
|
||||
#include "../World/WorldObject.hpp"
|
||||
|
||||
|
||||
class Graphic3DRenderer final {
|
||||
@ -21,8 +21,8 @@ public:
|
||||
|
||||
private:
|
||||
sf::RenderTexture mWorldRender; // This is used to create the scene
|
||||
std::unique_ptr<Camera> mMainCamera;
|
||||
std::unique_ptr<Camera> mMainCamera; // Default player view
|
||||
|
||||
ObjectDbgCube testObj;
|
||||
std::vector<std::shared_ptr<WorldObject>> mRenderList; // List of elements to be rendered next frame
|
||||
|
||||
};
|
@ -55,6 +55,17 @@
|
||||
V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0))
|
||||
#endif
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
#define M3D_ALIGNED_DATA(x) alignas(x)
|
||||
#define M3D_ALIGNED_STRUCT(x) struct alignas(x)
|
||||
#elif defined(__GNUC__)
|
||||
#define M3D_ALIGNED_DATA(x) __attribute__ ((aligned(x)))
|
||||
#define M3D_ALIGNED_STRUCT(x) struct __attribute__ ((aligned(x)))
|
||||
#else
|
||||
#define M3D_ALIGNED_DATA(x) __declspec(align(x))
|
||||
#define M3D_ALIGNED_STRUCT(x) __declspec(align(x)) struct
|
||||
#endif
|
||||
|
||||
//
|
||||
// Math constants and helping functions
|
||||
//
|
||||
@ -65,6 +76,15 @@ constexpr float M3D_1DIV2PI = 0.159154943f;
|
||||
constexpr float M3D_PIDIV2 = 1.570796327f;
|
||||
constexpr float M3D_PIDIV4 = 0.785398163f;
|
||||
|
||||
constexpr uint32_t M3D_PERMUTE_0X = 0;
|
||||
constexpr uint32_t M3D_PERMUTE_0Y = 1;
|
||||
constexpr uint32_t M3D_PERMUTE_0Z = 2;
|
||||
constexpr uint32_t M3D_PERMUTE_0W = 3;
|
||||
constexpr uint32_t M3D_PERMUTE_1X = 4;
|
||||
constexpr uint32_t M3D_PERMUTE_1Y = 5;
|
||||
constexpr uint32_t M3D_PERMUTE_1Z = 6;
|
||||
constexpr uint32_t M3D_PERMUTE_1W = 7;
|
||||
|
||||
constexpr float M3D_Deg2Rad(float a) noexcept { return a * (M3D_PI / 180.0f); }
|
||||
constexpr float M3D_Rad2Deg(float a) noexcept { return a * (180.0f / M3D_PI); }
|
||||
|
||||
@ -314,19 +334,23 @@ float M3D_V4GetX(M3D_VECTOR V) noexcept;
|
||||
float M3D_V4GetY(M3D_VECTOR V) noexcept;
|
||||
float M3D_V4GetZ(M3D_VECTOR V) noexcept;
|
||||
float M3D_V4GetW(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatY(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatZ(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
|
||||
M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept;
|
||||
M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept;
|
||||
M3D_VECTOR M3D_V4Select(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR Control) noexcept;
|
||||
M3D_VECTOR M3D_V4MergeXY(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4MergeZW(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V4ModAngles(M3D_VECTOR Angles) noexcept;
|
||||
M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V3Cross(M3D_VECTOR V1, M3D_VECTOR V2) noexcept;
|
||||
M3D_VECTOR M3D_V3LengthSq(M3D_VECTOR V) noexcept;
|
||||
@ -334,6 +358,95 @@ M3D_VECTOR M3D_V3Length(M3D_VECTOR V) noexcept;
|
||||
M3D_VECTOR M3D_V3Normalize(M3D_VECTOR V) noexcept;
|
||||
|
||||
|
||||
#ifndef DISABLE_INTRINSICS
|
||||
namespace M3D_Internal {
|
||||
// Slow path fallback for permutes that do not map to a single SSE shuffle opcode.
|
||||
template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper {
|
||||
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept {
|
||||
static const M3D_V4U32 selectMask = {{{
|
||||
WhichX ? 0xFFFFFFFF : 0,
|
||||
WhichY ? 0xFFFFFFFF : 0,
|
||||
WhichZ ? 0xFFFFFFFF : 0,
|
||||
WhichW ? 0xFFFFFFFF : 0,
|
||||
}}};
|
||||
|
||||
M3D_VECTOR shuffled1 = M3D_PERMUTE_PS(v1, Shuffle);
|
||||
M3D_VECTOR shuffled2 = M3D_PERMUTE_PS(v2, Shuffle);
|
||||
|
||||
M3D_VECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
|
||||
M3D_VECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
|
||||
|
||||
return _mm_or_ps(masked1, masked2);
|
||||
}
|
||||
};
|
||||
|
||||
// Fast path for permutes that only read from the first vector.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false> {
|
||||
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR) noexcept { return M3D_PERMUTE_PS(v1, Shuffle); }
|
||||
};
|
||||
|
||||
// Fast path for permutes that only read from the second vector.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true> {
|
||||
static M3D_VECTOR Permute(M3D_VECTOR, M3D_VECTOR v2) noexcept { return M3D_PERMUTE_PS(v2, Shuffle); }
|
||||
};
|
||||
|
||||
// Fast path for permutes that read XY from the first vector, ZW from the second.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true> {
|
||||
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v1, v2, Shuffle); }
|
||||
};
|
||||
|
||||
// Fast path for permutes that read XY from the second vector, ZW from the first.
|
||||
template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false> {
|
||||
static M3D_VECTOR Permute(M3D_VECTOR v1, M3D_VECTOR v2) noexcept { return _mm_shuffle_ps(v2, v1, Shuffle); }
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
|
||||
inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
return M3D_V4Permute(V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW);
|
||||
#else
|
||||
constexpr uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
|
||||
|
||||
constexpr bool WhichX = PermuteX > 3;
|
||||
constexpr bool WhichY = PermuteY > 3;
|
||||
constexpr bool WhichZ = PermuteZ > 3;
|
||||
constexpr bool WhichW = PermuteW > 3;
|
||||
|
||||
return M3D_Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> constexpr M3D_VECTOR M3D_V4Permute<0, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR) noexcept { return V1; }
|
||||
template<> constexpr M3D_VECTOR M3D_V4Permute<4, 5, 6, 7>(M3D_VECTOR, M3D_VECTOR V2) noexcept { return V2; }
|
||||
|
||||
#ifndef DISABLE_INTRINSICS
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 4, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movelh_ps(V1, V2); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<6, 7, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_movehl_ps(V1, V2); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 4, 1, 5>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpacklo_ps(V1, V2); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<2, 6, 3, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_unpackhi_ps(V1, V2); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<2, 3, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(V1), _mm_castps_pd(V2))); }
|
||||
#endif
|
||||
|
||||
#if defined(SSE4_INTRINSICS) && !defined(DISABLE_INTRINSICS)
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x1); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x2); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x3); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x4); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x5); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x6); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 6, 3>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x7); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x8); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0x9); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xA); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 5, 2, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xB); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xC); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<4, 1, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xD); }
|
||||
template<> inline M3D_VECTOR M3D_V4Permute<0, 5, 6, 7>(M3D_VECTOR V1, M3D_VECTOR V2) noexcept { return _mm_blend_ps(V1, V2, 0xE); }
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Matrix operation
|
||||
//
|
||||
@ -349,6 +462,7 @@ M3D_VECTOR M3D_V3Transform(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
||||
void M3D_V3Transform(M3D_F4* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
||||
M3D_VECTOR M3D_V3TransformPersDiv(M3D_VECTOR V, M3D_MATRIX M) noexcept;
|
||||
void M3D_V3TransformPersDiv(M3D_F3* pOutputStream, size_t OutputStride, const M3D_F3* pInputStream, size_t InputStride, size_t VectorCount, M3D_MATRIX M) noexcept;
|
||||
M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vpY, float vpW, float vpH, float vpMinZ, float vpMaxZ) noexcept;
|
||||
|
||||
|
||||
//
|
||||
@ -360,11 +474,14 @@ M3D_MATRIX M3D_TransformMatrixCamLookToLH(M3D_VECTOR viewPos, M3D_VECTOR viewDir
|
||||
M3D_MATRIX M3D_TransformMatrixCamLookToRH(M3D_VECTOR viewPos, M3D_VECTOR viewDirection, M3D_VECTOR upDirection) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixFrustrumFovLH(float fov, float ratio, float near, float far) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float near, float far) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Scale) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixRotationX(float Angle) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixRotationY(float Angle) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept;
|
||||
M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept;
|
||||
|
||||
|
||||
@ -386,6 +503,10 @@ M3D_GCONST M3D_V4F32 M3D_MIdentityR0_n = {{{-1.0f, 0.0f, 0.0f, 0.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MIdentityR1_n = {{{0.0f, -1.0f, 0.0f, 0.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MIdentityR2_n = {{{0.0f, 0.0f, -1.0f, 0.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MIdentityR3_n = {{{0.0f, 0.0f, 0.0f, -1.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MNegativeOne = {{{-1.0f, -1.0f, -1.0f, -1.0f}}};
|
||||
M3D_GCONST M3D_V4U32 M3D_MNegativeZero = {{{0x80000000, 0x80000000, 0x80000000, 0x80000000}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MOne = {{{1.0f, 1.0f, 1.0f, 1.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MZero = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}};
|
||||
@ -401,7 +522,18 @@ M3D_GCONST M3D_V4U32 M3D_MSelect1000 = {{{0xFFFFFFFF, 0x0, 0x0, 0x
|
||||
M3D_GCONST M3D_V4U32 M3D_MSelect1100 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0}}};
|
||||
M3D_GCONST M3D_V4U32 M3D_MSelect1110 = {{{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0}}};
|
||||
M3D_GCONST M3D_V4U32 M3D_MSelect1011 = {{{0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0xFFFFFFFF}}};
|
||||
M3D_GCONST M3D_V4I32 M3D_MAbsMask = {{{0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MNoFraction = {{{8388608.0f, 8388608.0f, 8388608.0f, 8388608.0f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MHalfPi = {{{M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2, M3D_PIDIV2}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MPi = {{{M3D_PI, M3D_PI, M3D_PI, M3D_PI}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MTwoPi = {{{M3D_2PI, M3D_2PI, M3D_2PI, M3D_2PI}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MReciprocalTwoPi = {{{M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI, M3D_1DIV2PI}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MSinCoeff0 = {{{-0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MSinCoeff1 = {{{-2.3889859e-08f, -0.16665852f, +0.0083139502f, -0.00018524670f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MCosCoeff0 = {{{-0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f}}};
|
||||
M3D_GCONST M3D_V4F32 M3D_MCosCoeff1 = {{{-2.6051615e-07f, -0.49992746f, +0.041493919f, -0.0012712436f}}};
|
||||
|
||||
void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept;
|
||||
constexpr M3D_F4X4 M3D_MIdentity4x4() {
|
||||
M3D_F4X4 I(
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
|
@ -33,6 +33,27 @@ inline void M3D_ScalarSinCos(float* pSin, float* pCos, float Value) noexcept {
|
||||
*pCos = sign * p;
|
||||
}
|
||||
|
||||
namespace M3D_Internal {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
// Round to nearest (even) a.k.a. banker's rounding
|
||||
inline float round_to_nearest(float x) noexcept {
|
||||
float i = floorf(x);
|
||||
x -= i;
|
||||
if (x < 0.5f)
|
||||
return i;
|
||||
if (x > 0.5f)
|
||||
return i + 1.f;
|
||||
|
||||
float int_part;
|
||||
(void)modff(i / 2.f, &int_part);
|
||||
if ((2.f * int_part) == i)
|
||||
return i;
|
||||
|
||||
return i + 1.f;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------------------------------- */
|
||||
|
||||
@ -478,6 +499,51 @@ inline float M3D_V4GetW(M3D_VECTOR V) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Permute(M3D_VECTOR V1, M3D_VECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW) noexcept {
|
||||
#if defined(AVX_INTRINSICS) && !defined(DISABLE_INTRINSICS)
|
||||
static const M3D_V4U32 three = {{{3, 3, 3, 3}}};
|
||||
|
||||
M3D_ALIGNED_DATA(16) unsigned int elem[4] = { PermuteX, PermuteY, PermuteZ, PermuteW };
|
||||
__m128i vControl = _mm_load_si128(reinterpret_cast<const __m128i*>(&elem[0]));
|
||||
|
||||
__m128i vSelect = _mm_cmpgt_epi32(vControl, three);
|
||||
vControl = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(vControl), three));
|
||||
|
||||
__m128 shuffled1 = _mm_permutevar_ps(V1, vControl);
|
||||
__m128 shuffled2 = _mm_permutevar_ps(V2, vControl);
|
||||
|
||||
__m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1);
|
||||
__m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2);
|
||||
|
||||
return _mm_or_ps(masked1, masked2);
|
||||
#else
|
||||
const uint32_t* aPtr[2];
|
||||
aPtr[0] = reinterpret_cast<const uint32_t*>(&V1);
|
||||
aPtr[1] = reinterpret_cast<const uint32_t*>(&V2);
|
||||
|
||||
M3D_VECTOR Result;
|
||||
auto pWork = reinterpret_cast<uint32_t*>(&Result);
|
||||
|
||||
const uint32_t i0 = PermuteX & 3;
|
||||
const uint32_t vi0 = PermuteX >> 2;
|
||||
pWork[0] = aPtr[vi0][i0];
|
||||
|
||||
const uint32_t i1 = PermuteY & 3;
|
||||
const uint32_t vi1 = PermuteY >> 2;
|
||||
pWork[1] = aPtr[vi1][i1];
|
||||
|
||||
const uint32_t i2 = PermuteZ & 3;
|
||||
const uint32_t vi2 = PermuteZ >> 2;
|
||||
pWork[2] = aPtr[vi2][i2];
|
||||
|
||||
const uint32_t i3 = PermuteW & 3;
|
||||
const uint32_t vi3 = PermuteW >> 2;
|
||||
pWork[3] = aPtr[vi3][i3];
|
||||
|
||||
return Result;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4SplatX(M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 vResult;
|
||||
@ -532,6 +598,31 @@ inline M3D_VECTOR M3D_V4SplatW(M3D_VECTOR V) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Round(M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Result = { { {
|
||||
M3D_Internal::round_to_nearest(V.v4f[0]),
|
||||
M3D_Internal::round_to_nearest(V.v4f[1]),
|
||||
M3D_Internal::round_to_nearest(V.v4f[2]),
|
||||
M3D_Internal::round_to_nearest(V.v4f[3])
|
||||
} } };
|
||||
return Result.v;
|
||||
#elif defined(SSE4_INTRINSICS)
|
||||
return _mm_round_ps(V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
#else
|
||||
__m128 sign = _mm_and_ps(V, M3D_MNegativeZero);
|
||||
__m128 sMagic = _mm_or_ps(M3D_MNoFraction, sign);
|
||||
__m128 R1 = _mm_add_ps(V, sMagic);
|
||||
R1 = _mm_sub_ps(R1, sMagic);
|
||||
__m128 R2 = _mm_and_ps(V, M3D_MAbsMask);
|
||||
__m128 mask = _mm_cmple_ps(R2, M3D_MNoFraction);
|
||||
R2 = _mm_andnot_ps(mask, V);
|
||||
R1 = _mm_and_ps(R1, mask);
|
||||
M3D_VECTOR vResult = _mm_xor_ps(R1, R2);
|
||||
return vResult;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Add(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 ret = {{{
|
||||
@ -560,6 +651,20 @@ inline M3D_VECTOR M3D_V4Subtract(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Multiply(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Result = {{{
|
||||
V1.v4f[0] * V2.v4f[0],
|
||||
V1.v4f[1] * V2.v4f[1],
|
||||
V1.v4f[2] * V2.v4f[2],
|
||||
V1.v4f[3] * V2.v4f[3]
|
||||
}}};
|
||||
return Result.v;
|
||||
#else
|
||||
return _mm_mul_ps(V1, V2);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4MultiplyAdd(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 ret = {{{
|
||||
@ -588,6 +693,20 @@ inline M3D_VECTOR M3D_V4Divide(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4NegativeMultiplySubtract(M3D_VECTOR V1, M3D_VECTOR V2, M3D_VECTOR V3) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Result = {{{
|
||||
V3.v4f[0] - (V1.v4f[0] * V2.v4f[0]),
|
||||
V3.v4f[1] - (V1.v4f[1] * V2.v4f[1]),
|
||||
V3.v4f[2] - (V1.v4f[2] * V2.v4f[2]),
|
||||
V3.v4f[3] - (V1.v4f[3] * V2.v4f[3])
|
||||
}}};
|
||||
return Result;
|
||||
#else
|
||||
return M3D_FNMADD_PS(V1, V2, V3);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4Scale(M3D_VECTOR V, float scale) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 ret = {{{
|
||||
@ -661,6 +780,25 @@ inline M3D_VECTOR M3D_V4Sqrt(M3D_VECTOR V) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V4ModAngles(M3D_VECTOR Angles) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_VECTOR V;
|
||||
M3D_VECTOR Result;
|
||||
|
||||
// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
|
||||
V = M3D_V4Multiply(Angles, M3D_MReciprocalTwoPi.v);
|
||||
V = M3D_V4Round(V);
|
||||
Result = M3D_V4NegativeMultiplySubtract(M3D_MTwoPi.v, V, Angles);
|
||||
return Result;
|
||||
#else
|
||||
// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
|
||||
M3D_VECTOR vResult = _mm_mul_ps(Angles, M3D_MReciprocalTwoPi);
|
||||
// Use the inline function due to complexity for rounding
|
||||
vResult = M3D_V4Round(vResult);
|
||||
return M3D_FNMADD_PS(vResult, M3D_MTwoPi, Angles);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V3Dot(M3D_VECTOR V1, M3D_VECTOR V2) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
float fValue = V1.v4f[0] * V2.v4f[0] + V1.v4f[1] * V2.v4f[1] + V1.v4f[2] * V2.v4f[2];
|
||||
@ -1672,6 +1810,16 @@ inline void M3D_V3TransformPersDiv(
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_VECTOR M3D_V3TransformNDCToViewport(M3D_VECTOR V, float vpX, float vpY, float vpW, float vpH, float vpMinZ, float vpMaxZ) noexcept {
|
||||
const float halfVPWidth = vpW * 0.5f;
|
||||
const float halfVPHeight = vpH * 0.5f;
|
||||
|
||||
M3D_VECTOR s = M3D_V4Set(halfVPWidth, -halfVPHeight, vpMaxZ - vpMinZ, 0.0f);
|
||||
M3D_VECTOR o = M3D_V4Set(vpX + halfVPWidth, vpY + halfVPHeight, vpMinZ, 0.0f);
|
||||
|
||||
return M3D_V4MultiplyAdd(V, s, o);
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------------------------------- */
|
||||
|
||||
@ -1832,7 +1980,40 @@ inline M3D_MATRIX M3D_TransformMatrixFrustrumFovRH(float fov, float ratio, float
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float ScaleZ) noexcept {
|
||||
inline M3D_MATRIX M3D_TransformMatrixTranslate(M3D_VECTOR Offset) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = 1.0f;
|
||||
ret.mat[0][1] = 0.0f;
|
||||
ret.mat[0][2] = 0.0f;
|
||||
ret.mat[0][3] = 0.0f;
|
||||
|
||||
ret.mat[1][0] = 0.0f;
|
||||
ret.mat[1][1] = 1.0f;
|
||||
ret.mat[1][2] = 0.0f;
|
||||
ret.mat[1][3] = 0.0f;
|
||||
|
||||
ret.mat[2][0] = 0.0f;
|
||||
ret.mat[2][1] = 0.0f;
|
||||
ret.mat[2][2] = 1.0f;
|
||||
ret.mat[2][3] = 0.0f;
|
||||
|
||||
ret.mat[3][0] = Offset.v4f[0];
|
||||
ret.mat[3][1] = Offset.v4f[1];
|
||||
ret.mat[3][2] = Offset.v4f[2];
|
||||
ret.mat[3][3] = 1.0f;
|
||||
return ret;
|
||||
#else
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_MIdentityR0.v;
|
||||
ret.rows[1] = M3D_MIdentityR1.v;
|
||||
ret.rows[2] = M3D_MIdentityR2.v;
|
||||
ret.rows[3] = M3D_V4Select(M3D_MIdentityR3.v, Offset, M3D_MSelect1110.v);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixScale(float ScaleX, float ScaleY, float ScaleZ) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = ScaleX;
|
||||
@ -1865,6 +2046,39 @@ inline M3D_MATRIX M3D_TransformMatrixScaling(float ScaleX, float ScaleY, float S
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixScale(M3D_VECTOR Scale) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = Scale.v4f[0];
|
||||
ret.mat[0][1] = 0.0f;
|
||||
ret.mat[0][2] = 0.0f;
|
||||
ret.mat[0][3] = 0.0f;
|
||||
|
||||
ret.mat[1][0] = 0.0f;
|
||||
ret.mat[1][1] = Scale.v4f[1];
|
||||
ret.mat[1][2] = 0.0f;
|
||||
ret.mat[1][3] = 0.0f;
|
||||
|
||||
ret.mat[2][0] = 0.0f;
|
||||
ret.mat[2][1] = 0.0f;
|
||||
ret.mat[2][2] = Scale.v4f[2];
|
||||
ret.mat[2][3] = 0.0f;
|
||||
|
||||
ret.mat[3][0] = 0.0f;
|
||||
ret.mat[3][1] = 0.0f;
|
||||
ret.mat[3][2] = 0.0f;
|
||||
ret.mat[3][3] = 1.0f;
|
||||
return ret;
|
||||
#else
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = _mm_and_ps(Scale, M3D_MMaskX);
|
||||
ret.rows[1] = _mm_and_ps(Scale, M3D_MMaskY);
|
||||
ret.rows[2] = _mm_and_ps(Scale, M3D_MMaskZ);
|
||||
ret.rows[3] = M3D_MIdentityR3.v;
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixTranslate(float OffsetX, float OffsetY, float OffsetZ) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_MATRIX ret;
|
||||
@ -2033,6 +2247,74 @@ inline M3D_MATRIX M3D_TransformMatrixRotationZ(float Angle) noexcept {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline M3D_MATRIX M3D_TransformMatrixRotation(M3D_VECTOR Angles) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
float cp = cosf(Angles.v4f[0]);
|
||||
float sp = sinf(Angles.v4f[0]);
|
||||
|
||||
float cy = cosf(Angles.v4f[1]);
|
||||
float sy = sinf(Angles.v4f[1]);
|
||||
|
||||
float cr = cosf(Angles.v4f[2]);
|
||||
float sr = sinf(Angles.v4f[2]);
|
||||
|
||||
M3D_MATRIX ret;
|
||||
ret.mat[0][0] = cr * cy + sr * sp * sy;
|
||||
ret.mat[0][1] = sr * cp;
|
||||
ret.mat[0][2] = sr * sp * cy - cr * sy;
|
||||
ret.mat[0][3] = 0.0f;
|
||||
|
||||
ret.mat[1][0] = cr * sp * sy - sr * cy;
|
||||
ret.mat[1][1] = cr * cp;
|
||||
ret.mat[1][2] = sr * sy + cr * sp * cy;
|
||||
ret.mat[1][3] = 0.0f;
|
||||
|
||||
ret.mat[2][0] = cp * sy;
|
||||
ret.mat[2][1] = -sp;
|
||||
ret.mat[2][2] = cp * cy;
|
||||
ret.mat[2][3] = 0.0f;
|
||||
|
||||
ret.mat[3][0] = 0.0f;
|
||||
ret.mat[3][1] = 0.0f;
|
||||
ret.mat[3][2] = 0.0f;
|
||||
ret.mat[3][3] = 1.0f;
|
||||
return ret;
|
||||
#else
|
||||
static const M3D_V4F32 Sign = {{{1.0f, -1.0f, -1.0f, 1.0f}}};
|
||||
|
||||
M3D_VECTOR SinAngles, CosAngles;
|
||||
M3D_V4SinCos(&SinAngles, &CosAngles, Angles);
|
||||
|
||||
M3D_VECTOR P0 = M3D_V4Permute<M3D_PERMUTE_1X, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_1X>(SinAngles, CosAngles);
|
||||
M3D_VECTOR Y0 = M3D_V4Permute<M3D_PERMUTE_0Y, M3D_PERMUTE_1X, M3D_PERMUTE_1X, M3D_PERMUTE_1Y>(SinAngles, CosAngles);
|
||||
M3D_VECTOR P1 = M3D_V4Permute<M3D_PERMUTE_1Z, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_0Z>(SinAngles, CosAngles);
|
||||
M3D_VECTOR Y1 = M3D_V4Permute<M3D_PERMUTE_1Y, M3D_PERMUTE_1Y, M3D_PERMUTE_0Y, M3D_PERMUTE_0Y>(SinAngles, CosAngles);
|
||||
M3D_VECTOR P2 = M3D_V4Permute<M3D_PERMUTE_0Z, M3D_PERMUTE_1Z, M3D_PERMUTE_0Z, M3D_PERMUTE_1Z>(SinAngles, CosAngles);
|
||||
M3D_VECTOR P3 = M3D_V4Permute<M3D_PERMUTE_0Y, M3D_PERMUTE_0Y, M3D_PERMUTE_1Y, M3D_PERMUTE_1Y>(SinAngles, CosAngles);
|
||||
M3D_VECTOR Y2 = M3D_V4SplatX(SinAngles);
|
||||
M3D_VECTOR NS = M3D_V4Negate(SinAngles);
|
||||
|
||||
M3D_VECTOR Q0 = M3D_V4Multiply(P0, Y0);
|
||||
M3D_VECTOR Q1 = M3D_V4Multiply(P1, Sign.v);
|
||||
Q1 = M3D_V4Multiply(Q1, Y1);
|
||||
M3D_VECTOR Q2 = M3D_V4Multiply(P2, Y2);
|
||||
Q2 = M3D_V4MultiplyAdd(Q2, P3, Q1);
|
||||
|
||||
M3D_VECTOR V0 = M3D_V4Permute<M3D_PERMUTE_1X, M3D_PERMUTE_0Y, M3D_PERMUTE_1Z, M3D_PERMUTE_0W>(Q0, Q2);
|
||||
M3D_VECTOR V1 = M3D_V4Permute<M3D_PERMUTE_1Y, M3D_PERMUTE_0Z, M3D_PERMUTE_1W, M3D_PERMUTE_0W>(Q0, Q2);
|
||||
M3D_VECTOR V2 = M3D_V4Permute<M3D_PERMUTE_0X, M3D_PERMUTE_1X, M3D_PERMUTE_0W, M3D_PERMUTE_0W>(Q0, NS);
|
||||
|
||||
M3D_MATRIX ret;
|
||||
ret.rows[0] = M3D_V4Select(M3D_MZero, V0, M3D_MSelect1110.v);
|
||||
ret.rows[1] = M3D_V4Select(M3D_MZero, V1, M3D_MSelect1110.v);
|
||||
ret.rows[2] = M3D_V4Select(M3D_MZero, V2, M3D_MSelect1110.v);
|
||||
ret.rows[3] = M3D_MIdentityR3;
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
//TODO: transform matrix is incomplete
|
||||
//v_tri[v_cnt].position.z = ((far+near)/2)+((far-near)/2)*_2dCoord.z;
|
||||
inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset, float _hOffset) noexcept {
|
||||
const float widthDiv2 = _w / 2;
|
||||
const float heightDiv2 = _h / 2;
|
||||
@ -2067,4 +2349,83 @@ inline M3D_MATRIX M3D_TransformMatrixViewport(float _w, float _h, float _wOffset
|
||||
ret.rows[3] = M3D_V4Set(_wOffset + widthDiv2, _hOffset + heightDiv2, 0, 1);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void M3D_V4SinCos(M3D_VECTOR* pSin, M3D_VECTOR* pCos, M3D_VECTOR V) noexcept {
|
||||
#ifdef DISABLE_INTRINSICS
|
||||
M3D_V4F32 Sin = { { {
|
||||
sinf(V.v4f[0]),
|
||||
sinf(V.v4f[1]),
|
||||
sinf(V.v4f[2]),
|
||||
sinf(V.v4f[3])
|
||||
} } };
|
||||
|
||||
M3D_V4F32 Cos = { { {
|
||||
cosf(V.v4f[0]),
|
||||
cosf(V.v4f[1]),
|
||||
cosf(V.v4f[2]),
|
||||
cosf(V.v4f[3])
|
||||
} } };
|
||||
|
||||
*pSin = Sin.v;
|
||||
*pCos = Cos.v;
|
||||
#else
|
||||
// Force the value within the bounds of pi
|
||||
M3D_VECTOR x = M3D_V4ModAngles(V);
|
||||
|
||||
// Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x).
|
||||
M3D_VECTOR sign = _mm_and_ps(x, M3D_MNegativeZero);
|
||||
__m128 c = _mm_or_ps(M3D_MPi, sign); // pi when x >= 0, -pi when x < 0
|
||||
__m128 absx = _mm_andnot_ps(sign, x); // |x|
|
||||
__m128 rflx = _mm_sub_ps(c, x);
|
||||
__m128 comp = _mm_cmple_ps(absx, M3D_MHalfPi);
|
||||
__m128 select0 = _mm_and_ps(comp, x);
|
||||
__m128 select1 = _mm_andnot_ps(comp, rflx);
|
||||
x = _mm_or_ps(select0, select1);
|
||||
select0 = _mm_and_ps(comp, M3D_MOne);
|
||||
select1 = _mm_andnot_ps(comp, M3D_MNegativeOne);
|
||||
sign = _mm_or_ps(select0, select1);
|
||||
|
||||
__m128 x2 = _mm_mul_ps(x, x);
|
||||
|
||||
// Compute polynomial approximation of sine
|
||||
const M3D_VECTOR SC1 = M3D_MSinCoeff1;
|
||||
__m128 vConstantsB = M3D_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
const M3D_VECTOR SC0 = M3D_MSinCoeff0;
|
||||
__m128 vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Result = M3D_FMADD_PS(vConstantsB, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
Result = M3D_FMADD_PS(Result, x2, M3D_MOne);
|
||||
Result = _mm_mul_ps(Result, x);
|
||||
*pSin = Result;
|
||||
|
||||
// Compute polynomial approximation of cosine
|
||||
const M3D_VECTOR CC1 = M3D_MCosCoeff1;
|
||||
vConstantsB = M3D_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
const M3D_VECTOR CC0 = M3D_MCosCoeff0;
|
||||
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
Result = M3D_FMADD_PS(vConstantsB, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
vConstants = M3D_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
Result = M3D_FMADD_PS(Result, x2, vConstants);
|
||||
|
||||
Result = M3D_FMADD_PS(Result, x2, M3D_MOne);
|
||||
Result = _mm_mul_ps(Result, sign);
|
||||
*pCos = Result;
|
||||
#endif
|
||||
}
|
@ -33,15 +33,43 @@ struct Vertex {
|
||||
struct MeshPart{
|
||||
MeshPart() = default;
|
||||
|
||||
std::vector<unsigned int> indices = {};
|
||||
std::vector<uint32_t> indices = {};
|
||||
M3D_F4X4 transform = M3D_MIdentity4x4();
|
||||
/*
|
||||
M3D_F3 offsetTransform = M3D_F3(0.0f, 0.0f, 0.0f);
|
||||
M3D_F3 scaleTransform = M3D_F3(1.0f, 1.0f, 1.0f);
|
||||
M3D_F3 rotateTransform = M3D_F3(0.0f, 0.0f, 0.0f);
|
||||
M3D_F3 translateTransform = M3D_F3(0.0f, 0.0f, 0.0f);
|
||||
*/
|
||||
std::vector<MeshPart> subparts;
|
||||
|
||||
inline constexpr size_t GetIndexStride() const noexcept { return sizeof(uint32_t); }
|
||||
inline const uint32_t GetIndicesCount() const { return indices.size(); }
|
||||
inline const uint32_t GetRootIndicesCount() const { return RecursiveIndicesCount(this); }
|
||||
|
||||
private:
|
||||
inline static const uint32_t RecursiveIndicesCount(const MeshPart* mp) {
|
||||
uint32_t sum = 0;
|
||||
for (auto& sb : mp->subparts) {
|
||||
sum += RecursiveIndicesCount(&sb);
|
||||
}
|
||||
sum += mp->indices.size();
|
||||
return sum;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct Mesh {
|
||||
std::vector<Vertex> vertices;
|
||||
std::vector<MeshPart> parts;
|
||||
|
||||
};
|
||||
inline constexpr size_t GetVertexStride() const noexcept { return sizeof(Vertex); }
|
||||
inline const size_t GetVerticesCount() const noexcept { return vertices.size(); }
|
||||
inline const uint32_t GetRootIndicesCount() const {
|
||||
uint32_t sum = 0;
|
||||
for (auto& sb : parts)
|
||||
sum += sb.GetRootIndicesCount();
|
||||
return sum;
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -6,10 +6,35 @@
|
||||
class WorldObject {
|
||||
public:
|
||||
virtual const Mesh& GetObjectMesh() const = 0;
|
||||
const M3D_MATRIX GetTransform() noexcept {
|
||||
M3D_MATRIX M = M3D_MIdentity();
|
||||
M *= M3D_TransformMatrixScale(M3D_V4LoadF3(&scale));
|
||||
M *= M3D_TransformMatrixRotation(M3D_V4LoadF3(&rot));
|
||||
M *= M3D_TransformMatrixTranslate(M3D_V4LoadF3(&pos));
|
||||
return M;
|
||||
}
|
||||
const M3D_F4X4 GetTransform4x4f() noexcept {
|
||||
M3D_F4X4 out;
|
||||
M3D_V4StoreF4x4(&out, GetTransform());
|
||||
return out;
|
||||
}
|
||||
|
||||
void SetPosition(M3D_F3& _pos) noexcept { pos = _pos; }
|
||||
void SetPosition(float _x, float _y, float _z) noexcept { pos = M3D_F3(_x, _y, _z); }
|
||||
void SetRotation(M3D_F3& _rot) noexcept { rot = _rot; }
|
||||
void SetRotation(float _x, float _y, float _z) noexcept { rot = M3D_F3(_x, _y, _z); }
|
||||
void SetScale(M3D_F3& _scale) noexcept { scale = _scale; }
|
||||
void SetScale(float _s) noexcept { scale = M3D_F3(_s, _s, _s); }
|
||||
void SetScale(float _x, float _y, float _z) noexcept { scale = M3D_F3(_x, _y, _z); }
|
||||
|
||||
protected:
|
||||
WorldObject() = default;
|
||||
|
||||
private:
|
||||
M3D_F3 scale = M3D_F3(1.0f, 1.0f, 1.0f);
|
||||
M3D_F3 rot = M3D_F3(0.0f, 0.0f, 0.0f);
|
||||
M3D_F3 pos = M3D_F3(0.0f, 0.0f, 0.0f);
|
||||
|
||||
};
|
||||
|
||||
template<class D>
|
||||
@ -25,14 +50,4 @@ protected:
|
||||
};
|
||||
|
||||
template<class D>
|
||||
inline WorldObjectAbstract<D>::~WorldObjectAbstract() {}
|
||||
|
||||
/*
|
||||
class WorldObject {
|
||||
public:
|
||||
virtual ~WorldObjectAbstract() = 0;
|
||||
|
||||
virtual std::vector<MeshVertex>& GetObjectMesh() const = 0;
|
||||
|
||||
};
|
||||
*/
|
||||
inline WorldObjectAbstract<D>::~WorldObjectAbstract() {}
|
Loading…
x
Reference in New Issue
Block a user