aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMiles Macklin <[email protected]>2017-03-10 14:51:31 +1300
committerMiles Macklin <[email protected]>2017-03-10 14:51:31 +1300
commitad3d90fafe5ee79964bdfe1f1e0704c3ffcdfd5f (patch)
tree4cc6f3288363889d7342f7f8407c0251e6904819 /core
downloadflex-ad3d90fafe5ee79964bdfe1f1e0704c3ffcdfd5f.tar.xz
flex-ad3d90fafe5ee79964bdfe1f1e0704c3ffcdfd5f.zip
Initial 1.1.0 binary release
Diffstat (limited to 'core')
-rw-r--r--core/aabbtree.cpp795
-rw-r--r--core/aabbtree.h155
-rw-r--r--core/cloth.h733
-rw-r--r--core/convex.h406
-rw-r--r--core/core.cpp29
-rw-r--r--core/core.h158
-rw-r--r--core/extrude.cpp114
-rw-r--r--core/extrude.h35
-rw-r--r--core/mat22.h174
-rw-r--r--core/mat33.h205
-rw-r--r--core/mat44.h280
-rw-r--r--core/maths.cpp68
-rw-r--r--core/maths.h1840
-rw-r--r--core/matnn.h326
-rw-r--r--core/mesh.cpp968
-rw-r--r--core/mesh.h77
-rw-r--r--core/perlin.cpp367
-rw-r--r--core/perlin.h35
-rw-r--r--core/pfm.cpp121
-rw-r--r--core/pfm.h44
-rw-r--r--core/platform.cpp348
-rw-r--r--core/platform.h216
-rw-r--r--core/point3.h114
-rw-r--r--core/quat.h137
-rw-r--r--core/sdf.cpp360
-rw-r--r--core/sdf.h37
-rw-r--r--core/skylight.h96
-rw-r--r--core/tga.cpp263
-rw-r--r--core/tga.h56
-rw-r--r--core/types.h33
-rw-r--r--core/vec2.h174
-rw-r--r--core/vec3.h147
-rw-r--r--core/vec4.h105
-rw-r--r--core/voxelize.cpp93
-rw-r--r--core/voxelize.h33
35 files changed, 9142 insertions, 0 deletions
diff --git a/core/aabbtree.cpp b/core/aabbtree.cpp
new file mode 100644
index 0000000..f7cb0c7
--- /dev/null
+++ b/core/aabbtree.cpp
@@ -0,0 +1,795 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "aabbtree.h"
+
+#include "maths.h"
+#include "platform.h"
+
+#include <algorithm>
+#include <iostream>
+
+using namespace std;
+
+#if _WIN32
+_declspec (thread) uint32_t AABBTree::s_traceDepth;
+#endif
+
+AABBTree::AABBTree(const Vec3* vertices, uint32_t numVerts, const uint32_t* indices, uint32_t numFaces)
+ : m_vertices(vertices)
+ , m_numVerts(numVerts)
+ , m_indices(indices)
+ , m_numFaces(numFaces)
+{
+ // build stats
+ m_treeDepth = 0;
+ m_innerNodes = 0;
+ m_leafNodes = 0;
+
+ Build();
+}
+
+namespace
+{
+
+ struct FaceSorter
+ {
+ FaceSorter(const Vec3* positions, const uint32_t* indices, uint32_t n, uint32_t axis)
+ : m_vertices(positions)
+ , m_indices(indices)
+ , m_numIndices(n)
+ , m_axis(axis)
+ {
+ }
+
+ inline bool operator()(uint32_t lhs, uint32_t rhs) const
+ {
+ float a = GetCentroid(lhs);
+ float b = GetCentroid(rhs);
+
+ if (a == b)
+ return lhs < rhs;
+ else
+ return a < b;
+ }
+
+ inline float GetCentroid(uint32_t face) const
+ {
+ const Vec3& a = m_vertices[m_indices[face*3+0]];
+ const Vec3& b = m_vertices[m_indices[face*3+1]];
+ const Vec3& c = m_vertices[m_indices[face*3+2]];
+
+ return (a[m_axis] + b[m_axis] + c[m_axis])/3.0f;
+ }
+
+ const Vec3* m_vertices;
+ const uint32_t* m_indices;
+ uint32_t m_numIndices;
+ uint32_t m_axis;
+ };
+
+ inline uint32_t LongestAxis(const Vector3& v)
+ {
+ if (v.x > v.y && v.x > v.z)
+ return 0;
+ else
+ return (v.y > v.z) ? 1 : 2;
+ }
+
+} // anonymous namespace
+
+void AABBTree::CalculateFaceBounds(uint32_t* faces, uint32_t numFaces, Vector3& outMinExtents, Vector3& outMaxExtents)
+{
+ Vector3 minExtents(FLT_MAX);
+ Vector3 maxExtents(-FLT_MAX);
+
+ // calculate face bounds
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ Vector3 a = Vector3(m_vertices[m_indices[faces[i]*3+0]]);
+ Vector3 b = Vector3(m_vertices[m_indices[faces[i]*3+1]]);
+ Vector3 c = Vector3(m_vertices[m_indices[faces[i]*3+2]]);
+
+ minExtents = Min(a, minExtents);
+ maxExtents = Max(a, maxExtents);
+
+ minExtents = Min(b, minExtents);
+ maxExtents = Max(b, maxExtents);
+
+ minExtents = Min(c, minExtents);
+ maxExtents = Max(c, maxExtents);
+ }
+
+ outMinExtents = minExtents;
+ outMaxExtents = maxExtents;
+}
+
+// track current tree depth
+static uint32_t s_depth = 0;
+
+void AABBTree::Build()
+{
+ assert(m_numFaces*3);
+
+ //const double startTime = GetSeconds();
+
+ const uint32_t numFaces = m_numFaces;
+
+ // build initial list of faces
+ m_faces.reserve(numFaces);
+ /*
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ m_faces[i] = i;
+ }
+ */
+
+ // calculate bounds of each face and store
+ m_faceBounds.reserve(numFaces);
+
+ std::vector<Bounds> stack;
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ Bounds top;
+ CalculateFaceBounds(&i, 1, top.m_min, top.m_max);
+
+ m_faces.push_back(i);
+ m_faceBounds.push_back(top);
+ /*
+ stack.push_back(top);
+
+ while (!stack.empty())
+ {
+ Bounds b = stack.back();
+ stack.pop_back();
+
+ const float kAreaThreshold = 200.0f;
+
+ if (b.GetSurfaceArea() < kAreaThreshold)
+ {
+ // node is good, append to our face list
+ m_faces.push_back(i);
+ m_faceBounds.push_back(b);
+ }
+ else
+ {
+ // split along longest axis
+ uint32_t a = LongestAxis(b.m_max-b.m_min);
+
+ float splitPos = (b.m_min[a] + b.m_max[a])*0.5f;
+ Bounds left(b);
+ left.m_max[a] = splitPos;
+
+ assert(left.GetSurfaceArea() < b.GetSurfaceArea());
+
+
+ Bounds right(b);
+ right.m_min[a] = splitPos;
+
+ assert(right.GetSurfaceArea() < b.GetSurfaceArea());
+
+ stack.push_back(left);
+ stack.push_back(right);
+ }
+ }
+ */
+ }
+
+ m_nodes.reserve(uint32_t(numFaces*1.5f));
+
+ // allocate space for all the nodes
+ m_freeNode = 1;
+
+ // start building
+ BuildRecursive(0, &m_faces[0], numFaces);
+
+ assert(s_depth == 0);
+
+
+ /*
+ const double buildTime = (GetSeconds()-startTime);
+ cout << "AAABTree Build Stats:" << endl;
+ cout << "Node size: " << sizeof(Node) << endl;
+ cout << "Build time: " << buildTime << "s" << endl;
+ cout << "Inner nodes: " << m_innerNodes << endl;
+ cout << "Leaf nodes: " << m_leafNodes << endl;
+ cout << "Alloc nodes: " << m_nodes.size() << endl;
+ cout << "Avg. tris/leaf: " << m_faces.size() / float(m_leafNodes) << endl;
+ cout << "Max depth: " << m_treeDepth << endl;
+ */
+ // free some memory
+ FaceBoundsArray f;
+ m_faceBounds.swap(f);
+}
+
+// partion faces around the median face
+uint32_t AABBTree::PartitionMedian(Node& n, uint32_t* faces, uint32_t numFaces)
+{
+ FaceSorter predicate(&m_vertices[0], &m_indices[0], m_numFaces*3, LongestAxis(n.m_maxExtents-n.m_minExtents));
+ std::nth_element(faces, faces+numFaces/2, faces+numFaces, predicate);
+
+ return numFaces/2;
+}
+
+// partion faces based on the surface area heuristic
+uint32_t AABBTree::PartitionSAH(Node& n, uint32_t* faces, uint32_t numFaces)
+{
+
+ /*
+ Vector3 mean(0.0f);
+ Vector3 variance(0.0f);
+
+ // calculate best axis based on variance
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ mean += 0.5f*(m_faceBounds[faces[i]].m_min + m_faceBounds[faces[i]].m_max);
+ }
+
+ mean /= float(numFaces);
+
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ Vector3 v = 0.5f*(m_faceBounds[faces[i]].m_min + m_faceBounds[faces[i]].m_max) - mean;
+ v *= v;
+ variance += v;
+ }
+
+ uint32_t bestAxis = LongestAxis(variance);
+ */
+
+ uint32_t bestAxis = 0;
+ uint32_t bestIndex = 0;
+ float bestCost = FLT_MAX;
+
+ for (uint32_t a=0; a < 3; ++a)
+ //uint32_t a = bestAxis;
+ {
+ // sort faces by centroids
+ FaceSorter predicate(&m_vertices[0], &m_indices[0], m_numFaces*3, a);
+ std::sort(faces, faces+numFaces, predicate);
+
+ // two passes over data to calculate upper and lower bounds
+ vector<float> cumulativeLower(numFaces);
+ vector<float> cumulativeUpper(numFaces);
+
+ Bounds lower;
+ Bounds upper;
+
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ lower.Union(m_faceBounds[faces[i]]);
+ upper.Union(m_faceBounds[faces[numFaces-i-1]]);
+
+ cumulativeLower[i] = lower.GetSurfaceArea();
+ cumulativeUpper[numFaces-i-1] = upper.GetSurfaceArea();
+ }
+
+ float invTotalSA = 1.0f / cumulativeUpper[0];
+
+ // test all split positions
+ for (uint32_t i=0; i < numFaces-1; ++i)
+ {
+ float pBelow = cumulativeLower[i] * invTotalSA;
+ float pAbove = cumulativeUpper[i] * invTotalSA;
+
+ float cost = 0.125f + (pBelow*i + pAbove*(numFaces-i));
+ if (cost <= bestCost)
+ {
+ bestCost = cost;
+ bestIndex = i;
+ bestAxis = a;
+ }
+ }
+ }
+
+ // re-sort by best axis
+ FaceSorter predicate(&m_vertices[0], &m_indices[0], m_numFaces*3, bestAxis);
+ std::sort(faces, faces+numFaces, predicate);
+
+ return bestIndex+1;
+}
+
+void AABBTree::BuildRecursive(uint32_t nodeIndex, uint32_t* faces, uint32_t numFaces)
+{
+ const uint32_t kMaxFacesPerLeaf = 6;
+
+ // if we've run out of nodes allocate some more
+ if (nodeIndex >= m_nodes.size())
+ {
+ uint32_t s = std::max(uint32_t(1.5f*m_nodes.size()), 512U);
+
+ //cout << "Resizing tree, current size: " << m_nodes.size()*sizeof(Node) << " new size: " << s*sizeof(Node) << endl;
+
+ m_nodes.resize(s);
+ }
+
+ // a reference to the current node, need to be careful here as this reference may become invalid if array is resized
+ Node& n = m_nodes[nodeIndex];
+
+ // track max tree depth
+ ++s_depth;
+ m_treeDepth = max(m_treeDepth, s_depth);
+
+ CalculateFaceBounds(faces, numFaces, n.m_minExtents, n.m_maxExtents);
+
+ // calculate bounds of faces and add node
+ if (numFaces <= kMaxFacesPerLeaf)
+ {
+ n.m_faces = faces;
+ n.m_numFaces = numFaces;
+
+ ++m_leafNodes;
+ }
+ else
+ {
+ ++m_innerNodes;
+
+ // face counts for each branch
+ //const uint32_t leftCount = PartitionMedian(n, faces, numFaces);
+ const uint32_t leftCount = PartitionSAH(n, faces, numFaces);
+ const uint32_t rightCount = numFaces-leftCount;
+
+ // alloc 2 nodes
+ m_nodes[nodeIndex].m_children = m_freeNode;
+
+ // allocate two nodes
+ m_freeNode += 2;
+
+ // split faces in half and build each side recursively
+ BuildRecursive(m_nodes[nodeIndex].m_children+0, faces, leftCount);
+ BuildRecursive(m_nodes[nodeIndex].m_children+1, faces+leftCount, rightCount);
+ }
+
+ --s_depth;
+}
+
+struct StackEntry
+{
+ uint32_t m_node;
+ float m_dist;
+};
+
+
+#define TRACE_STATS 0
+
+/*
+bool AABBTree::TraceRay(const Vec3& start, const Vector3& dir, float& outT, float& outU, float& outV, float& outW, float& outSign, uint32_t& outIndex) const
+{
+#if _WIN32
+ // reset stats
+ s_traceDepth = 0;
+#endif
+
+ const Vector3 rcp_dir(1.0f/dir.x, 1.0f/dir.y, 1.0f/dir.z);
+
+ // some temp variables
+ Vector3 normal;
+ float t, u, v, w, s;
+
+ float minT, minU, minV, minW, minSign;
+ minU = minV = minW = minSign = minT = FLT_MAX;
+
+ uint32_t minIndex = 0;
+ Vector3 minNormal;
+
+ const uint32_t kStackDepth = 50;
+
+ StackEntry stack[kStackDepth];
+ stack[0].m_node = 0;
+ stack[0].m_dist = 0.0f;
+
+ uint32_t stackCount = 1;
+
+ while (stackCount)
+ {
+ // pop node from back
+ StackEntry& e = stack[--stackCount];
+
+ // ignore if another node has already come closer
+ if (e.m_dist >= minT)
+ {
+ continue;
+ }
+
+ const Node* node = &m_nodes[e.m_node];
+
+filth:
+
+ if (node->m_faces == NULL)
+ {
+
+#if TRACE_STATS
+ extern uint32_t g_nodesChecked;
+ ++g_nodesChecked;
+#endif
+
+#if _WIN32
+ ++s_traceDepth;
+#endif
+ // find closest node
+ const Node& leftChild = m_nodes[node->m_children+0];
+ const Node& rightChild = m_nodes[node->m_children+1];
+
+ float dist[2] = {FLT_MAX, FLT_MAX};
+
+ IntersectRayAABBOmpf(start, rcp_dir, leftChild.m_minExtents, leftChild.m_maxExtents, dist[0]);
+ IntersectRayAABBOmpf(start, rcp_dir, rightChild.m_minExtents, rightChild.m_maxExtents, dist[1]);
+
+ const uint32_t closest = dist[1] < dist[0]; // 0 or 1
+ const uint32_t furthest = closest ^ 1;
+
+ if (dist[furthest] < minT)
+ {
+ StackEntry& e = stack[stackCount++];
+ e.m_node = node->m_children+furthest;
+ e.m_dist = dist[furthest];
+ }
+
+ // lifo
+ if (dist[closest] < minT)
+ {
+ node = &m_nodes[node->m_children+closest];
+ goto filth;
+ }
+ }
+ else
+ {
+ for (uint32_t i=0; i < node->m_numFaces; ++i)
+ {
+ const uint32_t faceIndex = node->m_faces[i];
+ const uint32_t indexStart = faceIndex*3;
+
+ const Vec3& a = m_vertices[m_indices[indexStart+0]];
+ const Vec3& b = m_vertices[m_indices[indexStart+1]];
+ const Vec3& c = m_vertices[m_indices[indexStart+2]];
+#if TRACE_STATS
+ extern uint32_t g_trisChecked;
+ ++g_trisChecked;
+#endif
+
+ if (IntersectRayTriTwoSided(start, dir, a, b, c, t, u, v, w, s))
+ {
+ if (t < minT && t > 0.01f)
+ {
+ minT = t;
+ minU = u;
+ minV = v;
+ minW = w;
+ minSign = s;
+ minIndex = faceIndex;
+ }
+ }
+ }
+ }
+ }
+
+ // copy to outputs
+ outT = minT;
+ outU = minU;
+ outV = minV;
+ outW = minW;
+ outSign = minSign;
+ outIndex = minIndex;
+
+ return (outT != FLT_MAX);
+}
+*/
+
+bool AABBTree::TraceRay(const Vec3& start, const Vector3& dir, float& outT, float& u, float& v, float& w, float& faceSign, uint32_t& faceIndex) const
+{
+ //s_traceDepth = 0;
+
+ Vector3 rcp_dir(1.0f/dir.x, 1.0f/dir.y, 1.0f/dir.z);
+
+ outT = FLT_MAX;
+ TraceRecursive(0, start, dir, outT, u, v, w, faceSign, faceIndex);
+
+ return (outT != FLT_MAX);
+}
+
+
+void AABBTree::TraceRecursive(uint32_t nodeIndex, const Vec3& start, const Vector3& dir, float& outT, float& outU, float& outV, float& outW, float& faceSign, uint32_t& faceIndex) const
+{
+ const Node& node = m_nodes[nodeIndex];
+
+ if (node.m_faces == NULL)
+ {
+#if _WIN32
+ ++s_traceDepth;
+#endif
+
+#if TRACE_STATS
+ extern uint32_t g_nodesChecked;
+ ++g_nodesChecked;
+#endif
+
+ // find closest node
+ const Node& leftChild = m_nodes[node.m_children+0];
+ const Node& rightChild = m_nodes[node.m_children+1];
+
+ float dist[2] = {FLT_MAX, FLT_MAX};
+
+ IntersectRayAABB(start, dir, leftChild.m_minExtents, leftChild.m_maxExtents, dist[0], NULL);
+ IntersectRayAABB(start, dir, rightChild.m_minExtents, rightChild.m_maxExtents, dist[1], NULL);
+
+ uint32_t closest = 0;
+ uint32_t furthest = 1;
+
+ if (dist[1] < dist[0])
+ {
+ closest = 1;
+ furthest = 0;
+ }
+
+ if (dist[closest] < outT)
+ TraceRecursive(node.m_children+closest, start, dir, outT, outU, outV, outW, faceSign, faceIndex);
+
+ if (dist[furthest] < outT)
+ TraceRecursive(node.m_children+furthest, start, dir, outT, outU, outV, outW, faceSign, faceIndex);
+
+ }
+ else
+ {
+ Vector3 normal;
+ float t, u, v, w, s;
+
+ for (uint32_t i=0; i < node.m_numFaces; ++i)
+ {
+ uint32_t indexStart = node.m_faces[i]*3;
+
+ const Vec3& a = m_vertices[m_indices[indexStart+0]];
+ const Vec3& b = m_vertices[m_indices[indexStart+1]];
+ const Vec3& c = m_vertices[m_indices[indexStart+2]];
+#if TRACE_STATS
+ extern uint32_t g_trisChecked;
+ ++g_trisChecked;
+#endif
+
+ if (IntersectRayTriTwoSided(start, dir, a, b, c, t, u, v, w, s))
+ {
+ if (t < outT)
+ {
+ outT = t;
+ outU = u;
+ outV = v;
+ outW = w;
+ faceSign = s;
+ faceIndex = node.m_faces[i];
+ }
+ }
+ }
+ }
+}
+
+/*
+bool AABBTree::TraceRay(const Vec3& start, const Vector3& dir, float& outT, Vector3* outNormal) const
+{
+ outT = FLT_MAX;
+ TraceRecursive(0, start, dir, outT, outNormal);
+
+ return (outT != FLT_MAX);
+}
+
+void AABBTree::TraceRecursive(uint32_t n, const Vec3& start, const Vector3& dir, float& outT, Vector3* outNormal) const
+{
+ const Node& node = m_nodes[n];
+
+ if (node.m_numFaces == 0)
+ {
+ extern _declspec(thread) uint32_t g_traceDepth;
+ ++g_traceDepth;
+#if _DEBUG
+ extern uint32_t g_nodesChecked;
+ ++g_nodesChecked;
+#endif
+ float t;
+ if (IntersectRayAABB(start, dir, node.m_minExtents, node.m_maxExtents, t, NULL))
+ {
+ if (t <= outT)
+ {
+ TraceRecursive(n*2+1, start, dir, outT, outNormal);
+ TraceRecursive(n*2+2, start, dir, outT, outNormal);
+ }
+ }
+ }
+ else
+ {
+ Vector3 normal;
+ float t, u, v, w;
+
+ for (uint32_t i=0; i < node.m_numFaces; ++i)
+ {
+ uint32_t indexStart = node.m_faces[i]*3;
+
+ const Vec3& a = m_vertices[m_indices[indexStart+0]];
+ const Vec3& b = m_vertices[m_indices[indexStart+1]];
+ const Vec3& c = m_vertices[m_indices[indexStart+2]];
+#if _DEBUG
+ extern uint32_t g_trisChecked;
+ ++g_trisChecked;
+#endif
+
+ if (IntersectRayTri(start, dir, a, b, c, t, u, v, w, &normal))
+ {
+ if (t < outT)
+ {
+ outT = t;
+
+ if (outNormal)
+ *outNormal = normal;
+ }
+ }
+ }
+ }
+}
+*/
+bool AABBTree::TraceRaySlow(const Vec3& start, const Vector3& dir, float& outT, float& outU, float& outV, float& outW, float& faceSign, uint32_t& faceIndex) const
+{
+ const uint32_t numFaces = GetNumFaces();
+
+ float minT, minU, minV, minW, minS;
+ minT = minU = minV = minW = minS = FLT_MAX;
+
+ Vector3 minNormal(0.0f, 1.0f, 0.0f);
+
+ Vector3 n(0.0f, 1.0f, 0.0f);
+ float t, u, v, w, s;
+ bool hit = false;
+ uint32_t minIndex = 0;
+
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ const Vec3& a = m_vertices[m_indices[i*3+0]];
+ const Vec3& b = m_vertices[m_indices[i*3+1]];
+ const Vec3& c = m_vertices[m_indices[i*3+2]];
+
+ if (IntersectRayTriTwoSided(start, dir, a, b, c, t, u, v, w, s))
+ {
+ if (t < minT)
+ {
+ minT = t;
+ minU = u;
+ minV = v;
+ minW = w;
+ minS = s;
+ minNormal = n;
+ minIndex = i;
+ hit = true;
+ }
+ }
+ }
+
+ outT = minT;
+ outU = minU;
+ outV = minV;
+ outW = minW;
+ faceSign = minS;
+ faceIndex = minIndex;
+
+ return hit;
+}
+
+void AABBTree::DebugDraw()
+{
+ /*
+ glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
+
+ DebugDrawRecursive(0, 0);
+
+ glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
+ */
+
+}
+
+void AABBTree::DebugDrawRecursive(uint32_t nodeIndex, uint32_t depth)
+{
+ static uint32_t kMaxDepth = 3;
+
+ if (depth > kMaxDepth)
+ return;
+
+
+ /*
+ Node& n = m_nodes[nodeIndex];
+
+ Vector3 minExtents = FLT_MAX;
+ Vector3 maxExtents = -FLT_MAX;
+
+ // calculate face bounds
+ for (uint32_t i=0; i < m_vertices.size(); ++i)
+ {
+ Vector3 a = m_vertices[i];
+
+ minExtents = Min(a, minExtents);
+ maxExtents = Max(a, maxExtents);
+ }
+
+
+ glBegin(GL_QUADS);
+ glVertex3f(minExtents.x, maxExtents.y, 0.0f);
+ glVertex3f(maxExtents.x, maxExtents.y, 0.0f);
+ glVertex3f(maxExtents.x, minExtents.y, 0.0f);
+ glVertex3f(minExtents.x, minExtents.y, 0.0f);
+ glEnd();
+
+
+ n.m_center = Vec3(minExtents+maxExtents)/2;
+ n.m_extents = (maxExtents-minExtents)/2;
+ */
+/*
+ if (n.m_minEtextents != Vector3(0.0f))
+ {
+ Vec3 corners[8];
+ corners[0] = n.m_center + Vector3(-n.m_extents.x, n.m_extents.y, n.m_extents.z);
+ corners[1] = n.m_center + Vector3(n.m_extents.x, n.m_extents.y, n.m_extents.z);
+ corners[2] = n.m_center + Vector3(n.m_extents.x, -n.m_extents.y, n.m_extents.z);
+ corners[3] = n.m_center + Vector3(-n.m_extents.x, -n.m_extents.y, n.m_extents.z);
+
+ corners[4] = n.m_center + Vector3(-n.m_extents.x, n.m_extents.y, -n.m_extents.z);
+ corners[5] = n.m_center + Vector3(n.m_extents.x, n.m_extents.y, -n.m_extents.z);
+ corners[6] = n.m_center + Vector3(n.m_extents.x, -n.m_extents.y, -n.m_extents.z);
+ corners[7] = n.m_center + Vector3(-n.m_extents.x, -n.m_extents.y, -n.m_extents.z);
+
+ glBegin(GL_QUADS);
+ glColor3f(0.0f, 1.0f, 0.0f);
+ glVertex3fv(corners[0]);
+ glVertex3fv(corners[1]);
+ glVertex3fv(corners[2]);
+ glVertex3fv(corners[3]);
+
+ glVertex3fv(corners[1]);
+ glVertex3fv(corners[5]);
+ glVertex3fv(corners[6]);
+ glVertex3fv(corners[2]);
+
+ glVertex3fv(corners[0]);
+ glVertex3fv(corners[4]);
+ glVertex3fv(corners[5]);
+ glVertex3fv(corners[1]);
+
+ glVertex3fv(corners[4]);
+ glVertex3fv(corners[5]);
+ glVertex3fv(corners[6]);
+ glVertex3fv(corners[7]);
+
+ glVertex3fv(corners[0]);
+ glVertex3fv(corners[4]);
+ glVertex3fv(corners[7]);
+ glVertex3fv(corners[3]);
+
+ glVertex3fv(corners[3]);
+ glVertex3fv(corners[7]);
+ glVertex3fv(corners[6]);
+ glVertex3fv(corners[2]);
+
+ glEnd();
+
+ DebugDrawRecursive(nodeIndex*2+1, depth+1);
+ DebugDrawRecursive(nodeIndex*2+2, depth+1);
+ }
+ */
+}
diff --git a/core/aabbtree.h b/core/aabbtree.h
new file mode 100644
index 0000000..64913cf
--- /dev/null
+++ b/core/aabbtree.h
@@ -0,0 +1,155 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "core.h"
+#include "maths.h"
+
+#include <vector>
+
+class AABBTree
+{
+ AABBTree(const AABBTree&);
+ AABBTree& operator=(const AABBTree&);
+
+public:
+
+ AABBTree(const Vec3* vertices, uint32_t numVerts, const uint32_t* indices, uint32_t numFaces);
+
+ bool TraceRaySlow(const Vec3& start, const Vector3& dir, float& outT, float& u, float& v, float& w, float& faceSign, uint32_t& faceIndex) const;
+ bool TraceRay(const Vec3& start, const Vector3& dir, float& outT, float& u, float& v, float& w, float& faceSign, uint32_t& faceIndex) const;
+
+ void DebugDraw();
+
+ Vector3 GetCenter() const { return (m_nodes[0].m_minExtents+m_nodes[0].m_maxExtents)*0.5f; }
+ Vector3 GetMinExtents() const { return m_nodes[0].m_minExtents; }
+ Vector3 GetMaxExtents() const { return m_nodes[0].m_maxExtents; }
+
+#if _WIN32
+ // stats (reset each trace)
+ static uint32_t GetTraceDepth() { return s_traceDepth; }
+#endif
+
+private:
+
+ void DebugDrawRecursive(uint32_t nodeIndex, uint32_t depth);
+
+ struct Node
+ {
+ Node()
+ : m_numFaces(0)
+ , m_faces(NULL)
+ , m_minExtents(0.0f)
+ , m_maxExtents(0.0f)
+ {
+ }
+
+ union
+ {
+ uint32_t m_children;
+ uint32_t m_numFaces;
+ };
+
+ uint32_t* m_faces;
+ Vector3 m_minExtents;
+ Vector3 m_maxExtents;
+ };
+
+
+ struct Bounds
+ {
+ Bounds() : m_min(0.0f), m_max(0.0f)
+ {
+ }
+
+ Bounds(const Vector3& min, const Vector3& max) : m_min(min), m_max(max)
+ {
+ }
+
+ inline float GetVolume() const
+ {
+ Vector3 e = m_max-m_min;
+ return (e.x*e.y*e.z);
+ }
+
+ inline float GetSurfaceArea() const
+ {
+ Vector3 e = m_max-m_min;
+ return 2.0f*(e.x*e.y + e.x*e.z + e.y*e.z);
+ }
+
+ inline void Union(const Bounds& b)
+ {
+ m_min = Min(m_min, b.m_min);
+ m_max = Max(m_max, b.m_max);
+ }
+
+ Vector3 m_min;
+ Vector3 m_max;
+ };
+
+ typedef std::vector<uint32_t> IndexArray;
+ typedef std::vector<Vec3> PositionArray;
+ typedef std::vector<Node> NodeArray;
+ typedef std::vector<uint32_t> FaceArray;
+ typedef std::vector<Bounds> FaceBoundsArray;
+
+ // partition the objects and return the number of objects in the lower partition
+ uint32_t PartitionMedian(Node& n, uint32_t* faces, uint32_t numFaces);
+ uint32_t PartitionSAH(Node& n, uint32_t* faces, uint32_t numFaces);
+
+ void Build();
+ void BuildRecursive(uint32_t nodeIndex, uint32_t* faces, uint32_t numFaces);
+ void TraceRecursive(uint32_t nodeIndex, const Vec3& start, const Vector3& dir, float& outT, float& u, float& v, float& w, float& faceSign, uint32_t& faceIndex) const;
+
+ void CalculateFaceBounds(uint32_t* faces, uint32_t numFaces, Vector3& outMinExtents, Vector3& outMaxExtents);
+ uint32_t GetNumFaces() const { return m_numFaces; }
+ uint32_t GetNumNodes() const { return uint32_t(m_nodes.size()); }
+
+ // track the next free node
+ uint32_t m_freeNode;
+
+ const Vec3* m_vertices;
+ const uint32_t m_numVerts;
+
+ const uint32_t* m_indices;
+ const uint32_t m_numFaces;
+
+ FaceArray m_faces;
+ NodeArray m_nodes;
+ FaceBoundsArray m_faceBounds;
+
+ // stats
+ uint32_t m_treeDepth;
+ uint32_t m_innerNodes;
+ uint32_t m_leafNodes;
+
+#if _WIN32
+ _declspec (thread) static uint32_t s_traceDepth;
+#endif
+};
diff --git a/core/cloth.h b/core/cloth.h
new file mode 100644
index 0000000..c70c81f
--- /dev/null
+++ b/core/cloth.h
@@ -0,0 +1,733 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <set>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <functional>
+#include <numeric>
+
+#include "maths.h"
+
+class ClothMesh
+{
+public:
+
+ struct Edge
+ {
+ int vertices[2];
+ int tris[2];
+
+ int stretchConstraint;
+ int bendingConstraint;
+
+
+ Edge(int a, int b)
+ {
+ assert(a != b);
+
+ vertices[0] = Min(a, b);
+ vertices[1] = Max(a, b);
+
+ tris[0] = -1;
+ tris[1] = -1;
+
+ stretchConstraint = -1;
+ bendingConstraint = -1;
+ }
+
+ bool IsBoundary() const { return tris[0] == -1 || tris[1] == -1; }
+
+ bool Contains(int index) const
+ {
+ return (vertices[0] == index) || (vertices[1] == index);
+ }
+
+ void Replace(int oldIndex, int newIndex)
+ {
+ if (vertices[0] == oldIndex)
+ vertices[0] = newIndex;
+ else if (vertices[1] == oldIndex)
+ vertices[1] = newIndex;
+ else
+ assert(0);
+ }
+
+ void RemoveTri(int index)
+ {
+ if (tris[0] == index)
+ tris[0] = -1;
+ else if (tris[1] == index)
+ tris[1] = -1;
+ else
+ assert(0);
+ }
+
+ bool AddTri(int index)
+ {
+ if (tris[0] == -1)
+ {
+ tris[0] = index;
+ return true;
+ }
+ else if (tris[1] == -1)
+ {
+ // check tri not referencing same edge
+ if (index == tris[0])
+ return false;
+ else
+ {
+ tris[1] = index;
+ return true;
+ }
+ }
+ else
+ return false;
+ }
+
+ bool operator < (const Edge& rhs) const
+ {
+ if (vertices[0] != rhs.vertices[0])
+ return vertices[0] < rhs.vertices[0];
+ else
+ return vertices[1] < rhs.vertices[1];
+ }
+ };
+
+ struct Triangle
+ {
+ Triangle(int a, int b, int c)
+ {
+ assert(a != b && a != c);
+ assert(b != c);
+
+ vertices[0] = a;
+ vertices[1] = b;
+ vertices[2] = c;
+
+ edges[0] = -1;
+ edges[1] = -1;
+ edges[2] = -1;
+
+ side = -1;
+
+ component = -1;
+ }
+
+ bool Contains(int v) const
+ {
+ if (vertices[0] == v ||
+ vertices[1] == v ||
+ vertices[2] == v)
+ return true;
+ else
+ return false;
+ }
+
+ void ReplaceEdge(int oldIndex, int newIndex)
+ {
+ for (int i=0; i < 3; ++i)
+ {
+ if (edges[i] == oldIndex)
+ {
+ edges[i] = newIndex;
+ return;
+ }
+
+ }
+ assert(0);
+ }
+
+ int ReplaceVertex(int oldIndex, int newIndex)
+ {
+ for (int i=0; i < 3; ++i)
+ {
+ if (vertices[i] == oldIndex)
+ {
+ vertices[i] = newIndex;
+ return i;
+ }
+ }
+
+ assert(0);
+ return -1;
+ }
+
+ int GetOppositeVertex(int v0, int v1) const
+ {
+ for (int i=0; i < 3; ++i)
+ {
+ if (vertices[i] != v0 && vertices[i] != v1)
+ return vertices[i];
+ }
+
+ assert(0);
+ return -1;
+ }
+
+ int vertices[3];
+ int edges[3];
+
+ // used during splitting
+ int side;
+
+ // used during singular vertex removal
+ mutable int component;
+ };
+
+ ClothMesh(const Vec4* vertices, int numVertices,
+ const int* indices, int numIndices,
+ float stretchStiffness,
+ float bendStiffness, bool tearable=true)
+ {
+ mValid = false;
+
+ mNumVertices = numVertices;
+
+ if (tearable)
+ {
+ // tearable cloth uses a simple bending constraint model that allows easy splitting of vertices and remapping of constraints
+ typedef std::set<Edge> EdgeSet;
+ EdgeSet edges;
+
+ // build unique edge list
+ for (int i=0; i < numIndices; i += 3)
+ {
+ mTris.push_back(Triangle(indices[i+0], indices[i+1], indices[i+2]));
+
+ const int triIndex = i/3;
+
+ // breaking the rules
+ Edge& e1 = const_cast<Edge&>(*edges.insert(Edge(indices[i+0], indices[i+1])).first);
+ Edge& e2 = const_cast<Edge&>(*edges.insert(Edge(indices[i+1], indices[i+2])).first);
+ Edge& e3 = const_cast<Edge&>(*edges.insert(Edge(indices[i+2], indices[i+0])).first);
+
+ if (!e1.AddTri(triIndex))
+ return;
+ if (!e2.AddTri(triIndex))
+ return;
+ if (!e3.AddTri(triIndex))
+ return;
+ }
+
+ // flatten set to array
+ mEdges.assign(edges.begin(), edges.end());
+
+ // second pass, set edge indices to faces
+ for (int i=0; i < numIndices; i += 3)
+ {
+ int e1 = int(std::lower_bound(mEdges.begin(), mEdges.end(), Edge(indices[i+0], indices[i+1])) - mEdges.begin());
+ int e2 = int(std::lower_bound(mEdges.begin(), mEdges.end(), Edge(indices[i+1], indices[i+2])) - mEdges.begin());
+ int e3 = int(std::lower_bound(mEdges.begin(), mEdges.end(), Edge(indices[i+2], indices[i+0])) - mEdges.begin());
+
+
+ if (e1 != e2 && e1 != e3 && e2 != e3)
+ {
+ const int triIndex = i/3;
+
+ mTris[triIndex].edges[0] = e1;
+ mTris[triIndex].edges[1] = e2;
+ mTris[triIndex].edges[2] = e3;
+ }
+ else
+ {
+ // degenerate tri
+ return;
+ }
+ }
+
+ // generate distance constraints
+ for (size_t i=0; i < mEdges.size(); ++i)
+ {
+ Edge& edge = mEdges[i];
+
+ // stretch constraint along mesh edges
+ edge.stretchConstraint = AddConstraint(vertices, edge.vertices[0], edge.vertices[1], stretchStiffness);
+
+ const int t1 = edge.tris[0];
+ const int t2 = edge.tris[1];
+
+ // add bending constraint between connected tris
+ if (t1 != -1 && t2 != -1 && bendStiffness > 0.0f)
+ {
+ int v1 = mTris[t1].GetOppositeVertex(edge.vertices[0], edge.vertices[1]);
+ int v2 = mTris[t2].GetOppositeVertex(edge.vertices[0], edge.vertices[1]);
+ edge.bendingConstraint = AddConstraint(vertices, v1, v2, bendStiffness);
+ }
+ }
+ }
+
+ // calculate rest volume
+ mRestVolume = 0.0f;
+ mConstraintScale = 0.0f;
+
+ std::vector<Vec3> gradients(numVertices);
+
+ for (int i=0; i < numIndices; i+=3)
+ {
+ Vec3 v1 = Vec3(vertices[indices[i+0]]);
+ Vec3 v2 = Vec3(vertices[indices[i+1]]);
+ Vec3 v3 = Vec3(vertices[indices[i+2]]);
+
+ const Vec3 n = Cross(v2-v1, v3-v1);
+ const float signedVolume = Dot(v1, n);
+
+ mRestVolume += signedVolume;
+
+ gradients[indices[i+0]] += n;
+ gradients[indices[i+1]] += n;
+ gradients[indices[i+2]] += n;
+ }
+
+ for (int i=0; i < numVertices; ++i)
+ mConstraintScale += Dot(gradients[i], gradients[i]);
+
+ mConstraintScale = 1.0f/mConstraintScale;
+
+ mValid = true;
+
+ }
+
+ int AddConstraint(const Vec4* vertices, int a, int b, float stiffness, float give=0.0f)
+ {
+ int index = int(mConstraintRestLengths.size());
+
+ mConstraintIndices.push_back(a);
+ mConstraintIndices.push_back(b);
+
+ const float restLength = Length(Vec3(vertices[a])-Vec3(vertices[b]));
+
+ mConstraintRestLengths.push_back(restLength*(1.0f + give));
+ mConstraintCoefficients.push_back(stiffness);
+
+ return index;
+ }
+
+ int IsSingularVertex(int vertex) const
+ {
+ std::vector<int> adjacentTriangles;
+
+ // gather adjacent triangles
+ for (int i=0; i < int(mTris.size()); ++i)
+ {
+ if (mTris[i].Contains(vertex))
+ adjacentTriangles.push_back(i);
+ }
+
+ // number of identified components
+ int componentCount = 0;
+
+ // while connected tris not colored
+ for (int i=0; i < int(adjacentTriangles.size()); ++i)
+ {
+ // pop off a triangle
+ int seed = adjacentTriangles[i];
+
+ // triangle already belongs to a component
+ if (mTris[seed].component != -1)
+ continue;
+
+ std::vector<int> stack;
+ stack.push_back(seed);
+
+ while (!stack.empty())
+ {
+ int t = stack.back();
+ stack.pop_back();
+
+ const Triangle& tri = mTris[t];
+
+ if (tri.component == componentCount)
+ {
+ // we're back to the beginning
+ // component is fully connected
+ break;
+ }
+
+ tri.component = componentCount;
+
+ // update mesh
+ for (int e=0; e < 3; ++e)
+ {
+ const Edge& edge = mEdges[tri.edges[e]];
+
+ if (edge.Contains(vertex))
+ {
+ if (!edge.IsBoundary())
+ {
+ // push unprocessed neighbors on stack
+ for (int s=0; s < 2; ++s)
+ {
+ assert(mTris[edge.tris[s]].component == -1 || mTris[edge.tris[s]].component == componentCount);
+
+ if (edge.tris[s] != t && mTris[edge.tris[s]].component == -1)
+ stack.push_back(edge.tris[s]);
+ }
+ }
+ }
+ }
+ }
+
+ componentCount++;
+ }
+
+ // reset component indices
+ for (int i=0; i < int(adjacentTriangles.size()); ++i)
+ {
+ assert(mTris[adjacentTriangles[i]].component != -1);
+
+ mTris[adjacentTriangles[i]].component = -1;
+ }
+
+ return componentCount;
+ }
+
+ struct TriangleUpdate
+ {
+ int triangle;
+ int vertex;
+ };
+
+ struct VertexCopy
+ {
+ int srcIndex;
+ int destIndex;
+ };
+
+ int SeparateVertex(int singularVertex, std::vector<TriangleUpdate>& replacements, std::vector<VertexCopy>& copies, int maxCopies)
+ {
+ std::vector<int> adjacentTriangles;
+
+ // gather adjacent triangles
+ for (int i=0; i < int(mTris.size()); ++i)
+ {
+ if (mTris[i].Contains(singularVertex))
+ adjacentTriangles.push_back(i);
+ }
+
+ // number of identified components
+ int componentCount = 0;
+
+ // first component keeps the existing vertex
+ int newIndex = singularVertex;
+
+ // while connected tris not colored
+ for (int i=0; i < int(adjacentTriangles.size()); ++i)
+ {
+ if (maxCopies == 0)
+ break;
+
+ // pop off a triangle
+ int seed = adjacentTriangles[i];
+
+ // triangle already belongs to a component
+ if (mTris[seed].component != -1)
+ continue;
+
+ std::vector<int> stack;
+ stack.push_back(seed);
+
+ while (!stack.empty())
+ {
+ int t = stack.back();
+ stack.pop_back();
+
+ Triangle& tri = mTris[t];
+
+ // test if we're back to the beginning, in which case the component is fully connected
+ if (tri.component == componentCount)
+ break;
+
+ assert(tri.component == -1);
+
+ tri.component = componentCount;
+
+ // update triangle
+ int v = tri.ReplaceVertex(singularVertex, newIndex);
+
+ if (singularVertex != newIndex)
+ {
+ // output replacement
+ TriangleUpdate r;
+ r.triangle = t*3 + v;
+ r.vertex = newIndex;
+ replacements.push_back(r);
+ }
+
+ // update mesh
+ for (int e=0; e < 3; ++e)
+ {
+ Edge& edge = mEdges[tri.edges[e]];
+
+ if (edge.Contains(singularVertex))
+ {
+ // update edge to point to new vertex
+ edge.Replace(singularVertex, newIndex);
+
+ const int stretching = edge.stretchConstraint;
+ if (mConstraintIndices[stretching*2+0] == singularVertex)
+ mConstraintIndices[stretching*2+0] = newIndex;
+ else if (mConstraintIndices[stretching*2+1] == singularVertex)
+ mConstraintIndices[stretching*2+1] = newIndex;
+ else
+ assert(0);
+
+ if (!edge.IsBoundary())
+ {
+ // push unprocessed neighbors on stack
+ for (int s=0; s < 2; ++s)
+ {
+ assert(mTris[edge.tris[s]].component == -1 || mTris[edge.tris[s]].component == componentCount);
+
+ if (edge.tris[s] != t && mTris[edge.tris[s]].component == -1)
+ stack.push_back(edge.tris[s]);
+ }
+ }
+ }
+ else
+ {
+ const int bending = edge.bendingConstraint;
+
+ if (bending != -1)
+ {
+ if (mConstraintIndices[bending*2+0] == singularVertex)
+ mConstraintIndices[bending*2+0] = newIndex;
+ else if (mConstraintIndices[bending*2+1] == singularVertex)
+ mConstraintIndices[bending*2+1] = newIndex;
+ }
+ }
+ }
+ }
+
+ // copy vertex
+ if (singularVertex != newIndex)
+ {
+ VertexCopy copy;
+ copy.srcIndex = singularVertex;
+ copy.destIndex = newIndex;
+
+ copies.push_back(copy);
+
+ mNumVertices++;
+ maxCopies--;
+ }
+
+ // component traversal finished
+ newIndex = mNumVertices;
+
+ componentCount++;
+ }
+
+ // reset component indices
+ for (int i=0; i < int(adjacentTriangles.size()); ++i)
+ {
+ //assert(mTris[adjacentTriangles[i]].component != -1);
+
+ mTris[adjacentTriangles[i]].component = -1;
+ }
+
+ return componentCount;
+ }
+
+ int SplitVertex(const Vec4* vertices, int index, Vec3 splitPlane, std::vector<int>& adjacentTris, std::vector<int>& adjacentVertices, std::vector<TriangleUpdate>& replacements, std::vector<VertexCopy>& copies, int maxCopies)
+ {
+ if (maxCopies == 0)
+ return -1;
+
+ float w = Dot(vertices[index], splitPlane);
+
+ int leftCount = 0;
+ int rightCount = 0;
+
+ const int newIndex = mNumVertices;
+
+ // classify all tris attached to the split vertex according
+ // to which side of the split plane their centroid lies on O(N)
+ for (size_t i = 0; i < mTris.size(); ++i)
+ {
+ Triangle& tri = mTris[i];
+
+ if (tri.Contains(index))
+ {
+ const Vec4 centroid = (vertices[tri.vertices[0]] + vertices[tri.vertices[1]] + vertices[tri.vertices[2]]) / 3.0f;
+
+ if (Dot(Vec3(centroid), splitPlane) < w)
+ {
+ tri.side = 1;
+
+ ++leftCount;
+ }
+ else
+ {
+ tri.side = 0;
+
+ ++rightCount;
+ }
+
+ adjacentTris.push_back(int(i));
+ for (int v=0; v < 3; ++v)
+ {
+ if (std::find(adjacentVertices.begin(), adjacentVertices.end(), tri.vertices[v]) == adjacentVertices.end())
+ {
+ adjacentVertices.push_back(tri.vertices[v]);
+ }
+ }
+ }
+ }
+
+ // if all tris on one side of split plane then do nothing
+ if (leftCount == 0 || rightCount == 0)
+ return -1;
+
+ // remap triangle indices
+ for (size_t i = 0; i < adjacentTris.size(); ++i)
+ {
+ const int triIndex = adjacentTris[i];
+
+ Triangle& tri = mTris[triIndex];
+
+ // tris on the negative side of the split plane are assigned the new index
+ if (tri.side == 0)
+ {
+ int v = tri.ReplaceVertex(index, newIndex);
+
+ TriangleUpdate update;
+ update.triangle = triIndex*3 + v;
+ update.vertex = newIndex;
+ replacements.push_back(update);
+
+ // update edges and constraints
+ for (int e = 0; e < 3; ++e)
+ {
+ Edge& edge = mEdges[tri.edges[e]];
+
+ if (edge.Contains(index))
+ {
+ bool exposed = false;
+
+ if (edge.tris[0] != -1 && edge.tris[1] != -1)
+ {
+ Triangle& t1 = mTris[edge.tris[0]];
+ Triangle& t2 = mTris[edge.tris[1]];
+
+ // Case 1: connected tris lie on opposite sides of the split plane
+ // creating a new exposed edge, need to break bending constraint
+ // and create new stretch constraint for exposed edge
+ if (t1.side != t2.side)
+ {
+ // create new edge
+ Edge newEdge(edge.vertices[0], edge.vertices[1]);
+ newEdge.Replace(index, newIndex);
+ newEdge.AddTri(triIndex);
+
+ // remove neighbor from old edge
+ edge.RemoveTri(triIndex);
+
+ // replace bending constraint with stretch constraint
+ assert(edge.bendingConstraint != -1);
+
+ newEdge.stretchConstraint = edge.bendingConstraint;
+
+ mConstraintIndices[newEdge.stretchConstraint * 2 + 0] = newEdge.vertices[0];
+ mConstraintIndices[newEdge.stretchConstraint * 2 + 1] = newEdge.vertices[1];
+ mConstraintCoefficients[newEdge.stretchConstraint] = mConstraintCoefficients[edge.stretchConstraint];
+ mConstraintRestLengths[newEdge.stretchConstraint] = mConstraintRestLengths[edge.stretchConstraint];
+
+ edge.bendingConstraint = -1;
+
+ // don't access Edge& after this
+ tri.ReplaceEdge(tri.edges[e], int(mEdges.size()));
+ mEdges.push_back(newEdge);
+
+ exposed = true;
+ }
+ }
+
+ if (!exposed)
+ {
+ // Case 2: both tris on same side of split plane or boundary edge, simply remap edge and constraint
+ // may have processed this edge already so check that it still contains old vertex
+ edge.Replace(index, newIndex);
+
+ const int stretching = edge.stretchConstraint;
+ if (mConstraintIndices[stretching * 2 + 0] == index)
+ mConstraintIndices[stretching * 2 + 0] = newIndex;
+ else if (mConstraintIndices[stretching * 2 + 1] == index)
+ mConstraintIndices[stretching * 2 + 1] = newIndex;
+ else
+ assert(0);
+ }
+ }
+ else
+ {
+ // Case 3: tri is adjacent to split vertex but this edge is not connected to it
+ // therefore there can be a bending constraint crossing this edge connected
+ // to vertex that needs to be remapped
+ const int bending = edge.bendingConstraint;
+
+ if (bending != -1)
+ {
+ if (mConstraintIndices[bending * 2 + 0] == index)
+ mConstraintIndices[bending * 2 + 0] = newIndex;
+ else if (mConstraintIndices[bending * 2 + 1] == index)
+ mConstraintIndices[bending * 2 + 1] = newIndex;
+ }
+ }
+ }
+
+ }
+ }
+
+ // output vertex copy
+ VertexCopy copy;
+ copy.srcIndex = index;
+ copy.destIndex = newIndex;
+
+ copies.push_back(copy);
+
+ mNumVertices++;
+
+ return newIndex;
+ }
+
+ std::vector<int> mConstraintIndices;
+ std::vector<float> mConstraintCoefficients;
+ std::vector<float> mConstraintRestLengths;
+
+ std::vector<Edge> mEdges;
+ std::vector<Triangle> mTris;
+
+ int mNumVertices;
+
+ float mRestVolume;
+ float mConstraintScale;
+
+ bool mValid;
+};
diff --git a/core/convex.h b/core/convex.h
new file mode 100644
index 0000000..98ac52c
--- /dev/null
+++ b/core/convex.h
@@ -0,0 +1,406 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "maths.h"
+
+#include <vector>
+
+// Thanks to Christian Sigg for the convex mesh builder!
+
+struct ConvexMeshBuilder
+{
+ ConvexMeshBuilder(const Vec4* planes)
+ : mPlanes(planes)
+ {}
+
+ void operator()(uint32_t mask, float scale=1.0f);
+
+ const Vec4* mPlanes;
+ std::vector<Vec3> mVertices;
+ std::vector<uint16_t> mIndices;
+};
+
+namespace
+{
+ float det(Vec4 v0, Vec4 v1, Vec4 v2, Vec4 v3)
+ {
+ const Vec3& d0 = reinterpret_cast<const Vec3&>(v0);
+ const Vec3& d1 = reinterpret_cast<const Vec3&>(v1);
+ const Vec3& d2 = reinterpret_cast<const Vec3&>(v2);
+ const Vec3& d3 = reinterpret_cast<const Vec3&>(v3);
+
+ return v0.w * Dot(Cross(d1,d2), d3)
+ - v1.w * Dot(Cross(d0, d2), d3)
+ + v2.w * Dot(Cross(d0, d1), d3)
+ - v3.w * Dot(Cross(d0, d1), d2);
+ }
+
+ Vec3 intersect(Vec4 p0, Vec4 p1, Vec4 p2)
+ {
+ const Vec3& d0 = reinterpret_cast<const Vec3&>(p0);
+ const Vec3& d1 = reinterpret_cast<const Vec3&>(p1);
+ const Vec3& d2 = reinterpret_cast<const Vec3&>(p2);
+
+ Vec3 r = (p0.w * Cross(d1, d2)
+ + p1.w * Cross(d2, d0)
+ + p2.w * Cross(d0, d1))
+ / Dot(d0, Cross(d2,d1));
+
+ return Vec3(r.x, r.y, r.z);
+ }
+
+ const uint16_t sInvalid = uint16_t(-1);
+
+ // restriction: only supports a single patch per vertex.
+ struct HalfedgeMesh
+ {
+ struct Halfedge
+ {
+ Halfedge(uint16_t vertex = sInvalid, uint16_t face = sInvalid,
+ uint16_t next = sInvalid, uint16_t prev = sInvalid)
+ : mVertex(vertex), mFace(face), mNext(next), mPrev(prev)
+ {}
+
+ uint16_t mVertex; // to
+ uint16_t mFace; // left
+ uint16_t mNext; // ccw
+ uint16_t mPrev; // cw
+ };
+
+ HalfedgeMesh() : mNumTriangles(0) {}
+
+ uint16_t findHalfedge(uint16_t v0, uint16_t v1)
+ {
+ uint16_t h = mVertices[v0], start = h;
+ while(h != sInvalid && mHalfedges[h].mVertex != v1)
+ {
+ h = mHalfedges[h ^ 1].mNext;
+ if(h == start)
+ return sInvalid;
+ }
+ return h;
+ }
+
+ void connect(uint16_t h0, uint16_t h1)
+ {
+ mHalfedges[h0].mNext = h1;
+ mHalfedges[h1].mPrev = h0;
+ }
+
+ void addTriangle(uint16_t v0, uint16_t v1, uint16_t v2)
+ {
+ // add new vertices
+ uint16_t n = Max(v0, Max(v1, v2))+1;
+ if(mVertices.size() < n)
+ mVertices.resize(n, sInvalid);
+
+ // collect halfedges, prev and next of triangle
+ uint16_t verts[] = { v0, v1, v2 };
+ uint16_t handles[3], prev[3], next[3];
+ for(uint16_t i=0; i<3; ++i)
+ {
+ uint16_t j = (i+1)%3;
+ uint16_t h = findHalfedge(verts[i], verts[j]);
+ if(h == sInvalid)
+ {
+ // add new edge
+ h = uint16_t(mHalfedges.size());
+ mHalfedges.push_back(Halfedge(verts[j]));
+ mHalfedges.push_back(Halfedge(verts[i]));
+ }
+ handles[i] = h;
+ prev[i] = mHalfedges[h].mPrev;
+ next[i] = mHalfedges[h].mNext;
+ }
+
+ // patch connectivity
+ for(uint16_t i=0; i<3; ++i)
+ {
+ uint16_t j = (i+1)%3;
+
+ mHalfedges[handles[i]].mFace = uint16_t(mFaces.size());
+
+ // connect prev and next
+ connect(handles[i], handles[j]);
+
+ if(next[j] == sInvalid) // new next edge, connect opposite
+ connect(handles[j]^1, next[i]!=sInvalid ? next[i] : handles[i]^1);
+
+ if(prev[i] == sInvalid) // new prev edge, connect opposite
+ connect(prev[j]!=sInvalid ? prev[j] : handles[j]^1, handles[i]^1);
+
+ // prev is boundary, update middle vertex
+ if(mHalfedges[handles[i]^1].mFace == sInvalid)
+ mVertices[verts[j]] = handles[i]^1;
+ }
+
+ assert(mNumTriangles < 0xffff);
+ mFaces.push_back(handles[2]);
+ ++mNumTriangles;
+ }
+
+ uint16_t removeTriangle(uint16_t f)
+ {
+ uint16_t result = sInvalid;
+
+ for(uint16_t i=0, h = mFaces[f]; i<3; ++i)
+ {
+ uint16_t v0 = mHalfedges[h^1].mVertex;
+ uint16_t v1 = mHalfedges[h].mVertex;
+
+ mHalfedges[h].mFace = sInvalid;
+
+ if(mHalfedges[h^1].mFace == sInvalid) // was boundary edge, remove
+ {
+ uint16_t v0Prev = mHalfedges[h ].mPrev;
+ uint16_t v0Next = mHalfedges[h^1].mNext;
+ uint16_t v1Prev = mHalfedges[h^1].mPrev;
+ uint16_t v1Next = mHalfedges[h ].mNext;
+
+ // update halfedge connectivity
+ connect(v0Prev, v0Next);
+ connect(v1Prev, v1Next);
+
+ // update vertex boundary or delete
+ mVertices[v0] = (v0Prev^1) == v0Next ? sInvalid : v0Next;
+ mVertices[v1] = (v1Prev^1) == v1Next ? sInvalid : v1Next;
+ }
+ else
+ {
+ mVertices[v0] = h; // update vertex boundary
+ result = v1;
+ }
+
+ h = mHalfedges[h].mNext;
+ }
+
+ mFaces[f] = sInvalid;
+ --mNumTriangles;
+
+ return result;
+ }
+
+ // true if vertex v is in front of face f
+ bool visible(uint16_t v, uint16_t f)
+ {
+ uint16_t h = mFaces[f];
+ if(h == sInvalid)
+ return false;
+
+ uint16_t v0 = mHalfedges[h].mVertex;
+ h = mHalfedges[h].mNext;
+ uint16_t v1 = mHalfedges[h].mVertex;
+ h = mHalfedges[h].mNext;
+ uint16_t v2 = mHalfedges[h].mVertex;
+ h = mHalfedges[h].mNext;
+
+ return det(mPoints[v], mPoints[v0], mPoints[v1], mPoints[v2]) < -1e-3f;
+ }
+
+ /*
+ void print() const
+ {
+ for(uint32_t i=0; i<mFaces.size(); ++i)
+ {
+ printf("f%u: ", i);
+ uint16_t h = mFaces[i];
+ if(h == sInvalid)
+ {
+ printf("deleted\n");
+ continue;
+ }
+
+ for(int j=0; j<3; ++j)
+ {
+ printf("h%u -> v%u -> ", uint32_t(h), uint32_t(mHalfedges[h].mVertex));
+ h = mHalfedges[h].mNext;
+ }
+
+ printf("\n");
+ }
+
+ for(uint32_t i=0; i<mVertices.size(); ++i)
+ {
+ printf("v%u: ", i);
+ uint16_t h = mVertices[i];
+ if(h == sInvalid)
+ {
+ printf("deleted\n");
+ continue;
+ }
+
+ uint16_t start = h;
+ do {
+ printf("h%u -> v%u, ", uint32_t(h), uint32_t(mHalfedges[h].mVertex));
+ h = mHalfedges[h^1].mNext;
+ } while (h != start);
+
+ printf("\n");
+ }
+
+ for(uint32_t i=0; i<mHalfedges.size(); ++i)
+ {
+ printf("h%u: v%u, ", i, uint32_t(mHalfedges[i].mVertex));
+
+ if(mHalfedges[i].mFace == sInvalid)
+ printf("boundary, ");
+ else
+ printf("f%u, ", uint32_t(mHalfedges[i].mFace));
+
+ printf("p%u, n%u\n", uint32_t(mHalfedges[i].mPrev), uint32_t(mHalfedges[i].mNext));
+ }
+ }
+ */
+
+ std::vector<Halfedge> mHalfedges;
+ std::vector<uint16_t> mVertices; // vertex -> (boundary) halfedge
+ std::vector<uint16_t> mFaces; // face -> halfedge
+ std::vector<Vec4> mPoints;
+ uint16_t mNumTriangles;
+ };
+}
+
+void ConvexMeshBuilder::operator()(uint32_t numPlanes, float scale)
+{
+
+ if(numPlanes < 4)
+ return; // todo: handle degenerate cases
+
+ HalfedgeMesh mesh;
+
+ // gather points (planes, that is)
+ mesh.mPoints.reserve(numPlanes);
+ for(uint32_t i=0; i < numPlanes; ++i)
+ mesh.mPoints.push_back(Vec4(mPlanes[i].x, mPlanes[i].y, mPlanes[i].z, mPlanes[i].w));
+
+ // initialize to tetrahedron
+ mesh.addTriangle(0, 1, 2);
+ mesh.addTriangle(0, 3, 1);
+ mesh.addTriangle(1, 3, 2);
+ mesh.addTriangle(2, 3, 0);
+
+ // flip if inside-out
+ if(mesh.visible(3, 0))
+ std::swap(mesh.mPoints[0], mesh.mPoints[1]);
+
+ // iterate through remaining points
+ for(uint16_t i=4; i<mesh.mPoints.size(); ++i)
+ {
+ // remove any visible triangle
+ uint16_t v0 = sInvalid;
+ for(uint16_t j=0; j<mesh.mFaces.size(); ++j)
+ {
+ if(mesh.visible(i, j))
+ v0 = Min(v0, mesh.removeTriangle(j));
+ }
+
+ if(v0 == sInvalid)
+ continue; // no triangle removed
+
+ if(!mesh.mNumTriangles)
+ return; // empty mesh
+
+ // find non-deleted boundary vertex
+ for(uint16_t h=0; mesh.mVertices[v0] == sInvalid; h+=2)
+ {
+ if ((mesh.mHalfedges[h ].mFace == sInvalid) ^
+ (mesh.mHalfedges[h+1].mFace == sInvalid))
+ {
+ v0 = mesh.mHalfedges[h].mVertex;
+ }
+ }
+
+ // tesselate hole
+ uint16_t start = v0;
+ do {
+ uint16_t h = mesh.mVertices[v0];
+ uint16_t v1 = mesh.mHalfedges[h].mVertex;
+ mesh.addTriangle(v0, v1, i);
+ v0 = v1;
+ } while(v0 != start && mesh.mNumTriangles < 200);
+
+ if (mesh.mNumTriangles == 200)
+ {
+ return;
+ }
+ }
+
+ // convert triangles to vertices (intersection of 3 planes)
+ std::vector<uint32_t> face2Vertex(mesh.mFaces.size());
+ for(uint32_t i=0; i<mesh.mFaces.size(); ++i)
+ {
+ face2Vertex[i] = uint32_t(mVertices.size());
+
+ uint16_t h = mesh.mFaces[i];
+ if(h == sInvalid)
+ continue;
+
+ uint16_t v0 = mesh.mHalfedges[h].mVertex;
+ h = mesh.mHalfedges[h].mNext;
+ uint16_t v1 = mesh.mHalfedges[h].mVertex;
+ h = mesh.mHalfedges[h].mNext;
+ uint16_t v2 = mesh.mHalfedges[h].mVertex;
+
+ mVertices.push_back(intersect(mesh.mPoints[v0], mesh.mPoints[v1], mesh.mPoints[v2]));
+ }
+
+ // convert vertices to polygons (face one-ring)
+ for(uint32_t i=0; i<mesh.mVertices.size(); ++i)
+ {
+ uint16_t h = mesh.mVertices[i];
+ if(h == sInvalid || mesh.mHalfedges[h].mFace == sInvalid)
+ continue;
+
+ uint16_t v0 = face2Vertex[mesh.mHalfedges[h].mFace];
+ h = mesh.mHalfedges[h].mPrev^1;
+ if(h == sInvalid || mesh.mHalfedges[h].mFace == sInvalid)
+ continue;
+
+ uint16_t v1 = face2Vertex[mesh.mHalfedges[h].mFace];
+
+ while(mIndices.size() < 1000)
+ {
+ h = mesh.mHalfedges[h].mPrev^1;
+ if(h == sInvalid || mesh.mHalfedges[h].mFace == sInvalid)
+ continue;
+
+ uint16_t v2 = face2Vertex[mesh.mHalfedges[h].mFace];
+
+ if(v0 == v2)
+ break;
+
+ mIndices.push_back(v0);
+ mIndices.push_back(v2);
+ mIndices.push_back(v1);
+
+ v1 = v2;
+ }
+
+ }
+}
diff --git a/core/core.cpp b/core/core.cpp
new file mode 100644
index 0000000..c0d9d2c
--- /dev/null
+++ b/core/core.cpp
@@ -0,0 +1,29 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "core.h"
+
diff --git a/core/core.h b/core/core.h
new file mode 100644
index 0000000..7c7d940
--- /dev/null
+++ b/core/core.h
@@ -0,0 +1,158 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#define ENABLE_VERBOSE_OUTPUT 0
+#define ENABLE_APIC_CAPTURE 0
+#define ENABLE_PERFALYZE_CAPTURE 0
+
+#if ENABLE_VERBOSE_OUTPUT
+#define VERBOSE(a) a##;
+#else
+#define VERBOSE(a)
+#endif
+
+#define Super __super
+
+// basically just a collection of macros and types
+#ifndef UNUSED
+#define UNUSED(x) (void)x;
+#endif
+
+#define NOMINMAX
+
+#if !PLATFORM_OPENCL
+#include <cassert>
+#endif
+
+#include "types.h"
+
+#if !PLATFORM_SPU && !PLATFORM_OPENCL
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+#include <functional>
+#endif
+
+// disable some warnings
+#if _WIN32
+#pragma warning(disable: 4996) // secure io
+#pragma warning(disable: 4100) // unreferenced param
+#pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+#endif
+
+// alignment helpers
+#define DEFAULT_ALIGNMENT 16
+
+#if PLATFORM_LINUX
+#define ALIGN_N(x)
+#define ENDALIGN_N(x) __attribute__ ((aligned (x)))
+#else
+#define ALIGN_N(x) __declspec(align(x))
+#define END_ALIGN_N(x)
+#endif
+
+#define ALIGN ALIGN_N(DEFAULT_ALIGNMENT)
+#define END_ALIGN END_ALIGN_N(DEFAULT_ALIGNMENT)
+
+inline bool IsPowerOfTwo(int n)
+{
+ return (n&(n-1))==0;
+}
+
+// align a ptr to a power of tow
+template <typename T>
+inline T* AlignPtr(T* p, uint32_t alignment)
+{
+ assert(IsPowerOfTwo(alignment));
+
+ // cast to safe ptr type
+ uintptr_t up = reinterpret_cast<uintptr_t>(p);
+ return (T*)((up+(alignment-1)) & ~(alignment-1));
+}
+
+// align an unsigned value to a power of two
+inline uint32_t Align(uint32_t val, uint32_t alignment)
+{
+ assert(IsPowerOfTwo(alignment));
+
+ return (val+(alignment-1))& ~(alignment-1);
+}
+
+inline bool IsAligned(void* p, uint32_t alignment)
+{
+ return (((uintptr_t)p) & (alignment-1)) == 0;
+}
+
+// Endian helpers
+template <typename T>
+T ByteSwap(const T& val)
+{
+ T copy = val;
+ uint8_t* p = reinterpret_cast<uint8_t*>(&copy);
+
+ std::reverse(p, p+sizeof(T));
+
+ return copy;
+}
+
+#ifndef LITTLE_ENDIAN
+#define LITTLE_ENDIAN WIN32
+#endif
+
+#ifndef BIG_ENDIAN
+#define BIG_ENDIAN PLATFORM_PS3 || PLATFORM_SPU
+#endif
+
+#if BIG_ENDIAN
+#define ToLittleEndian(x) ByteSwap(x)
+#else
+#define ToLittleEndian(x) x
+#endif
+
+//#include "platform.h"
+
+//#define sizeof_array(x) (sizeof(x)/sizeof(*x))
+template <typename T, size_t N>
+size_t sizeof_array(const T (&)[N])
+{
+ return N;
+}
+
+// functor designed for use in the stl
+template <typename T>
+class free_ptr : public std::unary_function<T*, void>
+{
+public:
+
+ void operator()(const T* ptr)
+ {
+ delete ptr;
+ }
+};
diff --git a/core/extrude.cpp b/core/extrude.cpp
new file mode 100644
index 0000000..637bed5
--- /dev/null
+++ b/core/extrude.cpp
@@ -0,0 +1,114 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "maths.h"
+
+#include <vector>
+
+void Extrude(const Vec3* points, int numPoints, std::vector<Vec3>& vertices, std::vector<Vec3>& normals, std::vector<int>& triangles, float radius, int resolution, int smoothing)
+{
+ if (numPoints < 2)
+ return;
+
+ Vec3 u, v;
+ Vec3 w = SafeNormalize(Vec3(points[1]) - Vec3(points[0]), Vec3(0.0f, 1.0f, 0.0f));
+
+ BasisFromVector(w, &u, &v);
+
+ Matrix44 frame;
+ frame.SetCol(0, Vec4(u.x, u.y, u.z, 0.0f));
+ frame.SetCol(1, Vec4(v.x, v.y, v.z, 0.0f));
+ frame.SetCol(2, Vec4(w.x, w.y, w.z, 0.0f));
+ frame.SetCol(3, Vec4(0.0f, 0.0f, 0.0f, 1.0f));
+
+ for (int i = 0; i < numPoints - 1; ++i)
+ {
+ Vec3 next;
+
+ if (i < numPoints - 1)
+ next = Normalize(Vec3(points[i + 1]) - Vec3(points[i - 1]));
+ else
+ next = Normalize(Vec3(points[i]) - Vec3(points[i - 1]));
+
+ int a = Max(i - 1, 0);
+ int b = i;
+ int c = Min(i + 1, numPoints - 1);
+ int d = Min(i + 2, numPoints - 1);
+
+ Vec3 p1 = Vec3(points[b]);
+ Vec3 p2 = Vec3(points[c]);
+ Vec3 m1 = 0.5f*(Vec3(points[c]) - Vec3(points[a]));
+ Vec3 m2 = 0.5f*(Vec3(points[d]) - Vec3(points[b]));
+
+ // ensure last segment handled correctly
+ int segments = (i < numPoints - 2) ? smoothing : smoothing + 1;
+
+ for (int s = 0; s < segments; ++s)
+ {
+ Vec3 pos = HermiteInterpolate(p1, p2, m1, m2, s / float(smoothing));
+ Vec3 dir = Normalize(HermiteTangent(p1, p2, m1, m2, s / float(smoothing)));
+
+ Vec3 cur = frame.GetAxis(2);
+ const float angle = acosf(Dot(cur, dir));
+
+ // if parallel then don't need to do anything
+ if (fabsf(angle) > 0.001f)
+ frame = RotationMatrix(angle, SafeNormalize(Cross(cur, dir)))*frame;
+
+ size_t startIndex = vertices.size();
+
+ for (int c = 0; c < resolution; ++c)
+ {
+ float angle = k2Pi / resolution;
+
+ // transform position and normal to world space
+ Vec4 v = frame*Vec4(cosf(angle*c), sinf(angle*c), 0.0f, 0.0f);
+
+ vertices.push_back(Vec3(v)*radius + pos);
+ normals.push_back(Vec3(v));
+ }
+
+ // output triangles
+ if (startIndex != 0)
+ {
+ for (int i = 0; i < resolution; ++i)
+ {
+ int curIndex = static_cast<int>(startIndex + i);
+ int nextIndex = static_cast<int>(startIndex + (i + 1) % resolution);
+
+ triangles.push_back(curIndex);
+ triangles.push_back(curIndex - resolution);
+ triangles.push_back(nextIndex - resolution);
+
+ triangles.push_back(nextIndex - resolution);
+ triangles.push_back(nextIndex);
+ triangles.push_back(curIndex);
+ }
+ }
+ }
+ }
+}
diff --git a/core/extrude.h b/core/extrude.h
new file mode 100644
index 0000000..bcbc6fa
--- /dev/null
+++ b/core/extrude.h
@@ -0,0 +1,35 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <vector>
+
+#include "maths.h"
+
+// extrudes a circle along a Hermite curve defined by curvePoints, resolution is the number of circle segments, smoothing is the number of segments between points
+void Extrude(const Vec3* points, int numPoints, std::vector<Vec3>& positions, std::vector<Vec3>& normals, std::vector<int>& indices, float radius, int resolution, int smoothing);
diff --git a/core/mat22.h b/core/mat22.h
new file mode 100644
index 0000000..b2515be
--- /dev/null
+++ b/core/mat22.h
@@ -0,0 +1,174 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "maths.h"
+
+struct Matrix22
+{
+ CUDA_CALLABLE Matrix22() {}
+ CUDA_CALLABLE Matrix22(float a, float b, float c, float d)
+ {
+ cols[0] = Vec2(a, c);
+ cols[1] = Vec2(b, d);
+ }
+
+ CUDA_CALLABLE Matrix22(const Vec2& c1, const Vec2& c2)
+ {
+ cols[0] = c1;
+ cols[1] = c2;
+ }
+
+ CUDA_CALLABLE float operator()(int i, int j) const { return static_cast<const float*>(cols[j])[i]; }
+ CUDA_CALLABLE float& operator()(int i, int j) { return static_cast<float*>(cols[j])[i]; }
+
+ Vec2 cols[2];
+
+ static inline Matrix22 Identity() { static const Matrix22 sIdentity(Vec2(1.0f, 0.0f), Vec2(0.0f, 1.0f)); return sIdentity; }
+};
+
+CUDA_CALLABLE inline Matrix22 Multiply(float s, const Matrix22& m)
+{
+ Matrix22 r = m;
+ r.cols[0] *= s;
+ r.cols[1] *= s;
+ return r;
+}
+
+CUDA_CALLABLE inline Matrix22 Multiply(const Matrix22& a, const Matrix22& b)
+{
+ Matrix22 r;
+ r.cols[0] = a.cols[0]*b.cols[0].x + a.cols[1]*b.cols[0].y;
+ r.cols[1] = a.cols[0]*b.cols[1].x + a.cols[1]*b.cols[1].y;
+ return r;
+}
+
+CUDA_CALLABLE inline Matrix22 Add(const Matrix22& a, const Matrix22& b)
+{
+ return Matrix22(a.cols[0]+b.cols[0], a.cols[1]+b.cols[1]);
+}
+
+CUDA_CALLABLE inline Vec2 Multiply(const Matrix22& a, const Vec2& x)
+{
+ return a.cols[0]*x.x + a.cols[1]*x.y;
+}
+
+CUDA_CALLABLE inline Matrix22 operator*(float s, const Matrix22& a) { return Multiply(s, a); }
+CUDA_CALLABLE inline Matrix22 operator*(const Matrix22& a, float s) { return Multiply(s, a); }
+CUDA_CALLABLE inline Matrix22 operator*(const Matrix22& a, const Matrix22& b) { return Multiply(a, b); }
+CUDA_CALLABLE inline Matrix22 operator+(const Matrix22& a, const Matrix22& b) { return Add(a, b); }
+CUDA_CALLABLE inline Matrix22 operator-(const Matrix22& a, const Matrix22& b) { return Add(a, -1.0f*b); }
+CUDA_CALLABLE inline Matrix22& operator+=(Matrix22& a, const Matrix22& b) { a = a+b; return a; }
+CUDA_CALLABLE inline Matrix22& operator-=(Matrix22& a, const Matrix22& b) { a = a-b; return a; }
+CUDA_CALLABLE inline Matrix22& operator*=(Matrix22& a, float s) { a = a*s; return a; }
+CUDA_CALLABLE inline Vec2 operator*(const Matrix22& a, const Vec2& x) { return Multiply(a, x); }
+
+CUDA_CALLABLE inline float Determinant(const Matrix22& m)
+{
+ return m(0,0)*m(1,1)-m(1,0)*m(0,1);
+}
+
+CUDA_CALLABLE inline Matrix22 Inverse(const Matrix22& m, float& det)
+{
+ det = Determinant(m);
+
+ if (fabsf(det) > FLT_EPSILON)
+ {
+ Matrix22 inv;
+ inv(0,0) = m(1,1);
+ inv(1,1) = m(0,0);
+ inv(0,1) = -m(0,1);
+ inv(1,0) = -m(1,0);
+
+ return Multiply(1.0f/det, inv);
+ }
+ else
+ {
+ det = 0.0f;
+ return m;
+ }
+}
+
+CUDA_CALLABLE inline Matrix22 Transpose(const Matrix22& a)
+{
+ Matrix22 r;
+ r(0,0) = a(0,0);
+ r(0,1) = a(1,0);
+ r(1,0) = a(0,1);
+ r(1,1) = a(1,1);
+ return r;
+}
+
+CUDA_CALLABLE inline float Trace(const Matrix22& a)
+{
+ return a(0,0)+a(1,1);
+}
+
+CUDA_CALLABLE inline Matrix22 RotationMatrix(float theta)
+{
+ return Matrix22(Vec2(cosf(theta), sinf(theta)), Vec2(-sinf(theta), cosf(theta)));
+}
+
+// outer product of a and b, b is considered a row vector
+CUDA_CALLABLE inline Matrix22 Outer(const Vec2& a, const Vec2& b)
+{
+ return Matrix22(a*b.x, a*b.y);
+}
+
+CUDA_CALLABLE inline Matrix22 QRDecomposition(const Matrix22& m)
+{
+ Vec2 a = Normalize(m.cols[0]);
+ Matrix22 q(a, PerpCCW(a));
+
+ return q;
+}
+
+CUDA_CALLABLE inline Matrix22 PolarDecomposition(const Matrix22& m)
+{
+ /*
+ //iterative method
+
+ float det;
+ Matrix22 q = m;
+
+ for (int i=0; i < 4; ++i)
+ {
+ q = 0.5f*(q + Inverse(Transpose(q), det));
+ }
+ */
+
+ Matrix22 q = m + Matrix22(m(1,1), -m(1,0), -m(0,1), m(0,0));
+
+ float s = Length(q.cols[0]);
+ q.cols[0] /= s;
+ q.cols[1] /= s;
+
+ return q;
+}
+
+
diff --git a/core/mat33.h b/core/mat33.h
new file mode 100644
index 0000000..b191819
--- /dev/null
+++ b/core/mat33.h
@@ -0,0 +1,205 @@
+// This code contain s NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "maths.h"
+
+#include "quat.h"
+#include "vec3.h"
+
+struct Matrix33
+{
+ CUDA_CALLABLE Matrix33() {}
+ CUDA_CALLABLE Matrix33(const Vec3& c1, const Vec3& c2, const Vec3& c3)
+ {
+ cols[0] = c1;
+ cols[1] = c2;
+ cols[2] = c3;
+ }
+
+ CUDA_CALLABLE Matrix33(const Quat& q)
+ {
+ cols[0] = Rotate(q, Vec3(1.0f, 0.0f, 0.0f));
+ cols[1] = Rotate(q, Vec3(0.0f, 1.0f, 0.0f));
+ cols[2] = Rotate(q, Vec3(0.0f, 0.0f, 1.0f));
+ }
+
+ CUDA_CALLABLE float operator()(int i, int j) const { return static_cast<const float*>(cols[j])[i]; }
+ CUDA_CALLABLE float& operator()(int i, int j) { return static_cast<float*>(cols[j])[i]; }
+
+ Vec3 cols[3];
+
+ CUDA_CALLABLE static inline Matrix33 Identity() { const Matrix33 sIdentity(Vec3(1.0f, 0.0f, 0.0f), Vec3(0.0f, 1.0f, 0.0f), Vec3(0.0f, 0.0f, 1.0f)); return sIdentity; }
+};
+
+CUDA_CALLABLE inline Matrix33 Multiply(float s, const Matrix33& m)
+{
+ Matrix33 r = m;
+ r.cols[0] *= s;
+ r.cols[1] *= s;
+ r.cols[2] *= s;
+ return r;
+}
+
+CUDA_CALLABLE inline Vec3 Multiply(const Matrix33& a, const Vec3& x)
+{
+ return a.cols[0]*x.x + a.cols[1]*x.y + a.cols[2]*x.z;
+}
+
+CUDA_CALLABLE inline Vec3 operator*(const Matrix33& a, const Vec3& x) { return Multiply(a, x); }
+
+CUDA_CALLABLE inline Matrix33 Multiply(const Matrix33& a, const Matrix33& b)
+{
+ Matrix33 r;
+ r.cols[0] = a*b.cols[0];
+ r.cols[1] = a*b.cols[1];
+ r.cols[2] = a*b.cols[2];
+ return r;
+}
+
+CUDA_CALLABLE inline Matrix33 Add(const Matrix33& a, const Matrix33& b)
+{
+ return Matrix33(a.cols[0]+b.cols[0], a.cols[1]+b.cols[1], a.cols[2]+b.cols[2]);
+}
+
+CUDA_CALLABLE inline float Determinant(const Matrix33& m)
+{
+ return Dot(m.cols[0], Cross(m.cols[1], m.cols[2]));
+}
+
+CUDA_CALLABLE inline Matrix33 Transpose(const Matrix33& a)
+{
+ Matrix33 r;
+ for (uint32_t i=0; i < 3; ++i)
+ for(uint32_t j=0; j < 3; ++j)
+ r(i, j) = a(j, i);
+
+ return r;
+}
+
+CUDA_CALLABLE inline float Trace(const Matrix33& a)
+{
+ return a(0,0)+a(1,1)+a(2,2);
+}
+
+CUDA_CALLABLE inline Matrix33 Outer(const Vec3& a, const Vec3& b)
+{
+ return Matrix33(a*b.x, a*b.y, a*b.z);
+}
+
+CUDA_CALLABLE inline Matrix33 Inverse(const Matrix33& a, bool& success)
+{
+ float s = Determinant(a);
+
+ const float eps = 1.e-8f;
+
+ if (fabsf(s) > eps)
+ {
+ Matrix33 b;
+
+ b(0,0) = a(1,1)*a(2,2) - a(1,2)*a(2,1); b(0,1) = a(0,2)*a(2,1) - a(0,1)*a(2,2); b(0,2) = a(0,1)*a(1,2) - a(0,2)*a(1,1);
+ b(1,0) = a(1,2)*a(2,0) - a(1,0)*a(2,2); b(1,1) = a(0,0)*a(2,2) - a(0,2)*a(2,0); b(1,2) = a(0,2)*a(1,0) - a(0,0)*a(1,2);
+ b(2,0) = a(1,0)*a(2,1) - a(1,1)*a(2,0); b(2,1) = a(0,1)*a(2,0) - a(0,0)*a(2,1); b(2,2) = a(0,0)*a(1,1) - a(0,1)*a(1,0);
+
+ success = true;
+
+ return Multiply(1.0f/s, b);
+ }
+ else
+ {
+ success = false;
+ return Matrix33();
+ }
+}
+
+CUDA_CALLABLE inline Matrix33 operator*(float s, const Matrix33& a) { return Multiply(s, a); }
+CUDA_CALLABLE inline Matrix33 operator*(const Matrix33& a, float s) { return Multiply(s, a); }
+CUDA_CALLABLE inline Matrix33 operator*(const Matrix33& a, const Matrix33& b) { return Multiply(a, b); }
+CUDA_CALLABLE inline Matrix33 operator+(const Matrix33& a, const Matrix33& b) { return Add(a, b); }
+CUDA_CALLABLE inline Matrix33 operator-(const Matrix33& a, const Matrix33& b) { return Add(a, -1.0f*b); }
+CUDA_CALLABLE inline Matrix33& operator+=(Matrix33& a, const Matrix33& b) { a = a+b; return a; }
+CUDA_CALLABLE inline Matrix33& operator-=(Matrix33& a, const Matrix33& b) { a = a-b; return a; }
+CUDA_CALLABLE inline Matrix33& operator*=(Matrix33& a, float s) { a.cols[0]*=s; a.cols[1]*=s; a.cols[2]*=s; return a; }
+
+template <typename T>
+CUDA_CALLABLE inline XQuat<T>::XQuat(const Matrix33& m)
+{
+ float tr = m(0, 0) + m(1, 1) + m(2, 2), h;
+ if(tr >= 0)
+ {
+ h = sqrtf(tr + 1);
+ w = 0.5f * h;
+ h = 0.5f / h;
+
+ x = (m(2, 1) - m(1, 2)) * h;
+ y = (m(0, 2) - m(2, 0)) * h;
+ z = (m(1, 0) - m(0, 1)) * h;
+ }
+ else
+ {
+ unsigned int i = 0;
+ if(m(1, 1) > m(0, 0))
+ i = 1;
+ if(m(2, 2) > m(i, i))
+ i = 2;
+ switch(i)
+ {
+ case 0:
+ h = sqrtf((m(0, 0) - (m(1, 1) + m(2, 2))) + 1);
+ x = 0.5f * h;
+ h = 0.5f / h;
+
+ y = (m(0, 1) + m(1, 0)) * h;
+ z = (m(2, 0) + m(0, 2)) * h;
+ w = (m(2, 1) - m(1, 2)) * h;
+ break;
+ case 1:
+ h = sqrtf((m(1, 1) - (m(2, 2) + m(0, 0))) + 1);
+ y = 0.5f * h;
+ h = 0.5f / h;
+
+ z = (m(1, 2) + m(2, 1)) * h;
+ x = (m(0, 1) + m(1, 0)) * h;
+ w = (m(0, 2) - m(2, 0)) * h;
+ break;
+ case 2:
+ h = sqrtf((m(2, 2) - (m(0, 0) + m(1, 1))) + 1);
+ z = 0.5f * h;
+ h = 0.5f / h;
+
+ x = (m(2, 0) + m(0, 2)) * h;
+ y = (m(1, 2) + m(2, 1)) * h;
+ w = (m(1, 0) - m(0, 1)) * h;
+ break;
+ default: // Make compiler happy
+ x = y = z = w = 0;
+ break;
+ }
+ }
+}
+
diff --git a/core/mat44.h b/core/mat44.h
new file mode 100644
index 0000000..5e125f9
--- /dev/null
+++ b/core/mat44.h
@@ -0,0 +1,280 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+// stores column vectors in column major order
+template <typename T>
+class XMatrix44
+{
+public:
+
+ CUDA_CALLABLE XMatrix44() { memset(columns, 0, sizeof(columns)); }
+ CUDA_CALLABLE XMatrix44(const T* d) { assert(d); memcpy(columns, d, sizeof(*this)); }
+ CUDA_CALLABLE XMatrix44(T c11, T c21, T c31, T c41,
+ T c12, T c22, T c32, T c42,
+ T c13, T c23, T c33, T c43,
+ T c14, T c24, T c34, T c44)
+ {
+ columns[0][0] = c11;
+ columns[0][1] = c21;
+ columns[0][2] = c31;
+ columns[0][3] = c41;
+
+ columns[1][0] = c12;
+ columns[1][1] = c22;
+ columns[1][2] = c32;
+ columns[1][3] = c42;
+
+ columns[2][0] = c13;
+ columns[2][1] = c23;
+ columns[2][2] = c33;
+ columns[2][3] = c43;
+
+ columns[3][0] = c14;
+ columns[3][1] = c24;
+ columns[3][2] = c34;
+ columns[3][3] = c44;
+ }
+
+ CUDA_CALLABLE XMatrix44(const Vec4& c1, const Vec4& c2, const Vec4& c3, const Vec4& c4)
+ {
+ columns[0][0] = c1.x;
+ columns[0][1] = c1.y;
+ columns[0][2] = c1.z;
+ columns[0][3] = c1.w;
+
+ columns[1][0] = c2.x;
+ columns[1][1] = c2.y;
+ columns[1][2] = c2.z;
+ columns[1][3] = c2.w;
+
+ columns[2][0] = c3.x;
+ columns[2][1] = c3.y;
+ columns[2][2] = c3.z;
+ columns[2][3] = c3.w;
+
+ columns[3][0] = c4.x;
+ columns[3][1] = c4.y;
+ columns[3][2] = c4.z;
+ columns[3][3] = c4.w;
+ }
+
+ CUDA_CALLABLE operator T* () { return &columns[0][0]; }
+ CUDA_CALLABLE operator const T* () const { return &columns[0][0]; }
+
+ // right multiply
+ CUDA_CALLABLE XMatrix44<T> operator * (const XMatrix44<T>& rhs) const
+ {
+ XMatrix44<T> r;
+ MatrixMultiply(*this, rhs, r);
+ return r;
+ }
+
+ // right multiply
+ CUDA_CALLABLE XMatrix44<T>& operator *= (const XMatrix44<T>& rhs)
+ {
+ XMatrix44<T> r;
+ MatrixMultiply(*this, rhs, r);
+ *this = r;
+
+ return *this;
+ }
+
+ // scalar multiplication
+ CUDA_CALLABLE XMatrix44<T>& operator *= (const T& s)
+ {
+ for (int c=0; c < 4; ++c)
+ {
+ for (int r=0; r < 4; ++r)
+ {
+ columns[c][r] *= s;
+ }
+ }
+
+ return *this;
+ }
+
+ CUDA_CALLABLE void MatrixMultiply(const T* __restrict lhs, const T* __restrict rhs, T* __restrict result) const
+ {
+ assert(lhs != rhs);
+ assert(lhs != result);
+ assert(rhs != result);
+
+ for (int i=0; i < 4; ++i)
+ {
+ for (int j=0; j < 4; ++j)
+ {
+ result[j*4+i] = rhs[j*4+0]*lhs[i+0];
+ result[j*4+i] += rhs[j*4+1]*lhs[i+4];
+ result[j*4+i] += rhs[j*4+2]*lhs[i+8];
+ result[j*4+i] += rhs[j*4+3]*lhs[i+12];
+ }
+ }
+ }
+
+ CUDA_CALLABLE void SetCol(int index, const Vec4& c)
+ {
+ columns[index][0] = c.x;
+ columns[index][1] = c.y;
+ columns[index][2] = c.z;
+ columns[index][3] = c.w;
+ }
+
+ // convenience overloads
+ CUDA_CALLABLE void SetAxis(uint32_t index, const XVector3<T>& a)
+ {
+ columns[index][0] = a.x;
+ columns[index][1] = a.y;
+ columns[index][2] = a.z;
+ columns[index][3] = 0.0f;
+ }
+
+ CUDA_CALLABLE void SetTranslation(const Point3& p)
+ {
+ columns[3][0] = p.x;
+ columns[3][1] = p.y;
+ columns[3][2] = p.z;
+ columns[3][3] = 1.0f;
+ }
+
+ CUDA_CALLABLE const Vec3& GetAxis(int i) const { return *reinterpret_cast<const Vec3*>(&columns[i]); }
+ CUDA_CALLABLE const Vec4& GetCol(int i) const { return *reinterpret_cast<const Vec4*>(&columns[i]); }
+ CUDA_CALLABLE const Point3& GetTranslation() const { return *reinterpret_cast<const Point3*>(&columns[3]); }
+
+ CUDA_CALLABLE Vec4 GetRow(int i) const { return Vec4(columns[0][i], columns[1][i], columns[2][i], columns[3][i]); }
+
+ float columns[4][4];
+
+ static XMatrix44<T> kIdentity;
+
+};
+
+// right multiply a point assumes w of 1
+template <typename T>
+CUDA_CALLABLE Point3 Multiply(const XMatrix44<T>& mat, const Point3& v)
+{
+ Point3 r;
+ r.x = v.x*mat[0] + v.y*mat[4] + v.z*mat[8] + mat[12];
+ r.y = v.x*mat[1] + v.y*mat[5] + v.z*mat[9] + mat[13];
+ r.z = v.x*mat[2] + v.y*mat[6] + v.z*mat[10] + mat[14];
+
+ return r;
+}
+
+// right multiply a vector3 assumes a w of 0
+template <typename T>
+CUDA_CALLABLE XVector3<T> Multiply(const XMatrix44<T>& mat, const XVector3<T>& v)
+{
+ XVector3<T> r;
+ r.x = v.x*mat[0] + v.y*mat[4] + v.z*mat[8];
+ r.y = v.x*mat[1] + v.y*mat[5] + v.z*mat[9];
+ r.z = v.x*mat[2] + v.y*mat[6] + v.z*mat[10];
+
+ return r;
+}
+
+// right multiply a vector4
+template <typename T>
+CUDA_CALLABLE XVector4<T> Multiply(const XMatrix44<T>& mat, const XVector4<T>& v)
+{
+ XVector4<T> r;
+ r.x = v.x*mat[0] + v.y*mat[4] + v.z*mat[8] + v.w*mat[12];
+ r.y = v.x*mat[1] + v.y*mat[5] + v.z*mat[9] + v.w*mat[13];
+ r.z = v.x*mat[2] + v.y*mat[6] + v.z*mat[10] + v.w*mat[14];
+ r.w = v.x*mat[3] + v.y*mat[7] + v.z*mat[11] + v.w*mat[15];
+
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE Point3 operator*(const XMatrix44<T>& mat, const Point3& v)
+{
+ return Multiply(mat, v);
+}
+
+template <typename T>
+CUDA_CALLABLE XVector4<T> operator*(const XMatrix44<T>& mat, const XVector4<T>& v)
+{
+ return Multiply(mat, v);
+}
+
+template <typename T>
+CUDA_CALLABLE XVector3<T> operator*(const XMatrix44<T>& mat, const XVector3<T>& v)
+{
+ return Multiply(mat, v);
+}
+
+template<typename T>
+CUDA_CALLABLE inline XMatrix44<T> Transpose(const XMatrix44<T>& m)
+{
+ XMatrix44<float> inv;
+
+ // transpose
+ for (uint32_t c=0; c < 4; ++c)
+ {
+ for (uint32_t r=0; r < 4; ++r)
+ {
+ inv.columns[c][r] = m.columns[r][c];
+ }
+ }
+
+ return inv;
+}
+
+template <typename T>
+CUDA_CALLABLE XMatrix44<T> AffineInverse(const XMatrix44<T>& m)
+{
+ XMatrix44<T> inv;
+
+ // transpose upper 3x3
+ for (int c=0; c < 3; ++c)
+ {
+ for (int r=0; r < 3; ++r)
+ {
+ inv.columns[c][r] = m.columns[r][c];
+ }
+ }
+
+ // multiply -translation by upper 3x3 transpose
+ inv.columns[3][0] = -Dot3(m.columns[3], m.columns[0]);
+ inv.columns[3][1] = -Dot3(m.columns[3], m.columns[1]);
+ inv.columns[3][2] = -Dot3(m.columns[3], m.columns[2]);
+ inv.columns[3][3] = 1.0f;
+
+ return inv;
+}
+
+CUDA_CALLABLE inline XMatrix44<float> Outer(const Vec4& a, const Vec4& b)
+{
+ return XMatrix44<float>(a*b.x, a*b.y, a*b.z, a*b.w);
+}
+
+// convenience
+typedef XMatrix44<float> Mat44;
+typedef XMatrix44<float> Matrix44;
+
diff --git a/core/maths.cpp b/core/maths.cpp
new file mode 100644
index 0000000..17218e4
--- /dev/null
+++ b/core/maths.cpp
@@ -0,0 +1,68 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "maths.h"
+
+uint32_t seed1;
+uint32_t seed2;
+
+void RandInit()
+{
+ seed1 = 315645664;
+ seed2 = seed1 ^ 0x13ab45fe;
+}
+
+static float s_identity[4][4] = { { 1.0f, 0.0f, 0.0f, 0.0f },
+{ 0.0f, 1.0f, 0.0f, 0.0f },
+{ 0.0f, 0.0f, 1.0f, 0.0f },
+{ 0.0f, 0.0f, 0.0f, 1.0f } };
+
+template <>
+XMatrix44<float> XMatrix44<float>::kIdentity(s_identity[0]);
+
+
+Colour::Colour(Colour::Preset p)
+{
+ switch (p)
+ {
+ case kRed:
+ *this = Colour(1.0f, 0.0f, 0.0f);
+ break;
+ case kGreen:
+ *this = Colour(0.0f, 1.0f, 0.0f);
+ break;
+ case kBlue:
+ *this = Colour(0.0f, 0.0f, 1.0f);
+ break;
+ case kWhite:
+ *this = Colour(1.0f, 1.0f, 1.0f);
+ break;
+ case kBlack:
+ *this = Colour(0.0f, 0.0f, 0.0f);
+ break;
+ };
+}
diff --git a/core/maths.h b/core/maths.h
new file mode 100644
index 0000000..19be818
--- /dev/null
+++ b/core/maths.h
@@ -0,0 +1,1840 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <cmath>
+#include <float.h>
+#include <cassert>
+#include <string.h>
+
+#include "core.h"
+#include "types.h"
+
+#ifdef __CUDACC__
+#define CUDA_CALLABLE __host__ __device__
+#else
+#define CUDA_CALLABLE
+#endif
+
+const float kPi = 3.141592653589f;
+const float k2Pi = 2.0f*kPi;
+const float kInvPi = 1.0f/kPi;
+const float kInv2Pi = 0.5f/kPi;
+const float kDegToRad = kPi/180.0f;
+const float kRadToDeg = 180.0f/kPi;
+
+CUDA_CALLABLE inline float DegToRad(float t)
+{
+ return t * kDegToRad;
+}
+
+CUDA_CALLABLE inline float RadToDeg(float t)
+{
+ return t * kRadToDeg;
+}
+
+CUDA_CALLABLE inline float Sin(float theta)
+{
+ return sinf(theta);
+}
+
+CUDA_CALLABLE inline float Cos(float theta)
+{
+ return cosf(theta);
+}
+
+CUDA_CALLABLE inline void SinCos(float theta, float& s, float& c)
+{
+ // no optimizations yet
+ s = sinf(theta);
+ c = cosf(theta);
+}
+
+CUDA_CALLABLE inline float Tan(float theta)
+{
+ return tanf(theta);
+}
+
+CUDA_CALLABLE inline float Sqrt(float x)
+{
+ return sqrtf(x);
+}
+
+CUDA_CALLABLE inline double Sqrt(double x)
+{
+ return sqrt(x);
+}
+
+CUDA_CALLABLE inline float ASin(float theta)
+{
+ return asinf(theta);
+}
+
+CUDA_CALLABLE inline float ACos(float theta)
+{
+ return acosf(theta);
+}
+
+CUDA_CALLABLE inline float ATan(float theta)
+{
+ return atanf(theta);
+}
+
+CUDA_CALLABLE inline float ATan2(float x, float y)
+{
+ return atan2f(x, y);
+}
+
+CUDA_CALLABLE inline float Abs(float x)
+{
+ return fabsf(x);
+}
+
+CUDA_CALLABLE inline float Pow(float b, float e)
+{
+ return powf(b, e);
+}
+
+CUDA_CALLABLE inline float Sgn(float x)
+{
+ return (x < 0.0f ? -1.0f : 1.0f);
+}
+
+CUDA_CALLABLE inline float Sign(float x)
+{
+ return x < 0.0f ? -1.0f : 1.0f;
+}
+
+CUDA_CALLABLE inline double Sign(double x)
+{
+ return x < 0.0f ? -1.0f : 1.0f;
+}
+
+CUDA_CALLABLE inline float Mod(float x, float y)
+{
+ return fmod(x, y);
+}
+
+template <typename T>
+CUDA_CALLABLE inline T Min(T a, T b)
+{
+ return a < b ? a : b;
+}
+
+template <typename T>
+CUDA_CALLABLE inline T Max(T a, T b)
+{
+ return a > b ? a : b;
+}
+
+template <typename T>
+CUDA_CALLABLE inline void Swap(T& a, T& b)
+{
+ T tmp = a;
+ a = b;
+ b = tmp;
+}
+
+template <typename T>
+CUDA_CALLABLE inline T Clamp(T a, T low, T high)
+{
+ if (low > high)
+ Swap(low, high);
+
+ return Max(low, Min(a, high));
+}
+
+template <typename V, typename T>
+CUDA_CALLABLE inline V Lerp(const V& start, const V& end, const T& t)
+{
+ return start + (end-start)*t;
+}
+
+CUDA_CALLABLE inline float InvSqrt(float x)
+{
+ return 1.0f / sqrtf(x);
+}
+
+// round towards +infinity
+CUDA_CALLABLE inline int Round(float f)
+{
+ return int(f+0.5f);
+}
+
+template <typename T>
+CUDA_CALLABLE T Normalize(const T& v)
+{
+ T a(v);
+ a /= Length(v);
+ return a;
+}
+
+template <typename T>
+CUDA_CALLABLE inline typename T::value_type LengthSq(const T v)
+{
+ return Dot(v,v);
+}
+
+template <typename T>
+CUDA_CALLABLE inline typename T::value_type Length(const T& v)
+{
+ typename T::value_type lSq = LengthSq(v);
+ if (lSq)
+ return Sqrt(LengthSq(v));
+ else
+ return 0.0f;
+}
+
+// this is mainly a helper function used by script
+template <typename T>
+CUDA_CALLABLE inline typename T::value_type Distance(const T& v1, const T& v2)
+{
+ return Length(v1-v2);
+}
+
+template <typename T>
+CUDA_CALLABLE inline T SafeNormalize(const T& v, const T& fallback=T())
+{
+ float l = LengthSq(v);
+ if (l > 0.0f)
+ {
+ return v * InvSqrt(l);
+ }
+ else
+ return fallback;
+}
+
+template <typename T>
+CUDA_CALLABLE inline T Sqr(T x) { return x*x; }
+
+template <typename T>
+CUDA_CALLABLE inline T Cube(T x) { return x*x*x; }
+
+#include "vec2.h"
+#include "vec3.h"
+#include "vec4.h"
+#include "quat.h"
+#include "point3.h"
+#include "mat22.h"
+#include "mat33.h"
+#include "mat44.h"
+#include "matnn.h"
+
+// represents a plane in the form ax + by + cz - d = 0
+class Plane : public Vec4
+{
+public:
+
+ CUDA_CALLABLE inline Plane() {}
+ CUDA_CALLABLE inline Plane(float x, float y, float z, float w) : Vec4(x, y, z, w) {}
+ CUDA_CALLABLE inline Plane(const Vec3& p, const Vector3& n)
+ {
+ x = n.x;
+ y = n.y;
+ z = n.z;
+ w = -Dot3(p, n);
+ }
+
+ CUDA_CALLABLE inline Vec3 GetNormal() const { return Vec3(x, y, z); }
+ CUDA_CALLABLE inline Vec3 GetPoint() const { return Vec3(x*-w, y*-w, z*-w); }
+
+ CUDA_CALLABLE inline Plane(const Vec3& v) : Vec4(v.x, v.y, v.z, 1.0f) {}
+ CUDA_CALLABLE inline Plane(const Vec4& v) : Vec4(v) {}
+};
+
+template <typename T>
+CUDA_CALLABLE inline T Dot(const XVector4<T>& v1, const XVector4<T>& v2)
+{
+ return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w;
+}
+
+// helper function that assumes a w of 0
+CUDA_CALLABLE inline float Dot(const Plane& p, const Vector3& v)
+{
+ return p.x*v.x + p.y*v.y + p.z*v.z;
+}
+
+CUDA_CALLABLE inline float Dot(const Vector3& v, const Plane& p)
+{
+ return Dot(p, v);
+}
+
+// helper function that assumes a w of 1
+CUDA_CALLABLE inline float Dot(const Plane& p, const Point3& v)
+{
+ return p.x*v.x + p.y*v.y + p.z*v.z + p.w;
+}
+
+// ensures that the normal component of the plane is unit magnitude
+CUDA_CALLABLE inline Vec4 NormalizePlane(const Vec4& p)
+{
+ float l = Length(Vec3(p));
+
+ return (1.0f/l)*p;
+}
+
+//----------------------------------------------------------------------------
+inline float RandomUnit()
+{
+ float r = (float)rand();
+ r /= RAND_MAX;
+ return r;
+}
+
+// Random number in range [-1,1]
+inline float RandomSignedUnit()
+{
+ float r = (float)rand();
+ r /= RAND_MAX;
+ r = 2.0f * r - 1.0f;
+ return r;
+}
+
+inline float Random(float lo, float hi)
+{
+ float r = (float)rand();
+ r /= RAND_MAX;
+ r = (hi - lo) * r + lo;
+ return r;
+}
+
+extern uint32_t seed1;
+extern uint32_t seed2;
+
+void RandInit();
+
+// random number generator
+inline uint32_t Rand()
+{
+ seed1 = ( seed2 ^ ( ( seed1 << 5 ) | ( seed1 >> 27 ) ) ) ^ ( seed1*seed2 );
+ seed2 = seed1 ^ ( ( seed2 << 12 ) | ( seed2 >> 20 ) );
+
+ return seed1;
+}
+
+// returns a random number in the range [min, max)
+inline uint32_t Rand(uint32_t min, uint32_t max)
+{
+ return min + Rand()%(max-min);
+}
+
+// returns random number between 0-1
+inline float Randf()
+{
+ uint32_t value = Rand();
+ uint32_t limit = 0xffffffff;
+
+ return ( float )value*( 1.0f/( float )limit );
+}
+
+// returns random number between min and max
+inline float Randf(float min, float max)
+{
+ // return Lerp(min, max, ParticleRandf());
+ float t = Randf();
+ return (1.0f-t)*min + t*(max);
+}
+
+// returns random number between 0-max
+inline float Randf(float max)
+{
+ return Randf()*max;
+}
+
+// returns a random unit vector (also can add an offset to generate around an off axis vector)
+inline Vec3 RandomUnitVector()
+{
+ float phi = Randf(kPi*2.0f);
+ float theta = Randf(kPi*2.0f);
+
+ float cosTheta = Cos(theta);
+ float sinTheta = Sin(theta);
+
+ float cosPhi = Cos(phi);
+ float sinPhi = Sin(phi);
+
+ return Vec3(cosTheta*sinPhi,cosPhi,sinTheta*sinPhi);
+}
+
+inline Vec3 RandVec3() { return Vec3(Randf(-1.0f, 1.0f), Randf(-1.0f, 1.0f), Randf(-1.0f, 1.0f)); }
+
+// uniformly sample volume of a sphere using dart throwing
+inline Vec3 UniformSampleSphereVolume()
+{
+ for(;;)
+ {
+ Vec3 v = RandVec3();
+ if (Dot(v, v) < 1.0f)
+ return v;
+ }
+}
+
+inline Vec3 UniformSampleSphere()
+{
+ float u1 = Randf(0.0f, 1.0f);
+ float u2 = Randf(0.0f, 1.0f);
+
+ float z = 1.f - 2.f * u1;
+ float r = sqrtf(Max(0.f, 1.f - z*z));
+ float phi = 2.f * kPi * u2;
+ float x = r * cosf(phi);
+ float y = r * sinf(phi);
+
+ return Vector3(x, y, z);
+}
+
+inline Vec3 UniformSampleHemisphere()
+{
+ // generate a random z value
+ float z = Randf(0.0f, 1.0f);
+ float w = Sqrt(1.0f-z*z);
+
+ float phi = k2Pi*Randf(0.0f, 1.0f);
+ float x = Cos(phi)*w;
+ float y = Sin(phi)*w;
+
+ return Vec3(x, y, z);
+}
+
+inline Vec2 UniformSampleDisc()
+{
+ float r = Sqrt(Randf(0.0f, 1.0f));
+ float theta = k2Pi*Randf(0.0f, 1.0f);
+
+ return Vec2(r * Cos(theta), r * Sin(theta));
+}
+
+inline void UniformSampleTriangle(float& u, float& v)
+{
+ float r = Sqrt(Randf());
+ u = 1.0f - r;
+ v = Randf() * r;
+}
+
+inline Vec3 CosineSampleHemisphere()
+{
+ Vec2 s = UniformSampleDisc();
+ float z = Sqrt(Max(0.0f, 1.0f - s.x*s.x - s.y*s.y));
+
+ return Vec3(s.x, s.y, z);
+}
+
+inline Vec3 SphericalToXYZ(float theta, float phi)
+{
+ float cosTheta = cos(theta);
+ float sinTheta = sin(theta);
+
+ return Vec3(sin(phi)*sinTheta, cosTheta, cos(phi)*sinTheta);
+}
+
+// returns random vector between -range and range
+inline Vec4 Randf(const Vec4 &range)
+{
+ return Vec4(Randf(-range.x, range.x),
+ Randf(-range.y, range.y),
+ Randf(-range.z, range.z),
+ Randf(-range.w, range.w));
+}
+
+// generates a transform matrix with v as the z axis, taken from PBRT
+inline void BasisFromVector(const Vec3& w, Vec3* u, Vec3* v)
+{
+ if (fabsf(w.x) > fabsf(w.y))
+ {
+ float invLen = 1.0f / sqrtf(w.x*w.x + w.z*w.z);
+ *u = Vec3(-w.z*invLen, 0.0f, w.x*invLen);
+ }
+ else
+ {
+ float invLen = 1.0f / sqrtf(w.y*w.y + w.z*w.z);
+ *u = Vec3(0.0f, w.z*invLen, -w.y*invLen);
+ }
+
+ *v = Cross(w, *u);
+
+ assert(fabsf(Length(*u)-1.0f) < 0.01f);
+ assert(fabsf(Length(*v)-1.0f) < 0.01f);
+}
+
+// same as above but returns a matrix
+inline Mat44 TransformFromVector(const Vec3& w, const Point3& t=Point3(0.0f, 0.0f, 0.0f))
+{
+ Mat44 m = Mat44::kIdentity;
+ m.SetCol(2, Vec4(w.x, w.y, w.z, 0.0));
+ m.SetCol(3, Vec4(t.x, t.y, t.z, 1.0f));
+
+ BasisFromVector(w, (Vec3*)m.columns[0], (Vec3*)m.columns[1]);
+
+ return m;
+}
+
+// todo: sort out rotations
+inline Mat44 ViewMatrix(const Point3& pos)
+{
+
+ float view[4][4] = { { 1.0f, 0.0f, 0.0f, 0.0f },
+ { 0.0f, 1.0f, 0.0f, 0.0f },
+ { 0.0f, 0.0f, 1.0f, 0.0f },
+ { -pos.x, -pos.y, -pos.z, 1.0f } };
+
+ return Mat44(&view[0][0]);
+}
+
+
+inline Mat44 LookAtMatrix(const Point3& viewer, const Point3& target)
+{
+ // create a basis from viewer to target (OpenGL convention looking down -z)
+ Vec3 forward = -Normalize(target-viewer);
+ Vec3 up(0.0f, 1.0f, 0.0f);
+ Vec3 left = Normalize(Cross(up, forward));
+ up = Cross(forward, left);
+
+ float xform[4][4] = { { left.x, left.y, left.z, 0.0f },
+ { up.x, up.y, up.z, 0.0f},
+ { forward.x, forward.y, forward.z, 0.0f},
+ { viewer.x, viewer.y, viewer.z, 1.0f} };
+
+ return AffineInverse(Mat44(&xform[0][0]));
+}
+
+// generate a rotation matrix around an axis, from PBRT p74
+inline Mat44 RotationMatrix(float angle, const Vec3& axis)
+{
+ Vec3 a = Normalize(axis);
+ float s = sinf(angle);
+ float c = cosf(angle);
+
+ float m[4][4];
+
+ m[0][0] = a.x * a.x + (1.0f - a.x * a.x) * c;
+ m[0][1] = a.x * a.y * (1.0f - c) + a.z * s;
+ m[0][2] = a.x * a.z * (1.0f - c) - a.y * s;
+ m[0][3] = 0.0f;
+
+ m[1][0] = a.x * a.y * (1.0f - c) - a.z * s;
+ m[1][1] = a.y * a.y + (1.0f - a.y * a.y) * c;
+ m[1][2] = a.y * a.z * (1.0f - c) + a.x * s;
+ m[1][3] = 0.0f;
+
+ m[2][0] = a.x * a.z * (1.0f - c) + a.y * s;
+ m[2][1] = a.y * a.z * (1.0f - c) - a.x * s;
+ m[2][2] = a.z * a.z + (1.0f - a.z * a.z) * c;
+ m[2][3] = 0.0f;
+
+ m[3][0] = 0.0f;
+ m[3][1] = 0.0f;
+ m[3][2] = 0.0f;
+ m[3][3] = 1.0f;
+
+ return Mat44(&m[0][0]);
+}
+
+inline Mat44 RotationMatrix(Quat q)
+{
+ Matrix33 rotation(q);
+
+ Matrix44 m;
+ m.SetAxis(0, rotation.cols[0]);
+ m.SetAxis(1, rotation.cols[1]);
+ m.SetAxis(2, rotation.cols[2]);
+ m.SetTranslation(Point3(0.0f));
+
+ return m;
+}
+
+inline Mat44 TranslationMatrix(const Point3& t)
+{
+ Mat44 m(Mat44::kIdentity);
+ m.SetTranslation(t);
+ return m;
+}
+
+inline Mat44 OrthographicMatrix(float left, float right, float bottom, float top, float n, float f)
+{
+
+ float m[4][4] = { { 2.0f/(right-left), 0.0f, 0.0f, 0.0f },
+ { 0.0f, 2.0f/(top-bottom), 0.0f, 0.0f },
+ { 0.0f, 0.0f, -2.0f/(f-n), 0.0f },
+ { -(right+left)/(right-left), -(top+bottom)/(top-bottom), -(f+n)/(f-n), 1.0f } };
+
+
+ return Mat44(&m[0][0]);
+}
+
+// this is designed as a drop in replacement for gluPerspective
+inline Mat44 ProjectionMatrix(float fov, float aspect, float znear, float zfar)
+{
+ float f = 1.0f / tanf(DegToRad(fov*0.5f));
+ float zd = znear-zfar;
+
+ float view[4][4] = { { f/aspect, 0.0f, 0.0f, 0.0f },
+ { 0.0f, f, 0.0f, 0.0f },
+ { 0.0f, 0.0f, (zfar+znear)/zd, -1.0f },
+ { 0.0f, 0.0f, (2.0f*znear*zfar)/zd, 0.0f } };
+
+ return Mat44(&view[0][0]);
+}
+
+// encapsulates an orientation encoded in Euler angles, not the sexiest
+// representation but it is convenient when manipulating objects from script
+
+class Rotation
+{
+public:
+
+ Rotation() : yaw(0), pitch(0), roll(0) {}
+ Rotation(float inYaw, float inPitch, float inRoll) : yaw(inYaw), pitch(inPitch), roll(inRoll) {}
+
+ Rotation& operator +=(const Rotation& rhs) {yaw += rhs.yaw; pitch += rhs.pitch; roll += rhs.roll; return *this;}
+ Rotation& operator -=(const Rotation& rhs) {yaw -= rhs.yaw; pitch -= rhs.pitch; roll -= rhs.roll; return *this;}
+
+ Rotation operator + (const Rotation& rhs) const { Rotation lhs(*this); lhs += rhs; return lhs; }
+ Rotation operator - (const Rotation& rhs) const { Rotation lhs(*this); lhs -= rhs; return lhs; }
+
+ // all members are in degrees (easy editing)
+ float yaw;
+ float pitch;
+ float roll;
+};
+
+inline Mat44 ScaleMatrix(const Vector3& s)
+{
+ float m[4][4] = { {s.x, 0.0f, 0.0f, 0.0f },
+ { 0.0f, s.y, 0.0f, 0.0f},
+ { 0.0f, 0.0f, s.z, 0.0f},
+ { 0.0f, 0.0f, 0.0f, 1.0f} };
+
+ return Mat44(&m[0][0]);
+}
+
+// assumes yaw on y, then pitch on z, then roll on x
+inline Mat44 TransformMatrix(const Rotation& r, const Point3& p)
+{
+ const float yaw = DegToRad(r.yaw);
+ const float pitch = DegToRad(r.pitch);
+ const float roll = DegToRad(r.roll);
+
+ const float s1 = Sin(roll);
+ const float c1 = Cos(roll);
+ const float s2 = Sin(pitch);
+ const float c2 = Cos(pitch);
+ const float s3 = Sin(yaw);
+ const float c3 = Cos(yaw);
+
+ // interprets the angles as yaw around world-y, pitch around new z, roll around new x
+ float mr[4][4] = { { c2*c3, s2, -c2*s3, 0.0f},
+ { s1*s3-c1*c3*s2, c1*c2, c3*s1+c1*s2*s3, 0.0f},
+ { c3*s1*s2+c1*s3, -c2*s1, c1*c3-s1*s2*s3, 0.0f},
+ { p.x, p.y, p.z, 1.0f} };
+
+ Mat44 m1(&mr[0][0]);
+
+ return m1;//m2 * m1;
+}
+
+class Transform
+{
+public:
+
+ Transform() {};
+ Transform(const Point3& p, const Rotation& r) : position(p), rotation(r) {}
+
+ Mat44 ToMatrix() const
+ {
+ return TransformMatrix(rotation, position);
+ }
+
+ // helper function to translate object
+ void Translate(const Vec3& delta)
+ {
+ position += delta;
+ }
+
+ // helper function to rotate an object
+ void Rotate(const Rotation& delta)
+ {
+ rotation += delta;
+ }
+
+ void RotateToLookAt(const Point3& target)
+ {
+ // get vector to point
+ Vec3 delta = target-position;
+
+ // yaw
+ rotation.yaw = atan2f(delta.z, delta.x);
+ rotation.pitch = atan2f(delta.y, sqrt(delta.x*delta.x+delta.z*delta.z));
+ rotation.roll = 0.0f;
+ }
+
+ Vec3 GetXAxis() const
+ {
+ return ToMatrix().columns[0];
+ }
+
+ Vec3 GetYAxis() const
+ {
+ return ToMatrix().columns[1];
+ }
+
+ Vec3 GetZAxis() const
+ {
+ return ToMatrix().columns[2];
+ }
+
+ Point3 position;
+ Rotation rotation;
+};
+
+// aligns the z axis along the vector
+inline Rotation AlignToVector(const Vec3& vector)
+{
+ // todo: fix, see spherical->cartesian coordinates wikipedia
+ return Rotation(0.0f, RadToDeg(atan2(vector.y, vector.x)), 0.0f);
+}
+
+// creates a vector given an angle measured clockwise from horizontal (1,0)
+inline Vec2 AngleToVector(float a)
+{
+ return Vec2(Cos(a), Sin(a));
+}
+
+inline float VectorToAngle(const Vec2& v)
+{
+ return atan2f(v.y, v.x);
+}
+
+CUDA_CALLABLE inline float SmoothStep(float a, float b, float t)
+{
+ t = Clamp(t-a / (b-a), 0.0f, 1.0f);
+ return t*t*(3.0f-2.0f*t);
+}
+
+// hermite spline interpolation
+template <typename T>
+T HermiteInterpolate(const T& a, const T& b, const T& t1, const T& t2, float t)
+{
+ // blending weights
+ const float w1 = 1.0f - 3*t*t + 2*t*t*t;
+ const float w2 = t*t*(3.0f-2.0f*t);
+ const float w3 = t*t*t - 2*t*t + t;
+ const float w4 = t*t*(t-1.0f);
+
+ // return weighted combination
+ return a*w1 + b*w2 + t1*w3 + t2*w4;
+
+}
+
+template <typename T>
+T HermiteTangent(const T& a, const T& b, const T& t1, const T& t2, float t)
+{
+ // first derivative blend weights
+ const float w1 = 6.0f*t*t-6*t;
+ const float w2 = -6.0f*t*t + 6*t;
+ const float w3 = 3*t*t - 4*t + 1;
+ const float w4 = 3*t*t - 2*t;
+
+ // weighted combination
+ return a*w1 + b*w2 + t1*w3 + t2*w4;
+}
+
+template <typename T>
+T HermiteSecondDerivative(const T& a, const T& b, const T& t1, const T& t2, float t)
+{
+ // first derivative blend weights
+ const float w1 = 12*t - 6.0f;
+ const float w2 = -12.0f*t + 6;
+ const float w3 = 6*t - 4.0f;
+ const float w4 = 6*t - 2.0f;
+
+ // weighted combination
+ return a*w1 + b*w2 + t1*w3 + t2*w4;
+}
+
+inline float Log(float base, float x)
+{
+ // calculate the log of a value for an arbitary base, only use if you can't use the standard bases (10, e)
+ return logf(x) / logf(base);
+}
+
+inline int Log2(int x)
+{
+ int n = 0;
+ while (x >= 2)
+ {
+ ++n;
+ x /= 2;
+ }
+
+ return n;
+}
+
+// function which maps a value to a range
+template <typename T>
+T RangeMap(T value, T lower, T upper)
+{
+ assert(upper >= lower);
+ return (value-lower)/(upper-lower);
+}
+
+// simple colour class
+class Colour
+{
+public:
+
+ enum Preset
+ {
+ kRed,
+ kGreen,
+ kBlue,
+ kWhite,
+ kBlack
+ };
+
+ Colour(float r_=0.0f, float g_=0.0f, float b_=0.0f, float a_=1.0f) : r(r_), g(g_), b(b_), a(a_) {}
+ Colour(float* p) : r(p[0]), g(p[1]), b(p[2]), a(p[3]) {}
+ Colour(uint32_t rgba)
+ {
+ a = ((rgba)&0xff)/255.0f;
+ r = ((rgba>>24)&0xff)/255.0f;
+ g = ((rgba>>16)&0xff)/255.0f;
+ b = ((rgba>>8)&0xff)/255.0f;
+ }
+ Colour(Preset p);
+
+ // cast operator
+ operator const float*() const { return &r; }
+ operator float*() { return &r; }
+
+ Colour operator * (float scale) const { Colour r(*this); r *= scale; return r; }
+ Colour operator / (float scale) const { Colour r(*this); r /= scale; return r; }
+ Colour operator + (const Colour& v) const { Colour r(*this); r += v; return r; }
+ Colour operator - (const Colour& v) const { Colour r(*this); r -= v; return r; }
+ Colour operator * (const Colour& scale) const { Colour r(*this); r *= scale; return r;}
+
+ Colour& operator *=(float scale) {r *= scale; g *= scale; b*= scale; a*= scale; return *this;}
+ Colour& operator /=(float scale) {float s(1.0f/scale); r *= s; g *= s; b *= s; a *=s; return *this;}
+ Colour& operator +=(const Colour& v) {r += v.r; g += v.g; b += v.b; a += v.a; return *this;}
+ Colour& operator -=(const Colour& v) {r -= v.r; g -= v.g; b -= v.b; a -= v.a; return *this;}
+ Colour& operator *=(const Colour& v) {r *= v.r; g *= v.g; b *= v.b; a *= v.a; return *this;}
+
+ float r, g, b, a;
+
+};
+
+inline bool operator == (const Colour& lhs, const Colour& rhs)
+{
+ return lhs.r == rhs.r && lhs.g == rhs.g && lhs.b == rhs.b && lhs.a == rhs.a;
+}
+
+inline bool operator != (const Colour& lhs, const Colour& rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline Colour ToneMap(const Colour& s)
+{
+ //return Colour(s.r / (s.r+1.0f), s.g / (s.g+1.0f), s.b / (s.b+1.0f), 1.0f);
+ float Y = 0.3333f*(s.r + s.g + s.b);
+ return s / (1.0f + Y);
+}
+
+// lhs scalar scale
+inline Colour operator * (float lhs, const Colour& rhs)
+{
+ Colour r(rhs);
+ r *= lhs;
+ return r;
+}
+
+inline Colour YxyToXYZ(float Y, float x, float y)
+{
+ float X = x * (Y / y);
+ float Z = (1.0f - x - y) * Y / y;
+
+ return Colour(X, Y, Z, 1.0f);
+}
+
+inline Colour HSVToRGB( float h, float s, float v )
+{
+ float r, g, b;
+
+ int i;
+ float f, p, q, t;
+ if( s == 0 ) {
+ // achromatic (grey)
+ r = g = b = v;
+ }
+ else
+ {
+ h *= 6.0f; // sector 0 to 5
+ i = int(floor( h ));
+ f = h - i; // factorial part of h
+ p = v * ( 1 - s );
+ q = v * ( 1 - s * f );
+ t = v * ( 1 - s * ( 1 - f ) );
+ switch( i ) {
+ case 0:
+ r = v;
+ g = t;
+ b = p;
+ break;
+ case 1:
+ r = q;
+ g = v;
+ b = p;
+ break;
+ case 2:
+ r = p;
+ g = v;
+ b = t;
+ break;
+ case 3:
+ r = p;
+ g = q;
+ b = v;
+ break;
+ case 4:
+ r = t;
+ g = p;
+ b = v;
+ break;
+ default: // case 5:
+ r = v;
+ g = p;
+ b = q;
+ break;
+ };
+ }
+
+ return Colour(r, g, b);
+}
+
+inline Colour XYZToLinear(float x, float y, float z)
+{
+ float c[4];
+ c[0] = 3.240479f * x + -1.537150f * y + -0.498535f * z;
+ c[1] = -0.969256f * x + 1.875991f * y + 0.041556f * z;
+ c[2] = 0.055648f * x + -0.204043f * y + 1.057311f * z;
+ c[3] = 1.0f;
+
+ return Colour(c);
+}
+
+inline uint32_t ColourToRGBA8(const Colour& c)
+{
+ union SmallColor
+ {
+ uint8_t u8[4];
+ uint32_t u32;
+ };
+
+ SmallColor s;
+ s.u8[0] = (uint8_t)(Clamp(c.r, 0.0f, 1.0f) * 255);
+ s.u8[1] = (uint8_t)(Clamp(c.g, 0.0f, 1.0f) * 255);
+ s.u8[2] = (uint8_t)(Clamp(c.b, 0.0f, 1.0f) * 255);
+ s.u8[3] = (uint8_t)(Clamp(c.a, 0.0f, 1.0f) * 255);
+
+ return s.u32;
+}
+
+inline Colour LinearToSrgb(const Colour& c)
+{
+ const float kInvGamma = 1.0f/2.2f;
+ return Colour(powf(c.r, kInvGamma), powf(c.g, kInvGamma), powf(c.b, kInvGamma), c.a);
+}
+
+inline Colour SrgbToLinear(const Colour& c)
+{
+ const float kInvGamma = 2.2f;
+ return Colour(powf(c.r, kInvGamma), powf(c.g, kInvGamma), powf(c.b, kInvGamma), c.a);
+}
+
+// intersection routines
+inline bool IntersectRaySphere(const Point3& sphereOrigin, float sphereRadius, const Point3& rayOrigin, const Vec3& rayDir, float& t, Vec3* hitNormal=NULL)
+{
+ Vec3 d(sphereOrigin-rayOrigin);
+ float deltaSq = LengthSq(d);
+ float radiusSq = sphereRadius*sphereRadius;
+
+ // if the origin is inside the sphere return no intersection
+ if (deltaSq > radiusSq)
+ {
+ float dprojr = Dot(d, rayDir);
+
+ // if ray pointing away from sphere no intersection
+ if (dprojr < 0.0f)
+ return false;
+
+ // bit of Pythagoras to get closest point on ray
+ float dSq = deltaSq-dprojr*dprojr;
+
+ if (dSq > radiusSq)
+ return false;
+ else
+ {
+ // length of the half cord
+ float thc = sqrt(radiusSq-dSq);
+
+ // closest intersection
+ t = dprojr - thc;
+
+ // calculate normal if requested
+ if (hitNormal)
+ *hitNormal = Normalize((rayOrigin+rayDir*t)-sphereOrigin);
+
+ return true;
+ }
+ }
+ else
+ {
+ return false;
+ }
+}
+
+template <typename T>
+CUDA_CALLABLE inline bool SolveQuadratic(T a, T b, T c, T& minT, T& maxT)
+{
+ if (a == 0.0f && b == 0.0f)
+ {
+ minT = maxT = 0.0f;
+ return true;
+ }
+
+ T discriminant = b*b - T(4.0)*a*c;
+
+ if (discriminant < 0.0f)
+ {
+ return false;
+ }
+
+ // numerical receipes 5.6 (this method ensures numerical accuracy is preserved)
+ T t = T(-0.5) * (b + Sign(b)*Sqrt(discriminant));
+ minT = t / a;
+ maxT = c / t;
+
+ if (minT > maxT)
+ {
+ Swap(minT, maxT);
+ }
+
+ return true;
+}
+
+// alternative ray sphere intersect, returns closest and furthest t values
+inline bool IntersectRaySphere(const Point3& sphereOrigin, float sphereRadius, const Point3& rayOrigin, const Vector3& rayDir, float& minT, float &maxT, Vec3* hitNormal=NULL)
+{
+ Vector3 q = rayOrigin-sphereOrigin;
+
+ float a = 1.0f;
+ float b = 2.0f*Dot(q, rayDir);
+ float c = Dot(q, q)-(sphereRadius*sphereRadius);
+
+ bool r = SolveQuadratic(a, b, c, minT, maxT);
+
+ if (minT < 0.0)
+ minT = 0.0f;
+
+ // calculate the normal of the closest hit
+ if (hitNormal && r)
+ {
+ *hitNormal = Normalize((rayOrigin+rayDir*minT)-sphereOrigin);
+ }
+
+ return r;
+}
+
+inline bool IntersectRayPlane(const Point3& p, const Vector3& dir, const Plane& plane, float& t)
+{
+ float d = Dot(plane, dir);
+
+ if (d == 0.0f)
+ {
+ return false;
+ }
+ else
+ {
+ t = -Dot(plane, p) / d;
+ }
+
+ return (t > 0.0f);
+}
+
+inline bool IntersectLineSegmentPlane(const Vec3& start, const Vec3& end, const Plane& plane, Vec3& out)
+{
+ Vec3 u(end - start);
+
+ float dist = -Dot(plane, start) / Dot(plane, u);
+
+ if (dist > 0.0f && dist < 1.0f)
+ {
+ out = (start + u * dist);
+ return true;
+ }
+ else
+ return false;
+}
+
+// Moller and Trumbore's method
+inline bool IntersectRayTriTwoSided(const Vec3& p, const Vec3& dir, const Vec3& a, const Vec3& b, const Vec3& c, float& t, float& u, float& v, float& w, float& sign)//Vec3* normal)
+{
+ Vector3 ab = b - a;
+ Vector3 ac = c - a;
+ Vector3 n = Cross(ab, ac);
+
+ float d = Dot(-dir, n);
+ float ood = 1.0f / d; // No need to check for division by zero here as infinity aritmetic will save us...
+ Vector3 ap = p - a;
+
+ t = Dot(ap, n) * ood;
+ if (t < 0.0f)
+ return false;
+
+ Vector3 e = Cross(-dir, ap);
+ v = Dot(ac, e) * ood;
+ if (v < 0.0f || v > 1.0f) // ...here...
+ return false;
+ w = -Dot(ab, e) * ood;
+ if (w < 0.0f || v + w > 1.0f) // ...and here
+ return false;
+
+ u = 1.0f - v - w;
+ //if (normal)
+ //*normal = n;
+ sign = d;
+
+ return true;
+}
+
+
+
+// mostly taken from Real Time Collision Detection - p192
+inline bool IntersectRayTri(const Point3& p, const Vec3& dir, const Point3& a, const Point3& b, const Point3& c, float& t, float& u, float& v, float& w, Vec3* normal)
+{
+ const Vec3 ab = b-a;
+ const Vec3 ac = c-a;
+
+ // calculate normal
+ Vec3 n = Cross(ab, ac);
+
+ // need to solve a system of three equations to give t, u, v
+ float d = Dot(-dir, n);
+
+ // if dir is parallel to triangle plane or points away from triangle
+ if (d <= 0.0f)
+ return false;
+
+ Vec3 ap = p-a;
+ t = Dot(ap, n);
+
+ // ignores tris behind
+ if (t < 0.0f)
+ return false;
+
+ // compute barycentric coordinates
+ Vec3 e = Cross(-dir, ap);
+ v = Dot(ac, e);
+ if (v < 0.0f || v > d) return false;
+
+ w = -Dot(ab, e);
+ if (w < 0.0f || v + w > d) return false;
+
+ float ood = 1.0f / d;
+ t *= ood;
+ v *= ood;
+ w *= ood;
+ u = 1.0f-v-w;
+
+ // optionally write out normal (todo: this branch is a performance concern, should probably remove)
+ if (normal)
+ *normal = n;
+
+ return true;
+}
+
+//mostly taken from Real Time Collision Detection - p192
+CUDA_CALLABLE inline bool IntersectSegmentTri(const Vec3& p, const Vec3& q, const Vec3& a, const Vec3& b, const Vec3& c, float& t, float& u, float& v, float& w, Vec3* normal, float expand)
+{
+ const Vec3 ab = b-a;
+ const Vec3 ac = c-a;
+ const Vec3 qp = p-q;
+
+ // calculate normal
+ Vec3 n = Cross(ab, ac);
+
+ // need to solve a system of three equations to give t, u, v
+ float d = Dot(qp, n);
+
+ // if dir is parallel to triangle plane or points away from triangle
+ if (d <= 0.0f)
+ return false;
+
+ Vec3 ap = p-a;
+ t = Dot(ap, n);
+
+ // ignores tris behind
+ if (t < 0.0f)
+ return false;
+
+ // ignores tris beyond segment
+ if (t > d)
+ return false;
+
+ // compute barycentric coordinates
+ Vec3 e = Cross(qp, ap);
+ v = Dot(ac, e);
+ if (v < 0.0f || v > d) return false;
+
+ w = -Dot(ab, e);
+ if (w < 0.0f || v + w > d) return false;
+
+ float ood = 1.0f / d;
+ t *= ood;
+ v *= ood;
+ w *= ood;
+ u = 1.0f-v-w;
+
+ // optionally write out normal (todo: this branch is a performance concern, should probably remove)
+ if (normal)
+ *normal = n;
+
+ return true;
+}
+
+CUDA_CALLABLE inline float ScalarTriple(const Vec3& a, const Vec3& b, const Vec3& c) { return Dot(Cross(a, b), c); }
+
+// intersects a line (through points p and q, against a triangle a, b, c - mostly taken from Real Time Collision Detection - p186
+CUDA_CALLABLE inline bool IntersectLineTri(const Vec3& p, const Vec3& q, const Vec3& a, const Vec3& b, const Vec3& c)//, float& t, float& u, float& v, float& w, Vec3* normal, float expand)
+{
+ const Vec3 pq = q-p;
+ const Vec3 pa = a-p;
+ const Vec3 pb = b-p;
+ const Vec3 pc = c-p;
+
+ Vec3 m = Cross(pq, pc);
+ float u = Dot(pb, m);
+ if (u< 0.0f) return false;
+
+ float v = -Dot(pa, m);
+ if (v < 0.0f) return false;
+
+ float w = ScalarTriple(pq, pb, pa);
+ if (w < 0.0f) return false;
+
+ return true;
+}
+
+CUDA_CALLABLE inline Vec3 ClosestPointToAABB(const Vec3& p, const Vec3& lower, const Vec3& upper)
+{
+ Vec3 c;
+
+ for (int i=0; i < 3; ++i)
+ {
+ float v = p[i];
+ if (v < lower[i]) v = lower[i];
+ if (v > upper[i]) v = upper[i];
+ c[i] = v;
+ }
+
+ return c;
+}
+
+
+// RTCD 5.1.5, page 142
+CUDA_CALLABLE inline Vec3 ClosestPointOnTriangle(const Vec3& a, const Vec3& b, const Vec3& c, const Vec3& p, float& v, float& w)
+{
+ Vec3 ab = b-a;
+ Vec3 ac = c-a;
+ Vec3 ap = p-a;
+
+ float d1 = Dot(ab, ap);
+ float d2 = Dot(ac, ap);
+ if (d1 <= 0.0f && d2 <= 0.0f)
+ {
+ v = 0.0f;
+ w = 0.0f;
+ return a;
+ }
+
+ Vec3 bp = p-b;
+ float d3 = Dot(ab, bp);
+ float d4 = Dot(ac, bp);
+ if (d3 >= 0.0f && d4 <= d3)
+ {
+ v = 1.0f;
+ w = 0.0f;
+ return b;
+ }
+
+ float vc = d1*d4 - d3*d2;
+ if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f)
+ {
+ v = d1 / (d1-d3);
+ w = 0.0f;
+ return a + v*ab;
+ }
+
+ Vec3 cp =p-c;
+ float d5 = Dot(ab, cp);
+ float d6 = Dot(ac, cp);
+ if (d6 >= 0.0f && d5 <= d6)
+ {
+ v = 0.0f;
+ w = 1.0f;
+ return c;
+ }
+
+ float vb = d5*d2 - d1*d6;
+ if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f)
+ {
+ v = 0.0f;
+ w = d2 / (d2 - d6);
+ return a + w * ac;
+ }
+
+ float va = d3*d6 - d5*d4;
+ if (va <= 0.0f && (d4 -d3) >= 0.0f && (d5-d6) >= 0.0f)
+ {
+ w = (d4-d3)/((d4-d3) + (d5-d6));
+ v = 1.0f-w;
+ return b + w * (c-b);
+ }
+
+ float denom = 1.0f / (va + vb + vc);
+ v = vb * denom;
+ w = vc * denom;
+ return a + ab*v + ac*w;
+}
+
+
+CUDA_CALLABLE inline float SqDistPointSegment(Vec3 a, Vec3 b, Vec3 c)
+{
+ Vec3 ab = b-a, ac=c-a, bc=c-b;
+ float e = Dot(ac, ab);
+
+ if (e <= 0.0f)
+ return Dot(ac, ac);
+ float f = Dot(ab, ab);
+
+ if (e >= f)
+ return Dot(bc, bc);
+
+ return Dot(ac, ac) - e*e/f;
+}
+
+CUDA_CALLABLE inline bool PointInTriangle(Vec3 a, Vec3 b, Vec3 c, Vec3 p)
+{
+ a -= p; b -= p; c-= p;
+
+ /*
+ float eps = 0.0f;
+
+ float ab = Dot(a, b);
+ float ac = Dot(a, c);
+ float bc = Dot(b, c);
+ float cc = Dot(c, c);
+
+ if (bc *ac - cc * ab <= eps)
+ return false;
+
+ float bb = Dot(b, b);
+ if (ab * bc - ac*bb <= eps)
+ return false;
+
+ return true;
+ */
+
+ Vec3 u = Cross(b, c);
+ Vec3 v = Cross(c, a);
+
+ if (Dot(u, v) <= 0.0f)
+ return false;
+
+ Vec3 w = Cross(a, b);
+
+ if (Dot(u, w) <= 0.0f)
+ return false;
+
+ return true;
+}
+
+inline void ClosestPointBetweenLineSegments(const Vec3& p, const Vec3& q, const Vec3& r, const Vec3& s, float& u, float& v)
+ {
+ Vec3 d1 = q-p;
+ Vec3 d2 = s-r;
+ Vec3 rp = p-r;
+ float a = Dot(d1, d1);
+ float c = Dot(d1, rp);
+ float e = Dot(d2, d2);
+ float f = Dot(d2, rp);
+
+ float b = Dot(d1, d2);
+ float denom = a*e - b*b;
+ if (denom != 0.0f)
+ u = Clamp((b*f - c*e)/denom, 0.0f, 1.0f);
+ else
+ {
+ u = 0.0f;
+ }
+
+ v = (b*u + f)/e;
+
+ if (v < 0.0f)
+ {
+ v = 0.0f;
+ u = Clamp(-c/a, 0.0f, 1.0f);
+ }
+ else if (v > 1.0f)
+ {
+ v = 1.0f;
+ u = Clamp((b-c)/a, 0.0f, 1.0f);
+ }
+ }
+
+
+
+CUDA_CALLABLE inline float minf(const float a, const float b) { return a < b ? a : b; }
+CUDA_CALLABLE inline float maxf(const float a, const float b) { return a > b ? a : b; }
+
+CUDA_CALLABLE inline bool IntersectRayAABBOmpf(const Vec3& pos, const Vector3& rcp_dir, const Vector3& min, const Vector3& max, float& t) {
+
+ float
+ l1 = (min.x - pos.x) * rcp_dir.x,
+ l2 = (max.x - pos.x) * rcp_dir.x,
+ lmin = minf(l1,l2),
+ lmax = maxf(l1,l2);
+
+ l1 = (min.y - pos.y) * rcp_dir.y;
+ l2 = (max.y - pos.y) * rcp_dir.y;
+ lmin = maxf(minf(l1,l2), lmin);
+ lmax = minf(maxf(l1,l2), lmax);
+
+ l1 = (min.z - pos.z) * rcp_dir.z;
+ l2 = (max.z - pos.z) * rcp_dir.z;
+ lmin = maxf(minf(l1,l2), lmin);
+ lmax = minf(maxf(l1,l2), lmax);
+
+ //return ((lmax > 0.f) & (lmax >= lmin));
+ //return ((lmax > 0.f) & (lmax > lmin));
+ bool hit = ((lmax >= 0.f) & (lmax >= lmin));
+ if (hit)
+ t = lmin;
+ return hit;
+}
+
+
+CUDA_CALLABLE inline bool IntersectRayAABB(const Vec3& start, const Vector3& dir, const Vector3& min, const Vector3& max, float& t, Vector3* normal)
+{
+ //! calculate candidate plane on each axis
+ float tx = -1.0f, ty = -1.0f, tz = -1.0f;
+ bool inside = true;
+
+ //! use unrolled loops
+
+ //! x
+ if (start.x < min.x)
+ {
+ if (dir.x != 0.0f)
+ tx = (min.x-start.x)/dir.x;
+ inside = false;
+ }
+ else if (start.x > max.x)
+ {
+ if (dir.x != 0.0f)
+ tx = (max.x-start.x)/dir.x;
+ inside = false;
+ }
+
+ //! y
+ if (start.y < min.y)
+ {
+ if (dir.y != 0.0f)
+ ty = (min.y-start.y)/dir.y;
+ inside = false;
+ }
+ else if (start.y > max.y)
+ {
+ if (dir.y != 0.0f)
+ ty = (max.y-start.y)/dir.y;
+ inside = false;
+ }
+
+ //! z
+ if (start.z < min.z)
+ {
+ if (dir.z != 0.0f)
+ tz = (min.z-start.z)/dir.z;
+ inside = false;
+ }
+ else if (start.z > max.z)
+ {
+ if (dir.z != 0.0f)
+ tz = (max.z-start.z)/dir.z;
+ inside = false;
+ }
+
+ //! if point inside all planes
+ if (inside)
+ {
+ t = 0.0f;
+ return true;
+ }
+
+ //! we now have t values for each of possible intersection planes
+ //! find the maximum to get the intersection point
+ float tmax = tx;
+ int taxis = 0;
+
+ if (ty > tmax)
+ {
+ tmax = ty;
+ taxis = 1;
+ }
+ if (tz > tmax)
+ {
+ tmax = tz;
+ taxis = 2;
+ }
+
+ if (tmax < 0.0f)
+ return false;
+
+ //! check that the intersection point lies on the plane we picked
+ //! we don't test the axis of closest intersection for precision reasons
+
+ //! no eps for now
+ float eps = 0.0f;
+
+ Vec3 hit = start + dir*tmax;
+
+ if ((hit.x < min.x-eps || hit.x > max.x+eps) && taxis != 0)
+ return false;
+ if ((hit.y < min.y-eps || hit.y > max.y+eps) && taxis != 1)
+ return false;
+ if ((hit.z < min.z-eps || hit.z > max.z+eps) && taxis != 2)
+ return false;
+
+ //! output results
+ t = tmax;
+
+ return true;
+}
+
+// construct a plane equation such that ax + by + cz + dw = 0
+CUDA_CALLABLE inline Vec4 PlaneFromPoints(const Vec3& p, const Vec3& q, const Vec3& r)
+{
+ Vec3 e0 = q-p;
+ Vec3 e1 = r-p;
+
+ Vec3 n = SafeNormalize(Cross(e0, e1));
+
+ return Vec4(n.x, n.y, n.z, -Dot(p, n));
+}
+
+CUDA_CALLABLE inline bool IntersectPlaneAABB(const Vec4& plane, const Vec3& center, const Vec3& extents)
+{
+ float radius = Abs(extents.x*plane.x) + Abs(extents.y*plane.y) + Abs(extents.z*plane.z);
+ float delta = Dot(center, Vec3(plane)) + plane.w;
+
+ return Abs(delta) <= radius;
+}
+
+// 2d rectangle class
+class Rect
+{
+public:
+
+ Rect() : m_left(0), m_right(0), m_top(0), m_bottom(0) {}
+
+ Rect(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) : m_left(left), m_right(right), m_top(top), m_bottom(bottom)
+ {
+ assert(left <= right);
+ assert(top <= bottom);
+ }
+
+ uint32_t Width() const { return m_right - m_left; }
+ uint32_t Height() const { return m_bottom - m_top; }
+
+ // expand rect x units in each direction
+ void Expand(uint32_t x)
+ {
+ m_left -= x;
+ m_right += x;
+ m_top -= x;
+ m_bottom += x;
+ }
+
+ uint32_t Left() const { return m_left; }
+ uint32_t Right() const { return m_right; }
+ uint32_t Top() const { return m_top; }
+ uint32_t Bottom() const { return m_bottom; }
+
+ bool Contains(uint32_t x, uint32_t y) const
+ {
+ return (x >= m_left) && (x <= m_right) && (y >= m_top) && (y <= m_bottom);
+ }
+
+ uint32_t m_left;
+ uint32_t m_right;
+ uint32_t m_top;
+ uint32_t m_bottom;
+};
+
+// doesn't really belong here but efficient (and I believe correct) in place random shuffle based on the Fisher-Yates / Knuth algorithm
+template <typename T>
+void RandomShuffle(T begin, T end)
+{
+ assert(end > begin);
+ uint32_t n = distance(begin, end);
+
+ for (uint32_t i=0; i < n; ++i)
+ {
+ // pick a random number between 0 and n-1
+ uint32_t r = Rand() % (n-i);
+
+ // swap that location with the current randomly selected position
+ swap(*(begin+i), *(begin+(i+r)));
+ }
+}
+
+
+CUDA_CALLABLE inline Quat QuatFromAxisAngle(const Vec3& axis, float angle)
+{
+ Vec3 v = Normalize(axis);
+
+ float half = angle*0.5f;
+ float w = cosf(half);
+
+ const float sin_theta_over_two = sinf(half);
+ v *= sin_theta_over_two;
+
+ return Quat(v.x, v.y, v.z, w);
+}
+
+
+// rotate by quaternion (q, w)
+CUDA_CALLABLE inline Vec3 rotate(const Vec3& q, float w, const Vec3& x)
+{
+ return 2.0f*(x*(w*w-0.5f) + Cross(q, x)*w + q*Dot(q, x));
+}
+
+// rotate x by inverse transform in (q, w)
+CUDA_CALLABLE inline Vec3 rotateInv(const Vec3& q, float w, const Vec3& x)
+{
+ return 2.0f*(x*(w*w-0.5f) - Cross(q, x)*w + q*Dot(q, x));
+}
+
+CUDA_CALLABLE inline void TransformBounds(const Quat& q, Vec3 extents, Vec3& newExtents)
+{
+ Matrix33 transform(q);
+
+ transform.cols[0] *= extents.x;
+ transform.cols[1] *= extents.y;
+ transform.cols[2] *= extents.z;
+
+ float ex = fabsf(transform.cols[0].x) + fabsf(transform.cols[1].x) + fabsf(transform.cols[2].x);
+ float ey = fabsf(transform.cols[0].y) + fabsf(transform.cols[1].y) + fabsf(transform.cols[2].y);
+ float ez = fabsf(transform.cols[0].z) + fabsf(transform.cols[1].z) + fabsf(transform.cols[2].z);
+
+ newExtents = Vec3(ex, ey, ez);
+}
+
+CUDA_CALLABLE inline void TransformBounds(const Vec3& localLower, const Vec3& localUpper, const Vec3& translation, const Quat& rotation, float scale, Vec3& lower, Vec3& upper)
+{
+ Matrix33 transform(rotation);
+
+ Vec3 extents = (localUpper-localLower)*scale;
+
+ transform.cols[0] *= extents.x;
+ transform.cols[1] *= extents.y;
+ transform.cols[2] *= extents.z;
+
+ float ex = fabsf(transform.cols[0].x) + fabsf(transform.cols[1].x) + fabsf(transform.cols[2].x);
+ float ey = fabsf(transform.cols[0].y) + fabsf(transform.cols[1].y) + fabsf(transform.cols[2].y);
+ float ez = fabsf(transform.cols[0].z) + fabsf(transform.cols[1].z) + fabsf(transform.cols[2].z);
+
+ Vec3 center = (localUpper+localLower)*0.5f*scale;
+
+ lower = rotation*center + translation - Vec3(ex, ey, ez)*0.5f;
+ upper = rotation*center + translation + Vec3(ex, ey, ez)*0.5f;
+}
+
+// Poisson sample the volume of a sphere with given separation
+inline int PoissonSample3D(float radius, float separation, Vec3* points, int maxPoints, int maxAttempts)
+{
+ // naive O(n^2) dart throwing algorithm to generate a Poisson distribution
+ int c = 0;
+ while (c < maxPoints)
+ {
+ int a = 0;
+ while (a < maxAttempts)
+ {
+ const Vec3 p = UniformSampleSphereVolume()*radius;
+
+ // test against points already generated
+ int i=0;
+ for (; i < c; ++i)
+ {
+ Vec3 d = p-points[i];
+
+ // reject if closer than separation
+ if (LengthSq(d) < separation*separation)
+ break;
+ }
+
+ // sample passed all tests, accept
+ if (i == c)
+ {
+ points[c] = p;
+ ++c;
+ break;
+ }
+
+ ++a;
+ }
+
+ // exit if we reached the max attempts and didn't manage to add a point
+ if (a == maxAttempts)
+ break;
+ }
+
+ return c;
+}
+
+// Generates an optimally dense sphere packing at the origin (implicit sphere at the origin)
+inline int TightPack3D(float radius, float separation, Vec3* points, int maxPoints)
+{
+ int dim = int(ceilf(radius/separation));
+
+ int c = 0;
+
+ for (int z=-dim; z <= dim; ++z)
+ {
+ for (int y=-dim; y <= dim; ++y)
+ {
+ for (int x=-dim; x <= dim; ++x)
+ {
+ float xpos = x*separation + (((y+z)&1)?separation*0.5f:0.0f);
+ float ypos = y*sqrtf(0.75f)*separation;
+ float zpos = z*sqrtf(0.75f)*separation;
+
+ Vec3 p(xpos, ypos, zpos);
+
+ // skip center
+ if (LengthSq(p) == 0.0f)
+ continue;
+
+ if (c < maxPoints && Length(p) <= radius)
+ {
+ points[c] = p;
+ ++c;
+ }
+ }
+ }
+ }
+
+ return c;
+}
+
+
+struct Bounds
+{
+ CUDA_CALLABLE inline Bounds() : lower( FLT_MAX)
+ , upper(-FLT_MAX) {}
+
+ CUDA_CALLABLE inline Bounds(const Vec3& lower, const Vec3& upper) : lower(lower), upper(upper) {}
+
+ CUDA_CALLABLE inline Vec3 GetCenter() const { return 0.5f*(lower+upper); }
+ CUDA_CALLABLE inline Vec3 GetEdges() const { return upper-lower; }
+
+ CUDA_CALLABLE inline void Expand(float r)
+ {
+ lower -= Vec3(r);
+ upper += Vec3(r);
+ }
+
+ CUDA_CALLABLE inline void Expand(const Vec3& r)
+ {
+ lower -= r;
+ upper += r;
+ }
+
+ CUDA_CALLABLE inline bool Empty() const { return lower.x >= upper.x || lower.y >= upper.y || lower.z >= upper.z; }
+
+ CUDA_CALLABLE inline bool Overlaps(const Vec3& p) const
+ {
+ if (p.x < lower.x ||
+ p.y < lower.y ||
+ p.z < lower.z ||
+ p.x > upper.x ||
+ p.y > upper.y ||
+ p.z > upper.z)
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ CUDA_CALLABLE inline bool Overlaps(const Bounds& b) const
+ {
+ if (lower.x > b.upper.x ||
+ lower.y > b.upper.y ||
+ lower.z > b.upper.z ||
+ upper.x < b.lower.x ||
+ upper.y < b.lower.y ||
+ upper.z < b.lower.z)
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ Vec3 lower;
+ Vec3 upper;
+};
+
+CUDA_CALLABLE inline Bounds Union(const Bounds& a, const Vec3& b)
+{
+ return Bounds(Min(a.lower, b), Max(a.upper, b));
+}
+
+CUDA_CALLABLE inline Bounds Union(const Bounds& a, const Bounds& b)
+{
+ return Bounds(Min(a.lower, b.lower), Max(a.upper, b.upper));
+}
+
+CUDA_CALLABLE inline Bounds Intersection(const Bounds& a, const Bounds& b)
+{
+ return Bounds(Max(a.lower, b.lower), Min(a.upper, b.upper));
+}
diff --git a/core/matnn.h b/core/matnn.h
new file mode 100644
index 0000000..b8107f9
--- /dev/null
+++ b/core/matnn.h
@@ -0,0 +1,326 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+template <int m, int n, typename T=double>
+class XMatrix
+{
+public:
+
+ XMatrix()
+ {
+ memset(data, 0, sizeof(*this));
+ }
+
+ XMatrix(const XMatrix<m, n>& a)
+ {
+ memcpy(data, a.data, sizeof(*this));
+ }
+
+ template <typename OtherT>
+ XMatrix(const OtherT* ptr)
+ {
+ for (int j=0; j < n; ++j)
+ for (int i=0; i < m; ++i)
+ data[j][i] = *(ptr++);
+ }
+
+ const XMatrix<m,n>& operator=(const XMatrix<m,n>& a)
+ {
+ memcpy(data, a.data, sizeof(*this));
+ return *this;
+ }
+
+ template <typename OtherT>
+ void SetCol(int j, const XMatrix<m, 1, OtherT>& c)
+ {
+ for (int i=0; i < m; ++i)
+ data[j][i] = c(i, 0);
+ }
+
+ template <typename OtherT>
+ void SetRow(int i, const XMatrix<1, n, OtherT>& r)
+ {
+ for (int j=0; j < m; ++j)
+ data[j][i] = r(0, j);
+ }
+
+ T& operator()(int row, int col) { return data[col][row]; }
+ const T& operator()(int row, int col) const { return data[col][row]; }
+
+
+ void SetIdentity()
+ {
+ for (int i=0; i < m; ++i)
+ {
+ for (int j=0; j < n; ++j)
+ {
+ if (i == j)
+ data[i][j] = 1.0;
+ else
+ data[i][j] = 0.0;
+ }
+ }
+ }
+
+ // column major storage
+ T data[n][m];
+};
+
+template <int m, int n, typename T>
+XMatrix<m, n, T> operator-(const XMatrix<m, n, T>& lhs, const XMatrix<m, n, T>& rhs)
+{
+ XMatrix<m, n> d;
+
+ for (int i=0; i < m; ++i)
+ for (int j=0; j < n; ++j)
+ d(i, j) = lhs(i,j)-rhs(i,j);
+
+
+ return d;
+}
+
+template <int m, int n, typename T>
+XMatrix<m, n, T> operator+(const XMatrix<m, n, T>& lhs, const XMatrix<m, n, T>& rhs)
+{
+ XMatrix<m, n> d;
+
+ for (int i=0; i < m; ++i)
+ for (int j=0; j < n; ++j)
+ d(i, j) = lhs(i,j)+rhs(i,j);
+
+
+ return d;
+}
+
+
+template <int m, int n, int o, typename T>
+XMatrix<m, o> Multiply(const XMatrix<m, n, T>& lhs, const XMatrix<n, o, T>& rhs)
+{
+ XMatrix<m, o> ret;
+
+ for (int i=0; i < m; ++i)
+ {
+ for (int j=0; j < o; ++j)
+ {
+ T sum = 0.0f;
+
+ for (int k=0; k < n; ++k)
+ {
+ sum += lhs(i, k)*rhs(k, j);
+ }
+
+ ret(i, j) = sum;
+ }
+ }
+
+ return ret;
+}
+
+
+template <int m, int n>
+XMatrix<n, m> Transpose(const XMatrix<m, n>& a)
+{
+ XMatrix<n, m> ret;
+
+ for (int i=0; i < m; ++i)
+ {
+ for (int j=0; j < n; ++j)
+ {
+ ret(j, i) = a(i, j);
+ }
+ }
+
+ return ret;
+}
+
+
+
+// matrix to swap row i and j when multiplied on the right
+template <int n>
+XMatrix<n,n> Permutation(int i, int j)
+{
+ XMatrix<n, n> m;
+ m.SetIdentity();
+
+ m(i, i) = 0.0;
+ m(i, j) = 1.0;
+
+ m(j, j) = 0.0;
+ m(j, i) = 1.0;
+
+ return m;
+}
+
+template <int m, int n>
+void PrintMatrix(const char* name, XMatrix<m, n> a)
+{
+ printf("%s = [\n", name);
+
+ for (int i=0; i < m; ++i)
+ {
+ printf ("[ ");
+
+ for (int j=0; j < n; ++j)
+ {
+ printf("% .4f", float(a(i, j)));
+
+ if (j < n-1)
+ printf(" ");
+ }
+
+ printf(" ]\n");
+ }
+
+ printf("]\n");
+}
+
+template <int n, typename T>
+XMatrix<n, n, T> LU(const XMatrix<n ,n, T>& m, XMatrix<n,n, T>& L)
+{
+ XMatrix<n,n> U = m;
+ L.SetIdentity();
+
+ // for each row
+ for (int j=0; j < n; ++j)
+ {
+ XMatrix<n,n> Li, LiInv;
+ Li.SetIdentity();
+ LiInv.SetIdentity();
+
+ T pivot = U(j, j);
+
+ if (pivot == 0.0)
+ return U;
+
+ assert(pivot != 0.0);
+
+ // zero our all entries below pivot
+ for (int i=j+1; i < n; ++i)
+ {
+ T l = -U(i, j)/pivot;
+
+ Li(i,j) = l;
+
+ // create inverse of L1..Ln as we go (this is L)
+ L(i,j) = -l;
+ }
+
+ U = Multiply(Li, U);
+ }
+
+ return U;
+}
+
+template <int m, typename T>
+XMatrix<m, 1, T> Solve(const XMatrix<m, m, T>& L, const XMatrix<m, m, T>& U, const XMatrix<m, 1, T>& b)
+{
+ XMatrix<m, 1> y;
+ XMatrix<m, 1> x;
+
+ // Ly = b (forward substitution)
+ for (int i=0; i < m; ++i)
+ {
+ T sum = 0.0;
+
+ for (int j=0; j < i; ++j)
+ {
+ sum += y(j, 0)*L(i, j);
+ }
+
+ assert(L(i,i) != 0.0);
+
+ y(i, 0) = (b(i,0) - sum) / L(i, i);
+ }
+
+ // Ux = y (back substitution)
+ for (int i=m-1; i >= 0; --i)
+ {
+ T sum = 0.0;
+
+ for (int j=i+1; j < m; ++j)
+ {
+ sum += x(j, 0)*U(i, j);
+ }
+
+ assert(U(i,i) != 0.0);
+
+ x(i, 0) = (y(i, 0) - sum) / U(i,i);
+ }
+
+ return x;
+}
+
+template <int n, typename T>
+T Determinant(const XMatrix<n, n, T>& A, XMatrix<n, n, T>& L, XMatrix<n, n, T>& U)
+{
+ U = LU(A, L);
+
+ // determinant is the product of diagonal entries of U (assume L has 1s on diagonal)
+ T det = 1.0;
+ for (int i=0; i < n; ++i)
+ det *= U(i, i);
+
+ return det;
+}
+
+template <int n, typename T>
+XMatrix<n, n, T> Inverse(const XMatrix<n, n, T>& A, T& det)
+{
+ XMatrix<n,n> L, U;
+ det = Determinant(A, L, U);
+
+ XMatrix<n,n> Inv;
+
+ if (det != 0.0f)
+ {
+ for (int i=0; i < n; ++i)
+ {
+ // solve for each column of the identity matrix
+ XMatrix<n, 1> I;
+ I(i, 0) = 1.0;
+
+ XMatrix<n, 1> x = Solve(L, U, I);
+
+ Inv.SetCol(i, x);
+ }
+ }
+
+ return Inv;
+}
+
+template <int m, int n, typename T>
+T FrobeniusNorm(const XMatrix<m, n, T>& A)
+{
+ T sum = 0.0;
+ for (int i=0; i < m; ++i)
+ for (int j=0; j < n; ++j)
+ sum += A(i,j)*A(i,j);
+
+ return sqrt(sum);
+}
diff --git a/core/mesh.cpp b/core/mesh.cpp
new file mode 100644
index 0000000..371e664
--- /dev/null
+++ b/core/mesh.cpp
@@ -0,0 +1,968 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "mesh.h"
+#include "platform.h"
+
+#include <map>
+#include <fstream>
+#include <iostream>
+
+using namespace std;
+
+void Mesh::DuplicateVertex(uint32_t i)
+{
+ assert(m_positions.size() > i);
+ m_positions.push_back(m_positions[i]);
+
+ if (m_normals.size() > i)
+ m_normals.push_back(m_normals[i]);
+
+ if (m_colours.size() > i)
+ m_colours.push_back(m_colours[i]);
+
+ if (m_texcoords[0].size() > i)
+ m_texcoords[0].push_back(m_texcoords[0][i]);
+
+ if (m_texcoords[1].size() > i)
+ m_texcoords[1].push_back(m_texcoords[1][i]);
+
+}
+
+void Mesh::Normalize(float s)
+{
+ Vec3 lower, upper;
+ GetBounds(lower, upper);
+ Vec3 edges = upper-lower;
+
+ Transform(TranslationMatrix(Point3(-lower)));
+
+ float maxEdge = max(edges.x, max(edges.y, edges.z));
+ Transform(ScaleMatrix(s/maxEdge));
+}
+
+void Mesh::CalculateNormals()
+{
+ m_normals.resize(0);
+ m_normals.resize(m_positions.size());
+
+ int numTris = int(GetNumFaces());
+
+ for (int i=0; i < numTris; ++i)
+ {
+ int a = m_indices[i*3+0];
+ int b = m_indices[i*3+1];
+ int c = m_indices[i*3+2];
+
+ Vec3 n = Cross(m_positions[b]-m_positions[a], m_positions[c]-m_positions[a]);
+
+ m_normals[a] += n;
+ m_normals[b] += n;
+ m_normals[c] += n;
+ }
+
+ int numVertices = int(GetNumVertices());
+
+ for (int i=0; i < numVertices; ++i)
+ m_normals[i] = ::Normalize(m_normals[i]);
+}
+
+namespace
+{
+
+ enum PlyFormat
+ {
+ eAscii,
+ eBinaryBigEndian
+ };
+
+ template <typename T>
+ T PlyRead(ifstream& s, PlyFormat format)
+ {
+ T data = eAscii;
+
+ switch (format)
+ {
+ case eAscii:
+ {
+ s >> data;
+ break;
+ }
+ case eBinaryBigEndian:
+ {
+ char c[sizeof(T)];
+ s.read(c, sizeof(T));
+ reverse(c, c+sizeof(T));
+ data = *(T*)c;
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ return data;
+ }
+
+} // namespace anonymous
+
+
+Mesh* ImportMesh(const char* path)
+{
+ std::string ext = GetExtension(path);
+
+ Mesh* mesh = NULL;
+
+ if (ext == "ply")
+ mesh = ImportMeshFromPly(path);
+ else if (ext == "obj")
+ mesh = ImportMeshFromObj(path);
+
+
+ return mesh;
+}
+
+Mesh* ImportMeshFromBin(const char* path)
+{
+ double start = GetSeconds();
+
+ FILE* f = fopen(path, "rb");
+
+ if (f)
+ {
+ int numVertices;
+ int numIndices;
+
+ size_t len;
+ len = fread(&numVertices, sizeof(numVertices), 1, f);
+ len = fread(&numIndices, sizeof(numIndices), 1, f);
+
+ Mesh* m = new Mesh();
+ m->m_positions.resize(numVertices);
+ m->m_normals.resize(numVertices);
+ m->m_indices.resize(numIndices);
+
+ len = fread(&m->m_positions[0], sizeof(Vec3)*numVertices, 1, f);
+ len = fread(&m->m_normals[0], sizeof(Vec3)*numVertices, 1, f);
+ len = fread(&m->m_indices[0], sizeof(int)*numIndices, 1, f);
+
+ (void)len;
+
+ fclose(f);
+
+ double end = GetSeconds();
+
+ printf("Imported mesh %s in %f ms\n", path, (end-start)*1000.0f);
+
+ return m;
+ }
+
+ return NULL;
+}
+
+void ExportMeshToBin(const char* path, const Mesh* m)
+{
+ FILE* f = fopen(path, "wb");
+
+ if (f)
+ {
+ int numVertices = int(m->m_positions.size());
+ int numIndices = int(m->m_indices.size());
+
+ fwrite(&numVertices, sizeof(numVertices), 1, f);
+ fwrite(&numIndices, sizeof(numIndices), 1, f);
+
+ // write data blocks
+ fwrite(&m->m_positions[0], sizeof(Vec3)*numVertices, 1, f);
+ fwrite(&m->m_normals[0], sizeof(Vec3)*numVertices, 1, f);
+ fwrite(&m->m_indices[0], sizeof(int)*numIndices, 1, f);
+
+ fclose(f);
+ }
+}
+
+Mesh* ImportMeshFromPly(const char* path)
+{
+ ifstream file(path, ios_base::in | ios_base::binary);
+
+ if (!file)
+ return NULL;
+
+ // some scratch memory
+ const uint32_t kMaxLineLength = 1024;
+ char buffer[kMaxLineLength];
+
+ //double startTime = GetSeconds();
+
+ file >> buffer;
+ if (strcmp(buffer, "ply") != 0)
+ return NULL;
+
+ PlyFormat format = eAscii;
+
+ uint32_t numFaces = 0;
+ uint32_t numVertices = 0;
+
+ const uint32_t kMaxProperties = 16;
+ uint32_t numProperties = 0;
+ float properties[kMaxProperties];
+
+ bool vertexElement = false;
+
+ while (file)
+ {
+ file >> buffer;
+
+ if (strcmp(buffer, "element") == 0)
+ {
+ file >> buffer;
+
+ if (strcmp(buffer, "face") == 0)
+ {
+ vertexElement = false;
+ file >> numFaces;
+ }
+
+ else if (strcmp(buffer, "vertex") == 0)
+ {
+ vertexElement = true;
+ file >> numVertices;
+ }
+ }
+ else if (strcmp(buffer, "format") == 0)
+ {
+ file >> buffer;
+ if (strcmp(buffer, "ascii") == 0)
+ {
+ format = eAscii;
+ }
+ else if (strcmp(buffer, "binary_big_endian") == 0)
+ {
+ format = eBinaryBigEndian;
+ }
+ else
+ {
+ printf("Ply: unknown format\n");
+ return NULL;
+ }
+ }
+ else if (strcmp(buffer, "property") == 0)
+ {
+ if (vertexElement)
+ ++numProperties;
+ }
+ else if (strcmp(buffer, "end_header") == 0)
+ {
+ break;
+ }
+ }
+
+ // eat newline
+ char nl;
+ file.read(&nl, 1);
+
+ // debug
+#if ENABLE_VERBOSE_OUTPUT
+ printf ("Loaded mesh: %s numFaces: %d numVertices: %d format: %d numProperties: %d\n", path, numFaces, numVertices, format, numProperties);
+#endif
+
+ Mesh* mesh = new Mesh;
+
+ mesh->m_positions.resize(numVertices);
+ mesh->m_normals.resize(numVertices);
+ mesh->m_colours.resize(numVertices, Colour(1.0f, 1.0f, 1.0f, 1.0f));
+
+ mesh->m_indices.reserve(numFaces*3);
+
+ // read vertices
+ for (uint32_t v=0; v < numVertices; ++v)
+ {
+ for (uint32_t i=0; i < numProperties; ++i)
+ {
+ properties[i] = PlyRead<float>(file, format);
+ }
+
+ mesh->m_positions[v] = Point3(properties[0], properties[1], properties[2]);
+ mesh->m_normals[v] = Vector3(0.0f, 0.0f, 0.0f);
+ }
+
+ // read indices
+ for (uint32_t f=0; f < numFaces; ++f)
+ {
+ uint32_t numIndices = (format == eAscii)?PlyRead<uint32_t>(file, format):PlyRead<uint8_t>(file, format);
+ uint32_t indices[4];
+
+ for (uint32_t i=0; i < numIndices; ++i)
+ {
+ indices[i] = PlyRead<uint32_t>(file, format);
+ }
+
+ switch (numIndices)
+ {
+ case 3:
+ mesh->m_indices.push_back(indices[0]);
+ mesh->m_indices.push_back(indices[1]);
+ mesh->m_indices.push_back(indices[2]);
+ break;
+ case 4:
+ mesh->m_indices.push_back(indices[0]);
+ mesh->m_indices.push_back(indices[1]);
+ mesh->m_indices.push_back(indices[2]);
+
+ mesh->m_indices.push_back(indices[2]);
+ mesh->m_indices.push_back(indices[3]);
+ mesh->m_indices.push_back(indices[0]);
+ break;
+
+ default:
+ assert(!"invalid number of indices, only support tris and quads");
+ break;
+ };
+
+ // calculate vertex normals as we go
+ Point3& v0 = mesh->m_positions[indices[0]];
+ Point3& v1 = mesh->m_positions[indices[1]];
+ Point3& v2 = mesh->m_positions[indices[2]];
+
+ Vector3 n = SafeNormalize(Cross(v1-v0, v2-v0), Vector3(0.0f, 1.0f, 0.0f));
+
+ for (uint32_t i=0; i < numIndices; ++i)
+ {
+ mesh->m_normals[indices[i]] += n;
+ }
+ }
+
+ for (uint32_t i=0; i < numVertices; ++i)
+ {
+ mesh->m_normals[i] = SafeNormalize(mesh->m_normals[i], Vector3(0.0f, 1.0f, 0.0f));
+ }
+
+ //cout << "Imported mesh " << path << " in " << (GetSeconds()-startTime)*1000.f << "ms" << endl;
+
+ return mesh;
+
+}
+
+// map of Material name to Material
+struct VertexKey
+{
+ VertexKey() : v(0), vt(0), vn(0) {}
+
+ uint32_t v, vt, vn;
+
+ bool operator == (const VertexKey& rhs) const
+ {
+ return v == rhs.v && vt == rhs.vt && vn == rhs.vn;
+ }
+
+ bool operator < (const VertexKey& rhs) const
+ {
+ if (v != rhs.v)
+ return v < rhs.v;
+ else if (vt != rhs.vt)
+ return vt < rhs.vt;
+ else
+ return vn < rhs.vn;
+ }
+};
+
+Mesh* ImportMeshFromObj(const char* path)
+{
+ ifstream file(path);
+
+ if (!file)
+ return NULL;
+
+ Mesh* m = new Mesh();
+
+ vector<Point3> positions;
+ vector<Vector3> normals;
+ vector<Vector2> texcoords;
+ vector<Vector3> colors;
+ vector<uint32_t>& indices = m->m_indices;
+
+ //typedef unordered_map<VertexKey, uint32_t, MemoryHash<VertexKey> > VertexMap;
+ typedef map<VertexKey, uint32_t> VertexMap;
+ VertexMap vertexLookup;
+
+ // some scratch memory
+ const uint32_t kMaxLineLength = 1024;
+ char buffer[kMaxLineLength];
+
+ //double startTime = GetSeconds();
+
+ while (file)
+ {
+ file >> buffer;
+
+ if (strcmp(buffer, "vn") == 0)
+ {
+ // normals
+ float x, y, z;
+ file >> x >> y >> z;
+
+ normals.push_back(Vector3(x, y, z));
+ }
+ else if (strcmp(buffer, "vt") == 0)
+ {
+ // texture coords
+ float u, v;
+ file >> u >> v;
+
+ texcoords.push_back(Vector2(u, v));
+ }
+ else if (buffer[0] == 'v')
+ {
+ // positions
+ float x, y, z;
+ file >> x >> y >> z;
+
+ positions.push_back(Point3(x, y, z));
+ }
+ else if (buffer[0] == 's' || buffer[0] == 'g' || buffer[0] == 'o')
+ {
+ // ignore smoothing groups, groups and objects
+ char linebuf[256];
+ file.getline(linebuf, 256);
+ }
+ else if (strcmp(buffer, "mtllib") == 0)
+ {
+ // ignored
+ std::string MaterialFile;
+ file >> MaterialFile;
+ }
+ else if (strcmp(buffer, "usemtl") == 0)
+ {
+ // read Material name
+ std::string materialName;
+ file >> materialName;
+ }
+ else if (buffer[0] == 'f')
+ {
+ // faces
+ uint32_t faceIndices[4];
+ uint32_t faceIndexCount = 0;
+
+ for (int i=0; i < 4; ++i)
+ {
+ VertexKey key;
+
+ file >> key.v;
+
+ if (!file.eof())
+ {
+ // failed to read another index continue on
+ if (file.fail())
+ {
+ file.clear();
+ break;
+ }
+
+ if (file.peek() == '/')
+ {
+ file.ignore();
+
+ if (file.peek() != '/')
+ {
+ file >> key.vt;
+ }
+
+ if (file.peek() == '/')
+ {
+ file.ignore();
+ file >> key.vn;
+ }
+ }
+
+ // find / add vertex, index
+ VertexMap::iterator iter = vertexLookup.find(key);
+
+ if (iter != vertexLookup.end())
+ {
+ faceIndices[faceIndexCount++] = iter->second;
+ }
+ else
+ {
+ // add vertex
+ uint32_t newIndex = uint32_t(m->m_positions.size());
+ faceIndices[faceIndexCount++] = newIndex;
+
+ vertexLookup.insert(make_pair(key, newIndex));
+
+ // push back vertex data
+ assert(key.v > 0);
+
+ m->m_positions.push_back(positions[key.v-1]);
+
+ // obj format doesn't support mesh colours so add default value
+ m->m_colours.push_back(Colour(1.0f, 1.0f, 1.0f));
+
+ // normal [optional]
+ if (key.vn)
+ {
+ m->m_normals.push_back(normals[key.vn-1]);
+ }
+
+ // texcoord [optional]
+ if (key.vt)
+ {
+ m->m_texcoords[0].push_back(texcoords[key.vt-1]);
+ }
+ }
+ }
+ }
+
+ if (faceIndexCount == 3)
+ {
+ // a triangle
+ indices.insert(indices.end(), faceIndices, faceIndices+3);
+ }
+ else if (faceIndexCount == 4)
+ {
+ // a quad, triangulate clockwise
+ indices.insert(indices.end(), faceIndices, faceIndices+3);
+
+ indices.push_back(faceIndices[2]);
+ indices.push_back(faceIndices[3]);
+ indices.push_back(faceIndices[0]);
+ }
+ else
+ {
+ cout << "Face with more than 4 vertices are not supported" << endl;
+ }
+
+ }
+ else if (buffer[0] == '#')
+ {
+ // comment
+ char linebuf[256];
+ file.getline(linebuf, 256);
+ }
+ }
+
+ // calculate normals if none specified in file
+ m->m_normals.resize(m->m_positions.size());
+
+ const uint32_t numFaces = uint32_t(indices.size())/3;
+ for (uint32_t i=0; i < numFaces; ++i)
+ {
+ uint32_t a = indices[i*3+0];
+ uint32_t b = indices[i*3+1];
+ uint32_t c = indices[i*3+2];
+
+ Point3& v0 = m->m_positions[a];
+ Point3& v1 = m->m_positions[b];
+ Point3& v2 = m->m_positions[c];
+
+ Vector3 n = SafeNormalize(Cross(v1-v0, v2-v0), Vector3(0.0f, 1.0f, 0.0f));
+
+ m->m_normals[a] += n;
+ m->m_normals[b] += n;
+ m->m_normals[c] += n;
+ }
+
+ for (uint32_t i=0; i < m->m_normals.size(); ++i)
+ {
+ m->m_normals[i] = SafeNormalize(m->m_normals[i], Vector3(0.0f, 1.0f, 0.0f));
+ }
+
+ //cout << "Imported mesh " << path << " in " << (GetSeconds()-startTime)*1000.f << "ms" << endl;
+
+ return m;
+}
+
+void ExportToObj(const char* path, const Mesh& m)
+{
+ ofstream file(path);
+
+ if (!file)
+ return;
+
+ file << "# positions" << endl;
+
+ for (uint32_t i=0; i < m.m_positions.size(); ++i)
+ {
+ Point3 v = m.m_positions[i];
+ file << "v " << v.x << " " << v.y << " " << v.z << endl;
+ }
+
+ file << "# texcoords" << endl;
+
+ for (uint32_t i=0; i < m.m_texcoords[0].size(); ++i)
+ {
+ Vec2 t = m.m_texcoords[0][i];
+ file << "vt " << t.x << " " << t.y << endl;
+ }
+
+ file << "# normals" << endl;
+
+ for (uint32_t i=0; i < m.m_normals.size(); ++i)
+ {
+ Vec3 n = m.m_normals[0][i];
+ file << "vn " << n.x << " " << n.y << " " << n.z << endl;
+ }
+
+ file << "# faces" << endl;
+
+ for (uint32_t i=0; i < m.m_indices.size()/3; ++i)
+ {
+ uint32_t j = i+1;
+
+ // no sharing, assumes there is a unique position, texcoord and normal for each vertex
+ file << "f " << j << "/" << j << "/" << j << endl;
+ }
+}
+
+void Mesh::AddMesh(const Mesh& m)
+{
+ uint32_t offset = uint32_t(m_positions.size());
+
+ // add new vertices
+ m_positions.insert(m_positions.end(), m.m_positions.begin(), m.m_positions.end());
+ m_normals.insert(m_normals.end(), m.m_normals.begin(), m.m_normals.end());
+ m_colours.insert(m_colours.end(), m.m_colours.begin(), m.m_colours.end());
+
+ // add new indices with offset
+ for (uint32_t i=0; i < m.m_indices.size(); ++i)
+ {
+ m_indices.push_back(m.m_indices[i]+offset);
+ }
+}
+
+void Mesh::Transform(const Matrix44& m)
+{
+ for (uint32_t i=0; i < m_positions.size(); ++i)
+ {
+ m_positions[i] = m*m_positions[i];
+ m_normals[i] = m*m_normals[i];
+ }
+}
+
+void Mesh::GetBounds(Vector3& outMinExtents, Vector3& outMaxExtents) const
+{
+ Point3 minExtents(FLT_MAX);
+ Point3 maxExtents(-FLT_MAX);
+
+ // calculate face bounds
+ for (uint32_t i=0; i < m_positions.size(); ++i)
+ {
+ const Point3& a = m_positions[i];
+
+ minExtents = Min(a, minExtents);
+ maxExtents = Max(a, maxExtents);
+ }
+
+ outMinExtents = Vector3(minExtents);
+ outMaxExtents = Vector3(maxExtents);
+}
+
+Mesh* CreateTriMesh(float size, float y)
+{
+ uint32_t indices[] = { 0, 1, 2 };
+ Point3 positions[3];
+ Vector3 normals[3];
+
+ positions[0] = Point3(-size, y, size);
+ positions[1] = Point3(size, y, size);
+ positions[2] = Point3(size, y, -size);
+
+ normals[0] = Vector3(0.0f, 1.0f, 0.0f);
+ normals[1] = Vector3(0.0f, 1.0f, 0.0f);
+ normals[2] = Vector3(0.0f, 1.0f, 0.0f);
+
+ Mesh* m = new Mesh();
+ m->m_indices.insert(m->m_indices.begin(), indices, indices + 3);
+ m->m_positions.insert(m->m_positions.begin(), positions, positions + 3);
+ m->m_normals.insert(m->m_normals.begin(), normals, normals + 3);
+
+ return m;
+}
+
+Mesh* CreateCubeMesh()
+{
+ const Point3 vertices[24] =
+ {
+ Point3(0.5, 0.5, 0.5),
+ Point3(-0.5, 0.5, 0.5),
+ Point3(0.5, -0.5, 0.5),
+ Point3(-0.5, -0.5, 0.5),
+ Point3(0.5, 0.5, -0.5),
+ Point3(-0.5, 0.5, -0.5),
+ Point3(0.5, -0.5, -0.5),
+ Point3(-0.5, -0.5, -0.5),
+ Point3(0.5, 0.5, 0.5),
+ Point3(0.5, -0.5, 0.5),
+ Point3(0.5, 0.5, 0.5),
+ Point3(0.5, 0.5, -0.5),
+ Point3(-0.5, 0.5, 0.5),
+ Point3(-0.5, 0.5, -0.5),
+ Point3(0.5, -0.5, -0.5),
+ Point3(0.5, 0.5, -0.5),
+ Point3(-0.5, -0.5, -0.5),
+ Point3(0.5, -0.5, -0.5),
+ Point3(-0.5, -0.5, 0.5),
+ Point3(0.5, -0.5, 0.5),
+ Point3(-0.5, -0.5, -0.5),
+ Point3(-0.5, -0.5, 0.5),
+ Point3(-0.5, 0.5, -0.5),
+ Point3(-0.5, 0.5, 0.5)
+ };
+
+ const Vec3 normals[24] =
+ {
+ Vec3(0.0f, 0.0f, 1.0f),
+ Vec3(0.0f, 0.0f, 1.0f),
+ Vec3(0.0f, 0.0f, 1.0f),
+ Vec3(0.0f, 0.0f, 1.0f),
+ Vec3(1.0f, 0.0f, 0.0f),
+ Vec3(0.0f, 1.0f, 0.0f),
+ Vec3(1.0f, 0.0f, 0.0f),
+ Vec3(0.0f, 0.0f, -1.0f),
+ Vec3(1.0f, 0.0f, 0.0f),
+ Vec3(1.0f, 0.0f, 0.0f),
+ Vec3(0.0f, 1.0f, 0.0f),
+ Vec3(0.0f, 1.0f, 0.0f),
+ Vec3(0.0f, 1.0f, 0.0f),
+ Vec3(0.0f, 0.0f, -1.0f),
+ Vec3(0.0f, 0.0f, -1.0f),
+ Vec3(-0.0f, -0.0f, -1.0f),
+ Vec3(0.0f, -1.0f, 0.0f),
+ Vec3(0.0f, -1.0f, 0.0f),
+ Vec3(0.0f, -1.0f, 0.0f),
+ Vec3(-0.0f, -1.0f, -0.0f),
+ Vec3(-1.0f, 0.0f, 0.0f),
+ Vec3(-1.0f, 0.0f, 0.0f),
+ Vec3(-1.0f, 0.0f, 0.0f),
+ Vec3(-1.0f, -0.0f, -0.0f)
+ };
+
+ const int indices[36] =
+ {
+ 0, 1, 2,
+ 3, 2, 1,
+ 8, 9, 4,
+ 6, 4, 9,
+ 10, 11, 12,
+ 5, 12, 11,
+ 7, 13, 14,
+ 15, 14, 13,
+ 16, 17, 18,
+ 19, 18, 17,
+ 20, 21, 22,
+ 23, 22, 21
+ };
+
+ Mesh* m = new Mesh();
+ m->m_positions.assign(vertices, vertices+24);
+ m->m_normals.assign(normals, normals+24);
+ m->m_indices.assign(indices, indices+36);
+
+ return m;
+
+}
+
+Mesh* CreateQuadMesh(float size, float y)
+{
+ uint32_t indices[] = { 0, 1, 2, 2, 3, 0 };
+ Point3 positions[4];
+ Vector3 normals[4];
+
+ positions[0] = Point3(-size, y, size);
+ positions[1] = Point3(size, y, size);
+ positions[2] = Point3(size, y, -size);
+ positions[3] = Point3(-size, y, -size);
+
+ normals[0] = Vector3(0.0f, 1.0f, 0.0f);
+ normals[1] = Vector3(0.0f, 1.0f, 0.0f);
+ normals[2] = Vector3(0.0f, 1.0f, 0.0f);
+ normals[3] = Vector3(0.0f, 1.0f, 0.0f);
+
+ Mesh* m = new Mesh();
+ m->m_indices.insert(m->m_indices.begin(), indices, indices+6);
+ m->m_positions.insert(m->m_positions.begin(), positions, positions+4);
+ m->m_normals.insert(m->m_normals.begin(), normals, normals+4);
+
+ return m;
+}
+
+Mesh* CreateDiscMesh(float radius, uint32_t segments)
+{
+ const uint32_t numVerts = 1 + segments;
+
+ Mesh* m = new Mesh();
+ m->m_positions.resize(numVerts);
+ m->m_normals.resize(numVerts);
+
+ m->m_positions[0] = Point3(0.0f);
+ m->m_positions[1] = Point3(0.0f, 0.0f, radius);
+
+ for (uint32_t i=1; i <= segments; ++i)
+ {
+ uint32_t nextVert = (i+1)%numVerts;
+
+ if (nextVert == 0)
+ nextVert = 1;
+
+ m->m_positions[nextVert] = Point3(radius*Sin((float(i)/segments)*k2Pi), 0.0f, radius*Cos((float(i)/segments)*k2Pi));
+ m->m_normals[nextVert] = Vector3(0.0f, 1.0f, 0.0f);
+
+ m->m_indices.push_back(0);
+ m->m_indices.push_back(i);
+ m->m_indices.push_back(nextVert);
+ }
+
+ return m;
+}
+
+Mesh* CreateTetrahedron(float ground, float height)
+{
+ Mesh* m = new Mesh();
+
+ const float dimValue = 1.0f / sqrtf(2.0f);
+ const Point3 vertices[4] =
+ {
+ Point3(-1.0f, ground, -dimValue),
+ Point3(1.0f, ground, -dimValue),
+ Point3(0.0f, ground + height, dimValue),
+ Point3(0.0f, ground, dimValue)
+ };
+
+ const int indices[12] =
+ {
+ //winding order is counter-clockwise
+ 0, 2, 1,
+ 2, 3, 1,
+ 2, 0, 3,
+ 3, 0, 1
+ };
+
+ m->m_positions.assign(vertices, vertices+4);
+ m->m_indices.assign(indices, indices+12);
+
+ m->CalculateNormals();
+
+ return m;
+}
+
+
+Mesh* CreateSphere(int slices, int segments, float radius)
+{
+ float dTheta = kPi / slices;
+ float dPhi = k2Pi / segments;
+
+ int vertsPerRow = segments + 1;
+
+ Mesh* mesh = new Mesh();
+
+ for (int i = 0; i <= slices; ++i)
+ {
+ float theta = dTheta*i;
+
+ for (int j = 0; j <= segments; ++j)
+ {
+ float phi = dPhi*j;
+
+ float x = sinf(theta)*cosf(phi);
+ float y = cosf(theta);
+ float z = sinf(theta)*sinf(phi);
+
+ mesh->m_positions.push_back(Point3(x, y, z)*radius);
+ mesh->m_normals.push_back(Vec3(x, y, z));
+
+ if (i > 0 && j > 0)
+ {
+ int a = i*vertsPerRow + j;
+ int b = (i - 1)*vertsPerRow + j;
+ int c = (i - 1)*vertsPerRow + j - 1;
+ int d = i*vertsPerRow + j - 1;
+
+ // add a quad for this slice
+ mesh->m_indices.push_back(b);
+ mesh->m_indices.push_back(a);
+ mesh->m_indices.push_back(d);
+
+ mesh->m_indices.push_back(b);
+ mesh->m_indices.push_back(d);
+ mesh->m_indices.push_back(c);
+ }
+ }
+ }
+
+ return mesh;
+}
+
+Mesh* CreateCapsule(int slices, int segments, float radius, float halfHeight)
+{
+ float dTheta = kPi / (slices * 2);
+ float dPhi = k2Pi / segments;
+
+ int vertsPerRow = segments + 1;
+
+ Mesh* mesh = new Mesh();
+
+ float theta = 0.0f;
+
+ for (int i = 0; i <= 2 * slices + 1; ++i)
+ {
+ for (int j = 0; j <= segments; ++j)
+ {
+ float phi = dPhi*j;
+
+ float x = sinf(theta)*cosf(phi);
+ float y = cosf(theta);
+ float z = sinf(theta)*sinf(phi);
+
+ // add y offset based on which hemisphere we're in
+ float yoffset = (i < slices) ? halfHeight : -halfHeight;
+
+ mesh->m_positions.push_back(Point3(x, y, z)*radius + Vec3(0.0f, yoffset, 0.0f));
+ mesh->m_normals.push_back(Vec3(x, y, z));
+
+ if (i > 0 && j > 0)
+ {
+ int a = i*vertsPerRow + j;
+ int b = (i - 1)*vertsPerRow + j;
+ int c = (i - 1)*vertsPerRow + j - 1;
+ int d = i*vertsPerRow + j - 1;
+
+ // add a quad for this slice
+ mesh->m_indices.push_back(b);
+ mesh->m_indices.push_back(a);
+ mesh->m_indices.push_back(d);
+
+ mesh->m_indices.push_back(b);
+ mesh->m_indices.push_back(d);
+ mesh->m_indices.push_back(c);
+ }
+ }
+
+ // don't update theta for the middle slice
+ if (i != slices)
+ theta += dTheta;
+ }
+
+ return mesh;
+}
diff --git a/core/mesh.h b/core/mesh.h
new file mode 100644
index 0000000..dfba532
--- /dev/null
+++ b/core/mesh.h
@@ -0,0 +1,77 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <vector>
+
+#include "core.h"
+#include "maths.h"
+
+struct Mesh
+{
+ void AddMesh(const Mesh& m);
+
+ uint32_t GetNumVertices() const { return uint32_t(m_positions.size()); }
+ uint32_t GetNumFaces() const { return uint32_t(m_indices.size()) / 3; }
+
+ void DuplicateVertex(uint32_t i);
+
+ void CalculateNormals();
+ void Transform(const Matrix44& m);
+ void Normalize(float s=1.0f); // scale so bounds in any dimension equals s and lower bound = (0,0,0)
+
+ void GetBounds(Vector3& minExtents, Vector3& maxExtents) const;
+
+ std::vector<Point3> m_positions;
+ std::vector<Vector3> m_normals;
+ std::vector<Vector2> m_texcoords[2];
+ std::vector<Colour> m_colours;
+
+ std::vector<uint32_t> m_indices;
+};
+
+// create mesh from file
+Mesh* ImportMeshFromObj(const char* path);
+Mesh* ImportMeshFromPly(const char* path);
+Mesh* ImportMeshFromBin(const char* path);
+
+// just switches on filename
+Mesh* ImportMesh(const char* path);
+
+// save a mesh in a flat binary format
+void ExportMeshToBin(const char* path, const Mesh* m);
+
+// create procedural primitives
+Mesh* CreateTriMesh(float size, float y=0.0f);
+Mesh* CreateCubeMesh();
+Mesh* CreateQuadMesh(float size, float y=0.0f);
+Mesh* CreateDiscMesh(float radius, uint32_t segments);
+Mesh* CreateTetrahedron(float ground=0.0f, float height=1.0f); //fixed but not used
+Mesh* CreateSphere(int slices, int segments, float radius = 1.0f);
+Mesh* CreateCapsule(int slices, int segments, float radius = 1.0f, float halfHeight = 1.0f);
+
diff --git a/core/perlin.cpp b/core/perlin.cpp
new file mode 100644
index 0000000..16fba71
--- /dev/null
+++ b/core/perlin.cpp
@@ -0,0 +1,367 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "perlin.h"
+
+#include <cmath>
+
+namespace Perlin
+{
+
+ // types.
+typedef int int32_t;
+typedef float real64;
+
+//
+// Fast math trickery....
+//
+#define _doublemagicroundeps (.5-1.4e-11) // almost .5f = .5f - 1e^(number of exp bit)
+#define _doublemagic double (6755399441055744.0) // 2^52 * 1.5, uses limited precision to floor
+
+//! TODO: not sure if this is endian safe
+
+// fast floor float->int conversion routine.
+inline int32_t Floor2Int(real64 val)
+{
+#if 0
+ val -= _doublemagicroundeps;
+ val += _doublemagic;
+ return ((int32_t*)&val)[0];
+#else
+ return (int32_t)floorf(val);
+#endif
+}
+
+
+
+inline real64 Lerp(real64 t, real64 v1, real64 v2)
+{
+ return v1 + t*(v2 - v1);
+}
+
+
+// like cubic fade but has both first-order and second-order derivatives of 0.0 at [0.0,1.0]
+inline real64 PerlinFade( real64 val ) {
+ real64 val3 = val*val*val;
+ real64 val4 = val3*val;
+ return 6.0f*val4*val - 15.0f*val4 + 10.0f*val3;
+}
+
+
+
+//
+// Perlin Noise Stuff
+//
+// Implementation of Ken Perlins 2002 "Improved Perlin Noise".
+// Mostly taken from pbrt implementation in "Physically Based Rendering" by Matt Pharr and Greg Humpreys
+// which was inturn taken from Ken Perlins example Java code from [http://mrl.nyu.edu/~perlin/noise/]
+//
+
+//
+// Perlin Noise Permutation Data
+//
+#define NOISE_PERM_SIZE 256
+static unsigned char NoisePerm[2 * NOISE_PERM_SIZE] = {
+ 151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96,
+ 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142,
+ // Rest of noise permutation table
+ 8, 99, 37, 240, 21, 10, 23,
+ 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32, 57, 177, 33,
+ 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166,
+ 77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, 40, 244,
+ 102, 143, 54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196,
+ 135, 130, 116, 188, 159, 86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123,
+ 5, 202, 38, 147, 118, 126, 255, 82, 85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42,
+ 223, 183, 170, 213, 119, 248, 152, 2, 44, 154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9,
+ 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232, 178, 185, 112, 104, 218, 246, 97, 228,
+ 251, 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, 14, 239, 107,
+ 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45, 127, 4, 150, 254,
+ 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180,
+ 151, 160, 137, 91, 90, 15,
+ 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142, 8, 99, 37, 240, 21, 10, 23,
+ 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32, 57, 177, 33,
+ 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166,
+ 77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, 40, 244,
+ 102, 143, 54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196,
+ 135, 130, 116, 188, 159, 86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123,
+ 5, 202, 38, 147, 118, 126, 255, 82, 85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42,
+ 223, 183, 170, 213, 119, 248, 152, 2, 44, 154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9,
+ 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232, 178, 185, 112, 104, 218, 246, 97, 228,
+ 251, 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51, 145, 235, 249, 14, 239, 107,
+ 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45, 127, 4, 150, 254,
+ 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180
+};
+
+// 1d gradient function.
+static inline real64 Grad1d(int32_t x, real64 dx)
+{
+ int32_t h = NoisePerm[x];
+ h &= 3;
+ return ((h&1) ? -dx : dx);
+}
+
+// 2d gradient function.
+static inline real64 Grad2d(int32_t x, int32_t y, real64 dx, real64 dy)
+{
+ int32_t h = NoisePerm[NoisePerm[x]+y];
+ h &= 3;
+ return ((h&1) ? -dx : dx) + ((h&2) ? -dy : dy);
+}
+// 3d gradient function.
+static inline real64 Grad3d(int32_t x, int32_t y, int32_t z, real64 dx, real64 dy, real64 dz)
+{
+ int32_t h = NoisePerm[NoisePerm[NoisePerm[x]+y]+z];
+ h &= 15;
+
+ // Ken Perlins original impl
+ real64 u = h<8 ? dx : dy;
+ real64 v = h<4 ? dy : (h==12 || h==14) ? dx : dz;
+ return ((h&1) ? -u : u) + ((h&2) ? -v : v);
+
+ // (will this run faster without having all those branches?)
+ /*
+ // or how about just this....
+ static const real64 GRADS[16][3] = { {1.0, 1.0, 0.0}, {-1.0, 1.0, 0.0}, {1.0, -1.0, 0.0}, {-1.0, -1.0, 0.0}, // the 12 grads
+ {1.0, 0.0, 1.0}, {-1.0, 0.0, 1.0}, {1.0, 0.0, -1.0}, {-1.0, 0.0, -1.0},
+ {0.0, 1.0, 1.0}, {0.0, -1.0, 1.0}, {0.0, 1.0, -1.0}, {0.0, -1.0, -1.0},
+ {-1.0, 1.0, 0.0}, {1.0, 1.0, 0.0}, {0.0, -1.0, 1.0}, {0.0, -1.0, -1.0} }; // 8 more "padding" items to make this array of size 16.
+ return GRADS[h][0]*dx + GRADS[h][1]*dy + GRADS[h][2]*dz;
+ */
+}
+
+// 4d version from java
+/*
+static double grad(int hash, double x, double y, double z, double w) {
+int h = hash & 31; // CONVERT LO 5 BITS OF HASH TO 32 GRAD DIRECTIONS.
+double a=y,b=z,c=w; // X,Y,Z
+switch (h >> 3) { // OR, DEPENDING ON HIGH ORDER 2 BITS:
+case 1: a=w;b=x;c=y;break; // W,X,Y
+case 2: a=z;b=w;c=x;break; // Z,W,X
+case 3: a=y;b=z;c=w;break; // Y,Z,W
+}
+return ((h&4)==0 ? -a:a) + ((h&2)==0 ? -b:b) + ((h&1)==0 ? -c:c);
+}
+*/
+
+static real64 PerlinNoise3DFunctionPeriodic(real64 x, real64 y, real64 z, int px, int py, int pz)
+{
+ // Compute noise cell coordinates and offsets
+ int32_t ix = Floor2Int(x);
+ int32_t iy = Floor2Int(y);
+ int32_t iz = Floor2Int(z);
+ real64 dx = x - ix, dy = y - iy, dz = z - iz;
+
+ int32_t ix0 = (ix%px) & (NOISE_PERM_SIZE-1);
+ int32_t iy0 = (iy%py) & (NOISE_PERM_SIZE-1);
+ int32_t iz0 = (iz%pz) & (NOISE_PERM_SIZE-1);
+
+ int32_t ix1 = ((ix+1)%px) & (NOISE_PERM_SIZE-1);
+ int32_t iy1 = ((iy+1)%py) & (NOISE_PERM_SIZE-1);
+ int32_t iz1 = ((iz+1)%pz) & (NOISE_PERM_SIZE-1);
+
+ real64 w000 = Grad3d(ix0, iy0, iz0, dx, dy, dz);
+ real64 w100 = Grad3d(ix1, iy0, iz0, dx-1, dy, dz);
+ real64 w010 = Grad3d(ix0, iy1, iz0, dx, dy-1, dz);
+ real64 w110 = Grad3d(ix1, iy1, iz0, dx-1, dy-1, dz);
+ real64 w001 = Grad3d(ix0, iy0, iz1, dx, dy, dz-1);
+ real64 w101 = Grad3d(ix1, iy0, iz1, dx-1, dy, dz-1);
+ real64 w011 = Grad3d(ix0, iy1, iz1, dx, dy-1, dz-1);
+ real64 w111 = Grad3d(ix1, iy1, iz1, dx-1, dy-1, dz-1);
+ // Compute trilinear interpolation of weights
+ real64 wx = PerlinFade(dx);
+ real64 wy = PerlinFade(dy);
+ real64 wz = PerlinFade(dz);
+ real64 x00 = Lerp(wx, w000, w100);
+ real64 x10 = Lerp(wx, w010, w110);
+ real64 x01 = Lerp(wx, w001, w101);
+ real64 x11 = Lerp(wx, w011, w111);
+ real64 y0 = Lerp(wy, x00, x10);
+ real64 y1 = Lerp(wy, x01, x11);
+ return Lerp(wz, y0, y1);
+}
+
+static real64 PerlinNoise3DFunction(real64 x, real64 y, real64 z)
+{
+ // Compute noise cell coordinates and offsets
+ int32_t ix = Floor2Int(x);
+ int32_t iy = Floor2Int(y);
+ int32_t iz = Floor2Int(z);
+ real64 dx = x - ix, dy = y - iy, dz = z - iz;
+ // Compute gradient weights
+ ix &= (NOISE_PERM_SIZE-1);
+ iy &= (NOISE_PERM_SIZE-1);
+ iz &= (NOISE_PERM_SIZE-1);
+ real64 w000 = Grad3d(ix, iy, iz, dx, dy, dz);
+ real64 w100 = Grad3d(ix+1, iy, iz, dx-1, dy, dz);
+ real64 w010 = Grad3d(ix, iy+1, iz, dx, dy-1, dz);
+ real64 w110 = Grad3d(ix+1, iy+1, iz, dx-1, dy-1, dz);
+ real64 w001 = Grad3d(ix, iy, iz+1, dx, dy, dz-1);
+ real64 w101 = Grad3d(ix+1, iy, iz+1, dx-1, dy, dz-1);
+ real64 w011 = Grad3d(ix, iy+1, iz+1, dx, dy-1, dz-1);
+ real64 w111 = Grad3d(ix+1, iy+1, iz+1, dx-1, dy-1, dz-1);
+ // Compute trilinear interpolation of weights
+ real64 wx = PerlinFade(dx);
+ real64 wy = PerlinFade(dy);
+ real64 wz = PerlinFade(dz);
+ real64 x00 = Lerp(wx, w000, w100);
+ real64 x10 = Lerp(wx, w010, w110);
+ real64 x01 = Lerp(wx, w001, w101);
+ real64 x11 = Lerp(wx, w011, w111);
+ real64 y0 = Lerp(wy, x00, x10);
+ real64 y1 = Lerp(wy, x01, x11);
+ return Lerp(wz, y0, y1);
+}
+
+
+static real64 PerlinNoise2DFunction(real64 x, real64 y)
+{
+ // Compute noise cell coordinates and offsets
+ int32_t ix = Floor2Int(x);
+ int32_t iy = Floor2Int(y);
+ real64 dx = x - ix, dy = y - iy;
+ // Compute gradient weights
+ ix &= (NOISE_PERM_SIZE-1);
+ iy &= (NOISE_PERM_SIZE-1);
+ real64 w00 = Grad2d(ix, iy, dx, dy);
+ real64 w10 = Grad2d(ix+1, iy, dx-1.0f, dy);
+ real64 w01 = Grad2d(ix, iy+1, dx, dy-1.0f);
+ real64 w11 = Grad2d(ix+1, iy+1, dx-1.0f, dy-1.0f);
+ // Compute bilinear interpolation of weights
+ real64 wx = PerlinFade(dx);
+ real64 wy = PerlinFade(dy);
+ real64 x0 = Lerp(wx, w00, w10);
+ real64 x1 = Lerp(wx, w01, w11);
+ return Lerp(wy, x0, x1);
+}
+
+
+static real64 PerlinNoise1DFunction(real64 x)
+{
+ // Compute noise cell coordinates and offsets
+ int32_t ix = Floor2Int(x);
+ real64 dx = x - ix;
+ // Compute gradient weights
+ ix &= (NOISE_PERM_SIZE-1);
+ real64 w00 = Grad1d(ix, dx);
+ real64 w10 = Grad1d(ix+1, dx-1.0f);
+ // Compute bilinear interpolation of weights
+ real64 wx = PerlinFade(dx);
+ real64 x0 = Lerp(wx, w00, w10);
+
+ return x0;
+}
+}
+
+//------------------------------------------------
+//! Public interfaces
+
+float Perlin1D(float x, int octaves, float persistence)
+{
+ float r = 0.0f;
+ float a = 1.0f;
+ int freq = 1;
+
+ for (int i=0; i < octaves; i++)
+ {
+ float fx = x*freq;
+
+ r += Perlin::PerlinNoise1DFunction(fx)*a;
+
+ //! update amplitude
+ a *= persistence;
+ freq = 2 << i;
+ }
+
+ return r;
+}
+
+
+float Perlin2D(float x, float y, int octaves, float persistence)
+{
+ float r = 0.0f;
+ float a = 1.0f;
+ int freq = 1;
+
+ for (int i=0; i < octaves; i++)
+ {
+ float fx = x*freq;
+ float fy = y*freq;
+
+ r += Perlin::PerlinNoise2DFunction(fx, fy)*a;
+
+ //! update amplitude
+ a *= persistence;
+ freq = 2 << i;
+ }
+
+ return r;
+}
+
+
+float Perlin3D(float x, float y, float z, int octaves, float persistence)
+{
+ float r = 0.0f;
+ float a = 1.0f;
+ int freq = 1;
+
+ for (int i=0; i < octaves; i++)
+ {
+ float fx = x*freq;
+ float fy = y*freq;
+ float fz = z*freq;
+
+ r += Perlin::PerlinNoise3DFunction(fx, fy, fz)*a;
+
+ //! update amplitude
+ a *= persistence;
+ freq = 2 << i;
+ }
+
+ return r;
+}
+
+float Perlin3DPeriodic(float x, float y, float z, int px, int py, int pz, int octaves, float persistence)
+{
+ float r = 0.0f;
+ float a = 1.0f;
+ int freq = 1;
+
+ for (int i=0; i < octaves; i++)
+ {
+ float fx = x*freq;
+ float fy = y*freq;
+ float fz = z*freq;
+
+ r += Perlin::PerlinNoise3DFunctionPeriodic(fx, fy, fz, px, py, pz)*a;
+
+ //! update amplitude
+ a *= persistence;
+ freq = 2 << i;
+ }
+
+ return r;
+}
diff --git a/core/perlin.h b/core/perlin.h
new file mode 100644
index 0000000..45294f8
--- /dev/null
+++ b/core/perlin.h
@@ -0,0 +1,35 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+float Perlin1D(float x, int octaves, float persistence);
+float Perlin2D(float x, float y, int octaves, float persistence);
+float Perlin3D(float x, float y, float z, int octaves, float persistence);
+
+// periodic versions of the same function, inspired by the Renderman pnoise() functions
+float Perlin3DPeriodic(float x, float y, float z, int px, int py, int pz, int octaves, float persistence);
diff --git a/core/pfm.cpp b/core/pfm.cpp
new file mode 100644
index 0000000..da25c8b
--- /dev/null
+++ b/core/pfm.cpp
@@ -0,0 +1,121 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "pfm.h"
+
+#include <cassert>
+#include <stdio.h>
+#include <string.h>
+#include <algorithm>
+
+#if _WIN32
+#pragma warning(disable: 4996) // secure io
+#pragma warning(disable: 4100) // unreferenced param
+#pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+#endif
+
+namespace
+{
+ // RAII wrapper to handle file pointer clean up
+ struct FilePointer
+ {
+ FilePointer(FILE* ptr) : p(ptr) {}
+ ~FilePointer() { if (p) fclose(p); }
+
+ operator FILE*() { return p; }
+
+ FILE* p;
+ };
+}
+
+bool PfmLoad(const char* filename, PfmImage& image)
+{
+ FilePointer f = fopen(filename, "rb");
+ if (!f)
+ return false;
+
+ memset(&image, 0, sizeof(PfmImage));
+
+ const uint32_t kBufSize = 1024;
+ char buffer[kBufSize];
+
+ if (!fgets(buffer, kBufSize, f))
+ return false;
+
+ if (strcmp(buffer, "PF\n") != 0)
+ return false;
+
+ if (!fgets(buffer, kBufSize, f))
+ return false;
+
+ image.m_depth = 1;
+ sscanf(buffer, "%d %d %d", &image.m_width, &image.m_height, &image.m_depth);
+
+ if (!fgets(buffer, kBufSize, f))
+ return false;
+
+ sscanf(buffer, "%f", &image.m_maxDepth);
+
+ uint32_t dataStart = ftell(f);
+ fseek(f, 0, SEEK_END);
+ uint32_t dataEnd = ftell(f);
+ fseek(f, dataStart, SEEK_SET);
+
+ uint32_t dataSize = dataEnd-dataStart;
+
+ if (dataSize != sizeof(float)*image.m_width*image.m_height*image.m_depth)
+ return false;
+
+ image.m_data = new float[dataSize/4];
+
+ if (fread(image.m_data, dataSize, 1, f) != 1)
+ return false;
+
+ return true;
+}
+
+void PfmSave(const char* filename, const PfmImage& image)
+{
+ FilePointer f = fopen(filename, "wb");
+ if (!f)
+ return;
+
+ fprintf(f, "PF\n");
+ if (image.m_depth > 1)
+ fprintf(f, "%d %d %d\n", image.m_width, image.m_height, image.m_depth);
+ else
+ fprintf(f, "%d %d\n", image.m_width, image.m_height);
+
+ fprintf(f, "%f\n", *std::max_element(image.m_data, image.m_data+(image.m_width*image.m_height*image.m_depth)));
+
+ fwrite(image.m_data, image.m_width*image.m_height*image.m_depth*sizeof(float), 1, f);
+}
+
+
+
+
+
diff --git a/core/pfm.h b/core/pfm.h
new file mode 100644
index 0000000..bf141af
--- /dev/null
+++ b/core/pfm.h
@@ -0,0 +1,44 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "types.h"
+
+struct PfmImage
+{
+ // set m_depth to 1 for standard pfm compatability, > 1 will act as a volume texture (non-standard)
+ uint32_t m_width;
+ uint32_t m_height;
+ uint32_t m_depth;
+
+ // optional
+ float m_maxDepth;
+
+ float* m_data;
+};
+
+bool PfmLoad(const char* filename, PfmImage& image);
+void PfmSave(const char* filename, const PfmImage& image);
diff --git a/core/platform.cpp b/core/platform.cpp
new file mode 100644
index 0000000..8846e9b
--- /dev/null
+++ b/core/platform.cpp
@@ -0,0 +1,348 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "core.h"
+#include "platform.h"
+#include "types.h"
+
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+#include <string>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+#ifdef WIN32
+
+#include <windows.h>
+#include <commdlg.h>
+#include <mmsystem.h>
+
+double GetSeconds()
+{
+ static LARGE_INTEGER lastTime;
+ static LARGE_INTEGER freq;
+ static bool first = true;
+
+ if (first)
+ {
+ QueryPerformanceCounter(&lastTime);
+ QueryPerformanceFrequency(&freq);
+
+ first = false;
+ }
+
+ static double time = 0.0;
+
+ LARGE_INTEGER t;
+ QueryPerformanceCounter(&t);
+
+ __int64 delta = t.QuadPart-lastTime.QuadPart;
+ double deltaSeconds = double(delta) / double(freq.QuadPart);
+
+ time += deltaSeconds;
+
+ lastTime = t;
+
+ return time;
+
+}
+
+void Sleep(double seconds)
+{
+ ::Sleep(DWORD(seconds*1000));
+}
+
+
+//// helper function to get exe path
+//string GetExePath()
+//{
+// const uint32_t kMaxPathLength = 2048;
+//
+// char exepath[kMaxPathLength];
+//
+// // get exe path for file system
+// uint32_t i = GetModuleFileName(NULL, exepath, kMaxPathLength);
+//
+// // rfind first slash
+// while (i && exepath[i] != '\\' && exepath[i] != '//')
+// i--;
+//
+// // insert null terminater to cut off exe name
+// return string(&exepath[0], &exepath[i+1]);
+//}
+//
+//string FileOpenDialog(char *filter)
+//{
+// HWND owner=NULL;
+//
+// OPENFILENAME ofn;
+// char fileName[MAX_PATH] = "";
+// ZeroMemory(&ofn, sizeof(ofn));
+// ofn.lStructSize = sizeof(OPENFILENAME);
+// ofn.hwndOwner = owner;
+// ofn.lpstrFilter = filter;
+// ofn.lpstrFile = fileName;
+// ofn.nMaxFile = MAX_PATH;
+// ofn.Flags = OFN_EXPLORER | OFN_FILEMUSTEXIST | OFN_HIDEREADONLY | OFN_NOCHANGEDIR;
+// ofn.lpstrDefExt = "";
+//
+// string fileNameStr;
+//
+// if ( GetOpenFileName(&ofn) )
+// fileNameStr = fileName;
+//
+// return fileNameStr;
+//}
+//
+//bool FileMove(const char* src, const char* dest)
+//{
+// BOOL b = MoveFileEx(src, dest, MOVEFILE_REPLACE_EXISTING);
+// return b == TRUE;
+//}
+//
+//bool FileScan(const char* pattern, vector<string>& files)
+//{
+// HANDLE h;
+// WIN32_FIND_DATA info;
+//
+// // build a list of files
+// h = FindFirstFile(pattern, &info);
+//
+// if (h != INVALID_HANDLE_VALUE)
+// {
+// do
+// {
+// if (!(strcmp(info.cFileName, ".") == 0 || strcmp(info.cFileName, "..") == 0))
+// {
+// files.push_back(info.cFileName);
+// }
+// }
+// while (FindNextFile(h, &info));
+//
+// if (GetLastError() != ERROR_NO_MORE_FILES)
+// {
+// return false;
+// }
+//
+// FindClose(h);
+// }
+// else
+// {
+// return false;
+// }
+//
+// return true;
+//}
+
+
+#else
+
+// linux, mac platforms
+#include <sys/time.h>
+
+double GetSeconds()
+{
+ // Figure out time elapsed since last call to idle function
+ static struct timeval last_idle_time;
+ static double time = 0.0;
+
+ struct timeval time_now;
+ gettimeofday(&time_now, NULL);
+
+ if (last_idle_time.tv_usec == 0)
+ last_idle_time = time_now;
+
+ float dt = (float)(time_now.tv_sec - last_idle_time.tv_sec) + 1.0e-6*(time_now.tv_usec - last_idle_time.tv_usec);
+
+ time += dt;
+ last_idle_time = time_now;
+
+ return time;
+}
+
+#endif
+
+
+uint8_t* LoadFileToBuffer(const char* filename, uint32_t* sizeRead)
+{
+ FILE* file = fopen(filename, "rb");
+ if (file)
+ {
+ fseek(file, 0, SEEK_END);
+ long p = ftell(file);
+
+ uint8_t* buf = new uint8_t[p];
+ fseek(file, 0, SEEK_SET);
+ uint32_t len = uint32_t(fread(buf, 1, p, file));
+
+ fclose(file);
+
+ if (sizeRead)
+ {
+ *sizeRead = len;
+ }
+
+ return buf;
+ }
+ else
+ {
+ std::cout << "Could not open file for reading: " << filename << std::endl;
+ return NULL;
+ }
+}
+
+string LoadFileToString(const char* filename)
+{
+ //std::ifstream file(filename);
+ //return string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
+ uint32_t size;
+ uint8_t* buf = LoadFileToBuffer(filename, &size);
+
+ if (buf)
+ {
+ string s(buf, buf+size);
+ delete[] buf;
+
+ return s;
+ }
+ else
+ {
+ return "";
+ }
+}
+
+bool SaveStringToFile(const char* filename, const char* s)
+{
+ FILE* f = fopen(filename, "w");
+ if (!f)
+ {
+ std::cout << "Could not open file for writing: " << filename << std::endl;
+ return false;
+ }
+ else
+ {
+ fputs(s, f);
+ fclose(f);
+
+ return true;
+ }
+}
+
+
+string StripFilename(const char* path)
+{
+ // simply find the last
+ const char* iter=path;
+ const char* last=NULL;
+ while (*iter)
+ {
+ if (*iter == '\\' || *iter== '/')
+ last = iter;
+
+ ++iter;
+ }
+
+ if (last)
+ {
+ return string(path, last+1);
+ }
+ else
+ return string();
+
+}
+
+string GetExtension(const char* path)
+{
+ const char* s = strrchr(path, '.');
+ if (s)
+ {
+ return string(s+1);
+ }
+ else
+ {
+ return "";
+ }
+}
+
+string StripExtension(const char* path)
+{
+ const char* s = strrchr(path, '.');
+ if (s)
+ {
+ return string(path, s);
+ }
+ else
+ {
+ return string(path);
+ }
+}
+
+string NormalizePath(const char* path)
+{
+ string p(path);
+ replace(p.begin(), p.end(), '\\', '/');
+ transform(p.begin(), p.end(), p.begin(), ::tolower);
+
+ return p;
+}
+
+// strips the path from a file name
+string StripPath(const char* path)
+{
+ // simply find the last
+ const char* iter=path;
+ const char* last=NULL;
+ while (*iter)
+ {
+ if (*iter == '\\' || *iter== '/')
+ last = iter;
+
+ ++iter;
+ }
+
+ if (!last)
+ {
+ return string(path);
+ }
+
+ // eat the last slash
+ ++last;
+
+ if (*last)
+ {
+ return string(last);
+ }
+ else
+ {
+ return string();
+ }
+}
+
diff --git a/core/platform.h b/core/platform.h
new file mode 100644
index 0000000..9b7a473
--- /dev/null
+++ b/core/platform.h
@@ -0,0 +1,216 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "types.h"
+
+#include <vector>
+#include <string>
+
+// system functions
+double GetSeconds();
+void Sleep(double seconds);
+
+// helper function to get exe path
+std::string GetExePath();
+std::string GetWorkingDirectory();
+
+#ifdef ANDROID
+
+#include <android/log.h>
+
+#ifndef
+#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR , "Flex", __VA_ARGS__)
+#endif
+
+inline std::string GetFilePathByPlatform(const char* path)
+{
+ std::string newPath(path);
+
+ if(newPath.find("/mnt/sdcard/flex/data/") != std::string::npos)
+ {
+ LOGE("file have exit %s",path);
+ }
+ else
+ {
+ std::string rootPath = "/mnt/sdcard/flex/";
+ size_t startPos = newPath.find("data/");
+ if(startPos != std::string::npos)
+ {
+ newPath = rootPath + newPath.substr(startPos);
+ }
+ }
+
+ return newPath;
+}
+#else
+
+inline std::string GetFilePathByPlatform(const char* path)
+{
+ std::string newPath(path);
+ return newPath;
+}
+
+#endif
+
+
+
+// shows a file open dialog
+//std::string FileOpenDialog(const char *filter = "All Files (*.*)\0*.*\0");
+
+// pulls out an option in the form option=value, must have no spaces
+template <typename T>
+bool GetCmdLineArg(const char* arg, T& out, int argc, char* argv[])
+{
+ // iterate over all the arguments
+ for (int i=0; i < argc; ++i)
+ {
+ const char* s1 = arg;
+ const char* s2 = argv[i];
+
+ while (*s1 && *s2 && *s1 == *s2)
+ {
+ ++s1;
+ ++s2;
+ }
+
+ // we've found the flag we're looking for
+ if (*s1 == 0 && *s2 == '=')
+ {
+ ++s2;
+
+ // build a string stream and output
+ std::istringstream is(s2);
+ if (is >> out)
+ {
+ return true;
+ }
+ else
+ return false;
+ }
+ }
+
+ return false;
+}
+// return the full path to a file
+std::string ExpandPath(const char* path);
+// takes a full file path and returns just the folder (with trailing slash)
+std::string StripFilename(const char* path);
+// strips the path from a file name
+std::string StripPath(const char* path);
+// strips the extension from a path
+std::string StripExtension(const char* path);
+// returns the file extension (excluding period)
+std::string GetExtension(const char* path);
+// normalize path
+std::string NormalizePath(const char* path);
+
+// loads a file to a text string
+std::string LoadFileToString(const char* filename);
+// loads a file to a binary buffer (free using delete[])
+uint8_t* LoadFileToBuffer(const char* filename, uint32_t* sizeRead=NULL);
+// save whole string to a file
+bool SaveStringToFile(const char* filename, const char* s);
+
+bool FileMove(const char* src, const char* dest);
+bool FileScan(const char* pattern, std::vector<std::string>& files);
+
+// file system stuff
+const uint32_t kMaxPathLength = 2048;
+
+#ifdef WIN32
+
+// defined these explicitly because don't want to include windowsx.h
+#define GET_WPARAM(wp, lp) (wp)
+#define GET_LPARAM(wp, lp) (lp)
+
+#define GET_X_LPARAM(lp) ((int)(short)LOWORD(lp))
+#define GET_Y_LPARAM(lp) ((int)(short)HIWORD(lp))
+
+#define vsnprintf _vsnprintf
+#define snprintf _snprintf
+#define vsnwprintf _vsnwprintf
+
+#if _MSC_VER >= 1400 //vc8.0 use new secure
+#define snwprintf _snwprintf_s
+#else
+#define snwprintf _snwprintf
+#endif // _MSC_VER
+
+#endif // WIN32
+
+#if PLATFORM_IOS
+inline std::string ExpandPath(const char* p)
+{
+ NSString *imagePath = [NSString stringWithUTF8String:p];
+ NSString *fullPath = [[NSBundle mainBundle] pathForResource:[imagePath lastPathComponent] ofType:nil inDirectory:[imagePath stringByDeletingLastPathComponent]];
+
+ if (fullPath)
+ {
+ std::string s = [fullPath cStringUsingEncoding:1];
+ return s;
+ }
+ else
+ {
+ Log::Info << "Failed to map path for : " << p << std::endl;
+ return std::string("");
+ }
+}
+inline std::string GetTempDirectory()
+{
+ NSString* tmp = NSTemporaryDirectory();
+ std::string s = [tmp cStringUsingEncoding:1];
+
+ return s;
+}
+
+inline std::string DataPath(const char* p)
+{
+ return ExpandPath((std::string("DataCooked/") + p).c_str());
+}
+
+#else
+
+inline std::string ExpandPath(const char* p)
+{
+ return p;
+}
+
+inline std::string DataPath(const char* p)
+{
+ return ExpandPath((std::string("Data/") + p).c_str());
+
+}
+
+#endif
+
+#ifdef ANDROID
+
+#define __int64 int64_t
+
+#endif
diff --git a/core/point3.h b/core/point3.h
new file mode 100644
index 0000000..ea6e4bc
--- /dev/null
+++ b/core/point3.h
@@ -0,0 +1,114 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <ostream>
+
+#include "vec4.h"
+
+class Point3
+{
+public:
+
+ Point3() : x(0), y(0), z(0) {}
+ Point3(float a) : x(a), y(a), z(a) {}
+ Point3(const float* p) : x(p[0]), y(p[1]), z(p[2]) {}
+ Point3(float x_, float y_, float z_) : x(x_), y(y_), z(z_)
+ {
+ Validate();
+ }
+
+ explicit Point3(const Vec3& v) : x(v.x), y(v.y), z(v.z) {}
+
+ operator float* () { return &x; }
+ operator const float* () const { return &x; };
+ operator Vec4 () const { return Vec4(x, y, z, 1.0f); }
+
+ void Set(float x_, float y_, float z_) { Validate(); x = x_; y = y_; z = z_;}
+
+ Point3 operator * (float scale) const { Point3 r(*this); r *= scale; Validate(); return r; }
+ Point3 operator / (float scale) const { Point3 r(*this); r /= scale; Validate(); return r; }
+ Point3 operator + (const Vec3& v) const { Point3 r(*this); r += v; Validate(); return r; }
+ Point3 operator - (const Vec3& v) const { Point3 r(*this); r -= v; Validate(); return r; }
+
+ Point3& operator *=(float scale) {x *= scale; y *= scale; z*= scale; Validate(); return *this;}
+ Point3& operator /=(float scale) {float s(1.0f/scale); x *= s; y *= s; z *= s; Validate(); return *this;}
+ Point3& operator +=(const Vec3& v) {x += v.x; y += v.y; z += v.z; Validate(); return *this;}
+ Point3& operator -=(const Vec3& v) {x -= v.x; y -= v.y; z -= v.z; Validate(); return *this;}
+
+ bool operator != (const Point3& v) const { return (x != v.x || y != v.y || z != v.z); }
+
+ // negate
+ Point3 operator -() const { Validate(); return Point3(-x, -y, -z); }
+
+ float x,y,z;
+
+ void Validate() const
+ {
+ }
+};
+
+// lhs scalar scale
+inline Point3 operator *(float lhs, const Point3& rhs)
+{
+ Point3 r(rhs);
+ r *= lhs;
+ return r;
+}
+
+inline Vec3 operator-(const Point3& lhs, const Point3& rhs)
+{
+ return Vec3(lhs.x-rhs.x,lhs.y-rhs.y, lhs.z-rhs.z);
+}
+
+inline Point3 operator+(const Point3& lhs, const Point3& rhs)
+{
+ return Point3(lhs.x+rhs.x, lhs.y+rhs.y, lhs.z+rhs.z);
+}
+
+inline bool operator==(const Point3& lhs, const Point3& rhs)
+{
+ return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z);
+}
+
+inline std::ostream& operator << (std::ostream& s, const Point3& p)
+{
+ return s << p.x << "," << p.y << "," << p.z;
+}
+
+// component wise min max functions
+inline Point3 Max(const Point3& a, const Point3& b)
+{
+ return Point3(std::max(a.x, b.x), std::max(a.y, b.y), std::max(a.z, b.z));
+}
+
+inline Point3 Min(const Point3& a, const Point3& b)
+{
+ return Point3(std::min(a.x, b.x), std::min(a.y, b.y), std::min(a.z, b.z));
+}
+
diff --git a/core/quat.h b/core/quat.h
new file mode 100644
index 0000000..c241aa8
--- /dev/null
+++ b/core/quat.h
@@ -0,0 +1,137 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <cassert>
+
+struct Matrix33;
+
+template <typename T>
+class XQuat
+{
+public:
+
+ typedef T value_type;
+
+ CUDA_CALLABLE XQuat() : x(0), y(0), z(0), w(1.0) {}
+ CUDA_CALLABLE XQuat(const T* p) : x(p[0]), y(p[1]), z(p[2]), w(p[3]) {}
+ CUDA_CALLABLE XQuat(T x_, T y_, T z_, T w_) : x(x_), y(y_), z(z_), w(w_) { }
+ CUDA_CALLABLE XQuat(const Vec3& v, float w) : x(v.x), y(v.y), z(v.z), w(w) { }
+ CUDA_CALLABLE XQuat(const Matrix33& m);
+
+ CUDA_CALLABLE operator T* () { return &x; }
+ CUDA_CALLABLE operator const T* () const { return &x; };
+
+ CUDA_CALLABLE void Set(T x_, T y_, T z_, T w_) { x = x_; y = y_; z = z_; w = w_; }
+
+ CUDA_CALLABLE XQuat<T> operator * (T scale) const { XQuat<T> r(*this); r *= scale; return r;}
+ CUDA_CALLABLE XQuat<T> operator / (T scale) const { XQuat<T> r(*this); r /= scale; return r; }
+ CUDA_CALLABLE XQuat<T> operator + (const XQuat<T>& v) const { XQuat<T> r(*this); r += v; return r; }
+ CUDA_CALLABLE XQuat<T> operator - (const XQuat<T>& v) const { XQuat<T> r(*this); r -= v; return r; }
+ CUDA_CALLABLE XQuat<T> operator * (XQuat<T> q) const
+ {
+ // quaternion multiplication
+ return XQuat<T>(w * q.x + q.w * x + y * q.z - q.y * z, w * q.y + q.w * y + z * q.x - q.z * x,
+ w * q.z + q.w * z + x * q.y - q.x * y, w * q.w - x * q.x - y * q.y - z * q.z);
+ }
+
+ CUDA_CALLABLE XQuat<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; w*= scale; return *this;}
+ CUDA_CALLABLE XQuat<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; w *=s; return *this;}
+ CUDA_CALLABLE XQuat<T>& operator +=(const XQuat<T>& v) {x += v.x; y += v.y; z += v.z; w += v.w; return *this;}
+ CUDA_CALLABLE XQuat<T>& operator -=(const XQuat<T>& v) {x -= v.x; y -= v.y; z -= v.z; w -= v.w; return *this;}
+
+ CUDA_CALLABLE bool operator != (const XQuat<T>& v) const { return (x != v.x || y != v.y || z != v.z || w != v.w); }
+
+ // negate
+ CUDA_CALLABLE XQuat<T> operator -() const { return XQuat<T>(-x, -y, -z, -w); }
+
+ T x,y,z,w;
+};
+
+typedef XQuat<float> Quat;
+
+// lhs scalar scale
+template <typename T>
+CUDA_CALLABLE XQuat<T> operator *(T lhs, const XQuat<T>& rhs)
+{
+ XQuat<T> r(rhs);
+ r *= lhs;
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE bool operator==(const XQuat<T>& lhs, const XQuat<T>& rhs)
+{
+ return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z && lhs.w == rhs.w);
+}
+
+template <typename T>
+CUDA_CALLABLE inline XQuat<T> QuatFromAxisAngle(const Vec3& axis, float angle)
+{
+ Vec3 v = Normalize(axis);
+
+ float half = angle*0.5f;
+ float w = cosf(half);
+
+ const float sin_theta_over_two = sinf(half);
+ v *= sin_theta_over_two;
+
+ return XQuat<T>(v.x, v.y, v.z, w);
+}
+
+
+// rotate vector by quaternion (q, w)
+CUDA_CALLABLE inline Vec3 Rotate(const Quat& q, const Vec3& x)
+{
+ return x*(2.0f*q.w*q.w-1.0f) + Cross(Vec3(q), x)*q.w*2.0f + Vec3(q)*Dot(Vec3(q), x)*2.0f;
+}
+
+// rotate vector by inverse transform in (q, w)
+CUDA_CALLABLE inline Vec3 RotateInv(const Quat& q, const Vec3& x)
+{
+ return x*(2.0f*q.w*q.w-1.0f) - Cross(Vec3(q), x)*q.w*2.0f + Vec3(q)*Dot(Vec3(q), x)*2.0f;
+}
+
+CUDA_CALLABLE inline Quat Inverse(const Quat& q)
+{
+ return Quat(-q.x, -q.y, -q.z, q.w);
+}
+
+CUDA_CALLABLE inline Quat Normalize(const Quat& q)
+{
+ float lSq = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+
+ if (lSq > 0.0f)
+ {
+ float invL = 1.0f / sqrtf(lSq);
+
+ return q*invL;
+ }
+ else
+ return Quat();
+} \ No newline at end of file
diff --git a/core/sdf.cpp b/core/sdf.cpp
new file mode 100644
index 0000000..29d6f13
--- /dev/null
+++ b/core/sdf.cpp
@@ -0,0 +1,360 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "sdf.h"
+
+#include <vector>
+#include <float.h>
+#include <math.h>
+
+using namespace std;
+
+namespace
+{
+ inline float Sqr(float x) { return x*x; }
+ inline int Clamp(int x, int lower, int upper) { return min(max(lower, x), upper); }
+
+ uint32_t Sample(const uint32_t* image, uint32_t w, uint32_t h, int x, int y)
+ {
+ return image[Clamp(y, 0, h-1)*w + Clamp(x, 0, w-1)];
+ }
+
+ uint32_t Sample(const uint32_t* image, uint32_t w, uint32_t h, uint32_t d, int x, int y, int z)
+ {
+ return image[Clamp(z, 0, d-1)*w*h + Clamp(y, 0, h-1)*w + Clamp(x, 0, w-1)];
+ }
+
+ // returns true if point is on the surface
+ bool EdgeDetect(const uint32_t* img, uint32_t w, uint32_t h, int x, int y)
+ {
+ bool center = Sample(img, w, h, x, y) != 0;
+
+ for (int j=y-1; j <= y+1; ++j)
+ {
+ for (int i=x-1; i <= x+1; ++i)
+ {
+ if ((0 != Sample(img, w, h, i, j)) != center)
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+
+ // returns true if point is on the surface
+ bool EdgeDetect(const uint32_t* img, uint32_t w, uint32_t h, uint32_t d, int x, int y, int z, float& dist)
+ {
+ bool center = Sample(img, w, h, d, x, y, z) != 0;
+
+ float minDist = FLT_MAX;
+
+ for (int k=z-1; k <= z+1; ++k)
+ {
+ for (int j=y-1; j <= y+1; ++j)
+ {
+ for (int i=x-1; i <= x+1; ++i)
+ {
+ if ((0 != Sample(img, w, h, d, i, j, k)) != center)
+ {
+ int dx = x-i;
+ int dy = y-j;
+ int dz = z-k;
+
+ minDist = min(sqrtf(float(dx*dx + dy*dy + dz*dz))*0.5f, minDist);
+ }
+ }
+ }
+ }
+
+ dist = minDist;
+
+ return minDist != FLT_MAX;
+ }
+}
+
+// 2D fast marching method (FMM J. Sethian. A fast marching level set method for monotonically advancing fronts. Proc. Natl. Acad. Sci., 93:1591�1595, 1996.)
+namespace
+{
+ struct Coord2D
+ {
+ int i, j;
+
+ float d;
+ int si, sj;
+
+ bool operator < (const Coord2D& c) const { return d > c.d; }
+ };
+}
+
+void MakeSDF(const uint32_t* img, uint32_t w, uint32_t h, float* output)
+{
+ const float scale = 1.0f / max(w, h);
+
+ std::vector<Coord2D> queue;
+
+ // find surface points
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ if (EdgeDetect(img, w, h, x, y))
+ {
+ Coord2D c = {(int)x, (int)y, 0.0f, (int)x, (int)y};
+ queue.push_back(c);
+ }
+
+ output[y*w + x] = FLT_MAX;
+ }
+ }
+
+ std::make_heap(queue.begin(), queue.end());
+
+ while (!queue.empty())
+ {
+ std::pop_heap(queue.begin(), queue.end());
+
+ Coord2D c = queue.back();
+ queue.pop_back();
+
+ // freeze coord if not already frozen
+ if (output[c.j*w + c.i] == FLT_MAX)
+ {
+ output[c.j*w + c.i] = c.d;
+
+ // update neighbours
+ int xmin = max(c.i-1, 0), xmax = min(c.i+1, int(w-1));
+ int ymin = max(c.j-1, 0), ymax = min(c.j+1, int(h-1));
+
+ for (int y=ymin; y <= ymax; ++y)
+ {
+ for (int x=xmin; x <= xmax; ++x)
+ {
+ if (c.i != x || c.j != y)
+ {
+ int dx = x-c.si;
+ int dy = y-c.sj;
+
+ // calculate distance to source coord
+ float d = sqrtf(float(dx*dx + dy*dy));
+
+ Coord2D newc = {x, y, d, c.si, c.sj};
+ queue.push_back(newc);
+ std::push_heap(queue.begin(), queue.end());
+ }
+ }
+ }
+ }
+ }
+
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ assert(output[y*w + x] < FLT_MAX);
+
+ // flip sign for interior
+ output[y*w + x] *= (img[y*w + x]?-1.0f:1.0f)*scale;
+ }
+ }
+}
+
+// 3D fast marching method (FMM J. Sethian. A fast marching level set method for monotonically advancing fronts. Proc. Natl. Acad. Sci., 93:1591�1595, 1996.)
+namespace
+{
+ struct Coord3D
+ {
+ int i, j, k;
+
+ float d;
+ int si, sj, sk;
+
+ bool operator < (const Coord3D& c) const { return d > c.d; }
+ };
+}
+
+void MakeSDF(const uint32_t* img, uint32_t w, uint32_t h, uint32_t d, float* output)
+{
+ const float scale = 1.0f / max(max(w, h), d);
+
+ std::vector<Coord3D> queue;
+
+ // find surface points
+ for (uint32_t z=0; z < d; ++z)
+ {
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ float dist;
+ if (EdgeDetect(img, w, h, d, x, y, z, dist))
+ {
+ Coord3D c = {(int)x, (int)y, (int)z, dist, (int)x, (int)y, (int)z};
+ queue.push_back(c);
+ }
+
+ output[z*w*h + y*w + x] = FLT_MAX;
+ }
+ }
+ }
+
+ // no occupied voxels so quit
+ if (queue.empty())
+ return;
+
+ std::make_heap(queue.begin(), queue.end());
+
+ while (!queue.empty())
+ {
+ std::pop_heap(queue.begin(), queue.end());
+
+ Coord3D c = queue.back();
+ queue.pop_back();
+
+ // freeze coord if not already frozen
+ if (output[c.k*w*h + c.j*w + c.i] == FLT_MAX)
+ {
+ output[c.k*w*h + c.j*w + c.i] = c.d;
+
+ // update neighbours
+ int xmin = max(c.i-1, 0), xmax = min(c.i+1, int(w-1));
+ int ymin = max(c.j-1, 0), ymax = min(c.j+1, int(h-1));
+ int zmin = max(c.k-1, 0), zmax = min(c.k+1, int(d-1));
+
+ for (int z=zmin; z <= zmax; ++z)
+ {
+ for (int y=ymin; y <= ymax; ++y)
+ {
+ for (int x=xmin; x <= xmax; ++x)
+ {
+ if ((c.i != x || c.j != y || c.k != z) && output[z*w*h + y*w + x] == FLT_MAX)
+ {
+ int dx = x-c.si;
+ int dy = y-c.sj;
+ int dz = z-c.sk;
+
+ // calculate distance to source coord
+ float d = sqrtf(float(dx*dx + dy*dy + dz*dz)) + output[c.sk*w*h + c.sj*w + c.si];
+
+ assert(d > 0.0f);
+
+ Coord3D newc = {x, y, z, d, c.si, c.sj, c.sk};
+
+ queue.push_back(newc);
+ std::push_heap(queue.begin(), queue.end());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (uint32_t z=0; z < d; ++z)
+ {
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ assert(output[z*w*h + y*w + x] < FLT_MAX);
+
+ // flip sign for interior
+ output[z*w*h + y*w + x] *= (img[z*w*h + y*w + x]?-1.0f:1.0f)*scale;
+ }
+ }
+ }
+}
+
+
+
+
+/*
+// Brute-force 2D SDF generation
+
+void FindNeighbour(const uint32_t* image, uint32_t w, uint32_t h, uint32_t cx, uint32_t cy, uint32_t& i, uint32_t& j, float& d)
+ {
+ float minDistSq=FLT_MAX;
+
+ float fx = float(cx);
+ float fy = float(cy);
+
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ if ((x != cx || y != cy) && image[y*w + x])
+ {
+ float dSq = Sqr(fx-float(x)) + Sqr(fy-float(y));
+ if (dSq < minDistSq)
+ {
+ minDistSq = dSq;
+ i = x;
+ j = y;
+ }
+ }
+ }
+ }
+
+ d = sqrtf(minDistSq);
+ }
+
+
+// brute force
+void MakeSDF(const uint32_t* img, uint32_t w, uint32_t h, float* output)
+{
+ // find surface points
+ vector<uint32_t> surface(w*h);
+
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ if (EdgeDetect(img, w, h, x, y))
+ {
+ surface[y*w + x] = 1;
+ }
+ }
+ }
+
+ // brute force search
+ for (uint32_t y=0; y < h; ++y)
+ {
+ for (uint32_t x=0; x < w; ++x)
+ {
+ uint32_t i, j;
+ float d;
+ FindNeighbour(&surface[0], w, h, x, y, i, j, d);
+
+ // flip sign for pixels inside the shape
+ float sign = (img[y*w + x])?-1.0f:1.0f;
+
+ output[y*w + x] = d*sign/w;
+ }
+ }
+}
+*/
diff --git a/core/sdf.h b/core/sdf.h
new file mode 100644
index 0000000..91b38c8
--- /dev/null
+++ b/core/sdf.h
@@ -0,0 +1,37 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "core.h"
+#include "maths.h"
+
+// 2d and 3d signed distance field computation using fast marching method (FMM), output array
+// should be the same size as input, non-zero input pixels will have distance < 0.0f, resulting
+// distance is scaled by 1 / max(dimension)
+void MakeSDF(const uint32_t* input, uint32_t width, uint32_t height, float* output);
+void MakeSDF(const uint32_t* input, uint32_t width, uint32_t height, uint32_t depth, float* output);
diff --git a/core/skylight.h b/core/skylight.h
new file mode 100644
index 0000000..ff0857d
--- /dev/null
+++ b/core/skylight.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "maths.h"
+
+// implements Perez's model for sky luminance
+inline float SkyDistribution(float theta, float gamma, float a, float b, float c, float d, float e)
+{
+ float cosGamma = cosf(gamma);
+ float cosTheta = cosf(theta);
+
+ return (1.0f + a*expf(b / cosTheta))*(1.0f + c*expf(d*gamma) + e*cosGamma*cosGamma);
+}
+
+inline float SkyLuminance(float theta, float gamma, float zenith, float sunTheta, float a, float b, float c, float d, float e)
+{
+ float l = zenith * (SkyDistribution(theta, gamma, a, b, c, d, e) / SkyDistribution(0.0f, sunTheta, a, b, c, d, e));
+ return l;
+}
+
+inline float Lerp2(const float ab[2], float t)
+{
+ return ab[0]*t + ab[1];
+}
+
+inline Colour SkyLight(float theta, float phi, float sunTheta, float sunPhi, float t)
+{
+ // need cosTheta > 0.0
+ theta = Clamp(theta, 0.0f, kPi*0.5f-1.0e-6f);
+
+ // calculate arc-length between sun and patch being calculated
+ const float gamma = acosf(cosf(sunTheta)*cosf(theta) + sinf(sunTheta)*sinf(theta)*cosf(abs(phi-sunPhi)));
+
+ const float xcoeff [5][2] = { { -0.0193f, -0.2592f },
+ { -0.0665f, 0.0008f },
+ { -0.0004f, 0.2125f },
+ { -0.0641f, -0.8989f },
+ { -0.0033f, 0.0452f } };
+
+ const float ycoeff [5][2] = { {-0.0167f, -0.2608f },
+ { -0.0950f, 0.0092f },
+ { -0.0079f, 0.2102f },
+ { -0.0441f, -1.6537f },
+ { -0.0109f, 0.0529f } };
+
+ const float Ycoeff [5][2] = { { 0.1787f, -1.4630f },
+ { -0.3554f, 0.4275f },
+ { -0.0227f, 5.3251f },
+ { 0.1206f, -2.5771f },
+ { -0.0670f, 0.3703f } };
+
+ const Matrix44 zxcoeff(0.00166f, -0.02903f, 0.11693f, 0.0f,
+ -0.00375f, 0.06377f, -0.21196f, 0.0f,
+ 0.00209f, -0.03202f, 0.06052f, 0.0f,
+ 0.0f, 0.00394f, 0.25886f, 0.0f);
+
+ const Matrix44 zycoeff(0.00275f, -0.04214f, 0.15346f, 0.0f,
+ -0.00610f, 0.08970f, -0.26756f, 0.0f,
+ 0.00317f, -0.04153f, 0.06670f, 0.0f,
+ 0.0f, 0.00516f, 0.26688f, 0.0f);
+
+
+ const Vec4 b(sunTheta*sunTheta*sunTheta, sunTheta*sunTheta, sunTheta, 1.0f);
+ const Vec4 a(t*t, t, 1.0f, 0.0f);
+
+ // calculate the zenith values for current turbidity and sun position
+ const float zx = Dot3(a, zxcoeff*b);
+ const float zy = Dot3(a, zycoeff*b);
+ const float zY = (4.0453f * t - 4.9710f)*tanf((4.0f/9.0f - t/120.0f)*(kPi-2.0f*sunTheta)) - 0.2155f*t + 2.4192f;
+
+ float x = SkyLuminance(theta, gamma, zx, sunTheta, Lerp2(xcoeff[0], t),
+ Lerp2(xcoeff[1], t),
+ Lerp2(xcoeff[2], t),
+ Lerp2(xcoeff[3], t),
+ Lerp2(xcoeff[4], t));
+
+
+ float y = SkyLuminance(theta, gamma, zy, sunTheta, Lerp2(ycoeff[0], t),
+ Lerp2(ycoeff[1], t),
+ Lerp2(ycoeff[2], t),
+ Lerp2(ycoeff[3], t),
+ Lerp2(ycoeff[4], t));
+
+ float Y = SkyLuminance(theta, gamma, zY, sunTheta, Lerp2(Ycoeff[0], t),
+ Lerp2(Ycoeff[1], t),
+ Lerp2(Ycoeff[2], t),
+ Lerp2(Ycoeff[3], t),
+ Lerp2(Ycoeff[4], t));
+
+
+ // convert Yxy to XYZ and then to RGB
+ Colour XYZ = YxyToXYZ(Y, x, y);
+ Colour RGB = XYZToLinear(XYZ.r, XYZ.g, XYZ.b);
+
+ return RGB;
+}
+
diff --git a/core/tga.cpp b/core/tga.cpp
new file mode 100644
index 0000000..802b9ec
--- /dev/null
+++ b/core/tga.cpp
@@ -0,0 +1,263 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "tga.h"
+#include "core.h"
+#include "types.h"
+
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct TgaHeader
+{
+ uint8_t identsize; // size of ID field that follows 18 uint8_t header (0 usually)
+ uint8_t colourmaptype; // type of colour map 0=none, 1=has palette
+ uint8_t imagetype; // type of image 0=none,1=indexed,2=rgb,3=grey,+8=rle packed
+
+ uint16_t colourmapstart; // first colour map entry in palette
+ uint16_t colourmaplength; // number of colours in palette
+ uint8_t colourmapbits; // number of bits per palette entry 15,16,24,32
+
+ uint16_t xstart; // image x origin
+ uint16_t ystart; // image y origin
+ uint16_t width; // image width in pixels
+ uint16_t height; // image height in pixels
+ uint8_t bits; // image bits per pixel 8,16,24,32
+ uint8_t descriptor; // image descriptor bits (vh flip bits)
+
+ // pixel data follows header
+};
+
+namespace
+{
+
+ void memwrite(void* src, uint32_t size, unsigned char*& buffer)
+ {
+ memcpy(buffer, src, size);
+ buffer += size;
+ }
+}
+
+
+
+bool TgaSave(const char* filename, const TgaImage& image, bool rle)
+{
+ FILE* f = fopen(filename, "wb");
+ if (f)
+ {
+ bool b = TgaSave(f, image, rle);
+ fclose(f);
+
+ return b;
+ }
+
+ return false;
+}
+
+bool TgaSave(FILE* f, const TgaImage& image, bool rle)
+{
+ TgaHeader header;
+
+ header.identsize = 0;
+ header.colourmaptype = 0;
+ header.imagetype = rle?9:2;
+ header.colourmapstart = 0;
+ header.colourmaplength = 0;
+ header.colourmapbits = 0;
+ header.xstart = 0;
+ header.ystart = 0;
+ header.width = image.m_width;
+ header.height = image.m_height;
+ header.bits = 32;
+ header.descriptor = 0;//uint16((1<<3)|(1<<5));
+
+ if (f)
+ {
+ unsigned char* data = (unsigned char*)new uint32_t[image.m_width*image.m_height + sizeof(header)];
+ unsigned char* writeptr = data;
+
+ memwrite(&header.identsize, sizeof(header.identsize), writeptr);
+ memwrite(&header.colourmaptype, sizeof(header.colourmaptype), writeptr);
+ memwrite(&header.imagetype, sizeof(header.imagetype), writeptr);
+ memwrite(&header.colourmapstart, sizeof(header.colourmapstart), writeptr);
+ memwrite(&header.colourmaplength, sizeof(header.colourmaplength), writeptr);
+ memwrite(&header.colourmapbits, sizeof(header.colourmapbits), writeptr);
+ memwrite(&header.xstart, sizeof(header.xstart), writeptr);
+ memwrite(&header.ystart, sizeof(header.ystart), writeptr);
+ memwrite(&header.width, sizeof(header.width), writeptr);
+ memwrite(&header.height, sizeof(header.height), writeptr);
+ memwrite(&header.bits, sizeof(header.bits), writeptr);
+ memwrite(&header.descriptor, sizeof(header.descriptor), writeptr);
+
+ union texel
+ {
+ uint32_t u32;
+ uint8_t u8[4];
+ };
+
+ texel* t = (texel*)image.m_data;
+ for (int i=0; i < image.m_width*image.m_height; ++i)
+ {
+ texel tmp = t[i];
+ swap(tmp.u8[0], tmp.u8[2]);
+ memwrite(&tmp.u32, 4, writeptr);
+ }
+
+ fwrite(data, writeptr-data, 1, f);
+ //fclose(f);
+
+ delete[] data;
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+
+bool TgaLoad(const char* filename, TgaImage& image)
+{
+ if (!filename)
+ return false;
+
+ FILE* aTGAFile = fopen(filename, "rb");
+ if (aTGAFile == NULL)
+ {
+ printf("Texture: could not open %s for reading.\n", filename);
+ return NULL;
+ }
+
+ char aHeaderIDLen;
+ size_t err;
+ err = fread(&aHeaderIDLen, sizeof(uint8_t), 1, aTGAFile);
+
+ char aColorMapType;
+ err = fread(&aColorMapType, sizeof(uint8_t), 1, aTGAFile);
+
+ char anImageType;
+ err = fread(&anImageType, sizeof(uint8_t), 1, aTGAFile);
+
+ short aFirstEntryIdx;
+ err = fread(&aFirstEntryIdx, sizeof(uint16_t), 1, aTGAFile);
+
+ short aColorMapLen;
+ err = fread(&aColorMapLen, sizeof(uint16_t), 1, aTGAFile);
+
+ char aColorMapEntrySize;
+ err = fread(&aColorMapEntrySize, sizeof(uint8_t), 1, aTGAFile);
+
+ short anXOrigin;
+ err = fread(&anXOrigin, sizeof(uint16_t), 1, aTGAFile);
+
+ short aYOrigin;
+ err = fread(&aYOrigin, sizeof(uint16_t), 1, aTGAFile);
+
+ short anImageWidth;
+ err = fread(&anImageWidth, sizeof(uint16_t), 1, aTGAFile);
+
+ short anImageHeight;
+ err = fread(&anImageHeight, sizeof(uint16_t), 1, aTGAFile);
+
+ char aBitCount = 32;
+ err = fread(&aBitCount, sizeof(uint8_t), 1, aTGAFile);
+
+ char anImageDescriptor;// = 8 | (1<<5);
+ err = fread((char*)&anImageDescriptor, sizeof(uint8_t), 1, aTGAFile);
+
+ // supress warning
+ (void)err;
+
+ // total is the number of bytes we'll have to read
+ uint8_t numComponents = aBitCount / 8;
+ uint32_t numTexels = anImageWidth * anImageHeight;
+
+ // allocate memory for image pixels
+ image.m_width = anImageWidth;
+ image.m_height = anImageHeight;
+ image.m_data = new uint32_t[numTexels];
+
+ // load the image pixels
+ for (uint32_t i=0; i < numTexels; ++i)
+ {
+ union texel
+ {
+ uint32_t u32;
+ uint8_t u8[4];
+ };
+
+ texel t;
+ t.u32 = 0;
+
+ if (!fread(&t.u32, numComponents, 1, aTGAFile))
+ {
+ printf("Texture: file not fully read, may be corrupt (%s)\n", filename);
+ }
+
+ // stores it as BGR(A) so we'll have to swap R and B.
+ swap(t.u8[0], t.u8[2]);
+
+
+ image.m_data[i] = t.u32;
+ }
+
+ // if bit 5 of the descriptor is set then the image is flipped vertically so we fix it up
+ if (anImageDescriptor & (1 << 5))
+ {
+
+ // swap all the rows
+ int rowSize = image.m_width*4;
+
+ uint8_t* buf = new uint8_t[image.m_width*4];
+ uint8_t* start = (uint8_t*)image.m_data;
+ uint8_t* end = &((uint8_t*)image.m_data)[rowSize*(image.m_height-1)];
+
+ while (start < end)
+ {
+ memcpy(buf, end, rowSize);
+ memcpy(end, start, rowSize);
+ memcpy(start, buf, rowSize);
+
+ start += rowSize;
+ end -= rowSize;
+ }
+
+ delete[] buf;
+ }
+
+ fclose(aTGAFile);
+
+ return true;
+}
+
+void TgaFree(const TgaImage& image)
+{
+ delete[] image.m_data;
+}
diff --git a/core/tga.h b/core/tga.h
new file mode 100644
index 0000000..5be822c
--- /dev/null
+++ b/core/tga.h
@@ -0,0 +1,56 @@
+
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include "types.h"
+
+#include <algorithm>
+#include <stdio.h>
+
+struct TgaImage
+{
+ uint32_t SampleClamp(int x, int y) const
+ {
+ uint32_t ix = std::min(std::max(0, x), int(m_width-1));
+ uint32_t iy = std::min(std::max(0, y), int(m_height-1));
+
+ return m_data[iy*m_width + ix];
+ }
+
+ uint16_t m_width;
+ uint16_t m_height;
+
+ // pixels are always assumed to be 32 bit
+ uint32_t* m_data;
+};
+
+bool TgaSave(const char* filename, const TgaImage& image, bool rle=false);
+bool TgaSave(FILE* file, const TgaImage& image, bool rle=false);
+bool TgaLoad(const char* filename, TgaImage& image);
+void TgaFree(const TgaImage& image);
diff --git a/core/types.h b/core/types.h
new file mode 100644
index 0000000..1317d5e
--- /dev/null
+++ b/core/types.h
@@ -0,0 +1,33 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <cstddef>
+#include "stdint.h"
+
+
diff --git a/core/vec2.h b/core/vec2.h
new file mode 100644
index 0000000..3570c47
--- /dev/null
+++ b/core/vec2.h
@@ -0,0 +1,174 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#if defined(_WIN32) && !defined(__CUDACC__)
+#if defined(_DEBUG)
+
+#define VEC2_VALIDATE() { assert(_finite(x));\
+ assert(!_isnan(x));\
+ \
+ assert(_finite(y));\
+ assert(!_isnan(y));\
+ }
+#else
+
+#define VEC2_VALIDATE() {\
+ assert(isfinite(x));\
+ assert(isfinite(y)); }\
+
+#endif // _WIN32
+
+#else
+#define VEC2_VALIDATE()
+#endif
+
+#ifdef _DEBUG
+#define FLOAT_VALIDATE(f) { assert(_finite(f)); assert(!_isnan(f)); }
+#else
+#define FLOAT_VALIDATE(f)
+#endif
+
+
+// vec2
+template <typename T>
+class XVector2
+{
+public:
+
+ typedef T value_type;
+
+ CUDA_CALLABLE XVector2() : x(0.0f), y(0.0f) { VEC2_VALIDATE(); }
+ CUDA_CALLABLE XVector2(T _x) : x(_x), y(_x) { VEC2_VALIDATE(); }
+ CUDA_CALLABLE XVector2(T _x, T _y) : x(_x), y(_y) { VEC2_VALIDATE(); }
+ CUDA_CALLABLE XVector2(const T* p) : x(p[0]), y(p[1]) {}
+
+ template <typename U>
+ CUDA_CALLABLE explicit XVector2(const XVector2<U>& v) : x(v.x), y(v.y) {}
+
+ CUDA_CALLABLE operator T* () { return &x; }
+ CUDA_CALLABLE operator const T* () const { return &x; };
+
+ CUDA_CALLABLE void Set(T x_, T y_) { VEC2_VALIDATE(); x = x_; y = y_; }
+
+ CUDA_CALLABLE XVector2<T> operator * (T scale) const { XVector2<T> r(*this); r *= scale; VEC2_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector2<T> operator / (T scale) const { XVector2<T> r(*this); r /= scale; VEC2_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector2<T> operator + (const XVector2<T>& v) const { XVector2<T> r(*this); r += v; VEC2_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector2<T> operator - (const XVector2<T>& v) const { XVector2<T> r(*this); r -= v; VEC2_VALIDATE(); return r; }
+
+ CUDA_CALLABLE XVector2<T>& operator *=(T scale) {x *= scale; y *= scale; VEC2_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector2<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; VEC2_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector2<T>& operator +=(const XVector2<T>& v) {x += v.x; y += v.y; VEC2_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector2<T>& operator -=(const XVector2<T>& v) {x -= v.x; y -= v.y; VEC2_VALIDATE(); return *this;}
+
+ CUDA_CALLABLE XVector2<T>& operator *=(const XVector2<T>& scale) {x *= scale.x; y *= scale.y; VEC2_VALIDATE(); return *this;}
+
+ // negate
+ CUDA_CALLABLE XVector2<T> operator -() const { VEC2_VALIDATE(); return XVector2<T>(-x, -y); }
+
+ // returns this vector
+ CUDA_CALLABLE void Normalize() { *this /= Length(*this); }
+ CUDA_CALLABLE void SafeNormalize(const XVector2<T>& v=XVector2<T>(0.0f,0.0f))
+ {
+ T length = Length(*this);
+ *this = (length==0.00001f)?v:(*this /= length);
+ }
+
+ T x;
+ T y;
+};
+
+typedef XVector2<float> Vec2;
+typedef XVector2<float> Vector2;
+
+// lhs scalar scale
+template <typename T>
+CUDA_CALLABLE XVector2<T> operator *(T lhs, const XVector2<T>& rhs)
+{
+ XVector2<T> r(rhs);
+ r *= lhs;
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE XVector2<T> operator*(const XVector2<T>& lhs, const XVector2<T>& rhs)
+{
+ XVector2<T> r(lhs);
+ r *= rhs;
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE bool operator==(const XVector2<T>& lhs, const XVector2<T>& rhs)
+{
+ return (lhs.x == rhs.x && lhs.y == rhs.y);
+}
+
+
+template <typename T>
+CUDA_CALLABLE T Dot(const XVector2<T>& v1, const XVector2<T>& v2)
+{
+ return v1.x * v2.x + v1.y * v2.y;
+}
+
+// returns the ccw perpendicular vector
+template <typename T>
+CUDA_CALLABLE XVector2<T> PerpCCW(const XVector2<T>& v)
+{
+ return XVector2<T>(-v.y, v.x);
+}
+
+template <typename T>
+CUDA_CALLABLE XVector2<T> PerpCW(const XVector2<T>& v)
+{
+ return XVector2<T>(v.y, -v.x);
+}
+
+// component wise min max functions
+template <typename T>
+CUDA_CALLABLE XVector2<T> Max(const XVector2<T>& a, const XVector2<T>& b)
+{
+ return XVector2<T>(Max(a.x, b.x), Max(a.y, b.y));
+}
+
+template <typename T>
+CUDA_CALLABLE XVector2<T> Min(const XVector2<T>& a, const XVector2<T>& b)
+{
+ return XVector2<T>(Min(a.x, b.x), Min(a.y, b.y));
+}
+
+// 2d cross product, treat as if a and b are in the xy plane and return magnitude of z
+template <typename T>
+CUDA_CALLABLE T Cross(const XVector2<T>& a, const XVector2<T>& b)
+{
+ return (a.x*b.y - a.y*b.x);
+}
+
+
+
+
diff --git a/core/vec3.h b/core/vec3.h
new file mode 100644
index 0000000..de07b12
--- /dev/null
+++ b/core/vec3.h
@@ -0,0 +1,147 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#if 0 //_DEBUG
+#define VEC3_VALIDATE() { \
+ assert(_finite(x));\
+ assert(!_isnan(x));\
+ \
+ assert(_finite(y));\
+ assert(!_isnan(y));\
+ \
+ assert(_finite(z));\
+ assert(!_isnan(z));\
+ }
+#else
+#define VEC3_VALIDATE()
+#endif
+
+template <typename T=float>
+class XVector3
+{
+public:
+
+ typedef T value_type;
+
+ CUDA_CALLABLE inline XVector3() : x(0.0f), y(0.0f), z(0.0f) {}
+ CUDA_CALLABLE inline XVector3(T a) : x(a), y(a), z(a) {}
+ CUDA_CALLABLE inline XVector3(const T* p) : x(p[0]), y(p[1]), z(p[2]) {}
+ CUDA_CALLABLE inline XVector3(T x_, T y_, T z_) : x(x_), y(y_), z(z_)
+ {
+ VEC3_VALIDATE();
+ }
+
+ CUDA_CALLABLE inline operator T* () { return &x; }
+ CUDA_CALLABLE inline operator const T* () const { return &x; };
+
+ CUDA_CALLABLE inline void Set(T x_, T y_, T z_) { VEC3_VALIDATE(); x = x_; y = y_; z = z_;}
+
+ CUDA_CALLABLE inline XVector3<T> operator * (T scale) const { XVector3<T> r(*this); r *= scale; return r; VEC3_VALIDATE();}
+ CUDA_CALLABLE inline XVector3<T> operator / (T scale) const { XVector3<T> r(*this); r /= scale; return r; VEC3_VALIDATE();}
+ CUDA_CALLABLE inline XVector3<T> operator + (const XVector3<T>& v) const { XVector3<T> r(*this); r += v; return r; VEC3_VALIDATE();}
+ CUDA_CALLABLE inline XVector3<T> operator - (const XVector3<T>& v) const { XVector3<T> r(*this); r -= v; return r; VEC3_VALIDATE();}
+ CUDA_CALLABLE inline XVector3<T> operator /(const XVector3<T>& v) const { XVector3<T> r(*this); r /= v; return r; VEC3_VALIDATE();}
+ CUDA_CALLABLE inline XVector3<T> operator *(const XVector3<T>& v) const { XVector3<T> r(*this); r *= v; return r; VEC3_VALIDATE();}
+
+ CUDA_CALLABLE inline XVector3<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; VEC3_VALIDATE(); return *this;}
+ CUDA_CALLABLE inline XVector3<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; VEC3_VALIDATE(); return *this;}
+ CUDA_CALLABLE inline XVector3<T>& operator +=(const XVector3<T>& v) {x += v.x; y += v.y; z += v.z; VEC3_VALIDATE(); return *this;}
+ CUDA_CALLABLE inline XVector3<T>& operator -=(const XVector3<T>& v) {x -= v.x; y -= v.y; z -= v.z; VEC3_VALIDATE(); return *this;}
+ CUDA_CALLABLE inline XVector3<T>& operator /=(const XVector3<T>& v) {x /= v.x; y /= v.y; z /= v.z; VEC3_VALIDATE(); return *this; }
+ CUDA_CALLABLE inline XVector3<T>& operator *=(const XVector3<T>& v) {x *= v.x; y *= v.y; z *= v.z; VEC3_VALIDATE(); return *this; }
+
+ CUDA_CALLABLE inline bool operator != (const XVector3<T>& v) const { return (x != v.x || y != v.y || z != v.z); }
+
+ // negate
+ CUDA_CALLABLE inline XVector3<T> operator -() const { VEC3_VALIDATE(); return XVector3<T>(-x, -y, -z); }
+
+ CUDA_CALLABLE void Validate()
+ {
+ VEC3_VALIDATE();
+ }
+
+ T x,y,z;
+};
+
+typedef XVector3<float> Vec3;
+typedef XVector3<float> Vector3;
+
+// lhs scalar scale
+template <typename T>
+CUDA_CALLABLE XVector3<T> operator *(T lhs, const XVector3<T>& rhs)
+{
+ XVector3<T> r(rhs);
+ r *= lhs;
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE bool operator==(const XVector3<T>& lhs, const XVector3<T>& rhs)
+{
+ return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z);
+}
+
+template <typename T>
+CUDA_CALLABLE typename T::value_type Dot3(const T& v1, const T& v2)
+{
+ return v1.x * v2.x + v1.y * v2.y + v1.z*v2.z;
+}
+
+CUDA_CALLABLE inline float Dot3(const float* v1, const float * v2)
+{
+ return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
+
+
+template <typename T>
+CUDA_CALLABLE inline T Dot(const XVector3<T>& v1, const XVector3<T>& v2)
+{
+ return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
+}
+
+CUDA_CALLABLE inline Vec3 Cross(const Vec3& b, const Vec3& c)
+{
+ return Vec3(b.y*c.z - b.z*c.y,
+ b.z*c.x - b.x*c.z,
+ b.x*c.y - b.y*c.x);
+}
+
+// component wise min max functions
+template <typename T>
+CUDA_CALLABLE inline XVector3<T> Max(const XVector3<T>& a, const XVector3<T>& b)
+{
+ return XVector3<T>(Max(a.x, b.x), Max(a.y, b.y), Max(a.z, b.z));
+}
+
+template <typename T>
+CUDA_CALLABLE inline XVector3<T> Min(const XVector3<T>& a, const XVector3<T>& b)
+{
+ return XVector3<T>(Min(a.x, b.x), Min(a.y, b.y), Min(a.z, b.z));
+}
+
diff --git a/core/vec4.h b/core/vec4.h
new file mode 100644
index 0000000..e59f590
--- /dev/null
+++ b/core/vec4.h
@@ -0,0 +1,105 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+#include <cassert>
+
+#if 0 //defined(_DEBUG) && defined(_WIN32)
+#define VEC4_VALIDATE() { \
+ assert(_finite(x));\
+ assert(!_isnan(x));\
+ \
+ assert(_finite(y));\
+ assert(!_isnan(y));\
+ \
+ assert(_finite(z));\
+ assert(!_isnan(z));\
+ \
+ assert(_finite(w));\
+ assert(!_isnan(w));\
+}
+#else
+#define VEC4_VALIDATE()
+#endif
+
+template <typename T>
+class XVector4
+{
+public:
+
+ typedef T value_type;
+
+ CUDA_CALLABLE XVector4() : x(0), y(0), z(0), w(0) {}
+ CUDA_CALLABLE XVector4(T a) : x(a), y(a), z(a), w(a) {}
+ CUDA_CALLABLE XVector4(const T* p) : x(p[0]), y(p[1]), z(p[2]), w(p[3]) {}
+ CUDA_CALLABLE XVector4(T x_, T y_, T z_, T w_) : x(x_), y(y_), z(z_), w(w_) { VEC4_VALIDATE(); }
+ CUDA_CALLABLE XVector4(const Vec3& v, float w) : x(v.x), y(v.y), z(v.z), w(w) { }
+
+ CUDA_CALLABLE operator T* () { return &x; }
+ CUDA_CALLABLE operator const T* () const { return &x; };
+
+ CUDA_CALLABLE void Set(T x_, T y_, T z_, T w_) { VEC4_VALIDATE(); x = x_; y = y_; z = z_; w = w_; }
+
+ CUDA_CALLABLE XVector4<T> operator * (T scale) const { XVector4<T> r(*this); r *= scale; VEC4_VALIDATE(); return r;}
+ CUDA_CALLABLE XVector4<T> operator / (T scale) const { XVector4<T> r(*this); r /= scale; VEC4_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector4<T> operator + (const XVector4<T>& v) const { XVector4<T> r(*this); r += v; VEC4_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector4<T> operator - (const XVector4<T>& v) const { XVector4<T> r(*this); r -= v; VEC4_VALIDATE(); return r; }
+ CUDA_CALLABLE XVector4<T> operator * (XVector4<T> scale) const { XVector4<T> r(*this); r *= scale; VEC4_VALIDATE(); return r; }
+
+ CUDA_CALLABLE XVector4<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; w*= scale; VEC4_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector4<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; w *=s; VEC4_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector4<T>& operator +=(const XVector4<T>& v) {x += v.x; y += v.y; z += v.z; w += v.w; VEC4_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector4<T>& operator -=(const XVector4<T>& v) {x -= v.x; y -= v.y; z -= v.z; w -= v.w; VEC4_VALIDATE(); return *this;}
+ CUDA_CALLABLE XVector4<T>& operator *=(const XVector4<T>& v) {x *= v.x; y *= v.y; z *= v.z; w *= v.w; VEC4_VALIDATE(); return *this;}
+
+ CUDA_CALLABLE bool operator != (const XVector4<T>& v) const { return (x != v.x || y != v.y || z != v.z || w != v.w); }
+
+ // negate
+ CUDA_CALLABLE XVector4<T> operator -() const { VEC4_VALIDATE(); return XVector4<T>(-x, -y, -z, -w); }
+
+ T x,y,z,w;
+};
+
+typedef XVector4<float> Vector4;
+typedef XVector4<float> Vec4;
+
+// lhs scalar scale
+template <typename T>
+CUDA_CALLABLE XVector4<T> operator *(T lhs, const XVector4<T>& rhs)
+{
+ XVector4<T> r(rhs);
+ r *= lhs;
+ return r;
+}
+
+template <typename T>
+CUDA_CALLABLE bool operator==(const XVector4<T>& lhs, const XVector4<T>& rhs)
+{
+ return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z && lhs.w == rhs.w);
+}
+
diff --git a/core/voxelize.cpp b/core/voxelize.cpp
new file mode 100644
index 0000000..8172aea
--- /dev/null
+++ b/core/voxelize.cpp
@@ -0,0 +1,93 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#include "aabbtree.h"
+#include "mesh.h"
+
+void Voxelize(const float* vertices, int numVertices, const int* indices, int numTriangleIndices, uint32_t width, uint32_t height, uint32_t depth, uint32_t* volume, Vec3 minExtents, Vec3 maxExtents)
+{
+ memset(volume, 0, sizeof(uint32_t)*width*height*depth);
+
+ // build an aabb tree of the mesh
+ AABBTree tree((const Vec3*)vertices, numVertices, (const uint32_t*)indices, numTriangleIndices/3);
+
+ // parity count method, single pass
+ const Vec3 extents(maxExtents-minExtents);
+ const Vec3 delta(extents.x/width, extents.y/height, extents.z/depth);
+ const Vec3 offset(0.5f*delta.x, 0.5f*delta.y, 0.5f*delta.z);
+
+ // this is the bias we apply to step 'off' a triangle we hit, not very robust
+ const float eps = 0.00001f*extents.z;
+
+ for (uint32_t x=0; x < width; ++x)
+ {
+ for (uint32_t y=0; y < height; ++y)
+ {
+ bool inside = false;
+
+ Vec3 rayDir = Vec3(0.0f, 0.0f, 1.0f);
+ Vec3 rayStart = minExtents + Vec3(x*delta.x + offset.x, y*delta.y + offset.y, 0.0f);
+
+ uint32_t lastTri = uint32_t(-1);
+ for (;;)
+ {
+ // calculate ray start
+ float t, u, v, w, s;
+ uint32_t tri;
+
+ if (tree.TraceRay(rayStart, rayDir, t, u, v, w, s, tri))
+ {
+ // calculate cell in which intersection occurred
+ const float zpos = rayStart.z + t*rayDir.z;
+ const float zhit = (zpos-minExtents.z)/delta.z;
+
+ uint32_t z = uint32_t(floorf((rayStart.z-minExtents.z)/delta.z + 0.5f));
+ uint32_t zend = std::min(uint32_t(floorf(zhit + 0.5f)), depth-1);
+
+ if (inside)
+ {
+ // march along column setting bits
+ for (uint32_t k=z; k < zend; ++k)
+ volume[k*width*height + y*width + x] = uint32_t(-1);
+ }
+
+ inside = !inside;
+
+ // we hit the tri we started from
+ if (tri == lastTri)
+ printf("Error self-intersect\n");
+ lastTri = tri;
+
+ rayStart += rayDir*(t+eps);
+
+ }
+ else
+ break;
+ }
+ }
+ }
+}
diff --git a/core/voxelize.h b/core/voxelize.h
new file mode 100644
index 0000000..deb99b5
--- /dev/null
+++ b/core/voxelize.h
@@ -0,0 +1,33 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2013-2016 NVIDIA Corporation. All rights reserved.
+
+#pragma once
+
+struct Mesh;
+
+// voxelizes a mesh using a single pass parity algorithm
+void Voxelize(const float* vertices, int numVertices, const int* indices, int numTriangleIndices, uint32_t width, uint32_t height, uint32_t depth, uint32_t* volume, Vec3 minExtents, Vec3 maxExtents); \ No newline at end of file