From cd6e0492903f8a9eb5efa14263d7d9ab092517de Mon Sep 17 00:00:00 2001 From: Marco Foco Date: Mon, 7 Mar 2016 15:47:07 +0100 Subject: FaceWorks 1.0 --- src/precomp.cpp | 843 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 843 insertions(+) create mode 100644 src/precomp.cpp (limited to 'src/precomp.cpp') diff --git a/src/precomp.cpp b/src/precomp.cpp new file mode 100644 index 0000000..f6396b9 --- /dev/null +++ b/src/precomp.cpp @@ -0,0 +1,843 @@ +//---------------------------------------------------------------------------------- +// File: FaceWorks/src/precomp.cpp +// SDK Version: v1.0 +// Email: gameworks@nvidia.com +// Site: http://developer.nvidia.com/ +// +// Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//---------------------------------------------------------------------------------- + + +#include "internal.h" + +#include +#include + + + +// Versioning + +GFSDK_FACEWORKS_API int GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_GetBinaryVersion() +{ + // Capture the header version at time of compilation + return GFSDK_FaceWorks_HeaderVersion; +} + +GFSDK_FACEWORKS_API const char * GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_GetBuildInfo() +{ +#define STRINGIZE2(x) #x +#define STRINGIZE(x) STRINGIZE2(x) + + return + "GFSDK_FaceWorks_HeaderVersion: " STRINGIZE(GFSDK_FaceWorks_HeaderVersion) "\n" + "Built on: " __DATE__ " " __TIME__ "\n" + +#if defined(_MSC_VER) + "Compiler: Microsoft Visual C++\n" + "_MSC_VER: " STRINGIZE(_MSC_VER) "\n" +#else + "Compiler: unknown\n" +#endif + +#if defined(_WIN64) + "Platform: Win64\n" +#elif defined(_WIN32) + "Platform: Win32\n" +#else + "Platform: unknown\n" +#endif + +#if defined(_DEBUG) + "Configuration: Debug\n" +#else + "Configuration: Release\n" +#endif + ; + +#undef STRINGIZE +#undef STRINGIZE2 +} + +static const float pi = 3.141592654f; + +// Initialization + +GFSDK_FACEWORKS_API GFSDK_FaceWorks_Result GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_Init_Internal(int headerVersion) +{ + if (headerVersion != GFSDK_FaceWorks_GetBinaryVersion()) + return GFSDK_FaceWorks_VersionMismatch; + + return GFSDK_FaceWorks_OK; +} + + + +// Error blob helper functions + +void BlobPrintf(GFSDK_FaceWorks_ErrorBlob * pBlob, const char * fmt, ...) +{ + if (!pBlob) + return; + + // Printf the message - just use a fixed-size buffer to simplify things + char newMsg[256]; + va_list args; + va_start(args, fmt); + _vsnprintf_s(newMsg, dim(newMsg), _TRUNCATE, fmt, args); + size_t newLen = strlen(newMsg); + + // Append the message to the blob + if (pBlob->m_msg) + { + size_t curLen = strlen(pBlob->m_msg); + size_t bytes = curLen + newLen + 1; + char * concat = static_cast(FaceWorks_Malloc(bytes, pBlob->m_allocator)); + if (!concat) + { + // Out of memory while generating an error message - just give up + return; + } + memcpy(concat, pBlob->m_msg, curLen); + memcpy(concat + curLen, newMsg, newLen + 1); + FaceWorks_Free(pBlob->m_msg, pBlob->m_allocator); + pBlob->m_msg = concat; + } + else + { + size_t bytes = newLen + 1; + pBlob->m_msg = static_cast(FaceWorks_Malloc(bytes, pBlob->m_allocator)); + if (!pBlob->m_msg) + { + // Out of memory while generating an error message - just give up + return; + } + memcpy(pBlob->m_msg, newMsg, bytes); + } +} + +GFSDK_FACEWORKS_API void GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_FreeErrorBlob( + GFSDK_FaceWorks_ErrorBlob * pBlob) +{ + if (!pBlob) + return; + + FaceWorks_Free(pBlob->m_msg, pBlob->m_allocator); + pBlob->m_msg = nullptr; +} + + + +GFSDK_FACEWORKS_API size_t GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_CalculateCurvatureSizeBytes(int vertexCount) +{ + return sizeof(float) * max(0, vertexCount); +} + +GFSDK_FACEWORKS_API GFSDK_FaceWorks_Result GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_CalculateMeshCurvature( + int vertexCount, + const void * pPositions, + int positionStrideBytes, + const void * pNormals, + int normalStrideBytes, + int indexCount, + const int * pIndices, + int smoothingPassCount, + void * pCurvaturesOut, + int curvatureStrideBytes, + GFSDK_FaceWorks_ErrorBlob * pErrorBlobOut, + gfsdk_new_delete_t * pAllocator /*= 0*/) +{ + // Validate parameters + if (vertexCount < 1) + { + ErrPrintf("vertexCount is %d; should be at least 1\n", vertexCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pPositions) + { + ErrPrintf("pPositions is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (positionStrideBytes < 3 * int(sizeof(float))) + { + ErrPrintf("positionStrideBytes is %d; should be at least %d\n", + positionStrideBytes, 3 * sizeof(float)); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pNormals) + { + ErrPrintf("pNormals is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (normalStrideBytes < 3 * int(sizeof(float))) + { + ErrPrintf("normalStrideBytes is %d; should be at least %d\n", + normalStrideBytes, 3 * sizeof(float)); + return GFSDK_FaceWorks_InvalidArgument; + } + if (indexCount < 3) + { + ErrPrintf("indexCount is %d; should be at least 3\n", indexCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pIndices) + { + ErrPrintf("pIndices is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (smoothingPassCount < 0) + { + ErrPrintf("smoothingPassCount is %d; should be at least 0\n", smoothingPassCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pCurvaturesOut) + { + ErrPrintf("pCurvaturesOut is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (curvatureStrideBytes < int(sizeof(float))) + { + ErrPrintf("curvatureStrideBytes is %d; should be at least %d\n", + curvatureStrideBytes, sizeof(float)); + return GFSDK_FaceWorks_InvalidArgument; + } + + // Calculate per-vertex curvature. We do this by estimating the curvature along each + // edge using the change in normals between its vertices; then we set each vertex's + // curvature to the midpoint of the minimum and maximum over all the edges touching it. + + int triCount = indexCount / 3; + + // Catch out-of-memory exceptions + try + { + FaceWorks_Allocator allocFloat(pAllocator); + std::vector> curvatureMin(vertexCount, FLT_MAX, allocFloat); + std::vector> curvatureMax(vertexCount, 0.0f, allocFloat); + + // !!!UNDONE: SIMD-ize or GPU-ize all this math + + for (int iTri = 0; iTri < triCount; ++iTri) + { + int indices[] = + { + pIndices[3*iTri], + pIndices[3*iTri + 1], + pIndices[3*iTri + 2], + }; + + float * pos[] = + { + reinterpret_cast((char *)pPositions + indices[0] * positionStrideBytes), + reinterpret_cast((char *)pPositions + indices[1] * positionStrideBytes), + reinterpret_cast((char *)pPositions + indices[2] * positionStrideBytes), + }; + + float * normal[] = + { + reinterpret_cast((char *)pNormals + indices[0] * normalStrideBytes), + reinterpret_cast((char *)pNormals + indices[1] * normalStrideBytes), + reinterpret_cast((char *)pNormals + indices[2] * normalStrideBytes), + }; + + // Calculate each edge's curvature - most edges will be calculated twice this + // way, but it's hard to fix that while still making sure to handle boundary edges. + + float dPx = pos[1][0] - pos[0][0]; + float dPy = pos[1][1] - pos[0][1]; + float dPz = pos[1][2] - pos[0][2]; + float dNx = normal[1][0] - normal[0][0]; + float dNy = normal[1][1] - normal[0][1]; + float dNz = normal[1][2] - normal[0][2]; + float curvature = sqrtf((dNx*dNx + dNy*dNy + dNz*dNz) / (dPx*dPx + dPy*dPy + dPz*dPz)); + curvatureMin[indices[0]] = min(curvatureMin[indices[0]], curvature); + curvatureMin[indices[1]] = min(curvatureMin[indices[1]], curvature); + curvatureMax[indices[0]] = max(curvatureMax[indices[0]], curvature); + curvatureMax[indices[1]] = max(curvatureMax[indices[1]], curvature); + + dPx = pos[2][0] - pos[1][0]; + dPy = pos[2][1] - pos[1][1]; + dPz = pos[2][2] - pos[1][2]; + dNx = normal[2][0] - normal[1][0]; + dNy = normal[2][1] - normal[1][1]; + dNz = normal[2][2] - normal[1][2]; + curvature = sqrtf((dNx*dNx + dNy*dNy + dNz*dNz) / (dPx*dPx + dPy*dPy + dPz*dPz)); + curvatureMin[indices[1]] = min(curvatureMin[indices[1]], curvature); + curvatureMin[indices[2]] = min(curvatureMin[indices[2]], curvature); + curvatureMax[indices[1]] = max(curvatureMax[indices[1]], curvature); + curvatureMax[indices[2]] = max(curvatureMax[indices[2]], curvature); + + dPx = pos[0][0] - pos[2][0]; + dPy = pos[0][1] - pos[2][1]; + dPz = pos[0][2] - pos[2][2]; + dNx = normal[0][0] - normal[2][0]; + dNy = normal[0][1] - normal[2][1]; + dNz = normal[0][2] - normal[2][2]; + curvature = sqrtf((dNx*dNx + dNy*dNy + dNz*dNz) / (dPx*dPx + dPy*dPy + dPz*dPz)); + curvatureMin[indices[2]] = min(curvatureMin[indices[2]], curvature); + curvatureMin[indices[0]] = min(curvatureMin[indices[0]], curvature); + curvatureMax[indices[2]] = max(curvatureMax[indices[2]], curvature); + curvatureMax[indices[0]] = max(curvatureMax[indices[0]], curvature); + } + + for (int i = 0; i < vertexCount; ++i) + { + float * pCurvature = reinterpret_cast((char *)pCurvaturesOut + i * curvatureStrideBytes); + *pCurvature = 0.5f * (curvatureMin[i] + curvatureMax[i]); + } + } + catch (std::bad_alloc) + { + return GFSDK_FaceWorks_OutOfMemory; + } + + if (smoothingPassCount > 0) + { + // Catch out-of-memory exceptions + try + { + FaceWorks_Allocator allocFloat(pAllocator); + std::vector> curvatureSum(allocFloat); + curvatureSum.resize(vertexCount); + + FaceWorks_Allocator allocInt(pAllocator); + std::vector> curvatureCount(allocInt); + curvatureCount.resize(vertexCount); + + // Run a couple of smoothing passes, replacing each vert's curvature + // by the average of its neighbors' + + for (int iPass = 0; iPass < smoothingPassCount; ++iPass) + { + for (int i = 0; i < vertexCount; ++i) + { + curvatureSum[i] = 0.0f; + curvatureCount[i] = 0; + } + + for (int iTri = 0; iTri < triCount; ++iTri) + { + int indices[] = + { + pIndices[3*iTri], + pIndices[3*iTri + 1], + pIndices[3*iTri + 2], + }; + + float curvature0 = *reinterpret_cast((char *)pCurvaturesOut + indices[0] * curvatureStrideBytes); + float curvature1 = *reinterpret_cast((char *)pCurvaturesOut + indices[1] * curvatureStrideBytes); + float curvature2 = *reinterpret_cast((char *)pCurvaturesOut + indices[2] * curvatureStrideBytes); + + curvatureSum[indices[0]] += curvature1 + curvature2; + curvatureCount[indices[0]] += 2; + + curvatureSum[indices[1]] += curvature2 + curvature0; + curvatureCount[indices[1]] += 2; + + curvatureSum[indices[2]] += curvature0 + curvature1; + curvatureCount[indices[2]] += 2; + } + + for (int i = 0; i < vertexCount; ++i) + { + float * pCurvature = reinterpret_cast((char *)pCurvaturesOut + i * curvatureStrideBytes); + *pCurvature = curvatureSum[i] / float(max(1, curvatureCount[i])); + } + } + } + catch (std::bad_alloc) + { + return GFSDK_FaceWorks_OutOfMemory; + } + } + + return GFSDK_FaceWorks_OK; +} + + + +GFSDK_FACEWORKS_API GFSDK_FaceWorks_Result GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_CalculateMeshUVScale( + int vertexCount, + const void * pPositions, + int positionStrideBytes, + const void * pUVs, + int uvStrideBytes, + int indexCount, + const int * pIndices, + float * pAverageUVScaleOut, + GFSDK_FaceWorks_ErrorBlob * pErrorBlobOut) +{ + // Validate parameters + if (vertexCount < 1) + { + ErrPrintf("vertexCount is %d; should be at least 1\n", vertexCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pPositions) + { + ErrPrintf("pPositions is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (positionStrideBytes < 3 * int(sizeof(float))) + { + ErrPrintf("positionStrideBytes is %d; should be at least %d\n", + positionStrideBytes, 3 * sizeof(float)); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pUVs) + { + ErrPrintf("pUVs is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (uvStrideBytes < 2 * int(sizeof(float))) + { + ErrPrintf("uvStrideBytes is %d; should be at least %d\n", + uvStrideBytes, 2 * sizeof(float)); + return GFSDK_FaceWorks_InvalidArgument; + } + if (indexCount < 3) + { + ErrPrintf("indexCount is %d; should be at least 3\n", indexCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (indexCount % 3 != 0) + { + ErrPrintf("indexCount is %d; should be a multiple of 3\n", indexCount); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pIndices) + { + ErrPrintf("pIndices is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pAverageUVScaleOut) + { + ErrPrintf("pAverageUVScaleOut is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + + // Calculate average UV scale, as a geometric mean of scale for each triangle + + float logUvScaleSum = 0.0f; + int logUvScaleCount = 0; + + // !!!UNDONE: SIMD-ize or GPU-ize all this math + + for (int iIndex = 0; iIndex < indexCount; iIndex += 3) + { + int indices[] = + { + pIndices[iIndex], + pIndices[iIndex + 1], + pIndices[iIndex + 2], + }; + + float * pos[] = + { + reinterpret_cast((char *)pPositions + indices[0] * positionStrideBytes), + reinterpret_cast((char *)pPositions + indices[1] * positionStrideBytes), + reinterpret_cast((char *)pPositions + indices[2] * positionStrideBytes), + }; + + float * uv[] = + { + reinterpret_cast((char *)pUVs + indices[0] * uvStrideBytes), + reinterpret_cast((char *)pUVs + indices[1] * uvStrideBytes), + reinterpret_cast((char *)pUVs + indices[2] * uvStrideBytes), + }; + + // Find longest edge length in local space + float dP0x = pos[1][0] - pos[0][0]; + float dP0y = pos[1][1] - pos[0][1]; + float dP0z = pos[1][2] - pos[0][2]; + float dP1x = pos[2][0] - pos[1][0]; + float dP1y = pos[2][1] - pos[1][1]; + float dP1z = pos[2][2] - pos[1][2]; + float dP2x = pos[0][0] - pos[2][0]; + float dP2y = pos[0][1] - pos[2][1]; + float dP2z = pos[0][2] - pos[2][2]; + float diameter = sqrtf(max(dP0x*dP0x + dP0y*dP0y + dP0z*dP0z, + max(dP1x*dP1x + dP1y*dP1y + dP1z*dP1z, + dP2x*dP2x + dP2y*dP2y + dP2z*dP2z))); + + // Find longest edge length in UV space + float dUV0x = uv[1][0] - uv[0][0]; + float dUV0y = uv[1][1] - uv[0][1]; + float dUV1x = uv[2][0] - uv[1][0]; + float dUV1y = uv[2][1] - uv[1][1]; + float dUV2x = uv[0][0] - uv[2][0]; + float dUV2y = uv[0][1] - uv[2][1]; + float uvDiameter = sqrtf(max(dUV0x*dUV0x + dUV0y*dUV0y, + max(dUV1x*dUV1x + dUV1y*dUV1y, + dUV2x*dUV2x + dUV2y*dUV2y))); + + // Skip degenerate triangles + if (diameter < 1e-6f || uvDiameter < 1e-6f) + continue; + + float triUvScale = diameter / uvDiameter; + logUvScaleSum += logf(triUvScale); + ++logUvScaleCount; + } + + *pAverageUVScaleOut = expf(logUvScaleSum / float(logUvScaleCount)); + + return GFSDK_FaceWorks_OK; +} + + + +// Diffusion profile from GPU Gems 3 - mixture of 6 Gaussians with RGB weights +// NOTE: could switch to a LUT generated using one of the Donner and Jensen papers + +static const float diffusionSigmas[] = { 0.080f, 0.220f, 0.432f, 0.753f, 1.411f, 2.722f }; +static const float diffusionWeightsR[] = { 0.233f, 0.100f, 0.118f, 0.113f, 0.358f, 0.078f }; +static const float diffusionWeightsG[] = { 0.455f, 0.336f, 0.198f, 0.007f, 0.004f, 0.000f }; +static const float diffusionWeightsB[] = { 0.649f, 0.344f, 0.000f, 0.007f, 0.000f, 0.000f }; + + +static_assert(dim(diffusionWeightsR) == dim(diffusionSigmas), "dimension mismatch between array diffusionWeightsR and diffusionSigmas"); +static_assert(dim(diffusionWeightsG) == dim(diffusionSigmas), "dimension mismatch between array diffusionWeightsG and diffusionSigmas"); +static_assert(dim(diffusionWeightsB) == dim(diffusionSigmas), "dimension mismatch between array diffusionWeightsB and diffusionSigmas"); + +inline float Gaussian(float sigma, float x) +{ + static const float rsqrtTwoPi = 0.39894228f; + return (rsqrtTwoPi / sigma) * expf(-0.5f * (x*x) / (sigma*sigma)); +} + +static void EvaluateDiffusionProfile(float x, float rgb[3]) // x in millimeters +{ + rgb[0] = 0.0f; + rgb[1] = 0.0f; + rgb[2] = 0.0f; + + for (int i = 0; i < dim(diffusionSigmas); ++i) + { + static const float rsqrtTwoPi = 0.39894228f; + float sigma = diffusionSigmas[i]; + float gaussian = (rsqrtTwoPi / sigma) * expf(-0.5f * (x*x) / (sigma*sigma)); + + rgb[0] += diffusionWeightsR[i] * gaussian; + rgb[1] += diffusionWeightsG[i] * gaussian; + rgb[2] += diffusionWeightsB[i] * gaussian; + } +} + +GFSDK_FACEWORKS_API size_t GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_CalculateCurvatureLUTSizeBytes( + const GFSDK_FaceWorks_CurvatureLUTConfig * pConfig) +{ + if (!pConfig) + return 0; + + return 4 * pConfig->m_texWidth * pConfig->m_texHeight; +} + +GFSDK_FACEWORKS_API GFSDK_FaceWorks_Result GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_GenerateCurvatureLUT( + const GFSDK_FaceWorks_CurvatureLUTConfig * pConfig, + void * pCurvatureLUTOut, + GFSDK_FaceWorks_ErrorBlob * pErrorBlobOut) +{ + // Validate parameters + if (!pConfig) + { + ErrPrintf("pConfig is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pCurvatureLUTOut) + { + ErrPrintf("pCurvatureLUTOut is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_diffusionRadius <= 0.0f) + { + ErrPrintf("m_diffusionRadius is %g; should be greater than 0\n", + pConfig->m_diffusionRadius); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_texWidth < 1) + { + ErrPrintf("m_texWidth is %d; should be at least 1\n", + pConfig->m_texWidth); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_texHeight < 1) + { + ErrPrintf("m_texHeight is %d; should be at least 1\n", + pConfig->m_texHeight); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_curvatureRadiusMin <= 0.0f) + { + ErrPrintf("m_curvatureRadiusMin is %g; should be greater than 0\n", + pConfig->m_curvatureRadiusMin); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_curvatureRadiusMax <= 0.0f) + { + ErrPrintf("m_curvatureRadiusMax is %g; should be greater than 0\n", + pConfig->m_curvatureRadiusMax); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_curvatureRadiusMax < pConfig->m_curvatureRadiusMin) + { + ErrPrintf("m_curvatureRadiusMin is %g and m_curvatureRadiusMax is %g; max should be greater than min\n", + pConfig->m_curvatureRadiusMin, pConfig->m_curvatureRadiusMax); + return GFSDK_FaceWorks_InvalidArgument; + } + + // The diffusion profile is built assuming a (standard human skin) radius + // of 2.7 mm, so the curvatures and shadow widths need to be scaled to generate + // a LUT for the user's desired diffusion radius. + float diffusionRadiusFactor = pConfig->m_diffusionRadius / 2.7f; + + float curvatureMin = diffusionRadiusFactor / pConfig->m_curvatureRadiusMax; + float curvatureMax = diffusionRadiusFactor / pConfig->m_curvatureRadiusMin; + float curvatureScale = (curvatureMax - curvatureMin) / float(pConfig->m_texHeight); + float curvatureBias = curvatureMin + 0.5f * curvatureScale; + + float NdotLScale = 2.0f / float(pConfig->m_texWidth); + float NdotLBias = -1.0f + 0.5f * NdotLScale; + + unsigned char * pPx = static_cast(pCurvatureLUTOut); + + // !!!UNDONE: SIMD-ize or GPU-ize all this math + + for (int iY = 0; iY < pConfig->m_texHeight; ++iY) + { + for (int iX = 0; iX < pConfig->m_texWidth; ++iX) + { + float NdotL = float(iX) * NdotLScale + NdotLBias; + float theta = acosf(NdotL); + + float curvature = float(iY) * curvatureScale + curvatureBias; + float radius = 1.0f / curvature; + + // Sample points around a ring, and Monte-Carlo-integrate the + // scattered lighting using the diffusion profile + + static const int cIter = 200; + float rgb[3] = { 0.0f, 0.0f, 0.0f }; + + // Set integration bounds in arc-length in mm on the sphere + float lowerBound = max(-pi*radius, -10.0f); + float upperBound = min(pi*radius, 10.0f); + + float iterScale = (upperBound - lowerBound) / float(cIter); + float iterBias = lowerBound + 0.5f * iterScale; + + for (int iIter = 0; iIter < cIter; ++iIter) + { + float delta = float(iIter) * iterScale + iterBias; + float rgbDiffusion[3]; + EvaluateDiffusionProfile(delta, rgbDiffusion); + + float NdotLDelta = max(0.0f, cosf(theta - delta * curvature)); + rgb[0] += NdotLDelta * rgbDiffusion[0]; + rgb[1] += NdotLDelta * rgbDiffusion[1]; + rgb[2] += NdotLDelta * rgbDiffusion[2]; + } + + // Scale sum of samples to get value of integral + float scale = (upperBound - lowerBound) / float(cIter); + rgb[0] *= scale; + rgb[1] *= scale; + rgb[2] *= scale; + + // Calculate delta from standard diffuse lighting (saturate(N.L)) to + // scattered result, remapped from [-.25, .25] to [0, 1]. + float rgbAdjust = -max(0.0f, NdotL) * 2.0f + 0.5f; + rgb[0] = rgb[0] * 2.0f + rgbAdjust; + rgb[1] = rgb[1] * 2.0f + rgbAdjust; + rgb[2] = rgb[2] * 2.0f + rgbAdjust; + + // Clamp to [0, 1] + rgb[0] = min(max(rgb[0], 0.0f), 1.0f); + rgb[1] = min(max(rgb[1], 0.0f), 1.0f); + rgb[2] = min(max(rgb[2], 0.0f), 1.0f); + + // Convert to integer format (linear RGB space) + *(pPx++) = static_cast(255.0f * rgb[0] + 0.5f); + *(pPx++) = static_cast(255.0f * rgb[1] + 0.5f); + *(pPx++) = static_cast(255.0f * rgb[2] + 0.5f); + *(pPx++) = 255; + } + } + + return GFSDK_FaceWorks_OK; +} + +GFSDK_FACEWORKS_API size_t GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_CalculateShadowLUTSizeBytes( + const GFSDK_FaceWorks_ShadowLUTConfig * pConfig) +{ + if (!pConfig) + return 0; + + return 4 * pConfig->m_texWidth * pConfig->m_texHeight; +} + +GFSDK_FACEWORKS_API GFSDK_FaceWorks_Result GFSDK_FACEWORKS_CALLCONV GFSDK_FaceWorks_GenerateShadowLUT( + const GFSDK_FaceWorks_ShadowLUTConfig * pConfig, + void * pShadowLUTOut, + GFSDK_FaceWorks_ErrorBlob * pErrorBlobOut) +{ + if (!pConfig) + { + ErrPrintf("pConfig is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (!pShadowLUTOut) + { + ErrPrintf("pShadowLUTOut is null\n"); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_diffusionRadius <= 0.0f) + { + ErrPrintf("m_diffusionRadius is %g; should be greater than 0\n", + pConfig->m_diffusionRadius); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_texWidth < 1) + { + ErrPrintf("m_texWidth is %d; should be at least 1\n", + pConfig->m_texWidth); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_texHeight < 1) + { + ErrPrintf("m_texHeight is %d; should be at least 1\n", + pConfig->m_texHeight); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_shadowWidthMin <= 0.0f) + { + ErrPrintf("m_shadowWidthMin is %g; should be greater than 0\n", + pConfig->m_shadowWidthMin); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_shadowWidthMax <= 0.0f) + { + ErrPrintf("m_shadowWidthMax is %g; should be greater than 0\n", + pConfig->m_shadowWidthMax); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_shadowWidthMax < pConfig->m_shadowWidthMin) + { + ErrPrintf("m_shadowWidthMin is %g and m_shadowWidthMax is %g; max should be greater than min\n", + pConfig->m_shadowWidthMin, pConfig->m_shadowWidthMax); + return GFSDK_FaceWorks_InvalidArgument; + } + if (pConfig->m_shadowSharpening < 1.0f) + { + ErrPrintf("m_shadowSharpening is %g; should be at least 1.0\n", + pConfig->m_shadowSharpening); + return GFSDK_FaceWorks_InvalidArgument; + } + + // The diffusion profile is built assuming a (standard human skin) radius + // of 2.7 mm, so the curvatures and shadow widths need to be scaled to generate + // a LUT for the user's desired diffusion radius. + float diffusionRadiusFactor = pConfig->m_diffusionRadius / 2.7f; + + float shadowRcpWidthMin = diffusionRadiusFactor / pConfig->m_shadowWidthMax; + float shadowRcpWidthMax = diffusionRadiusFactor / pConfig->m_shadowWidthMin; + float shadowScale = (shadowRcpWidthMax - shadowRcpWidthMin) / float(pConfig->m_texHeight); + float shadowBias = shadowRcpWidthMin + 0.5f * shadowScale; + + unsigned char * pPx = static_cast(pShadowLUTOut); + + // !!!UNDONE: SIMD-ize or GPU-ize all this math + + for (int iY = 0; iY < pConfig->m_texHeight; ++iY) + { + for (int iX = 0; iX < pConfig->m_texWidth; ++iX) + { + // Calculate input position relative to the shadow edge, by approximately + // inverting the transfer function of a disc or Gaussian filter. + float u = (iX + 0.5f) / float(pConfig->m_texWidth); + float inputPos = (sqrtf(u) - sqrtf(1.0f - u)) * 0.5f + 0.5f; + + float rcpWidth = float(iY) * shadowScale + shadowBias; + + // Sample points along a line perpendicular to the shadow edge, and + // Monte-Carlo-integrate the scattered lighting using the diffusion profile + + static const int cIter = 200; + float rgb[3] = { 0.0f, 0.0f, 0.0f }; + + float iterScale = 20.0f / float(cIter); + float iterBias = -10.0f + 0.5f * iterScale; + + for (int iIter = 0; iIter < cIter; ++iIter) + { + float delta = float(iIter) * iterScale + iterBias; + float rgbDiffusion[3]; + EvaluateDiffusionProfile(delta, rgbDiffusion); + + // Use smoothstep as an approximation of the transfer function of a + // disc or Gaussian filter. + float newPos = (inputPos + delta * rcpWidth) * pConfig->m_shadowSharpening + + (-0.5f * pConfig->m_shadowSharpening + 0.5f); + float newPosClamped = min(max(newPos, 0.0f), 1.0f); + float newShadow = (3.0f - 2.0f * newPosClamped) * newPosClamped * newPosClamped; + + rgb[0] += newShadow * rgbDiffusion[0]; + rgb[1] += newShadow * rgbDiffusion[1]; + rgb[2] += newShadow * rgbDiffusion[2]; + } + + // Scale sum of samples to get value of integral. Also hack in a + // fade to ensure the left edge of the image goes strictly to zero. + float scale = 20.0f / float(cIter); + if (iX * 25 < pConfig->m_texWidth) + { + scale *= min(25.0f * float(iX) / float(pConfig->m_texWidth), 1.0f); + } + rgb[0] *= scale; + rgb[1] *= scale; + rgb[2] *= scale; + + // Clamp to [0, 1] + rgb[0] = min(max(rgb[0], 0.0f), 1.0f); + rgb[1] = min(max(rgb[1], 0.0f), 1.0f); + rgb[2] = min(max(rgb[2], 0.0f), 1.0f); + + // Convert linear to sRGB + rgb[0] = (rgb[0] < 0.0031308f) ? (12.92f * rgb[0]) : (1.055f * powf(rgb[0], 1.0f / 2.4f) - 0.055f); + rgb[1] = (rgb[1] < 0.0031308f) ? (12.92f * rgb[1]) : (1.055f * powf(rgb[1], 1.0f / 2.4f) - 0.055f); + rgb[2] = (rgb[2] < 0.0031308f) ? (12.92f * rgb[2]) : (1.055f * powf(rgb[2], 1.0f / 2.4f) - 0.055f); + + // Convert to integer format + *(pPx++) = static_cast(255.0f * rgb[0] + 0.5f); + *(pPx++) = static_cast(255.0f * rgb[1] + 0.5f); + *(pPx++) = static_cast(255.0f * rgb[2] + 0.5f); + *(pPx++) = 255; + } + } + + return GFSDK_FaceWorks_OK; +} -- cgit v1.2.3