diff options
Diffstat (limited to 'src/Simulation_Util.cpp')
| -rw-r--r-- | src/Simulation_Util.cpp | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/src/Simulation_Util.cpp b/src/Simulation_Util.cpp new file mode 100644 index 0000000..d250e3d --- /dev/null +++ b/src/Simulation_Util.cpp @@ -0,0 +1,472 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright � 2008- 2013 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +#include "Internal.h" +#include "Simulation_Util.h" +#include "D3DX_replacement_code.h" + +#define FN_QUALIFIER inline +#define FN_NAME(x) x +#include "Spectrum_Util.h" +#include "Float16_Util.h" + +#ifndef __ANDROID__ +#include <random> +#endif + +#ifdef __ANDROID__ +#include "math.h" +#endif + +#if !defined(__GNUC__) && !defined(__ANDROID__) +namespace std { using namespace tr1; } +#define USE_MERSENNE_TWISTER_RNG +#else +#undef USE_MERSENNE_TWISTER_RNG +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +namespace +{ + // Template algo for initializaing various aspects of simulation + template<class Functor> void for_each_wavevector(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, const Functor& functor) + { + const int dmap_dim = params.fft_resolution; + const float fft_period = params.fft_period; + + for (int i = 0; i <= dmap_dim; i++) + { + // ny is y-coord wave number + const int ny = (-dmap_dim/2 + i); + + // K is wave-vector, range [-|DX/W, |DX/W], [-|DY/H, |DY/H] + float2 K; + K.y = float(ny) * (2 * float(M_PI) / fft_period); + + for (int j = 0; j <= dmap_dim; j++) + { + // nx is x-coord wave number + int nx = (-dmap_dim/2 + j); + + K.x = float(nx) * (2 * float(M_PI) / fft_period); + + functor(i,j,nx,ny,K); + } + } + } + + struct init_omega_functor { + + int dmap_dim; + float* pOutOmega; + + void operator()(int i, int j, int /* not used nx*/, int /* not used ny*/, float2 K) const { + // The angular frequency is following the dispersion relation: + // omega^2 = g*k + // So the equation of Gerstner wave is: + // x = x0 - K/k * A * sin(dot(K, x0) - sqrt(g * k) * t), x is a 2D vector. + // z = A * cos(dot(K, x0) - sqrt(g * k) * t) + // Gerstner wave means: a point on a simple sinusoid wave is doing a uniform circular motion. The + // center is (x0, y0, z0), the radius is A, and the circle plane is parallel to K. + pOutOmega[i * (dmap_dim + 4) + j] = sqrtf(GRAV_ACCEL * sqrtf(K.x * K.x + K.y * K.y)); + } + }; + +#ifdef USE_MERSENNE_TWISTER_RNG + static std::mt19937 g_random_number_generation; + + template <class _Engine> + float generate_uniform_01(_Engine& _Eng) + { + return ((_Eng() - (_Eng.min)()) / ((float)(_Eng.max)() - (float)(_Eng.min)() + 1.f)); + } + +#else // using simple and fast XOR SHIFT RNG + + unsigned long xor_shift_rand128() + { + static uint32_t x = 123456789; // time(0); + static uint32_t y = 362436069; + static uint32_t z = 521288629; + static uint32_t w = 88675123; + uint32_t t; + + t = x ^ (x << 11); + x = y; y = z; z = w; + return w = w ^ (w >> 19) ^ (t ^ (t >> 8)); + } + + struct xorshift_engine + { + unsigned long xor_shift_rand128(); + unsigned long min() { return 0;}; + unsigned long max() { return 4294967295ul;}; + void seed() { return;}; + }; + + xorshift_engine g_random_number_generation; + + float generate_uniform_01(xorshift_engine& _Eng) + { + return ((float)(xor_shift_rand128() - (_Eng.min)()) / ((float)(_Eng.max)() - (float)(_Eng.min)() + 1.f)); + } + +#endif + + // Generating gaussian random number with mean 0 and standard deviation 1. + float Gauss() + { + float u1 = generate_uniform_01(g_random_number_generation); + float u2 = generate_uniform_01(g_random_number_generation); + if (u1 < 1e-6f) + u1 = 1e-6f; + return sqrtf(-2 * logf(u1)) * cosf(2 * float(M_PI) * u2); + } + +} +namespace GFSDK_WaveWorks_Simulation_Util +{ + void init_omega(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, float* pOutOmega) + { + init_omega_functor f; + f.dmap_dim = params.fft_resolution; + f.pOutOmega = pOutOmega; + + for_each_wavevector(params, f); + } + + void init_gauss( const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& /*params*/, float2* pOutGauss) + { + g_random_number_generation.seed(); + for (int i = 0; i <= gauss_map_resolution; i++) + { + for (int j = 0; j <= gauss_map_resolution; j++) + { + const int ix = i * (gauss_map_resolution + 4) + j; + pOutGauss[ix].x = Gauss(); + pOutGauss[ix].y = Gauss(); + } + } + } + + float get_spectrum_rms_sqr(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params) + { + float a = params.wave_amplitude * params.wave_amplitude; + float v = params.wind_speed; + float dir_depend = params.wind_dependency; + float fft_period = params.fft_period; + + float phil_norm = expf(1)/fft_period; // This normalization ensures that the simulation is invariant w.r.t. units and/or fft_period + phil_norm *= phil_norm; // Use the square as we are accumulating RMS + + // We can compute the integral of Phillips over a disc in wave vector space analytically, and by subtracting one + // disc from the other we can compute the integral for the ring defined by {params.window_in,params.window_out} + const float lower_k = params.window_in * 2.f * float(M_PI) / fft_period; + const float upper_k = params.window_out * 2.f * float(M_PI) / fft_period; + float rms_est = UpperBoundPhillipsIntegral(upper_k, v, a, dir_depend, params.small_wave_fraction) - UpperBoundPhillipsIntegral(lower_k, v, a, dir_depend, params.small_wave_fraction); + + // Normalize to wave number space + rms_est *= 0.25f*(fft_period*fft_period)/(float(M_PI) * float(M_PI)); + rms_est *= phil_norm; + + return rms_est; + } + + template<class InputPolicy, class MultiplierPolicy> + void add_displacements( const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, + const BYTE* pReadbackData, + UINT rowPitch, + const gfsdk_float2* inSamplePoints, + gfsdk_float4* outDisplacements, + UINT numSamples, + const MultiplierPolicy& multiplier + ) + { + const int dmap_dim = params.fft_resolution; + const FLOAT f_dmap_dim = FLOAT(dmap_dim); + const FLOAT uv_scale = f_dmap_dim / params.fft_period; + + const gfsdk_float2* currSrc = inSamplePoints; + gfsdk_float4* currDst = outDisplacements; + for(UINT sample = 0; sample != numSamples; ++sample, ++currSrc, ++currDst) + { + // Calculate the UV coords, in texels + const gfsdk_float2 uv = *currSrc * uv_scale - gfsdk_make_float2(0.5f, 0.5f); + gfsdk_float2 uv_wrap = gfsdk_make_float2(fmodf(uv.x,f_dmap_dim),fmodf(uv.y,f_dmap_dim)); + if(uv_wrap.x < 0.f) + uv_wrap.x += f_dmap_dim; + else if(uv_wrap.x >= f_dmap_dim) + uv_wrap.x -= f_dmap_dim; + if(uv_wrap.y < 0.f) + uv_wrap.y += f_dmap_dim; + else if(uv_wrap.y >= f_dmap_dim) + uv_wrap.y -= f_dmap_dim; + const gfsdk_float2 uv_round = gfsdk_make_float2(floorf(uv_wrap.x),floorf(uv_wrap.y)); + const gfsdk_float2 uv_frac = uv_wrap - uv_round; + + const int uv_x = ((int)uv_round.x) % dmap_dim; + const int uv_y = ((int)uv_round.y) % dmap_dim; + const int uv_x_1 = (uv_x + 1) % dmap_dim; + const int uv_y_1 = (uv_y + 1) % dmap_dim; + + // Ensure we wrap round during the lerp too + const typename InputPolicy::InputType* pTL = reinterpret_cast<const typename InputPolicy::InputType*>(pReadbackData + uv_y * rowPitch); + const typename InputPolicy::InputType* pTR = pTL + uv_x_1; + pTL += uv_x; + const typename InputPolicy::InputType* pBL = reinterpret_cast<const typename InputPolicy::InputType*>(pReadbackData + uv_y_1 * rowPitch); + const typename InputPolicy::InputType* pBR = pBL + uv_x_1; + pBL += uv_x; + + gfsdk_float4 toadd = (1.f - uv_frac.x) * (1.f - uv_frac.y) * InputPolicy::get_float4(pTL); + toadd += uv_frac.x * (1.f - uv_frac.y) * InputPolicy::get_float4(pTR); + toadd += (1.f - uv_frac.x) * uv_frac.y * InputPolicy::get_float4(pBL); + toadd += uv_frac.x * uv_frac.y * InputPolicy::get_float4(pBR); + *currDst += multiplier.mult(toadd); + } + } + + struct NoMultiplierPolicy + { + inline const gfsdk_float4& mult(const gfsdk_float4& val) const { return val; } + }; + + struct ParameterizedMultiplierPolicy + { + ParameterizedMultiplierPolicy(float m) : + m_multiplier(m) + { + } + + inline gfsdk_float4 mult(const gfsdk_float4& val) const { return m_multiplier*val; } + + float m_multiplier; + }; + + template<class InputPolicy> + void add_displacements( const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, + const BYTE* pReadbackData, + UINT rowPitch, + const gfsdk_float2* inSamplePoints, + gfsdk_float4* outDisplacements, + UINT numSamples, + float multiplier + ) + { + if(1.f == multiplier) + { + // No multiplier required + add_displacements<InputPolicy,NoMultiplierPolicy>(params,pReadbackData,rowPitch,inSamplePoints,outDisplacements,numSamples,NoMultiplierPolicy()); + } + else if(0.f != multiplier) + { + add_displacements<InputPolicy,ParameterizedMultiplierPolicy>(params,pReadbackData,rowPitch,inSamplePoints,outDisplacements,numSamples,ParameterizedMultiplierPolicy(multiplier)); + } + else + { + // Nothin to add, do nothin + } + } + + struct Float16InputPolicy + { + struct half4 + { + gfsdk_U16 _components[4]; + }; + typedef half4 InputType; + static inline gfsdk_float4 get_float4(const half4* pIn) + { + return GFSDK_WaveWorks_Float16_Util::float32x4((gfsdk_U16*)pIn); + } + }; + + void add_displacements_float16( const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, + const BYTE* pReadbackData, + UINT rowPitch, + const gfsdk_float2* inSamplePoints, + gfsdk_float4* outDisplacements, + UINT numSamples, + float multiplier + ) + { + add_displacements<Float16InputPolicy>(params,pReadbackData,rowPitch,inSamplePoints,outDisplacements,numSamples,multiplier); + } + + struct Float32InputPolicy + { + typedef gfsdk_float4 InputType; + static inline const gfsdk_float4& get_float4(const gfsdk_float4* pIn) { return *pIn; } + }; + + void add_displacements_float32( const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, + const BYTE* pReadbackData, + UINT rowPitch, + const gfsdk_float2* inSamplePoints, + gfsdk_float4* outDisplacements, + UINT numSamples, + float multiplier + ) + { + add_displacements<Float32InputPolicy>(params,pReadbackData,rowPitch,inSamplePoints,outDisplacements,numSamples,multiplier); + } + +#if !defined(WAVEWORKS_ENABLE_PROFILING) // defined in FFT_API_support.h + void tieThreadToCore(unsigned char core) + { + // do nothing + } + TickType getTicks() + { +#if defined(TARGET_PLATFORM_PS4) + return 0; +#elif defined(TARGET_PLATFORM_NIXLIKE) + timespec retval = {0,0}; + return retval; +#else + return 0; +#endif + } + float getMilliseconds(const TickType& start, const TickType& stop) + { + return 0; + } +#elif defined(TARGET_PLATFORM_PS4) + void tieThreadToCore(unsigned char core) + { + // do nothing + } + + TickType getTicks() + { + return __builtin_readcyclecounter(); + } + + float getMilliseconds(const TickType& start, const TickType& stop) + { + return float(stop - start) * 1000.f/1600000000.f; // Based on 1.6GHz clocks + } + +#elif defined(TARGET_PLATFORM_NIXLIKE) + void tieThreadToCore(unsigned char /*core*/) + { + // Enable this to WAR on systems that have core-sensitive QueryPerformanceFrequency + // SetThreadAffinityMask( GetCurrentThread(), 1<<core ); + } + + // A somewhat vile WAR to make sure we can compile on older Linuxes, however in our defense + // we do check for _RAW support by relaxing down to no-_RAW if a call to clock_gettime() fails + #ifndef CLOCK_MONOTONIC_RAW + #define CLOCK_MONOTONIC_RAW 4 + #endif + +#if defined(TARGET_PLATFORM_MACOSX) + #include <mach/mach_time.h> + TickType getTicks() + { + mach_timebase_info_data_t timebase; + timespec currTime; + uint64_t clock; + uint64_t nano; + clock = mach_absolute_time(); + mach_timebase_info(&timebase); // should better use it once to get numer/denom + nano = clock * (uint64_t)timebase.numer / (uint64_t)timebase.denom; + currTime.tv_sec = nano / 1000000000L; + currTime.tv_nsec = nano % 1000000000L; + return currTime; + } +#else + TickType getTicks() + { + static int clk_id = CLOCK_MONOTONIC_RAW; + timespec currTime; + if(clock_gettime(clk_id, &currTime)) + { + clk_id = CLOCK_MONOTONIC; + clock_gettime(clk_id, &currTime); + } + return currTime; + } +#endif + + float getMilliseconds(const TickType& start, const TickType& stop) + { + timespec x = stop; + timespec y = start; + + /* Perform the carry for the later subtraction by updating y. */ + if (x.tv_nsec < y.tv_nsec) { + long numsec = (y.tv_nsec - x.tv_nsec) / 1000000000 + 1; + y.tv_nsec -= 1000000000 * numsec; + y.tv_sec += numsec; + } + if (x.tv_nsec - y.tv_nsec > 1000000000) { + long numsec = (x.tv_nsec - y.tv_nsec) / 1000000000; + y.tv_nsec += 1000000000 * numsec; + y.tv_sec -= numsec; + } + + /* Compute the time remaining to wait. + tv_nsec is certainly positive. */ + timespec diff; + diff.tv_sec = x.tv_sec - y.tv_sec; + diff.tv_nsec = x.tv_nsec - y.tv_nsec; + + return float(1000. * ((double) diff.tv_sec + 0.000000001 * (double) diff.tv_nsec)); + } +#else // !WAVEWORKS_ENABLE_PROFILING && !TARGET_PLATFORM_NIXLIKE + void tieThreadToCore(unsigned char /*core*/) + { + // Enable this to WAR on systems that have core-sensitive QueryPerformanceFrequency + // SetThreadAffinityMask( GetCurrentThread(), 1<<core ); + } + + TickType getTicks() + { + LARGE_INTEGER c; + QueryPerformanceCounter(&c); + return c.QuadPart; + } + + float getMilliseconds(const TickType& start, const TickType& stop) + { + static bool first_time=true; + static LARGE_INTEGER f; + if(first_time) + { + QueryPerformanceFrequency(&f); + first_time=false; + } + // clamping timestamp + TickType clampedStop = stop < start ? start : stop; + return (float)((double)(clampedStop - start)/(double)(f.QuadPart/1000.0)); + } +#endif +} |