diff options
| author | Jason Maskell <[email protected]> | 2016-05-09 10:39:54 +0200 |
|---|---|---|
| committer | Jason Maskell <[email protected]> | 2016-05-09 10:39:54 +0200 |
| commit | 79b3462799c28af8ba586349bd671b1b56e72353 (patch) | |
| tree | 3b06e36c390254c0dc7f3733a0d32af213d87293 /src/FFT_Simulation_CUDA_impl.h | |
| download | waveworks_archive-79b3462799c28af8ba586349bd671b1b56e72353.tar.xz waveworks_archive-79b3462799c28af8ba586349bd671b1b56e72353.zip | |
Initial commit with PS4 and XBone stuff trimmed.
Diffstat (limited to 'src/FFT_Simulation_CUDA_impl.h')
| -rw-r--r-- | src/FFT_Simulation_CUDA_impl.h | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/src/FFT_Simulation_CUDA_impl.h b/src/FFT_Simulation_CUDA_impl.h new file mode 100644 index 0000000..d2a7ef9 --- /dev/null +++ b/src/FFT_Simulation_CUDA_impl.h @@ -0,0 +1,312 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright � 2008- 2013 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +#ifndef _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H +#define _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H + +#include "FFT_Simulation.h" + +#ifdef SUPPORT_CUDA + +struct IDirect3DResource9; +struct ID3D10Resource; + +class NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl; +template<class T> class CircularFIFO; + +class NVWaveWorks_FFT_Simulation_CUDA_Impl : public NVWaveWorks_FFT_Simulation +{ +public: + NVWaveWorks_FFT_Simulation_CUDA_Impl(NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl* pManager, const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params); + ~NVWaveWorks_FFT_Simulation_CUDA_Impl(); + + // Mandatory NVWaveWorks_FFT_Simulation interface + HRESULT initD3D9(IDirect3DDevice9* pD3DDevice); + HRESULT initD3D10(ID3D10Device* pD3DDevice); + HRESULT initD3D11(ID3D11Device* pD3DDevice); + HRESULT initGL2(void* pGLContext); + HRESULT initNoGraphics(); + HRESULT reinit(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params); + HRESULT addDisplacements(const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples); + HRESULT addArchivedDisplacements(float coord, const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples); + gfsdk_U64 getDisplacementMapVersion() const { return m_DisplacementMapVersion; } + HRESULT getTimings(NVWaveWorks_FFT_Simulation_Timings&) const; + LPDIRECT3DTEXTURE9 GetDisplacementMapD3D9(); + ID3D10ShaderResourceView** GetDisplacementMapD3D10(); + ID3D11ShaderResourceView** GetDisplacementMapD3D11(); + GLuint GetDisplacementMapGL2(); + + IDirect3DResource9* getD3D9InteropResource(unsigned int deviceIndex); + ID3D10Resource* getD3D10InteropResource(unsigned int deviceIndex); + cudaGraphicsResource* getInteropResource(unsigned int deviceIndex); + + HRESULT preKick(int constantsIndex); + HRESULT kickPreInterop(double dSimTime, gfsdk_U64 kickID); + HRESULT kickWithinInterop(gfsdk_U64 kickID); + HRESULT kickPostInterop(gfsdk_U64 kickID); + + HRESULT collectSingleReadbackResult(bool blocking); + bool getReadbackCursor(gfsdk_U64* pKickID); + bool hasReadbacksInFlight() const; + HRESULT canCollectSingleReadbackResultWithoutBlocking(); + HRESULT resetReadbacks(); + + HRESULT archiveDisplacements(); + +private: + + HRESULT kickWithinInteropD3D11(gfsdk_U64 kickID); + HRESULT kickWithinInteropD3D10(gfsdk_U64 kickID); + HRESULT kickWithinInteropD3D9(gfsdk_U64 kickID); + HRESULT kickWithinInteropGL2(gfsdk_U64 kickID); + HRESULT kickWithinInteropNoGfx(gfsdk_U64 kickID); + + NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl* m_pManager; + + GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade m_params; + + int m_resolution; // m_params.fft_resolution + int m_half_resolution_plus_one; + + HRESULT allocateAllResources(); + void releaseAllResources(); + + void releaseAll(); + + HRESULT releaseCudaResources(); + HRESULT allocateCudaResources(); + + HRESULT registerDisplacementMapWithCUDA(); + HRESULT unregisterDisplacementMapWithCUDA(); + + HRESULT initGaussAndOmega(); + + enum { NumReadbackSlots = 4 }; // 2 in-flight, one usable, one active + enum { NumTimerSlots = 4 }; // 2 in-flight, one usable, one active + + struct CudaDeviceState + { + int m_cudaDevice; + + int m_constantsIndex; + + // The Gauss distribution used to generated H0 + float2* m_device_Gauss; + // Initial height field H(0) generated by Phillips spectrum & Gauss distribution. + float2* m_device_H0; + // Height field H(t) in frequency domain, updated each frame. + float2* m_device_Ht; + // Choppy fields Dx(t) and Dy(t), updated each frame. + float4* m_device_Dt; + // Angular frequency + float* m_device_Omega; + + bool m_H0Dirty; + + // Readback staging + float4* m_readback_device_Dxyzs[NumReadbackSlots]; + + // Readback completion events + cudaEvent_t m_readback_completion_evts[NumReadbackSlots]; + cudaEvent_t m_readback_staging_evts[NumReadbackSlots]; + cudaEvent_t m_start_timer_evts[NumTimerSlots]; + cudaEvent_t m_stop_timer_evts[NumTimerSlots]; + cudaEvent_t m_start_fft_timer_evts[NumTimerSlots]; + cudaEvent_t m_stop_fft_timer_evts[NumTimerSlots]; + }; + + unsigned int m_numCudaDevices; + CudaDeviceState* m_pCudaDeviceStates; + + // Optional readback ring-buffer + struct ReadbackSlot + { + float4* m_device_Dxyz; + float4* m_host_Dxyz; + int m_cudaDevice; + cudaEvent_t m_completion_evt; + cudaEvent_t m_staging_evt; + gfsdk_U64 m_kickID; + }; + + // The D3D11 and GL2 use the surface<>-based variants of the CUDA kernels, which output to 16F. Therefore the readback element size + // must be adjusted to match... + size_t m_readback_element_size; + ReadbackSlot m_readback_slots[NumReadbackSlots]; + int m_active_readback_slot; // i.e. not in-flight + int m_end_inflight_readback_slots; // the first in-flight slot is always the one after active + float4* m_active_readback_host_Dxyz; + + ReadbackSlot* m_working_readback_slot; // the readback slot being used for current kick processing + + HRESULT consumeAvailableReadbackSlot(CudaDeviceState& cu_dev_state, gfsdk_U64 kickID, ReadbackSlot** ppSlot); + HRESULT waitForAllInFlightReadbacks(); + + void addDisplacements( const BYTE* pReadbackData, + const gfsdk_float2* inSamplePoints, + gfsdk_float4* outDisplacements, + UINT numSamples, + float multiplier = 1.f + ); + + HRESULT updateH0(const CudaDeviceState& cu_dev_state, cudaStream_t cu_kernel_stream); + + struct ReadbackFIFOSlot + { + gfsdk_U64 kickID; + float4* host_Dxyz; + }; + CircularFIFO<ReadbackFIFOSlot>* m_pReadbackFIFO; + + // Timer query ring-buffer + struct TimerSlot + { + int m_cudaDevice; + cudaEvent_t m_start_timer_evt; + cudaEvent_t m_stop_timer_evt; + float m_elapsed_time; // in milli-seconds, as per house style + gfsdk_U64 m_kickID; + }; + + TimerSlot m_timer_slots[NumTimerSlots]; + int m_active_timer_slot; // i.e. not in-flight + int m_end_inflight_timer_slots; // the first in-flight slot is always the one after active + + TimerSlot* m_working_timer_slot; // the timer slot being used for current kick processing + + HRESULT consumeAvailableTimerSlot(CudaDeviceState& cu_dev_state, gfsdk_U64 kickID, TimerSlot** ppSlot); + HRESULT waitForAllInFlightTimers(); + HRESULT queryTimers(); + HRESULT getElapsedTimeForActiveSlot(); + + bool m_DisplacementMapIsCUDARegistered; + bool m_GaussAndOmegaInitialised; + bool m_cudaResourcesInitialised; + bool m_ReadbackInitialised; + + gfsdk_U64 m_DisplacementMapVersion; + + // D3D API handling + nv_water_d3d_api m_d3dAPI; + +#if WAVEWORKS_ENABLE_D3D9 + struct D3D9Objects + { + IDirect3DDevice9* m_pd3d9Device; + + struct PerCudaDeviceResources + { + // Displacement/choppy field + LPDIRECT3DTEXTURE9 m_pd3d9DisplacementMap; // (ABGR32F) + bool m_d3d9DisplacementmapIsRegistered; + }; + + PerCudaDeviceResources* m_pd3d9PerCudaDeviceResources; + }; +#endif + +#if WAVEWORKS_ENABLE_D3D10 + struct D3D10Objects + { + ID3D10Device* m_pd3d10Device; + + struct PerCudaDeviceResources + { + // Displacement/choppy field + ID3D10Texture2D* m_pd3d10DisplacementMapResource; + ID3D10ShaderResourceView* m_pd3d10DisplacementMap; // (ABGR32F) + bool m_d3d10DisplacementmapIsRegistered; + }; + + PerCudaDeviceResources* m_pd3d10PerCudaDeviceResources; + }; +#endif + +#if WAVEWORKS_ENABLE_D3D11 + struct D3D11Objects + { + ID3D11Device* m_pd3d11Device; + + struct PerCudaDeviceResources + { + // Displacement/choppy field + ID3D11Texture2D* m_pd3d11DisplacementMapResource; + ID3D11ShaderResourceView* m_pd3d11DisplacementMap; // (ABGR32F) + cudaGraphicsResource* m_pd3d11RegisteredDisplacementMapResource; + }; + + PerCudaDeviceResources* m_pd3d11PerCudaDeviceResources; + }; +#endif +#if WAVEWORKS_ENABLE_GL + struct GL2Objects + { + void* m_pGLContext; + + struct PerCudaDeviceResources + { + // Displacement/choppy field + GLuint m_GL2DisplacementMapTexture; // RGBA32F + cudaGraphicsResource* m_pGL2RegisteredDisplacementMapResource; + }; + + PerCudaDeviceResources* m_pGL2PerCudaDeviceResources; + }; +#endif + struct NoGraphicsObjects + { + struct PerCudaDeviceResources + { + float4* m_Device_displacementMap; + }; + + PerCudaDeviceResources* m_pNoGraphicsPerCudaDeviceResources; + }; + + union + { +#if WAVEWORKS_ENABLE_D3D9 + D3D9Objects _9; +#endif +#if WAVEWORKS_ENABLE_D3D10 + D3D10Objects _10; +#endif +#if WAVEWORKS_ENABLE_D3D11 + D3D11Objects _11; +#endif +#if WAVEWORKS_ENABLE_GL + GL2Objects _GL2; +#endif + NoGraphicsObjects _noGFX; + } m_d3d; + +}; + +#endif // SUPPORT_CUDA + +#endif // _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H |