summaryrefslogtreecommitdiff
path: root/src/FFT_Simulation_CUDA_impl.h
diff options
context:
space:
mode:
authorJason Maskell <[email protected]>2016-05-09 10:39:54 +0200
committerJason Maskell <[email protected]>2016-05-09 10:39:54 +0200
commit79b3462799c28af8ba586349bd671b1b56e72353 (patch)
tree3b06e36c390254c0dc7f3733a0d32af213d87293 /src/FFT_Simulation_CUDA_impl.h
downloadwaveworks_archive-79b3462799c28af8ba586349bd671b1b56e72353.tar.xz
waveworks_archive-79b3462799c28af8ba586349bd671b1b56e72353.zip
Initial commit with PS4 and XBone stuff trimmed.
Diffstat (limited to 'src/FFT_Simulation_CUDA_impl.h')
-rw-r--r--src/FFT_Simulation_CUDA_impl.h312
1 files changed, 312 insertions, 0 deletions
diff --git a/src/FFT_Simulation_CUDA_impl.h b/src/FFT_Simulation_CUDA_impl.h
new file mode 100644
index 0000000..d2a7ef9
--- /dev/null
+++ b/src/FFT_Simulation_CUDA_impl.h
@@ -0,0 +1,312 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright � 2008- 2013 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+#ifndef _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H
+#define _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H
+
+#include "FFT_Simulation.h"
+
+#ifdef SUPPORT_CUDA
+
+struct IDirect3DResource9;
+struct ID3D10Resource;
+
+class NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl;
+template<class T> class CircularFIFO;
+
+class NVWaveWorks_FFT_Simulation_CUDA_Impl : public NVWaveWorks_FFT_Simulation
+{
+public:
+ NVWaveWorks_FFT_Simulation_CUDA_Impl(NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl* pManager, const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params);
+ ~NVWaveWorks_FFT_Simulation_CUDA_Impl();
+
+ // Mandatory NVWaveWorks_FFT_Simulation interface
+ HRESULT initD3D9(IDirect3DDevice9* pD3DDevice);
+ HRESULT initD3D10(ID3D10Device* pD3DDevice);
+ HRESULT initD3D11(ID3D11Device* pD3DDevice);
+ HRESULT initGL2(void* pGLContext);
+ HRESULT initNoGraphics();
+ HRESULT reinit(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params);
+ HRESULT addDisplacements(const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples);
+ HRESULT addArchivedDisplacements(float coord, const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples);
+ gfsdk_U64 getDisplacementMapVersion() const { return m_DisplacementMapVersion; }
+ HRESULT getTimings(NVWaveWorks_FFT_Simulation_Timings&) const;
+ LPDIRECT3DTEXTURE9 GetDisplacementMapD3D9();
+ ID3D10ShaderResourceView** GetDisplacementMapD3D10();
+ ID3D11ShaderResourceView** GetDisplacementMapD3D11();
+ GLuint GetDisplacementMapGL2();
+
+ IDirect3DResource9* getD3D9InteropResource(unsigned int deviceIndex);
+ ID3D10Resource* getD3D10InteropResource(unsigned int deviceIndex);
+ cudaGraphicsResource* getInteropResource(unsigned int deviceIndex);
+
+ HRESULT preKick(int constantsIndex);
+ HRESULT kickPreInterop(double dSimTime, gfsdk_U64 kickID);
+ HRESULT kickWithinInterop(gfsdk_U64 kickID);
+ HRESULT kickPostInterop(gfsdk_U64 kickID);
+
+ HRESULT collectSingleReadbackResult(bool blocking);
+ bool getReadbackCursor(gfsdk_U64* pKickID);
+ bool hasReadbacksInFlight() const;
+ HRESULT canCollectSingleReadbackResultWithoutBlocking();
+ HRESULT resetReadbacks();
+
+ HRESULT archiveDisplacements();
+
+private:
+
+ HRESULT kickWithinInteropD3D11(gfsdk_U64 kickID);
+ HRESULT kickWithinInteropD3D10(gfsdk_U64 kickID);
+ HRESULT kickWithinInteropD3D9(gfsdk_U64 kickID);
+ HRESULT kickWithinInteropGL2(gfsdk_U64 kickID);
+ HRESULT kickWithinInteropNoGfx(gfsdk_U64 kickID);
+
+ NVWaveWorks_FFT_Simulation_Manager_CUDA_Impl* m_pManager;
+
+ GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade m_params;
+
+ int m_resolution; // m_params.fft_resolution
+ int m_half_resolution_plus_one;
+
+ HRESULT allocateAllResources();
+ void releaseAllResources();
+
+ void releaseAll();
+
+ HRESULT releaseCudaResources();
+ HRESULT allocateCudaResources();
+
+ HRESULT registerDisplacementMapWithCUDA();
+ HRESULT unregisterDisplacementMapWithCUDA();
+
+ HRESULT initGaussAndOmega();
+
+ enum { NumReadbackSlots = 4 }; // 2 in-flight, one usable, one active
+ enum { NumTimerSlots = 4 }; // 2 in-flight, one usable, one active
+
+ struct CudaDeviceState
+ {
+ int m_cudaDevice;
+
+ int m_constantsIndex;
+
+ // The Gauss distribution used to generated H0
+ float2* m_device_Gauss;
+ // Initial height field H(0) generated by Phillips spectrum & Gauss distribution.
+ float2* m_device_H0;
+ // Height field H(t) in frequency domain, updated each frame.
+ float2* m_device_Ht;
+ // Choppy fields Dx(t) and Dy(t), updated each frame.
+ float4* m_device_Dt;
+ // Angular frequency
+ float* m_device_Omega;
+
+ bool m_H0Dirty;
+
+ // Readback staging
+ float4* m_readback_device_Dxyzs[NumReadbackSlots];
+
+ // Readback completion events
+ cudaEvent_t m_readback_completion_evts[NumReadbackSlots];
+ cudaEvent_t m_readback_staging_evts[NumReadbackSlots];
+ cudaEvent_t m_start_timer_evts[NumTimerSlots];
+ cudaEvent_t m_stop_timer_evts[NumTimerSlots];
+ cudaEvent_t m_start_fft_timer_evts[NumTimerSlots];
+ cudaEvent_t m_stop_fft_timer_evts[NumTimerSlots];
+ };
+
+ unsigned int m_numCudaDevices;
+ CudaDeviceState* m_pCudaDeviceStates;
+
+ // Optional readback ring-buffer
+ struct ReadbackSlot
+ {
+ float4* m_device_Dxyz;
+ float4* m_host_Dxyz;
+ int m_cudaDevice;
+ cudaEvent_t m_completion_evt;
+ cudaEvent_t m_staging_evt;
+ gfsdk_U64 m_kickID;
+ };
+
+ // The D3D11 and GL2 use the surface<>-based variants of the CUDA kernels, which output to 16F. Therefore the readback element size
+ // must be adjusted to match...
+ size_t m_readback_element_size;
+ ReadbackSlot m_readback_slots[NumReadbackSlots];
+ int m_active_readback_slot; // i.e. not in-flight
+ int m_end_inflight_readback_slots; // the first in-flight slot is always the one after active
+ float4* m_active_readback_host_Dxyz;
+
+ ReadbackSlot* m_working_readback_slot; // the readback slot being used for current kick processing
+
+ HRESULT consumeAvailableReadbackSlot(CudaDeviceState& cu_dev_state, gfsdk_U64 kickID, ReadbackSlot** ppSlot);
+ HRESULT waitForAllInFlightReadbacks();
+
+ void addDisplacements( const BYTE* pReadbackData,
+ const gfsdk_float2* inSamplePoints,
+ gfsdk_float4* outDisplacements,
+ UINT numSamples,
+ float multiplier = 1.f
+ );
+
+ HRESULT updateH0(const CudaDeviceState& cu_dev_state, cudaStream_t cu_kernel_stream);
+
+ struct ReadbackFIFOSlot
+ {
+ gfsdk_U64 kickID;
+ float4* host_Dxyz;
+ };
+ CircularFIFO<ReadbackFIFOSlot>* m_pReadbackFIFO;
+
+ // Timer query ring-buffer
+ struct TimerSlot
+ {
+ int m_cudaDevice;
+ cudaEvent_t m_start_timer_evt;
+ cudaEvent_t m_stop_timer_evt;
+ float m_elapsed_time; // in milli-seconds, as per house style
+ gfsdk_U64 m_kickID;
+ };
+
+ TimerSlot m_timer_slots[NumTimerSlots];
+ int m_active_timer_slot; // i.e. not in-flight
+ int m_end_inflight_timer_slots; // the first in-flight slot is always the one after active
+
+ TimerSlot* m_working_timer_slot; // the timer slot being used for current kick processing
+
+ HRESULT consumeAvailableTimerSlot(CudaDeviceState& cu_dev_state, gfsdk_U64 kickID, TimerSlot** ppSlot);
+ HRESULT waitForAllInFlightTimers();
+ HRESULT queryTimers();
+ HRESULT getElapsedTimeForActiveSlot();
+
+ bool m_DisplacementMapIsCUDARegistered;
+ bool m_GaussAndOmegaInitialised;
+ bool m_cudaResourcesInitialised;
+ bool m_ReadbackInitialised;
+
+ gfsdk_U64 m_DisplacementMapVersion;
+
+ // D3D API handling
+ nv_water_d3d_api m_d3dAPI;
+
+#if WAVEWORKS_ENABLE_D3D9
+ struct D3D9Objects
+ {
+ IDirect3DDevice9* m_pd3d9Device;
+
+ struct PerCudaDeviceResources
+ {
+ // Displacement/choppy field
+ LPDIRECT3DTEXTURE9 m_pd3d9DisplacementMap; // (ABGR32F)
+ bool m_d3d9DisplacementmapIsRegistered;
+ };
+
+ PerCudaDeviceResources* m_pd3d9PerCudaDeviceResources;
+ };
+#endif
+
+#if WAVEWORKS_ENABLE_D3D10
+ struct D3D10Objects
+ {
+ ID3D10Device* m_pd3d10Device;
+
+ struct PerCudaDeviceResources
+ {
+ // Displacement/choppy field
+ ID3D10Texture2D* m_pd3d10DisplacementMapResource;
+ ID3D10ShaderResourceView* m_pd3d10DisplacementMap; // (ABGR32F)
+ bool m_d3d10DisplacementmapIsRegistered;
+ };
+
+ PerCudaDeviceResources* m_pd3d10PerCudaDeviceResources;
+ };
+#endif
+
+#if WAVEWORKS_ENABLE_D3D11
+ struct D3D11Objects
+ {
+ ID3D11Device* m_pd3d11Device;
+
+ struct PerCudaDeviceResources
+ {
+ // Displacement/choppy field
+ ID3D11Texture2D* m_pd3d11DisplacementMapResource;
+ ID3D11ShaderResourceView* m_pd3d11DisplacementMap; // (ABGR32F)
+ cudaGraphicsResource* m_pd3d11RegisteredDisplacementMapResource;
+ };
+
+ PerCudaDeviceResources* m_pd3d11PerCudaDeviceResources;
+ };
+#endif
+#if WAVEWORKS_ENABLE_GL
+ struct GL2Objects
+ {
+ void* m_pGLContext;
+
+ struct PerCudaDeviceResources
+ {
+ // Displacement/choppy field
+ GLuint m_GL2DisplacementMapTexture; // RGBA32F
+ cudaGraphicsResource* m_pGL2RegisteredDisplacementMapResource;
+ };
+
+ PerCudaDeviceResources* m_pGL2PerCudaDeviceResources;
+ };
+#endif
+ struct NoGraphicsObjects
+ {
+ struct PerCudaDeviceResources
+ {
+ float4* m_Device_displacementMap;
+ };
+
+ PerCudaDeviceResources* m_pNoGraphicsPerCudaDeviceResources;
+ };
+
+ union
+ {
+#if WAVEWORKS_ENABLE_D3D9
+ D3D9Objects _9;
+#endif
+#if WAVEWORKS_ENABLE_D3D10
+ D3D10Objects _10;
+#endif
+#if WAVEWORKS_ENABLE_D3D11
+ D3D11Objects _11;
+#endif
+#if WAVEWORKS_ENABLE_GL
+ GL2Objects _GL2;
+#endif
+ NoGraphicsObjects _noGFX;
+ } m_d3d;
+
+};
+
+#endif // SUPPORT_CUDA
+
+#endif // _NVWAVEWORKS_FFT_SIMULATION_CUDA_IMPL_H