1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
// This code contains NVIDIA Confidential Information and is disclosed
// under the Mutual Non-Disclosure Agreement.
//
// Notice
// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
//
// NVIDIA Corporation assumes no responsibility for the consequences of use of such
// information or for any infringement of patents or other rights of third parties that may
// result from its use. No license is granted by implication or otherwise under any patent
// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
// expressly authorized by NVIDIA. Details are subject to change without notice.
// This code supersedes and replaces all information previously supplied.
// NVIDIA Corporation products are not authorized for use as critical
// components in life support devices or systems without express written approval of
// NVIDIA Corporation.
//
// Copyright � 2008- 2013 NVIDIA Corporation. All rights reserved.
//
// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
// rights in and to this software and related documentation and any modifications thereto.
// Any use, reproduction, disclosure or distribution of this software and related
// documentation without an express license agreement from NVIDIA Corporation is
// strictly prohibited.
//
#ifndef _NVWaveWorks_FFT_Simulation_CPU_Impl_H
#define _NVWaveWorks_FFT_Simulation_CPU_Impl_H
#include "FFT_Simulation.h"
struct Task;
template<class T> class CircularFIFO;
typedef float complex[2];
class NVWaveWorks_FFT_Simulation_CPU_Impl : public NVWaveWorks_FFT_Simulation
{
public:
NVWaveWorks_FFT_Simulation_CPU_Impl(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params);
~NVWaveWorks_FFT_Simulation_CPU_Impl();
// Simulation primitives
bool UpdateH0(int row); // Returns true if this is the last row to be updated
bool UpdateHt(int row); // Returns true if this is the last row to be updated
bool UpdateTexture(int row); // Returns true if this is the last row to be updated
// FFT simulation primitives - 2 paths here:
// - the 'legacy' path models the entire NxN 2D FFT as a single task
// - the new path models each group of N-wide 1D FFT's as a single task
bool ComputeFFT_XY_NxN(int index); // Returns true if this is the last FFT to be processed
bool ComputeFFT_X(int XYZindex, int subIndex);
bool ComputeFFT_Y(int XYZindex, int subIndex);
int GetNumRowsIn_FFT_X() const;
int GetNumRowsIn_FFT_Y() const;
HRESULT OnInitiateSimulationStep(Graphics_Context* pGC, double dSimTime);
void OnCompleteSimulationStep(gfsdk_U64 kickID);
// Mandatory NVWaveWorks_FFT_Simulation interface
HRESULT initD3D11(ID3D11Device* pD3DDevice);
HRESULT initGnm();
HRESULT initGL2(void* pGLContext);
HRESULT initNoGraphics();
HRESULT reinit(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params);
HRESULT addDisplacements(const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples);
HRESULT addArchivedDisplacements(float coord, const gfsdk_float2* inSamplePoints, gfsdk_float4* outDisplacements, UINT numSamples);
HRESULT getTimings(NVWaveWorks_FFT_Simulation_Timings&) const;
gfsdk_U64 getDisplacementMapVersion() const { return m_DisplacementMapVersion; }
ID3D11ShaderResourceView** GetDisplacementMapD3D11();
sce::Gnm::Texture* GetDisplacementMapGnm();
GLuint GetDisplacementMapGL2();
const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& GetParams() const { return m_params; }
bool IsH0UpdateRequired() const { return m_H0UpdateRequired; }
void SetH0UpdateNotRequired() { m_H0UpdateRequired = false; }
HRESULT archiveDisplacements(gfsdk_U64 kickID);
void calcReinit(const GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade& params, bool& bRelease, bool& bAllocate, bool& bReinitH0, bool& bReinitGaussAndOmega);
void pipelineNextReinit() { m_pipelineNextReinit = true; }
private:
GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade m_next_params;
GFSDK_WaveWorks_Detailed_Simulation_Params::Cascade m_params;
bool m_params_are_dirty;
HRESULT allocateAllResources();
void releaseAllResources();
void releaseAll();
HRESULT initGaussAndOmega();
// D3D API handling
nv_water_d3d_api m_d3dAPI;
#if WAVEWORKS_ENABLE_D3D11
struct D3D11Objects
{
ID3D11Device* m_pd3d11Device;
ID3D11Texture2D* m_pd3d11DisplacementMapTexture[2];
ID3D11ShaderResourceView* m_pd3d11DisplacementMapTextureSRV[2];
ID3D11DeviceContext* m_pDC;
};
#endif
#if WAVEWORKS_ENABLE_GNM
struct GnmObjects
{
enum { NumGnmTextures = 3 };
int m_mapped_gnm_texture_index; // We triple-buffer on PS4, because there is no driver/runtime to handle buffer renaming
sce::Gnm::Texture m_pGnmDisplacementMapTexture[NumGnmTextures];
};
#endif
#if WAVEWORKS_ENABLE_GL
struct GL2Objects
{
void* m_pGLContext;
GLuint m_GLDisplacementMapTexture[2];
GLuint m_GLDisplacementMapPBO[2];
};
#endif
struct NoGraphicsObjects
{
void* m_pnogfxDisplacementMap[2];
size_t m_nogfxDisplacementMapRowPitch;
};
union
{
#if WAVEWORKS_ENABLE_D3D11
D3D11Objects _11;
#endif
#if WAVEWORKS_ENABLE_GNM
GnmObjects _gnm;
#endif
#if WAVEWORKS_ENABLE_GL
GL2Objects _GL2;
#endif
NoGraphicsObjects _noGFX;
} m_d3d;
//initial spectrum data
float2* m_gauss_data; // We cache the Gaussian distribution which underlies h0 in order to avoid having to re-run the
// random number generator when we re-calculate h0 (e.g. when windspeed changes)
float2* m_h0_data;
float* m_omega_data;
float* m_sqrt_table; //pre-computed coefficient for speed-up computation of update spectrum
//in-out buffer for FFTCPU, it holds 3 FFT images sequentially
complex* m_fftCPU_io_buffer;
// "safe" buffers with data for readbacks, filled by working threads
gfsdk_float4* m_readback_buffer[2];
gfsdk_float4* m_active_readback_buffer; // The readback buffer currently being served - this can potentially be a different buffer from the
// double-buffered pair in m_readback_buffer[], since one of those could have been swapped for one
// from the FIFO when an archiving operation occured
struct ReadbackFIFOSlot
{
gfsdk_U64 kickID;
gfsdk_float4* buffer;
};
CircularFIFO<ReadbackFIFOSlot>* m_pReadbackFIFO;
volatile LONG m_ref_count_update_h0, m_ref_count_update_ht, m_ref_count_FFT_X, m_ref_count_FFT_Y, m_ref_count_update_texture;
// current index of a texture that is mapped and filled by working threads
// can be 0 or 1. Other texture is returned to user and can be safely used for rendering
int m_mapped_texture_index;
BYTE* m_mapped_texture_ptr; //pointer to a mapped texture that is filling by working threads
size_t m_mapped_texture_row_pitch;
friend void UpdateH0(const Task& task);
friend void UpdateHt(const Task& task);
friend void ComputeFFT(const Task& task);
friend void UpdateTexture(const Task& task);
double m_doubletime;
bool m_H0UpdateRequired;
gfsdk_U64 m_DisplacementMapVersion;
bool m_pipelineNextReinit;
};
#endif // _NVWaveWorks_FFT_Simulation_CPU_Impl_H
|