1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
/* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited. */
#ifndef NV_CO_DX12_CIRCULAR_RESOURCE_HEAP_H
#define NV_CO_DX12_CIRCULAR_RESOURCE_HEAP_H
#include <NvResult.h>
#include <NvCoFreeList.h>
#include "NvCoDx12CounterFence.h"
#define NOMINMAX
#include <d3d12.h>
#include <wrl.h>
#include <deque>
using namespace Microsoft::WRL;
/** \addtogroup common
@{
*/
namespace nvidia {
namespace Common {
/*! \brief The Dx12CircularResourceHeap is a heap that is suited for size constrained real-time resources allocation that
is transitory in nature. It is designed to allocate resources which are used and discarded, often used where in
previous versions of DirectX the 'DISCARD' flag was used.
The idea is to have a heap which chunks of resource can be allocated, and used for GPU execution,
and that the heap is able through the addSync/updateCompleted idiom is able to track when the usage of the resources is
completed allowing them to be reused. The heap is arranged as circularly, with new allocations made from the front, and the back
being updated as the GPU updating the back when it is informed anything using prior parts of the heap have completed. In this
arrangement all the heap between the back and the front can be thought of as in use or potentially in use by the GPU. All the heap
from the front back around to the back, is free and can be allocated from. It is the responsibility of the user of the Heap to make
sure the invariant holds, but in most normal usage it does so simply.
Another feature of the heap is that it does not require upfront knowledge of how big a heap is needed. The backing resources will be expanded
dynamically with requests as needed. The only requirement is that know single request can be larger than m_blockSize specified in the Desc
used to initialize the heap. This is because all the backing resources are allocated to a single size. This limitation means the Dx12CircularResourceHeap
may not be the best use for example for uploading a texture - because it's design is really around transitory uploads or write backs, and so more suited
to constant buffers, vertex buffer, index buffers and the like.
To upload a texture at program startup it is most likely better to use a Dx12ResourceScopeManager.
### The addSync/updateCompleted Idiom
Lots of classes in Nv/Common/Platform/Dx12 use the addSync/update idiom. The idiom enables a class to track GPU progress fairly simply. The
two methods are generally called at a fairly low frequency - say once a frame. The addSync method is given a signalValue that should be the
value generated from nextSignal on the Dx12Fence that is passed on construction of the type. Calling addSync means when this value is hit
ALL previous heap allocations will no longer be used. Thus in practice usage looks something like
\code{.cpp}
typedef Dx12CircularResourceHeap Heap;
Heap::Cursor cursor = heap.allocateVertexBuffer(sizeof(Vertex) * numVerts);
Memory:copy(cursor.m_position, verts, sizeof(Vertex) * numVerts);
// Do a command using the GPU handle
m_commandList->...
// Do another command using the GPU handle
m_commandList->...
// Execute the command list on the command queue
{
ID3D12CommandList* lists[] = { m_commandList };
m_commandQueue->ExecuteCommandLists(_countof(lists), lists);
}
// Add a sync point
const uint64_t signalValue = m_fence.nextSignal(m_commandQueue);
heap.addSync(signalValue)
// The cursors cannot be used anymore
// At some later point call updateCompleted. This will see where the GPU is at, and make resources available that the GPU no longer accesses.
heap.updateCompleted();
\endcode
### Implementation
Front and back can be in the same block, but ONLY if back is behind front, because we have to always be able to insert
new blocks in front of front. So it must be possible to do an block insertion between the two of them.
|--B---F-----| |----------|
When B and F are on top of one another it means there is nothing in the list. NOTE this also means that a move of front can never place it
top of the back.
https://msdn.microsoft.com/en-us/library/windows/desktop/dn899125%28v=vs.85%29.aspx
https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx
*/
class Dx12CircularResourceHeap
{
protected:
struct Block;
//NV_CO_DECLARE_CLASS_BASE(Dx12CircularResourceHeap);
public:
/// The alignment used for VERTEX_BUFFER allocations
/// Strictly speaking it seems the hardware can handle 4 byte alignment, but since often in use
/// data will be copied from CPU memory to the allocation, using 16 byte alignment is superior as allows
/// significantly faster memcpy.
/// The sample that shows sizeof(float) - 4 bytes is appropriate is at the link below.
/// https://msdn.microsoft.com/en-us/library/windows/desktop/mt426646%28v=vs.85%29.aspx
enum
{
VERTEX_BUFFER_ALIGNMENT = 16,
};
struct Desc
{
void init()
{
{
D3D12_HEAP_PROPERTIES& props = m_heapProperties;
props.Type = D3D12_HEAP_TYPE_UPLOAD;
props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
props.CreationNodeMask = 1;
props.VisibleNodeMask = 1;
}
m_heapFlags = D3D12_HEAP_FLAG_NONE;
m_initialState = D3D12_RESOURCE_STATE_GENERIC_READ;
m_blockSize = 0;
}
D3D12_HEAP_PROPERTIES m_heapProperties;
D3D12_HEAP_FLAGS m_heapFlags;
D3D12_RESOURCE_STATES m_initialState;
size_t m_blockSize;
};
/// Cursor position
struct Cursor
{
/// Get GpuHandle
inline D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle() const { return m_block->m_resource->GetGPUVirtualAddress() + size_t(m_position - m_block->m_start); }
/// Must have a block and position
inline bool isValid() const { return m_block != nullptr; }
/// Calculate the offset into the underlying resource
inline size_t getOffset() const { return size_t(m_position - m_block->m_start); }
/// Get the underlying resource
inline ID3D12Resource* getResource() const { return m_block->m_resource; }
Block* m_block; ///< The block index
uint8_t* m_position; ///< The current position
};
/// Get the desc used to initialize the heap
inline const Desc& getDesc() const { return m_desc; }
/// Must be called before used
/// Block size must be at least as large as the _largest_ thing allocated
/// Also note depending on alignment of a resource allocation, the block size might also need to take into account the
/// maximum alignment use. It is a REQUIREMENT that a newly allocated resource block is large enough to hold any
/// allocation taking into account the alignment used.
int init(ID3D12Device* device, const Desc& desc, Dx12CounterFence* fence);
/// Get the block size
inline size_t getBlockSize() const { return m_desc.m_blockSize; }
/// Allocate constant buffer of specified size
Cursor allocate(size_t size, size_t alignment);
/// Allocate a constant buffer
inline Cursor allocateConstantBuffer(size_t size) { return allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); }
/// Allocate a vertex buffer
inline Cursor allocateVertexBuffer(size_t size) { return allocate(size, VERTEX_BUFFER_ALIGNMENT); }
/// Create filled in constant buffer
inline Cursor newConstantBuffer(const void* data, size_t size) { Cursor cursor = allocateConstantBuffer(size); memcpy(cursor.m_position, data, size); return cursor; }
/// Create in filled in constant buffer
template <typename T>
inline Cursor newConstantBuffer(const T& in) { return newConstantBuffer(&in, sizeof(T)); }
/// Look where the GPU has got to and release anything not currently used
void updateCompleted();
/// Add a sync point - meaning that when this point is hit in the queue
/// all of the resources up to this point will no longer be used.
void addSync(uint64_t signalValue);
/// Get the gpu address of this cursor
D3D12_GPU_VIRTUAL_ADDRESS getGpuHandle(const Cursor& cursor) const { return cursor.m_block->m_resource->GetGPUVirtualAddress() + size_t(cursor.m_position - cursor.m_block->m_start); }
/// Ctor
Dx12CircularResourceHeap();
/// Dtor
~Dx12CircularResourceHeap();
protected:
struct Block
{
ID3D12Resource* m_resource; ///< The mapped resource
uint8_t* m_start; ///< Once created the resource is mapped to here
Block* m_next; ///< Points to next block in the list
};
struct PendingEntry
{
uint64_t m_completedValue; ///< The value when this is completed
Cursor m_cursor; ///< the cursor at that point
};
void _freeBlockListResources(const Block* block);
/// Create a new block (with associated resource), do not add the block list
Block* _newBlock();
Block* m_blocks; ///< Circular singly linked list of block. nullptr initially
FreeList m_blockFreeList; ///< Free list of actual allocations of blocks
std::deque<PendingEntry> m_pendingQueue; ///< Holds the list of pending positions. When the fence value is greater than the value on the queue entry, the entry is done.
// Allocation is made from the front, and freed from the back.
Cursor m_back; ///< Current back position.
Cursor m_front; ///< Current front position.
Desc m_desc; ///< Describes the heap
Dx12CounterFence* m_fence; ///< The fence to use
ID3D12Device* m_device; ///< The device that resources will be constructed on
};
} // namespace Common
} // namespace nvidia
/** @} */
#endif // NV_CO_DX12_CIRCULAR_RESOURCE_HEAP_H
|