aboutsummaryrefslogtreecommitdiff
path: root/mp/src/public/togl/linuxwin/cglmprogram.h
blob: 8fa09fa2cfeaee128dbc7ec5018affebc9596699 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// cglmprogram.h
//	GLMgr programs (ARBVP/ARBfp)
//
//===============================================================================

#ifndef CGLMPROGRAM_H
#define	CGLMPROGRAM_H

#include <sys/stat.h>

#pragma once

// good ARB program references
// http://petewarden.com/notes/archives/2005/05/fragment_progra_2.html
// http://petewarden.com/notes/archives/2005/06/fragment_progra_3.html

// ext links

// http://www.opengl.org/registry/specs/ARB/vertex_program.txt
// http://www.opengl.org/registry/specs/ARB/fragment_program.txt
// http://www.opengl.org/registry/specs/EXT/gpu_program_parameters.txt


//===============================================================================

// tokens not in the SDK headers

//#ifndef	GL_DEPTH_STENCIL_ATTACHMENT_EXT
//	#define GL_DEPTH_STENCIL_ATTACHMENT_EXT 0x84F9
//#endif

//===============================================================================

// forward declarations

class GLMContext;
class CGLMShaderPair;
class CGLMShaderPairCache;

// CGLMProgram can contain two flavors of the same program, one in assembler, one in GLSL.
// these flavors are pretty different in terms of the API's that are used to activate them - 
// for example, assembler programs can just get bound to the context, whereas GLSL programs
// have to be linked.  To some extent we try to hide that detail inside GLM.

// for now, make CGLMProgram a container, it does not set policy or hold a preference as to which
// flavor you want to use.  GLMContext has to handle that. 

enum EGLMProgramType
{
	kGLMVertexProgram,
	kGLMFragmentProgram,
	
	kGLMNumProgramTypes
};

enum EGLMProgramLang
{
	kGLMARB,
	kGLMGLSL,
	
	kGLMNumProgramLangs
};

struct GLMShaderDesc
{
	union
	{
		GLuint		arb;		// ARB program object name
		GLhandleARB	glsl;		// GLSL shader object handle (void*)
	}	m_object;

	// these can change if shader text is edited
	bool	m_textPresent;	// is this flavor(lang) of text present in the buffer?
	int		m_textOffset;	// where is it
	int		m_textLength;	// how big
	
	bool	m_compiled;		// has this text been through a compile attempt
	bool	m_valid;		// and if so, was the compile successful

	int		m_slowMark;		// has it been flagged during a non native draw batch before. increment every time it's slow.
	
	int		m_highWater;	// count of vec4's in the major uniform array ("vc" on vs, "pc" on ps)
							// written by dxabstract.... gross!
	int		m_VSHighWaterBone; // count of vec4's in the bone-specific uniform array (only valid for vertex shaders)
};

GLenum	GLMProgTypeToARBEnum( EGLMProgramType type );	// map vert/frag to ARB asm bind target
GLenum	GLMProgTypeToGLSLEnum( EGLMProgramType type );	// map vert/frag to ARB asm bind target

#define GL_SHADER_PAIR_CACHE_STATS 0

class CGLMProgram
{
public:
	friend class CGLMShaderPairCache;
	friend class CGLMShaderPair;
	friend class GLMContext;			// only GLMContext can make CGLMProgram objects
	friend class GLMTester;	
	friend struct IDirect3D9;
	friend struct IDirect3DDevice9;
		
	//===============================
	
	// constructor is very light, it just makes one empty program object per flavor.
	CGLMProgram( GLMContext *ctx, EGLMProgramType type );
	~CGLMProgram( );	

	void	SetProgramText			( char *text );				// import text to GLM object - invalidate any prev compiled program
	void	SetShaderName			( const char *name );				// only used for debugging/telemetry markup
	
	bool	CompileActiveSources	( void );					// compile only the flavors that were provided.
	bool	Compile					( EGLMProgramLang lang );	
	bool	CheckValidity			( EGLMProgramLang lang );

	void	LogSlow					( EGLMProgramLang lang );	// detailed spew when called for first time; one liner or perhaps silence after that
	
	void	GetLabelIndexCombo		( char *labelOut, int labelOutMaxChars, int *indexOut, int *comboOut );	
	void	GetComboIndexNameString	( char *stringOut, int stringOutMaxChars );		// mmmmmmmm-nnnnnnnn-filename
	
#if GLMDEBUG
	bool	PollForChanges( void );			// check mirror for changes.
	void	ReloadStringFromEditable( void );	// populate m_string from editable item (react to change)
	bool	SyncWithEditable( void );
#endif

	//===============================
	
	// common stuff

	GLMContext				*m_ctx;					// link back to parent context

	EGLMProgramType			m_type;					// vertex or pixel

	uint					m_nHashTag;				// serial number for hashing
	
	char					*m_text;				// copy of text passed into constructor.  Can change if editable shaders is enabled.
													// note - it can contain multiple flavors, so use CGLMTextSectioner to scan it and locate them
#if GLMDEBUG
	CGLMEditableTextItem	*m_editable;			// editable text item for debugging
#endif	
	
	GLMShaderDesc			m_descs[ kGLMNumProgramLangs ];	

	uint					m_samplerMask;			// (1<<n) mask of sampler active locs, if this is a fragment shader (dxabstract sets this field)
	uint					m_samplerTypes;			// SAMPLER_2D, etc.
	uint					m_nNumUsedSamplers;
	uint					m_maxSamplers;
	uint					m_maxVertexAttrs;
	uint					m_nCentroidMask;
	uint					m_nShadowDepthSamplerMask;
	
	bool					m_bTranslatedProgram;

	char					m_shaderName[64];
};	

//===============================================================================

struct GLMShaderPairInfo
{
	int		m_status;		// -1 means req'd index was out of bounds (loop stop..)  0 means not present.  1 means present/active.
	
	char	m_vsName[ 128 ];
	int		m_vsStaticIndex;
	int		m_vsDynamicIndex;
	
	char	m_psName[ 128 ];
	int		m_psStaticIndex;
	int		m_psDynamicIndex;
};

class CGLMShaderPair					// a container for a linked GLSL shader pair, and metadata obtained post-link
{

public:

	friend class CGLMProgram;
	friend class GLMContext;
	friend class CGLMShaderPairCache;
		
	//===============================
	
	// constructor just sets up a GLSL program object and leaves it empty.
	CGLMShaderPair( GLMContext *ctx  );
	~CGLMShaderPair( );	

	bool	SetProgramPair			( CGLMProgram *vp, CGLMProgram *fp );
		// true result means successful link and query

	bool	RefreshProgramPair		( void );
		// re-link and re-query the uniforms

	FORCEINLINE void UpdateScreenUniform( uint nWidthHeight )
	{
		if ( m_nScreenWidthHeight == nWidthHeight )
			return;
		
		m_nScreenWidthHeight = nWidthHeight;

		uint nWidth = nWidthHeight & 0xFFFF, nHeight = nWidthHeight >> 16;
		// Apply half pixel offset to output vertices to account for the pixel center difference between D3D9 and OpenGL.
		// We output vertices in clip space, which ranges from [-1,1], so 1.0/width in clip space transforms into .5/width in screenspace, see: "Viewports and Clipping (Direct3D 9)" in the DXSDK
		float v[4] = { 1.0f / (float)nWidth, 1.0f / (float)nHeight, (float)nWidth, (float)nHeight };
		if ( m_locVertexScreenParams >= 0 )
			gGL->glUniform4fv( m_locVertexScreenParams, 1, v );
	}
	
	//===============================
	
	// common stuff

	GLMContext				*m_ctx;					// link back to parent context

	CGLMProgram				*m_vertexProg;	
	CGLMProgram				*m_fragmentProg;

	GLhandleARB				m_program;				// linked program object

	// need meta data for attribs / samplers / params
	// actually we only need it for samplers and params.
	// attributes are hardwired.
	
	// vertex stage uniforms
	GLint					m_locVertexParams;		// "vc" per dx9asmtogl2 convention
	GLint					m_locVertexBoneParams;	// "vcbones"
	GLint					m_locVertexInteger0;	// "i0"
			
	GLint					m_locVertexBool0;		// "b0"
	GLint					m_locVertexBool1;		// "b1"
	GLint					m_locVertexBool2;		// "b2"
	GLint					m_locVertexBool3;		// "b3"
	bool					m_bHasBoolOrIntUniforms;
			
	// fragment stage uniforms
	GLint					m_locFragmentParams;			// "pc" per dx9asmtogl2 convention
	
	int						m_NumUniformBufferParams[kGLMNumProgramTypes];
	GLint					m_UniformBufferParams[kGLMNumProgramTypes][256];
	
	GLint					m_locFragmentFakeSRGBEnable;	// "flSRGBWrite" - set to 1.0 to effect sRGB encoding on output
	float					m_fakeSRGBEnableValue;			// shadow to avoid redundant sets of the m_locFragmentFakeSRGBEnable uniform
		// init it to -1.0 at link or relink, so it will trip on any legit incoming value (0.0 or 1.0)

	GLint					m_locSamplers[ 16 ];			// "sampler0 ... sampler1..."

	// other stuff
	bool					m_valid;				// true on successful link
	uint					m_revision;				// if this pair is relinked, bump this number.

	GLint					m_locVertexScreenParams; // vcscreen
	uint					m_nScreenWidthHeight;
		
};	

//===============================================================================

// N-row, M-way associative cache with LRU per row.
// still needs some metric dump ability and some parameter tuning.
// extra credit would be to make an auto-tuner.

struct CGLMPairCacheEntry
{
	long long		m_lastMark;				// a mark of zero means an empty entry
	CGLMProgram		*m_vertexProg;
	CGLMProgram		*m_fragmentProg;
	uint			m_extraKeyBits;
	CGLMShaderPair	*m_pair;
};

class CGLMShaderPairCache				// cache for linked GLSL shader pairs
{

public:

protected:
	friend class CGLMShaderPair;
	friend class CGLMProgram;
	friend class GLMContext;
		
	//===============================
	
	CGLMShaderPairCache( GLMContext *ctx  );
	~CGLMShaderPairCache( );	

	FORCEINLINE CGLMShaderPair *SelectShaderPair	( CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits );
	void			QueryShaderPair		( int index, GLMShaderPairInfo *infoOut );
	
	// shoot down linked pairs that use the program in the arg
	// return true if any had to be skipped due to conflict with currently bound pair
	bool			PurgePairsWithShader( CGLMProgram *prog );
	
	// purge everything (when would GLM know how to do this ?  at context destroy time, but any other times?)
	// return true if any had to be skipped due to conflict with currently bound pair
	bool			Purge				( void );
	
	// stats
	void			DumpStats			( void );
	
	//===============================

	FORCEINLINE uint HashRowIndex( CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits ) const;
	FORCEINLINE CGLMPairCacheEntry*	HashRowPtr( uint hashRowIndex ) const;
	
	FORCEINLINE void HashRowProbe( CGLMPairCacheEntry *row, CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits, int &hitway, int &emptyway, int &oldestway );
		
	CGLMShaderPair *SelectShaderPairInternal( CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits, int rowIndex );
	//===============================

	// common stuff

	GLMContext				*m_ctx;					// link back to parent context

	long long				m_mark;

	uint					m_rowsLg2;
	uint					m_rows;
	uint					m_rowsMask;
	
	uint					m_waysLg2;
	uint					m_ways;
	
	uint					m_entryCount;
	
	CGLMPairCacheEntry		*m_entries;				// array[ m_rows ][ m_ways ]

	uint					*m_evictions;			// array[ m_rows ];

#if GL_SHADER_PAIR_CACHE_STATS
	uint					*m_hits;				// array[ m_rows ];
#endif
};	

FORCEINLINE uint CGLMShaderPairCache::HashRowIndex( CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits ) const
{
	return ( vp->m_nHashTag + fp->m_nHashTag + extraKeyBits * 7 ) & m_rowsMask;
}

FORCEINLINE CGLMPairCacheEntry*	CGLMShaderPairCache::HashRowPtr( uint hashRowIndex ) const
{
	return &m_entries[ hashRowIndex * m_ways ];
}

FORCEINLINE void CGLMShaderPairCache::HashRowProbe( CGLMPairCacheEntry *row, CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits, int& hitway, int& emptyway, int& oldestway )
{
	hitway = -1;
	emptyway = -1;
	oldestway = -1;

	// scan this row to see if the desired pair is present
	CGLMPairCacheEntry *cursor = row;
	long long oldestmark = 0xFFFFFFFFFFFFFFFFLL;

	for( uint way = 0; way < m_ways; ++way )
	{
		if ( cursor->m_lastMark != 0 )	// occupied slot
		{
			// check if this is the oldest one on the row - only occupied slots are checked
			if ( cursor->m_lastMark < oldestmark )
			{
				oldestway = way;
				oldestmark = cursor->m_lastMark;
			}

			if ( ( cursor->m_vertexProg == vp ) && ( cursor->m_fragmentProg == fp ) && ( cursor->m_extraKeyBits == extraKeyBits ) )	// match?
			{
				// found it
				hitway = way;
				break;
			}
		}
		else
		{
			// empty way, log it if first one seen
			if (emptyway<0)
			{
				emptyway = way;
			}
		}
		cursor++;
	}
}

FORCEINLINE CGLMShaderPair *CGLMShaderPairCache::SelectShaderPair( CGLMProgram *vp, CGLMProgram *fp, uint extraKeyBits )
{
	// select row where pair would be found if it exists
	uint rowIndex = HashRowIndex( vp, fp, extraKeyBits );

	CGLMPairCacheEntry *pCursor = HashRowPtr( rowIndex );
	
	if ( ( pCursor->m_fragmentProg != fp ) || ( pCursor->m_vertexProg != vp ) || ( pCursor->m_extraKeyBits != extraKeyBits ) )
	{
		CGLMPairCacheEntry *pLastCursor = pCursor + m_ways;

		++pCursor;

		while ( pCursor != pLastCursor )
		{
			if ( ( pCursor->m_fragmentProg == fp ) && ( pCursor->m_vertexProg == vp ) && ( pCursor->m_extraKeyBits == extraKeyBits ) )	// match?
				break;
			++pCursor;
		};
	
		if ( pCursor == pLastCursor )
			return SelectShaderPairInternal( vp, fp, extraKeyBits, rowIndex );
	}
		
	// found it.  mark it and return
	pCursor->m_lastMark = m_mark++;

#if GL_SHADER_PAIR_CACHE_STATS
	// count the hit
	m_hits[ rowIndex ] ++;
#endif

	return pCursor->m_pair;
}

#endif