diff options
| author | FluorescentCIAAfricanAmerican <[email protected]> | 2020-04-22 12:56:21 -0400 |
|---|---|---|
| committer | FluorescentCIAAfricanAmerican <[email protected]> | 2020-04-22 12:56:21 -0400 |
| commit | 3bf9df6b2785fa6d951086978a3e66f49427166a (patch) | |
| tree | 2c0f1f0c63c4832882bc93814ebd2c2b1c6224e5 /togl/linuxwin/dx9asmtogl2.cpp | |
| download | archived-source-engine-2018-hl2-src-master.tar.xz archived-source-engine-2018-hl2-src-master.zip | |
Diffstat (limited to 'togl/linuxwin/dx9asmtogl2.cpp')
| -rw-r--r-- | togl/linuxwin/dx9asmtogl2.cpp | 3838 |
1 files changed, 3838 insertions, 0 deletions
diff --git a/togl/linuxwin/dx9asmtogl2.cpp b/togl/linuxwin/dx9asmtogl2.cpp new file mode 100644 index 0000000..22be8fc --- /dev/null +++ b/togl/linuxwin/dx9asmtogl2.cpp @@ -0,0 +1,3838 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// TOGL CODE LICENSE +// +// Copyright 2011-2014 Valve Corporation +// All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +//------------------------------------------------------------------------------ +// DX9AsmToGL2.cpp +//------------------------------------------------------------------------------ +// Immediately include gl.h, etc. here to avoid compilation warnings. +#include <GL/gl.h> +#include <GL/glext.h> + +#include "togl/rendermechanism.h" +#include "tier0/dbg.h" +#include "tier1/strtools.h" +#include "tier1/utlbuffer.h" +#include "dx9asmtogl2.h" + +#include "materialsystem/IShader.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +#ifdef POSIX +#define strcat_s( a, b, c) V_strcat( a, c, b ) +#endif + +#define DST_REGISTER 0 +#define SRC_REGISTER 1 + +// Flags to PrintUsageAndIndexToString. +#define SEMANTIC_OUTPUT 0x01 +#define SEMANTIC_INPUT 0x02 + +#define UNDECLARED_OUTPUT 0xFFFFFFFF +#define UNDECLARED_INPUT 0xFFFFFFFF + +#ifndef POSIX +#define Debugger() Assert(0) +#endif + +//#define Assert(n) if( !(n) ){ TranslationError(); } + + +static char *g_szVecZeros[] = { NULL, "0.0", "vec2( 0.0, 0.0 )", "vec3( 0.0, 0.0, 0.0 )", "vec4( 0.0, 0.0, 0.0, 0.0 )" }; +static char *g_szVecOnes[] = { NULL, "1.0", "vec2( 1.0, 1.0 )", "vec3( 1.0, 1.0, 1.0 )", "vec4( 1.0, 1.0, 1.0, 1.0 )" }; +static char *g_szDefaultSwizzle = "xyzw"; +static char *g_szDefaultSwizzleStrings[] = { "x", "y", "z", "w" }; +static char *g_szSamplerStrings[] = { "2D", "CUBE", "3D" }; + +static const char *g_pAtomicTempVarName = "atomic_temp_var"; +static const char *g_pTangentAttributeName = "g_tangent"; + +int __cdecl SortInts( const int *a, const int *b ) +{ + if ( *a < *b ) + return -1; + else if ( *a > *b ) + return 1; + else + return 0; +} + +void StripExtraTrailingZeros( char *pStr ) +{ + int len = (int)V_strlen( pStr ); + while ( len >= 2 && pStr[len-1] == '0' && pStr[len-2] != '.' ) + { + pStr[len-1] = 0; + --len; + } +} + +void D3DToGL::PrintToBufWithIndents( CUtlBuffer &buf, const char *pFormat, ... ) +{ + va_list marker; + va_start( marker, pFormat ); + + char szTemp[1024]; + V_vsnprintf( szTemp, sizeof( szTemp ), pFormat, marker ); + va_end( marker ); + + PrintIndentation( (char*)buf.Base(), buf.Size() ); + strcat_s( (char*)buf.Base(), buf.Size(), szTemp ); +} + +void PrintToBuf( CUtlBuffer &buf, const char *pFormat, ... ) +{ + va_list marker; + va_start( marker, pFormat ); + + char szTemp[1024]; + V_vsnprintf( szTemp, sizeof( szTemp ), pFormat, marker ); + va_end( marker ); + + strcat_s( (char*)buf.Base(), buf.Size(), szTemp ); +} + +void PrintToBuf( char *pOut, int nOutSize, const char *pFormat, ... ) +{ + int nStrlen = V_strlen( pOut ); + pOut += nStrlen; + nOutSize -= nStrlen; + + va_list marker; + va_start( marker, pFormat ); + V_vsnprintf( pOut, nOutSize, pFormat, marker ); + va_end( marker ); +} + +// Return the number of letters following the dot. +// Returns 4 if there is no dot. +// (So "r0.xy" returns 2 and "r0" returns 4). +int GetNumWriteMaskEntries( const char *pParam ) +{ + const char *pDot = strchr( pParam, '.' ); + if ( pDot ) + return V_strlen( pDot + 1 ); + else + return 4; +} + +const char* GetSwizzleDot( const char *pParam ) +{ + const char *pDot = strrchr( pParam, '.' ); + + const char *pSquareClose = strrchr( pParam, ']' ); + + if ( pSquareClose ) + { + // The test against ']' catches cases like, so we point to the last dot vc[int(va_r.x) + 29].x + if ( pDot && ( pSquareClose < pDot ) ) + return pDot; + else + return NULL; + } + + // Make sure the next character is a valid swizzle since we want to treat strings like vec4( gl_Normal, 0.0 ) as a whole param name. + if ( pDot && ( ( *(pDot+1) == 'x' ) || ( *(pDot+1) == 'y' ) || ( *(pDot+1) == 'z' ) || ( *(pDot+1) == 'w' ) || + ( *(pDot+1) == 'r' ) || ( *(pDot+1) == 'g' ) || ( *(pDot+1) == 'b' ) || ( *(pDot+1) == 'z' ) ) ) + { + return pDot; + } + + return NULL; +} + +int GetNumSwizzleComponents( const char *pParam ) +{ + // Special scalar output which won't accept a swizzle + if ( !V_stricmp( pParam, "gl_FogFragCoord" ) ) + return 1; + + // Special scalar output which won't accept a swizzle + if ( !V_stricmp( pParam, "gl_FragDepth" ) ) + return 1; + + // Special scalar output which won't accept a swizzle + if ( !V_stricmp( pParam, "a0" ) ) + return 1; + + const char *pDot = GetSwizzleDot( pParam ); + if ( pDot ) + { + pDot++; // Step over the dot + + int nNumSwizzleComponents = 0; + while ( ( *pDot == 'x' ) || ( *pDot == 'y' ) || ( *pDot == 'z' ) || ( *pDot == 'w' ) || + ( *pDot == 'r' ) || ( *pDot == 'g' ) || ( *pDot == 'b' ) || ( *pDot == 'z' ) ) + { + nNumSwizzleComponents++; + pDot++; + } + + return nNumSwizzleComponents; + } + + return 0; +} + +char GetSwizzleComponent( const char *pParam, int n ) +{ + Assert( n < 4 ); + + const char *pDot = GetSwizzleDot( pParam ); + if ( pDot ) + { + ++pDot; + int nComponents = (int)V_strlen( pDot ); + Assert( nComponents > 0 ); + + if ( n < nComponents ) + return pDot[n]; + else + return pDot[nComponents-1]; + } + + return g_szDefaultSwizzle[n]; +} + +// Replace the parameter name and leave the swizzle intact. +// So "somevar.xyz" becomes "othervar.xyz". +void ReplaceParamName( const char *pSrc, const char *pNewParamName, char *pOut, int nOutLen ) +{ + // Start with the new parameter name. + V_strncpy( pOut, pNewParamName, nOutLen ); + + // Now add the swizzle if necessary. + const char *pDot = GetSwizzleDot( pSrc ); + if ( pDot ) + { + V_strncat( pOut, pDot, nOutLen ); + } +} + +void GetParamNameWithoutSwizzle( const char *pParam, char *pOut, int nOutLen ) +{ + char *pParamStart = (char *) pParam; + const char *pDot = GetSwizzleDot( pParam ); // dot followed by valid swizzle characters + bool bAbsWrapper = false; + + // Check for abs() or -abs() wrapper and strip it off during the fixup + if ( !V_strncmp( pParam, "abs(", 4 ) || !V_strncmp( pParam, "-abs(", 5 ) ) + { + const char *pOpenParen = strchr( pParam, '(' ); // FIRST opening paren + const char *pClosingParen = strrchr( pParam, ')' ); // LAST closing paren + + Assert ( pOpenParen && pClosingParen ); + pClosingParen; // hush compiler + + pParamStart = (char *) pOpenParen; + pParamStart++; + bAbsWrapper = true; + } + + if ( pDot ) + { + int nToCopy = MIN( nOutLen-1, pDot - pParamStart ); + memcpy( pOut, pParamStart, nToCopy ); + pOut[nToCopy] = 0; + } + else + { + V_strncpy( pOut, pParamStart, bAbsWrapper ? nOutLen - 1 : nOutLen ); + } +} + +bool DoParamNamesMatch( const char *pParam1, const char *pParam2 ) +{ + char szTemp[2][256]; + GetParamNameWithoutSwizzle( pParam1, szTemp[0], sizeof( szTemp[0] ) ); + GetParamNameWithoutSwizzle( pParam2, szTemp[1], sizeof( szTemp[1] ) ); + return ( V_stricmp( szTemp[0], szTemp[1] ) == 0 ); +} + + + +// Extract the n'th component of the swizzle mask. +// If n would exceed the length of the swizzle mask, then it looks up into "xyzw". +void WriteParamWithSingleMaskEntry( const char *pParam, int n, char *pOut, int nOutLen ) +{ + bool bCloseParen = false; + if ( !V_strncmp( pParam, "-abs(", 5 ) ) + { + V_strcpy( pOut, "-abs(" ); + bCloseParen = true; + + pOut += 5; nOutLen -= 5; + } + else if ( !V_strncmp( pParam, "abs(", 4 ) ) + { + V_strcpy( pOut, "abs(" ); + bCloseParen = true; + + pOut += 4; nOutLen -= 4; + } + + GetParamNameWithoutSwizzle( pParam, pOut, nOutLen ); + PrintToBuf( pOut, nOutLen, "." ); + PrintToBuf( pOut, nOutLen, "%c", GetSwizzleComponent( pParam, n ) ); + + if ( bCloseParen ) + { + PrintToBuf( pOut, nOutLen, ")" ); + } +} + + +float uint32ToFloat( uint32 dw ) +{ + return *((float*)&dw); +} + +CUtlString EnsureNumSwizzleComponents( const char *pSrcRegisterName, int nComponents ) +{ + int nExisting = GetNumSwizzleComponents( pSrcRegisterName ); + if ( nExisting == nComponents ) + return pSrcRegisterName; + + bool bAbsWrapper = false; // Parameter wrapped in an abs() + bool bAbsNegative = false; // -abs() + char szSrcRegister[128]; + V_strncpy( szSrcRegister, pSrcRegisterName, sizeof(szSrcRegister) ); + + // Check for abs() or -abs() wrapper and strip it off during the fixup + if ( !V_strncmp( pSrcRegisterName, "abs(", 4 ) || !V_strncmp( pSrcRegisterName, "-abs(", 5 ) ) + { + bAbsWrapper = true; + bAbsNegative = pSrcRegisterName[0] == '-'; + + const char *pOpenParen = strchr( pSrcRegisterName, '(' ); // FIRST opening paren + const char *pClosingParen = strrchr( pSrcRegisterName, ')' ); // LAST closing paren + + Assert ( pOpenParen && pClosingParen ); // If we start with abs( and don't get both parens, something is very wrong + + // Copy out just the register name with no abs() + int nRegNameLength = pClosingParen - pOpenParen - 1; + V_strncpy( szSrcRegister, pOpenParen+1, nRegNameLength + 1 ); // Kind of a weird function...copy more than you need and slam the last char to NULL-terminate + } + + char szReg[256]; + GetParamNameWithoutSwizzle( szSrcRegister, szReg, sizeof( szReg ) ); + if ( nComponents == 0 ) + return szReg; + + PrintToBuf( szReg, sizeof( szReg ), "." ); + if ( nExisting > nComponents ) + { + // DX ASM will sometimes have statements like "NRM r0.xyz, r1.yzww", where it just doesn't use the last part of r1. So we won't either. + for ( int i=0; i < nComponents; i++ ) + { + PrintToBuf( szReg, sizeof( szReg ), "%c", GetSwizzleComponent( szSrcRegister, i ) ); + } + } + else + { + if ( nExisting == 0 ) + { + // We've got something like r0 and need N more components, so add as much of "xyzw" is needed. + for ( int i=0; i < nComponents; i++ ) + PrintToBuf( szReg, sizeof( szReg ), "%c", g_szDefaultSwizzle[i] ); + } + else + { + // We've got something like r0.x and need N more components, so replicate the X so it looks like r0.xxx + V_strncpy( szReg, szSrcRegister, sizeof( szReg ) ); + char cLast = szSrcRegister[ V_strlen( szSrcRegister ) - 1 ]; + for ( int i=nExisting; i < nComponents; i++ ) + { + PrintToBuf( szReg, sizeof( szReg ), "%c", cLast ); + } + } + } + + if ( bAbsWrapper ) + { + char szTemp[128]; + V_strncpy( szTemp, szReg, sizeof(szTemp) ); + V_snprintf( szReg, sizeof( szReg ), "%sabs(%s)", bAbsNegative ? "-" : "", szTemp ) ; + } + + return szReg; +} + +static void TranslationError() +{ + GLMDebugPrintf( "D3DToGL: GLSL translation error!\n" ); + DebuggerBreakIfDebugging(); + + Error( "D3DToGL: GLSL translation error!\n" ); +} + +D3DToGL::D3DToGL() +{ +} + +uint32 D3DToGL::GetNextToken( void ) +{ + uint32 dwToken = *m_pdwNextToken; + m_pdwNextToken++; + return dwToken; +} + +void D3DToGL::SkipTokens( uint32 numToSkip ) +{ + m_pdwNextToken += numToSkip; +} + +uint32 D3DToGL::Opcode( uint32 dwToken ) +{ + return ( dwToken & D3DSI_OPCODE_MASK ); +} + +uint32 D3DToGL::OpcodeSpecificData (uint32 dwToken) +{ + return ( ( dwToken & D3DSP_OPCODESPECIFICCONTROL_MASK ) >> D3DSP_OPCODESPECIFICCONTROL_SHIFT ); +} + +uint32 D3DToGL::TextureType ( uint32 dwToken ) +{ + return ( dwToken & D3DSP_TEXTURETYPE_MASK ); // Note this one doesn't shift due to weird D3DSAMPLER_TEXTURE_TYPE enum +} + + + +// Print GLSL intrinsic corresponding to particular instruction +bool D3DToGL::OpenIntrinsic( uint32 inst, char* buff, int nBufLen, uint32 destDimension, uint32 nArgumentDimension ) +{ + // Some GLSL intrinsics need type conversion, which we do in this routine + // As a result, the caller must sometimes close both parentheses, not just one + bool bDoubleClose = false; + + if ( nArgumentDimension == 0 ) + { + nArgumentDimension = 4; + } + + switch ( inst ) + { + case D3DSIO_RSQ: + V_snprintf( buff, nBufLen, "inversesqrt( " ); + break; + case D3DSIO_DP3: + case D3DSIO_DP4: + if ( destDimension == 1 ) + { + V_snprintf( buff, nBufLen, "dot( " ); + } + else + { + if ( !destDimension ) + destDimension = 4; + V_snprintf( buff, nBufLen, "vec%d( dot( ", destDimension ); + bDoubleClose = true; + } + break; + case D3DSIO_MIN: + V_snprintf( buff, nBufLen, "min( " ); + break; + case D3DSIO_MAX: + V_snprintf( buff, nBufLen, "max( " ); + break; + case D3DSIO_SLT: + if ( nArgumentDimension == 1 ) + { + V_snprintf( buff, nBufLen, "float( " ); // lessThan doesn't have a scalar version + } + else + { + Assert( nArgumentDimension > 1 ); + V_snprintf( buff, nBufLen, "vec%d( lessThan( ", nArgumentDimension ); + bDoubleClose = true; + } + break; + case D3DSIO_SGE: + if ( nArgumentDimension == 1 ) + { + V_snprintf( buff, nBufLen, "float( " ); // greaterThanEqual doesn't have a scalar version + } + else + { + Assert( nArgumentDimension > 1 ); + V_snprintf( buff, nBufLen, "vec%d( greaterThanEqual( ", nArgumentDimension ); + bDoubleClose = true; + } + break; + case D3DSIO_EXP: + V_snprintf( buff, nBufLen, "exp( " ); // exp2 ? + break; + case D3DSIO_LOG: + V_snprintf( buff, nBufLen, "log( " ); // log2 ? + break; + case D3DSIO_LIT: + TranslationError(); + V_snprintf( buff, nBufLen, "lit( " ); // gonna have to write this one + break; + case D3DSIO_DST: + V_snprintf( buff, nBufLen, "dst( " ); // gonna have to write this one + break; + case D3DSIO_LRP: + Assert( !m_bVertexShader ); + V_snprintf( buff, nBufLen, "mix( " ); + break; + case D3DSIO_FRC: + V_snprintf( buff, nBufLen, "fract( " ); + break; + case D3DSIO_M4x4: + TranslationError(); + V_snprintf( buff, nBufLen, "m4x4" ); + break; + case D3DSIO_M4x3: + case D3DSIO_M3x4: + case D3DSIO_M3x3: + case D3DSIO_M3x2: + case D3DSIO_CALL: + case D3DSIO_CALLNZ: + case D3DSIO_LOOP: + case D3DSIO_RET: + case D3DSIO_ENDLOOP: + case D3DSIO_LABEL: + case D3DSIO_DCL: + TranslationError(); + break; + case D3DSIO_POW: + V_snprintf( buff, nBufLen, "pow( " ); + break; + case D3DSIO_CRS: + V_snprintf( buff, nBufLen, "cross( " ); + break; + case D3DSIO_SGN: + TranslationError(); + V_snprintf( buff, nBufLen, "sign( " ); + break; + case D3DSIO_ABS: + V_snprintf( buff, nBufLen, "abs( " ); + break; + case D3DSIO_NRM: + TranslationError(); + V_snprintf( buff, nBufLen, "normalize( " ); + break; + case D3DSIO_SINCOS: + TranslationError(); + V_snprintf( buff, nBufLen, "sincos( " ); // gonna have to write this one + break; + case D3DSIO_REP: + case D3DSIO_ENDREP: + case D3DSIO_IF: + case D3DSIO_IFC: + case D3DSIO_ELSE: + case D3DSIO_ENDIF: + case D3DSIO_BREAK: + case D3DSIO_BREAKC: // TODO: these are the reason we even need GLSL...gotta make these work + TranslationError(); + break; + case D3DSIO_DEFB: + case D3DSIO_DEFI: + TranslationError(); + break; + case D3DSIO_TEXCOORD: + V_snprintf( buff, nBufLen, "texcoord" ); + break; + case D3DSIO_TEXKILL: + V_snprintf( buff, nBufLen, "kill( " ); // wrap the discard instruction? + break; + case D3DSIO_TEX: + TranslationError(); + V_snprintf( buff, nBufLen, "TEX" ); // We shouldn't get here + break; + case D3DSIO_TEXBEM: + case D3DSIO_TEXBEML: + case D3DSIO_TEXREG2AR: + case D3DSIO_TEXREG2GB: + case D3DSIO_TEXM3x2PAD: + case D3DSIO_TEXM3x2TEX: + case D3DSIO_TEXM3x3PAD: + case D3DSIO_TEXM3x3TEX: + case D3DSIO_TEXM3x3SPEC: + case D3DSIO_TEXM3x3VSPEC: + TranslationError(); + break; + case D3DSIO_EXPP: + V_snprintf( buff, nBufLen, "exp( " ); + break; + case D3DSIO_LOGP: + V_snprintf( buff, nBufLen, "log( " ); + break; + case D3DSIO_CND: + TranslationError(); + break; + case D3DSIO_DEF: + TranslationError(); + V_snprintf( buff, nBufLen, "DEF" ); + break; + case D3DSIO_TEXREG2RGB: + case D3DSIO_TEXDP3TEX: + case D3DSIO_TEXM3x2DEPTH: + case D3DSIO_TEXDP3: + case D3DSIO_TEXM3x3: + TranslationError(); + break; + case D3DSIO_TEXDEPTH: + V_snprintf( buff, nBufLen, "texdepth" ); + break; + case D3DSIO_CMP: + TranslationError(); + Assert( !m_bVertexShader ); + V_snprintf( buff, nBufLen, "CMP" ); + break; + case D3DSIO_BEM: + TranslationError(); + break; + case D3DSIO_DP2ADD: + TranslationError(); + break; + case D3DSIO_DSX: + case D3DSIO_DSY: + TranslationError(); + break; + case D3DSIO_TEXLDD: + V_snprintf( buff, nBufLen, "texldd" ); + break; + case D3DSIO_SETP: + TranslationError(); + break; + case D3DSIO_TEXLDL: + V_snprintf( buff, nBufLen, "texldl" ); + break; + case D3DSIO_BREAKP: + case D3DSIO_PHASE: + TranslationError(); + break; + } + + return bDoubleClose; +} + + +const char* D3DToGL::GetGLSLOperatorString( uint32 inst ) +{ + if ( inst == D3DSIO_ADD ) + return "+"; + else if ( inst == D3DSIO_SUB ) + return "-"; + else if ( inst == D3DSIO_MUL ) + return "*"; + + Error( "GetGLSLOperatorString: unknown operator" ); + return "zzzz"; +} + + +// Print ASM opcode +void D3DToGL::PrintOpcode( uint32 inst, char* buff, int nBufLen ) +{ + switch ( inst ) + { + case D3DSIO_NOP: + V_snprintf( buff, nBufLen, "NOP" ); + TranslationError(); + break; + case D3DSIO_MOV: + V_snprintf( buff, nBufLen, "MOV" ); + break; + case D3DSIO_ADD: + V_snprintf( buff, nBufLen, "ADD" ); + break; + case D3DSIO_SUB: + V_snprintf( buff, nBufLen, "SUB" ); + break; + case D3DSIO_MAD: + V_snprintf( buff, nBufLen, "MAD" ); + break; + case D3DSIO_MUL: + V_snprintf( buff, nBufLen, "MUL" ); + break; + case D3DSIO_RCP: + V_snprintf( buff, nBufLen, "RCP" ); + break; + case D3DSIO_RSQ: + V_snprintf( buff, nBufLen, "RSQ" ); + break; + case D3DSIO_DP3: + V_snprintf( buff, nBufLen, "DP3" ); + break; + case D3DSIO_DP4: + V_snprintf( buff, nBufLen, "DP4" ); + break; + case D3DSIO_MIN: + V_snprintf( buff, nBufLen, "MIN" ); + break; + case D3DSIO_MAX: + V_snprintf( buff, nBufLen, "MAX" ); + break; + case D3DSIO_SLT: + V_snprintf( buff, nBufLen, "SLT" ); + break; + case D3DSIO_SGE: + V_snprintf( buff, nBufLen, "SGE" ); + break; + case D3DSIO_EXP: + V_snprintf( buff, nBufLen, "EX2" ); + break; + case D3DSIO_LOG: + V_snprintf( buff, nBufLen, "LG2" ); + break; + case D3DSIO_LIT: + V_snprintf( buff, nBufLen, "LIT" ); + break; + case D3DSIO_DST: + V_snprintf( buff, nBufLen, "DST" ); + break; + case D3DSIO_LRP: + Assert( !m_bVertexShader ); + V_snprintf( buff, nBufLen, "LRP" ); + break; + case D3DSIO_FRC: + V_snprintf( buff, nBufLen, "FRC" ); + break; + case D3DSIO_M4x4: + V_snprintf( buff, nBufLen, "m4x4" ); + break; + case D3DSIO_M4x3: + case D3DSIO_M3x4: + case D3DSIO_M3x3: + case D3DSIO_M3x2: + case D3DSIO_CALL: + case D3DSIO_CALLNZ: + case D3DSIO_LOOP: + case D3DSIO_RET: + case D3DSIO_ENDLOOP: + case D3DSIO_LABEL: + TranslationError(); + break; + case D3DSIO_DCL: + V_snprintf( buff, nBufLen, "DCL" ); + break; + case D3DSIO_POW: + V_snprintf( buff, nBufLen, "POW" ); + break; + case D3DSIO_CRS: + V_snprintf( buff, nBufLen, "XPD" ); + break; + case D3DSIO_SGN: + TranslationError(); + V_snprintf( buff, nBufLen, "SGN" ); + break; + case D3DSIO_ABS: + V_snprintf( buff, nBufLen, "ABS" ); + break; + case D3DSIO_NRM: + TranslationError(); + V_snprintf( buff, nBufLen, "NRM" ); + break; + case D3DSIO_SINCOS: + Assert( !m_bVertexShader ); + V_snprintf( buff, nBufLen, "SCS" ); + break; + case D3DSIO_REP: + case D3DSIO_ENDREP: + case D3DSIO_IF: + case D3DSIO_IFC: + case D3DSIO_ELSE: + case D3DSIO_ENDIF: + case D3DSIO_BREAK: + case D3DSIO_BREAKC: + TranslationError(); + break; + case D3DSIO_MOVA: + Assert( m_bVertexShader ); + V_snprintf( buff, nBufLen, "MOV" ); // We're always moving into a temp instead, so this is MOV instead of ARL + break; + case D3DSIO_DEFB: + case D3DSIO_DEFI: + TranslationError(); + break; + case D3DSIO_TEXCOORD: + V_snprintf( buff, nBufLen, "texcoord" ); + break; + case D3DSIO_TEXKILL: + V_snprintf( buff, nBufLen, "KIL" ); + break; + case D3DSIO_TEX: + V_snprintf( buff, nBufLen, "TEX" ); + break; + case D3DSIO_TEXBEM: + case D3DSIO_TEXBEML: + case D3DSIO_TEXREG2AR: + case D3DSIO_TEXREG2GB: + case D3DSIO_TEXM3x2PAD: + case D3DSIO_TEXM3x2TEX: + case D3DSIO_TEXM3x3PAD: + case D3DSIO_TEXM3x3TEX: + case D3DSIO_TEXM3x3SPEC: + case D3DSIO_TEXM3x3VSPEC: + TranslationError(); + break; + case D3DSIO_EXPP: + V_snprintf( buff, nBufLen, "EXP" ); + break; + case D3DSIO_LOGP: + V_snprintf( buff, nBufLen, "LOG" ); + break; + case D3DSIO_CND: + TranslationError(); + break; + case D3DSIO_DEF: + V_snprintf( buff, nBufLen, "DEF" ); + break; + case D3DSIO_TEXREG2RGB: + case D3DSIO_TEXDP3TEX: + case D3DSIO_TEXM3x2DEPTH: + case D3DSIO_TEXDP3: + case D3DSIO_TEXM3x3: + TranslationError(); + break; + case D3DSIO_TEXDEPTH: + V_snprintf( buff, nBufLen, "texdepth" ); + break; + case D3DSIO_CMP: + Assert( !m_bVertexShader ); + V_snprintf( buff, nBufLen, "CMP" ); + break; + case D3DSIO_BEM: + TranslationError(); + break; + case D3DSIO_DP2ADD: + TranslationError(); + break; + case D3DSIO_DSX: + case D3DSIO_DSY: + TranslationError(); + break; + case D3DSIO_TEXLDD: + V_snprintf( buff, nBufLen, "texldd" ); + break; + case D3DSIO_SETP: + TranslationError(); + break; + case D3DSIO_TEXLDL: + V_snprintf( buff, nBufLen, "texldl" ); + break; + case D3DSIO_BREAKP: + case D3DSIO_PHASE: + TranslationError(); + break; + } +} + +CUtlString D3DToGL::GetUsageAndIndexString( uint32 dwToken, int fSemanticFlags ) +{ + char szTemp[1024]; + PrintUsageAndIndexToString( dwToken, szTemp, sizeof( szTemp ), fSemanticFlags ); + return szTemp; +} + +//------------------------------------------------------------------------------ +// Helper function which prints ASCII representation of usage-usageindex pair to string +// +// Strictly used by vertex shaders +// not used any more now that we have attribmap metadata +//------------------------------------------------------------------------------ +void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageIndexName, int nBufLen, int fSemanticFlags ) +{ + uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK ); + uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + switch ( dwUsage ) + { + case D3DDECLUSAGE_POSITION: + if ( m_bVertexShader ) + { + if ( fSemanticFlags & SEMANTIC_OUTPUT ) + V_snprintf( strUsageUsageIndexName, nBufLen, "vTempPos" ); // effectively gl_Position + else + V_snprintf( strUsageUsageIndexName, nBufLen, "gl_Vertex" ); + } + else + { + // .xy = position in viewport coordinates + // .z = depth + V_snprintf( strUsageUsageIndexName, nBufLen, "gl_FragCoord" ); + } + + break; + case D3DDECLUSAGE_BLENDWEIGHT: + V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[1]" ); // "vertex.attrib[12]" ); // or [1] + break; + case D3DDECLUSAGE_BLENDINDICES: + V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[13]" ); // "vertex.attrib[13]" ); // or [ 7 ] + break; + case D3DDECLUSAGE_NORMAL: + V_snprintf( strUsageUsageIndexName, nBufLen, "vec4( gl_Normal, 0.0 )" ); + break; + case D3DDECLUSAGE_PSIZE: + TranslationError(); + V_snprintf( strUsageUsageIndexName, nBufLen, "_psize" ); // no analog + break; + case D3DDECLUSAGE_TEXCOORD: + V_snprintf( strUsageUsageIndexName, nBufLen, "oT%d", dwUsageIndex ); + break; + case D3DDECLUSAGE_TANGENT: + + NoteTangentInputUsed(); + V_strncpy( strUsageUsageIndexName, g_pTangentAttributeName, nBufLen ); + + break; + case D3DDECLUSAGE_BINORMAL: + V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[14]" ); // aka texc[6] + break; +// case D3DDECLUSAGE_TESSFACTOR: +// TranslationError(); +// V_snprintf( strUsageUsageIndexName, nBufLen, "_position" ); // no analog +// break; +// case D3DDECLUSAGE_POSITIONT: +// TranslationError(); +// V_snprintf( strUsageUsageIndexName, nBufLen, "_positiont" ); // no analog +// break; + case D3DDECLUSAGE_COLOR: + + Assert( dwUsageIndex <= 1 ); +// if ( fSemanticFlags & SEMANTIC_OUTPUT ) +// V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_BackColor" : "gl_FrontColor" ); +// else + V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_SecondaryColor" : "gl_Color" ); + + break; + case D3DDECLUSAGE_FOG: + TranslationError(); + break; + case D3DDECLUSAGE_DEPTH: + TranslationError(); + V_snprintf( strUsageUsageIndexName, nBufLen, "_depth" ); // no analog + break; + case D3DDECLUSAGE_SAMPLE: + TranslationError(); + V_snprintf( strUsageUsageIndexName, nBufLen, "_sample" ); // no analog + break; + default: + Debugger(); + break; + } +} + +uint32 D3DToGL::GetRegType( uint32 dwRegToken ) +{ + return ( ( dwRegToken & D3DSP_REGTYPE_MASK2 ) >> D3DSP_REGTYPE_SHIFT2 ) | ( ( dwRegToken & D3DSP_REGTYPE_MASK ) >> D3DSP_REGTYPE_SHIFT ); +} + +void D3DToGL::PrintIndentation( char *pBuf, int nBufLen ) +{ + for( int i=0; i<m_NumIndentTabs; i++ ) + { + strcat_s( pBuf, nBufLen, "\t" ); + } +} + +CUtlString D3DToGL::GetParameterString( uint32 dwToken, uint32 dwSourceOrDest, bool bForceScalarSource, int *pARLDestReg ) +{ + char szTemp[1024]; + PrintParameterToString( dwToken, dwSourceOrDest, szTemp, sizeof( szTemp ), bForceScalarSource, pARLDestReg ); + return szTemp; +} + + +// If the register happens to end with ".xyzw", then this strips off the mask. +void SimplifyFourParamRegister( char *pRegister ) +{ + int nLen = V_strlen( pRegister ); + if ( nLen > 5 && V_strcmp( &pRegister[nLen-5], ".xyzw" ) == 0 ) + pRegister[nLen-5] = 0; +} + + +// This returns 0 for x, 1 for y, 2 for z, and 3 for w. +int GetSwizzleComponentVectorIndex( char chMask ) +{ + if ( chMask == 'x' ) + return 0; + else if ( chMask == 'y' ) + return 1; + else if ( chMask == 'z' ) + return 2; + else if ( chMask == 'w' ) + return 3; + + Error( "GetSwizzleComponentVectorIndex( '%c' ) - invalid parameter.\n", chMask ); + return 0; +} + + +// GLSL needs the # of src masks to match the dest write mask. +// +// So this: +// r0.xy = r1 + r2; +// becomes: +// r0.xy = r1.xy + r2.xy; +// +// +// Also, and this is the trickier one: GLSL reads the source registers from their first component on +// whereas D3D reads them as referenced in the dest register mask! +// +// So this code in D3D: +// r0.yz = c0.x + c1.wxyz +// Really means: +// r0.y = c0.x + c1.x +// r0.z = c0.x + c1.y +// So we translate it to this in GLSL: +// r0.yz = c0.xx + c1.wx +// r0.yz = c0.xx + c1.xy +// +CUtlString D3DToGL::FixGLSLSwizzle( const char *pDestRegisterName, const char *pSrcRegisterName ) +{ + bool bAbsWrapper = false; // Parameter wrapped in an abs() + bool bAbsNegative = false; // -abs() + char szSrcRegister[128]; + V_strncpy( szSrcRegister, pSrcRegisterName, sizeof(szSrcRegister) ); + + // Check for abs() or -abs() wrapper and strip it off during the fixup + if ( !V_strncmp( pSrcRegisterName, "abs(", 4 ) || !V_strncmp( pSrcRegisterName, "-abs(", 5 ) ) + { + bAbsWrapper = true; + bAbsNegative = pSrcRegisterName[0] == '-'; + + const char *pOpenParen = strchr( pSrcRegisterName, '(' ); // FIRST opening paren + const char *pClosingParen = strrchr( pSrcRegisterName, ')' ); // LAST closing paren + + Assert ( pOpenParen && pClosingParen ); // If we start with abs( and don't get both parens, something is very wrong + + // Copy out just the register name with no abs() + int nRegNameLength = pClosingParen - pOpenParen - 1; + V_strncpy( szSrcRegister, pOpenParen+1, nRegNameLength + 1 ); // Kind of a weird function...copy more than you need and slam the last char to NULL-terminate + + } + + int nSwizzlesInDest = GetNumSwizzleComponents( pDestRegisterName ); + if ( nSwizzlesInDest == 0 ) + nSwizzlesInDest = 4; + + char szFixedSrcRegister[128]; + GetParamNameWithoutSwizzle( szSrcRegister, szFixedSrcRegister, sizeof( szFixedSrcRegister ) ); + V_strncat( szFixedSrcRegister, ".", sizeof( szFixedSrcRegister ) ); + for ( int i=0; i < nSwizzlesInDest; i++ ) + { + char chDestWriteMask = GetSwizzleComponent( pDestRegisterName, i ); + int nVectorIndex = GetSwizzleComponentVectorIndex( chDestWriteMask ); + + char ch[2]; + ch[0] = GetSwizzleComponent( szSrcRegister, nVectorIndex ); + ch[1] = 0; + V_strncat( szFixedSrcRegister, ch, sizeof( szFixedSrcRegister ) ); + } + + SimplifyFourParamRegister( szFixedSrcRegister ); + + if ( bAbsWrapper ) + { + char szTempSrcRegister[128]; + V_strncpy( szTempSrcRegister, szFixedSrcRegister, sizeof(szTempSrcRegister) ); + V_snprintf( szFixedSrcRegister, sizeof( szFixedSrcRegister ), "%sabs(%s)", bAbsNegative ? "-" : "", szTempSrcRegister ) ; + } + + return szFixedSrcRegister; +} + +// Weird encoding...bits are split apart in the dwToken +inline uint32 GetRegTypeFromToken( uint32 dwToken ) +{ + return ( ( dwToken & D3DSP_REGTYPE_MASK2 ) >> D3DSP_REGTYPE_SHIFT2 ) | ( ( dwToken & D3DSP_REGTYPE_MASK ) >> D3DSP_REGTYPE_SHIFT ); +} + +void D3DToGL::FlagIndirectRegister( uint32 dwToken, int *pARLDestReg ) +{ + if ( !pARLDestReg ) + return; + + switch ( dwToken & D3DVS_SWIZZLE_MASK & D3DVS_X_W ) + { + case D3DVS_X_X: + *pARLDestReg = ARL_DEST_X; + break; + case D3DVS_X_Y: + *pARLDestReg = ARL_DEST_Y; + break; + case D3DVS_X_Z: + *pARLDestReg = ARL_DEST_Z; + break; + case D3DVS_X_W: + *pARLDestReg = ARL_DEST_W; + break; + } +} + + +//------------------------------------------------------------------------------ +// PrintParameterToString() +// +// Helper function which prints ASCII representation of passed Parameter dwToken +// to string. Token defines parameter details. The dwSourceOrDest parameter says +// whether or not this is a source or destination register +//------------------------------------------------------------------------------ +void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, char *pRegisterName, int nBufLen, bool bForceScalarSource, int *pARLDestReg ) +{ + char buff[32]; + bool bAllowWriteMask = true; + bool bAllowSwizzle = true; + + uint32 dwRegNum = dwToken & D3DSP_REGNUM_MASK; + + uint32 dwRegType, dwSwizzle; + uint32 dwSrcModifier = D3DSPSM_NONE; + + // Clear string to zero length + pRegisterName[ 0 ] = 0; + + dwRegType = GetRegTypeFromToken( dwToken ); + + // If this is a dest register + if ( dwSourceOrDest == DST_REGISTER ) + { + // Instruction modifiers + if ( dwToken & D3DSPDM_PARTIALPRECISION ) + { +// strcat_s( pRegisterName, nBufLen, "_pp" ); + } + + if ( dwToken & D3DSPDM_MSAMPCENTROID) + { +// strcat_s( pRegisterName, nBufLen, "_centroid" ); + } + } + + // If this is a source register + if ( dwSourceOrDest == SRC_REGISTER ) + { + dwSrcModifier = dwToken & D3DSP_SRCMOD_MASK; + + // If there are any source modifiers, check to see if they're at + // least partially "prefix" and prepend appropriately + if ( dwSrcModifier != D3DSPSM_NONE ) + { + switch ( dwSrcModifier ) + { + // These four start with just minus... (some may result in "postfix" notation as well later on) + case D3DSPSM_NEG: // negate + strcat_s( pRegisterName, nBufLen, "-" ); + break; + case D3DSPSM_BIASNEG: // bias and negate + case D3DSPSM_SIGNNEG: // sign and negate + case D3DSPSM_X2NEG: // *2 and negate + TranslationError(); + strcat_s( pRegisterName, nBufLen, "-" ); + break; + case D3DSPSM_COMP: // complement + TranslationError(); + strcat_s( pRegisterName, nBufLen, "1-" ); + break; + case D3DSPSM_ABS: // abs() + strcat_s( pRegisterName, nBufLen, "abs(" ); + + break; + case D3DSPSM_ABSNEG: // -abs() + strcat_s( pRegisterName, nBufLen, "-abs(" ); + + break; + case D3DSPSM_NOT: // for predicate register: "!p0" + TranslationError(); + strcat_s( pRegisterName, nBufLen, "!" ); + break; + } + } + } + + // Register name (from type and number) + switch ( dwRegType ) + { + case D3DSPR_TEMP: + V_snprintf( buff, sizeof( buff ), "r%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + m_dwTempUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this temp + break; + case D3DSPR_INPUT: + if ( !m_bVertexShader && ( dwSourceOrDest == SRC_REGISTER ) ) + { + if ( m_dwMajorVersion == 3 ) + { + V_snprintf( buff, sizeof( buff ), "oTempT%d", dwRegNum ); + } + else + { + V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "gl_Color" : "gl_SecondaryColor" ); + } + strcat_s( pRegisterName, nBufLen, buff ); + } + else + { + V_snprintf( buff, sizeof( buff ), "v%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + } + break; + case D3DSPR_CONST: + if ( m_bConstantRegisterDefined[dwRegNum] ) + { + char szConstantRegName[3]; + if ( m_bVertexShader ) + { + V_snprintf( szConstantRegName, 3, "vd" ); + } + else + { + V_snprintf( szConstantRegName, 3, "pd" ); + } + + // Put defined constants into their own namespace "d" + V_snprintf( buff, sizeof( buff ), "%s%d", szConstantRegName, dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + } + else if ( dwToken & D3DSHADER_ADDRESSMODE_MASK ) // Indirect addressing (e.g. skinning in a vertex shader) + { + char szConstantRegName[16]; + if ( m_bVertexShader ) + { + V_snprintf( szConstantRegName, 3, "vc" ); + } + else // No indirect addressing in PS, this shouldn't happen + { + TranslationError(); + V_snprintf( szConstantRegName, 3, "pc" ); + } + + if ( ( m_bGenerateBoneUniformBuffer ) && ( dwRegNum >= DXABSTRACT_VS_FIRST_BONE_SLOT ) ) + { + if( dwRegNum < DXABSTRACT_VS_LAST_BONE_SLOT ) + { + dwRegNum -= DXABSTRACT_VS_FIRST_BONE_SLOT; + V_strcpy( szConstantRegName, "vcbones" ); + + m_nHighestBoneRegister = ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT; + } + else + { + dwRegNum -= ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT; + m_nHighestRegister = m_bGenerateBoneUniformBuffer ? ( ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - ( ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT ) ): ( DXABSTRACT_VS_PARAM_SLOTS - 1 ); + } + } + else + { + m_nHighestRegister = m_bGenerateBoneUniformBuffer ? ( ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - ( ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT ) ): ( DXABSTRACT_VS_PARAM_SLOTS - 1 ); + } + + // Index into single pc/vc[] register array with relative addressing + int nDstReg = -1; + FlagIndirectRegister( GetNextToken(), &nDstReg ); + if ( pARLDestReg ) + *pARLDestReg = nDstReg; + + Assert( nDstReg != ARL_DEST_NONE ); + int nSrcSwizzle = 'x'; + if ( nDstReg == ARL_DEST_Y ) + nSrcSwizzle = 'y'; + else if ( nDstReg == ARL_DEST_Z ) + nSrcSwizzle = 'z'; + else if ( nDstReg == ARL_DEST_W ) + nSrcSwizzle = 'w'; + V_snprintf( buff, sizeof( buff ), "%s[int(va_r.%c) + %d]", szConstantRegName, nSrcSwizzle, dwRegNum ); + + strcat_s( pRegisterName, nBufLen, buff ); + + // Must allow swizzling, otherwise this example doesn't compile right: mad r3.xyz, c27[a0.w].w, r3, r7 + //bAllowSwizzle = false; + } + else // Direct addressing of constant array + { + char szConstantRegName[16]; + V_snprintf( szConstantRegName, 3, m_bVertexShader ? "vc" : "pc" ); + + if ( ( m_bGenerateBoneUniformBuffer ) && ( dwRegNum >= DXABSTRACT_VS_FIRST_BONE_SLOT ) ) + { + if( dwRegNum < DXABSTRACT_VS_LAST_BONE_SLOT ) + { + dwRegNum -= DXABSTRACT_VS_FIRST_BONE_SLOT; + V_strcpy( szConstantRegName, "vcbones" ); + + m_nHighestBoneRegister = MAX( m_nHighestBoneRegister, (int)dwRegNum ); + } + else + { + // handles case where constants after the bones are used (c217 onwards), these are to be concatenated with those before the bones (c0-c57) + // keep track of regnum for concatenated array + dwRegNum -= ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT; + m_nHighestRegister = MAX( m_nHighestRegister, dwRegNum ); + } + } + else + { + //// NOGO if (dwRegNum != 255) // have seen cases where dwRegNum is 0xFF... need to figure out where those opcodes are coming from + { + m_nHighestRegister = MAX( m_nHighestRegister, dwRegNum ); + } + + Assert( m_nHighestRegister < DXABSTRACT_VS_PARAM_SLOTS ); + } + + // Index into single pc/vc[] register array with absolute addressing, same for GLSL and ASM + V_snprintf( buff, sizeof( buff ), "%s[%d]", szConstantRegName, dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + } + break; + case D3DSPR_ADDR: // aliases to D3DSPR_TEXTURE + if ( m_bVertexShader ) + { + Assert( dwRegNum == 0 ); + + V_snprintf( buff, sizeof( buff ), "va_r" ); + } + else // D3DSPR_TEXTURE in the pixel shader + { + // If dest reg, this is an iterator/varying declaration + if ( dwSourceOrDest == DST_REGISTER ) + { + // Is this iterator centroid? + if ( m_nCentroidMask & ( 0x00000001 << dwRegNum ) ) + { + V_snprintf( buff, sizeof( buff ), "centroid varying vec4 oT%d", dwRegNum ); // centroid varying + } + else + { + V_snprintf( buff, sizeof( buff ), "varying vec4 oT%d", dwRegNum ); + } + + bAllowWriteMask = false; + } + else // source register + { + V_snprintf( buff, sizeof( buff ), "oT%d", dwRegNum ); + } + } + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_RASTOUT: // vertex shader oPos + Assert( m_bVertexShader ); + Assert( m_dwMajorVersion == 2 ); + switch( dwRegNum ) + { + case D3DSRO_POSITION: + strcat_s( pRegisterName, nBufLen, "vTempPos" ); // In GLSL, this ends up in gl_Position later on + m_bDeclareVSOPos = true; + break; + + case D3DSRO_FOG: + strcat_s( pRegisterName, nBufLen, "gl_FogFragCoord" ); + m_bDeclareVSOFog = true; + break; + + default: + printf( "\nD3DSPR_RASTOUT: dwRegNum is %08x and token is %08x", dwRegNum, dwToken ); + TranslationError(); + break; + } + break; + case D3DSPR_ATTROUT: + Assert( m_bVertexShader ); + Assert( m_dwMajorVersion == 2 ); + + if ( dwRegNum == 0 ) + { + V_snprintf( buff, sizeof( buff ), "gl_FrontColor" ); + } + else if ( dwRegNum == 1 ) + { + V_snprintf( buff, sizeof( buff ), "gl_FrontSecondaryColor" ); + } + else + { + Error( "Invalid D3DSPR_ATTROUT index" ); + } + + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_TEXCRDOUT: // aliases to D3DSPR_OUTPUT + if ( m_bVertexShader ) + { + if ( m_nVSPositionOutput == (int32) dwRegNum ) + { + V_snprintf( buff, sizeof( buff ), "vTempPos" ); // This output varying is the position + } + else if ( m_dwMajorVersion == 3 ) + { + V_snprintf( buff, sizeof( buff ), "oTempT%d", dwRegNum ); + } + else + { + V_snprintf( buff, sizeof( buff ), "oT%d", dwRegNum ); + } + + m_dwTexCoordOutMask |= ( 0x00000001 << dwRegNum ); + } + else + { + V_snprintf( buff, sizeof( buff ), "oC%d", dwRegNum ); + } + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_CONSTINT: + V_snprintf( buff, sizeof( buff ), "i%d", dwRegNum ); // Loops use these + strcat_s( pRegisterName, nBufLen, buff ); + m_dwConstIntUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this integer constant + break; + case D3DSPR_COLOROUT: + V_snprintf( buff, sizeof( buff ), "gl_FragData[%d]", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + m_bOutputColorRegister[dwRegNum] = true; + break; + case D3DSPR_DEPTHOUT: + V_snprintf( buff, sizeof( buff ), "gl_FragDepth" ); + strcat_s( pRegisterName, nBufLen, buff ); + m_bOutputDepthRegister = true; + break; + case D3DSPR_SAMPLER: + V_snprintf( buff, sizeof( buff ), "sampler%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_CONST2: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+2048); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_CONST3: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+4096); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_CONST4: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+6144); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_CONSTBOOL: + V_snprintf( buff, sizeof( buff ), m_bVertexShader ? "b%d" : "fb%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + m_dwConstBoolUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this bool constant + break; + case D3DSPR_LOOP: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "aL%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_TEMPFLOAT16: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "temp_float16_xxx%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_MISCTYPE: + Assert( dwRegNum == 0 ); // So far, we know that MISC[0] is gl_FragCoord (aka vPos in DX ASM parlance), but we don't know about any other MISC registers + V_snprintf( buff, sizeof( buff ), "gl_FragCoord" ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_LABEL: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "label%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + case D3DSPR_PREDICATE: + TranslationError(); + V_snprintf( buff, sizeof( buff ), "p%d", dwRegNum ); + strcat_s( pRegisterName, nBufLen, buff ); + break; + } + + // If this is a dest register + if ( dwSourceOrDest == DST_REGISTER ) + { + // + // Write masks + // + // If some (not all, not none) of the write masks are set, we should include them + // + if ( bAllowWriteMask && ( !((dwToken & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) || ((dwToken & D3DSP_WRITEMASK_ALL) == 0x00000000) ) ) + { + // Put the dot on there + strcat_s( pRegisterName, nBufLen, "." ); + + // Optionally put on the x, y, z or w + int nMasksWritten = 0; + if ( dwToken & D3DSP_WRITEMASK_0 ) + { + strcat_s( pRegisterName, nBufLen, "x" ); + ++nMasksWritten; + } + if ( dwToken & D3DSP_WRITEMASK_1 ) + { + strcat_s( pRegisterName, nBufLen, "y" ); + ++nMasksWritten; + } + if ( dwToken & D3DSP_WRITEMASK_2 ) + { + strcat_s( pRegisterName, nBufLen, "z" ); + ++nMasksWritten; + } + if ( dwToken & D3DSP_WRITEMASK_3 ) + { + strcat_s( pRegisterName, nBufLen, "w" ); + ++nMasksWritten; + } + } + } + else // must be a source register + { + if ( bAllowSwizzle ) // relative addressing hard-codes the swizzle on a0.x + { + uint32 dwXSwizzle, dwYSwizzle, dwZSwizzle, dwWSwizzle; + + // Mask out the swizzle modifier + dwSwizzle = dwToken & D3DVS_SWIZZLE_MASK; + + // If there are any swizzles at all, tack on the appropriate notation + if ( dwSwizzle != D3DVS_NOSWIZZLE ) + { + // Separate out the two-bit codes for each component swizzle + dwXSwizzle = dwSwizzle & D3DVS_X_W; + dwYSwizzle = dwSwizzle & D3DVS_Y_W; + dwZSwizzle = dwSwizzle & D3DVS_Z_W; + dwWSwizzle = dwSwizzle & D3DVS_W_W; + + // Put on the dot + strcat_s( pRegisterName, nBufLen, "." ); + + // See where X comes from + switch ( dwXSwizzle ) + { + case D3DVS_X_X: + strcat_s( pRegisterName, nBufLen, "x" ); + break; + case D3DVS_X_Y: + strcat_s( pRegisterName, nBufLen, "y" ); + break; + case D3DVS_X_Z: + strcat_s( pRegisterName, nBufLen, "z" ); + break; + case D3DVS_X_W: + strcat_s( pRegisterName, nBufLen, "w" ); + break; + } + + if ( !bForceScalarSource ) + { + // If the source of the remaining components are aren't + // identical to the source of x, continue with swizzle + if ( ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwYSwizzle >> (D3DVS_SWIZZLE_SHIFT + 2))) || // X and Y sources match? + ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwZSwizzle >> (D3DVS_SWIZZLE_SHIFT + 4))) || // X and Z sources match? + ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwWSwizzle >> (D3DVS_SWIZZLE_SHIFT + 6)))) // X and W sources match? + { + + // OpenGL seems to want us to have either 1 or 4 components in a swizzle, so just plow on through the rest + switch ( dwYSwizzle ) + { + case D3DVS_Y_X: + strcat_s( pRegisterName, nBufLen, "x" ); + break; + case D3DVS_Y_Y: + strcat_s( pRegisterName, nBufLen, "y" ); + break; + case D3DVS_Y_Z: + strcat_s( pRegisterName, nBufLen, "z" ); + break; + case D3DVS_Y_W: + strcat_s( pRegisterName, nBufLen, "w" ); + break; + } + + switch ( dwZSwizzle ) + { + case D3DVS_Z_X: + strcat_s( pRegisterName, nBufLen, "x" ); + break; + case D3DVS_Z_Y: + strcat_s( pRegisterName, nBufLen, "y" ); + break; + case D3DVS_Z_Z: + strcat_s( pRegisterName, nBufLen, "z" ); + break; + case D3DVS_Z_W: + strcat_s( pRegisterName, nBufLen, "w" ); + break; + } + + switch ( dwWSwizzle ) + { + case D3DVS_W_X: + strcat_s( pRegisterName, nBufLen, "x" ); + break; + case D3DVS_W_Y: + strcat_s( pRegisterName, nBufLen, "y" ); + break; + case D3DVS_W_Z: + strcat_s( pRegisterName, nBufLen, "z" ); + break; + case D3DVS_W_W: + strcat_s( pRegisterName, nBufLen, "w" ); + break; + } + + } + + } // end !bForceScalarSource + } + else // dwSwizzle == D3DVS_NOSWIZZLE + { + // If this is a MOVA / ARL, GL on the Mac requires us to tack the .x onto the source register + if ( bForceScalarSource ) + { + strcat_s( pRegisterName, nBufLen, ".x" ); + } + } + } // bAllowSwizzle + + // If there are any source modifiers, check to see if they're at + // least partially "postfix" and tack them on as appropriate + if ( dwSrcModifier != D3DSPSM_NONE ) + { + switch ( dwSrcModifier ) + { + case D3DSPSM_BIAS: // bias + case D3DSPSM_BIASNEG: // bias and negate + TranslationError(); + strcat_s( pRegisterName, nBufLen, "_bx2" ); + break; + case D3DSPSM_SIGN: // sign + case D3DSPSM_SIGNNEG: // sign and negate + TranslationError(); + strcat_s( pRegisterName, nBufLen, "_sgn" ); + break; + case D3DSPSM_X2: // *2 + case D3DSPSM_X2NEG: // *2 and negate + TranslationError(); + strcat_s( pRegisterName, nBufLen, "_x2" ); + break; + case D3DSPSM_ABS: // abs() + case D3DSPSM_ABSNEG: // -abs() + strcat_s( pRegisterName, nBufLen, ")" ); + break; + case D3DSPSM_DZ: // divide through by z component + TranslationError(); + strcat_s( pRegisterName, nBufLen, "_dz" ); + break; + case D3DSPSM_DW: // divide through by w component + TranslationError(); + strcat_s( pRegisterName, nBufLen, "_dw" ); + break; + } + } // end postfix modifiers (really only ps.1.x) + } +} + +void D3DToGL::RecordInputAndOutputPositions() +{ + // Remember where we are in the token stream. + m_pRecordedInputTokenStart = m_pdwNextToken; + + // Remember where our outputs are. + m_nRecordedParamCodeStrlen = V_strlen( (char*)m_pBufParamCode->Base() ); + m_nRecordedALUCodeStrlen = V_strlen( (char*)m_pBufALUCode->Base() ); + m_nRecordedAttribCodeStrlen = V_strlen( (char*)m_pBufAttribCode->Base() ); +} +void D3DToGL::AddTokenHexCodeToBuffer( char *pBuffer, int nSize, int nLastStrlen ) +{ + int nCurStrlen = V_strlen( pBuffer ); + if ( nCurStrlen == nLastStrlen ) + return; + + // Build a string with all the hex codes of the tokens since last time. + char szHex[512]; + szHex[0] = '\n'; + V_snprintf( &szHex[1], sizeof( szHex )-1, HEXCODE_HEADER ); + int nTokens = MIN( 10, m_pdwNextToken - m_pRecordedInputTokenStart ); + for ( int i=0; i < nTokens; i++ ) + { + char szTemp[32]; + V_snprintf( szTemp, sizeof( szTemp ), "0x%x ", m_pRecordedInputTokenStart[i] ); + V_strncat( szHex, szTemp, sizeof( szHex ) ); + } + V_strncat( szHex, "\n", sizeof( szHex ) ); + + // Insert the hex codes into the string. + int nBytesToInsert = V_strlen( szHex ); + if ( nCurStrlen + nBytesToInsert + 1 >= nSize ) + Error( "Buffer overflow writing token hex codes" ); + + if ( m_bPutHexCodesAfterLines ) + { + // Put it at the end of the last line. + if ( pBuffer[nCurStrlen-1] == '\n' ) + pBuffer[nCurStrlen-1] = 0; + + V_strncat( pBuffer, &szHex[1], nSize ); + } + else + { + memmove( pBuffer + nLastStrlen + nBytesToInsert, pBuffer + nLastStrlen, nCurStrlen - nLastStrlen + 1 ); + memcpy( pBuffer + nLastStrlen, szHex, nBytesToInsert ); + } +} + +void D3DToGL::AddTokenHexCode() +{ + if ( m_pdwNextToken > m_pRecordedInputTokenStart ) + { + AddTokenHexCodeToBuffer( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size(), m_nRecordedParamCodeStrlen ); + AddTokenHexCodeToBuffer( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), m_nRecordedALUCodeStrlen ); + AddTokenHexCodeToBuffer( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size(), m_nRecordedAttribCodeStrlen ); + } +} + +uint32 D3DToGL::MaintainAttributeMap( uint32 dwToken, uint32 dwRegToken ) +{ + // Check that this reg index has not been used before - if it has, let Houston know + uint dwRegIndex = dwRegToken & D3DSP_REGNUM_MASK; + if ( m_dwAttribMap[ dwRegIndex ] == 0xFFFFFFFF ) + { + // log it + // semantic/usage in the higher nibble + // usage index in the low nibble + + uint usage = dwToken & D3DSP_DCL_USAGE_MASK; + uint usageindex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + m_dwAttribMap[ dwRegIndex ] = ( usage << 4 ) | usageindex; + + // avoid writing 0xBB since runtime code uses that for an 'unused' marker + if ( m_dwAttribMap[ dwRegIndex ] == 0xBB ) + { + Debugger(); + } + } + else + { + //not OK + Debugger(); + } + + return dwRegIndex; +} + +void D3DToGL::Handle_DCL() +{ + uint32 dwToken = GetNextToken(); // What kind of dcl is this... + uint32 dwRegToken = GetNextToken(); // Look ahead to register token + + uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK ); + uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + uint32 dwRegNum = dwRegToken & D3DSP_REGNUM_MASK; + uint32 nRegType = GetRegTypeFromToken( dwRegToken ); + + if ( m_bVertexShader ) + { + // If this is an output, remember the index (what the ASM code calls o0, o1, o2..) and the semantic. + // When GetParameterString( DST_REGISTER ) hits this one, we'll return "oN". + // At the end of the main() function, we'll insert a bunch of statements like "gl_Color = o2" based on what we remembered here. + if ( ( m_dwMajorVersion >= 3 ) && ( nRegType == D3DSPR_OUTPUT ) ) + { +// uint32 dwRegComponents = ( dwRegToken & D3DSP_WRITEMASK_ALL ) >> 16; // Components used by the output register (1 means float, 3 means vec2, 7 means vec3, f means vec4) + + if ( dwRegNum >= MAX_DECLARED_OUTPUTS ) + Error( "Output register number (%d) too high (only %d supported).", dwRegNum, MAX_DECLARED_OUTPUTS ); + + if ( m_DeclaredOutputs[dwRegNum] != UNDECLARED_OUTPUT ) + Error( "Output dcl_ hit for register #%d more than once!", dwRegNum ); + + Assert( dwToken != UNDECLARED_OUTPUT ); + m_DeclaredOutputs[dwRegNum] = dwToken; + + //uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK ); + //uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + // Flag which o# output register maps to gl_Position + if ( dwUsage == D3DDECLUSAGE_POSITION ) + { + m_nVSPositionOutput = dwUsageIndex; + m_bDeclareVSOPos = true; + } + + if ( m_bAddHexCodeComments ) + { + CUtlString sParam2 = GetUsageAndIndexString( dwToken, SEMANTIC_OUTPUT ); + PrintToBuf( *m_pBufHeaderCode, "// [GL remembering that oT%d maps to %s]\n", dwRegNum, sParam2.String() ); + } + + } + else if ( GetRegType( dwRegToken ) == D3DSPR_SAMPLER ) + { + // We can support vertex texturing if necessary, but I can't find a use case in any branch. (HW morphing in L4D2 isn't enabled, and the comments indicate that r_hwmorph isn't compatible with mat_queue_mode anyway, and CS:GO/DoTA don't use vertex shader texturing.) + TranslationError(); + + int nRegNum = dwRegToken & D3DSP_REGNUM_MASK; + switch ( TextureType( dwToken ) ) + { + default: + case D3DSTT_UNKNOWN: + case D3DSTT_2D: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_2D; + break; + case D3DSTT_CUBE: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_CUBE; + break; + case D3DSTT_VOLUME: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_3D; + break; + } + + // Track sampler declarations + m_dwSamplerUsageMask |= 1 << nRegNum; + } + else + { + Assert( GetRegType( dwRegToken ) == D3DSPR_INPUT); + + CUtlString sParam1 = GetParameterString( dwRegToken, DST_REGISTER, false, NULL ); + CUtlString sParam2 = GetUsageAndIndexString( dwToken, SEMANTIC_INPUT ); + + sParam2 = FixGLSLSwizzle( sParam1, sParam2 ); + PrintToBuf( *m_pBufHeaderCode, "attribute vec4 %s; // ", sParam1.String() ); + + MaintainAttributeMap( dwToken, dwRegToken ); + + char temp[128]; + // regnum goes straight into the vertex.attrib[n] index + sprintf( temp, "%08x %08x\n", dwToken, dwRegToken ); + StrcatToHeaderCode( temp ); + } + } + else // Pixel shader + { + // If the register is a sampler, the dcl has a dimension decorator that we have to save for subsequent TEX instructions + uint32 nRegType = GetRegType( dwRegToken ); + if ( nRegType == D3DSPR_SAMPLER ) + { + int nRegNum = dwRegToken & D3DSP_REGNUM_MASK; + switch ( TextureType( dwToken ) ) + { + default: + case D3DSTT_UNKNOWN: + case D3DSTT_2D: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_2D; + break; + case D3DSTT_CUBE: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_CUBE; + break; + case D3DSTT_VOLUME: + m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_3D; + break; + } + + // Track sampler declarations + m_dwSamplerUsageMask |= 1 << nRegNum; + } + else // Not a sampler, we're going to generate varying declaration code + { + // In pixel shaders we only declare texture coordinate varyings since they may be using centroid + if ( ( m_dwMajorVersion == 3 ) && ( nRegType == D3DSPR_INPUT ) ) + { + Assert( m_DeclaredInputs[dwRegNum] == UNDECLARED_INPUT ); + m_DeclaredInputs[dwRegNum] = dwToken; + + if ( ( dwUsage != D3DDECLUSAGE_COLOR ) && ( dwUsage != D3DDECLUSAGE_TEXCOORD ) ) + { + TranslationError(); // Not supported yet, but can be if we need it. + } + + if ( dwUsage == D3DDECLUSAGE_TEXCOORD ) + { + char buf[256]; + if ( m_nCentroidMask & ( 0x00000001 << dwUsageIndex ) ) + { + V_snprintf( buf, sizeof( buf ), "centroid varying vec4 oT%d;\n", dwUsageIndex ); // centroid varying + } + else + { + V_snprintf( buf, sizeof( buf ), "varying vec4 oT%d;\n", dwUsageIndex ); + } + StrcatToHeaderCode( buf ); + } + } + else if ( nRegType == D3DSPR_TEXTURE ) + { + char buff[256]; + PrintParameterToString( dwRegToken, DST_REGISTER, buff, sizeof( buff ), false, NULL ); + PrintToBuf( *m_pBufHeaderCode, "%s;\n",buff ); + } + else + { + // No need to declare anything (probably D3DSPR_MISCTYPE either VPOS or VFACE) + } + } + } +} + +static bool IsFloatNaN( float f ) +{ + const uint nBits = *reinterpret_cast<uint*>(&f); + const uint nExponent = ( nBits >> 23 ) & 0xFF; + + return ( nExponent == 255 ); +} + +static inline bool EqualTol( double a, double b, double t ) +{ + return fabs( a - b ) <= ( ( MAX( fabs( a ), fabs( b ) ) + 1.0 ) * t ); +} + +// Originally written by Bruce Dawson, see: +// See http://randomascii.wordpress.com/2012/03/08/float-precisionfrom-zero-to-100-digits-2/ +// This class represents a very limited high-precision number with 'count' 32-bit +// unsigned elements. +template <int count> +struct HighPrec +{ + typedef unsigned T; + typedef unsigned long long Product_t; + static const int kWordShift = 32; + HighPrec() + { + memset(m_data, 0, sizeof(m_data)); + m_nLowestNonZeroIndex = ARRAYSIZE(m_data); + } + + // Insert the bits from value into m_data, shifted in from the bottom (least + // significant end) by the specified number of bits. A shift of zero or less + // means that none of the bits will be shifted in. A shift of one means that + // the high bit of value will be in the bottom of the last element of m_data - + // the least significant bit. A shift of kWordShift means that value will be + // in the least significant element of m_data, and so on. + void InsertLowBits(T value, int shiftAmount) + { + if (shiftAmount <= 0) + return; + + int subShift = shiftAmount & (kWordShift - 1); + int bigShift = shiftAmount / kWordShift; + Product_t result = (Product_t)value << subShift; + T resultLow = (T)result; + T resultHigh = result >> kWordShift; + + // Use an unsigned type so that negative numbers will become large, + // which makes the range checking below simpler. + unsigned highIndex = ARRAYSIZE(m_data) - 1 - bigShift; + // Write the results to the data array. If the index is too large + // then that means that the data was shifted off the edge. + if ( (highIndex < ARRAYSIZE(m_data)) && ( resultHigh ) ) + { + m_data[highIndex] |= resultHigh; + m_nLowestNonZeroIndex = MIN( m_nLowestNonZeroIndex, highIndex ); + } + + if ( ( highIndex + 1 < ARRAYSIZE(m_data)) && ( resultLow ) ) + { + m_data[highIndex + 1] |= resultLow; + m_nLowestNonZeroIndex = MIN( m_nLowestNonZeroIndex, highIndex + 1 ); + } + } + + // Insert the bits from value into m_data, shifted in from the top (most + // significant end) by the specified number of bits. A shift of zero or less + // means that none of the bits will be shifted in. A shift of one means that + // the low bit of value will be in the top of the first element of m_data - + // the most significant bit. A shift of kWordShift means that value will be + // in the most significant element of m_data, and so on. + void InsertTopBits(T value, int shiftAmount) + { + InsertLowBits(value, (ARRAYSIZE(m_data) + 1) * kWordShift - shiftAmount); + } + + // Return true if all elements of m_data are zero. + bool IsZero() const + { + bool bIsZero = ( m_nLowestNonZeroIndex == ARRAYSIZE(m_data) ); + +#ifdef DEBUG + for (int i = 0; i < ARRAYSIZE(m_data); ++i) + { + if (m_data[i]) + { + Assert( !bIsZero ); + return false; + } + } + Assert( bIsZero ); +#endif + + return bIsZero; + } + + // Divide by div and return the remainder, from 0 to div-1. + // Standard long-division algorithm. + T DivReturnRemainder(T divisor) + { + T remainder = 0; + +#ifdef DEBUG + for (uint j = 0; j < m_nLowestNonZeroIndex; ++j) + { + Assert( m_data[j] == 0 ); + } +#endif + + int nNewLowestNonZeroIndex = ARRAYSIZE(m_data); + for (int i = m_nLowestNonZeroIndex; i < ARRAYSIZE(m_data); ++i) + { + Product_t dividend = ((Product_t)remainder << kWordShift) + m_data[i]; + Product_t result = dividend / divisor; + remainder = T(dividend % divisor); + + m_data[i] = T(result); + + if ( ( result ) && ( nNewLowestNonZeroIndex == ARRAYSIZE(m_data) ) ) + nNewLowestNonZeroIndex = i; + } + m_nLowestNonZeroIndex = nNewLowestNonZeroIndex; + + return remainder; + } + + // The individual 'digits' (32-bit unsigned integers actually) that + // make up the number. The most-significant digit is in m_data[0]. + T m_data[count]; + + uint m_nLowestNonZeroIndex; +}; + +union Double_t +{ + Double_t(double num = 0.0f) : f(num) {} + // Portable extraction of components. + bool Negative() const { return (i >> 63) != 0; } + int64_t RawMantissa() const { return i & ((1LL << 52) - 1); } + int64_t RawExponent() const { return (i >> 52) & 0x7FF; } + + int64_t i; + double f; +}; + +static uint PrintDoubleInt( char *pBuf, uint nBufSize, double f, uint nMinChars ) +{ + static const char *pDigits = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899"; + + Assert( !nMinChars || ( ( nMinChars % 6 ) == 0 ) ); + + char *pLastChar = pBuf + nBufSize - 1; + char *pDst = pLastChar; + *pDst-- = '\0'; + + // Put the double in our magic union so we can grab the components. + union Double_t num(f); + + // Get the character that represents the sign. + // Check for NaNs or infinity. + if (num.RawExponent() == 2047) + { + TranslationError(); + } + + // Adjust for the exponent bias. + int exponentValue = int(num.RawExponent() - 1023); + // Add the implied one to the mantissa. + uint64_t mantissaValue = (1ll << 52) + num.RawMantissa(); + // Special-case for denormals - no special exponent value and + // no implied one. + if (num.RawExponent() == 0) + { + exponentValue = -1022; + mantissaValue = num.RawMantissa(); + } + uint32_t mantissaHigh = mantissaValue >> 32; + uint32_t mantissaLow = mantissaValue & 0xFFFFFFFF; + + // The first bit of the mantissa has an implied value of one and this can + // be shifted 1023 positions to the left, so that's 1024 bits to the left + // of the binary point, or 32 32-bit words for the integer part. + HighPrec<32> intPart; + // When our exponentValue is zero (a number in the 1.0 to 2.0 range) + // we have a 53-bit mantissa and the implied value of the highest bit + // is 1. We need to shift 12 bits in from the bottom to get that 53rd bit + // into the ones spot in the integral portion. + // To complicate it a bit more we have to insert the mantissa as two parts. + intPart.InsertLowBits(mantissaHigh, 12 + exponentValue); + intPart.InsertLowBits(mantissaLow, 12 + exponentValue - 32); + + bool bAnyDigitsLeft; + do + { + uint remainder = intPart.DivReturnRemainder( 1000000 ); // 10^6 + uint origRemainer = remainder; (void)origRemainer; + + bAnyDigitsLeft = !intPart.IsZero(); + + if ( bAnyDigitsLeft ) + { + uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; + n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1 - 2) = reinterpret_cast<const uint16*>(pDigits)[n]; + Assert( remainder < 100U ); + *reinterpret_cast<uint16*>(pDst - 1 - 4) = reinterpret_cast<const uint16*>(pDigits)[remainder]; + pDst -= 6; + } + else + { + uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst; + if ( remainder ) + { + n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst; + + if ( remainder ) + { + Assert( remainder < 100U ); + *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[remainder]; --pDst; if ( remainder >= 10 ) --pDst; + } + } + } + + } while ( bAnyDigitsLeft ); + + uint l = pLastChar - pDst; + + while ( ( l - 1 ) < nMinChars ) + { + *pDst-- = '0'; + l++; + } + + Assert( (int)l == ( pLastChar - pDst ) ); + + Assert( l <= nBufSize ); + + memmove( pBuf, pDst + 1, l ); + return l - 1; +} + +// FloatToString is equivalent to sprintf( "%.12f" ), but doesn't have any dependencies on the current locale setting. +// Unfortunately, high accuracy radix conversion is actually pretty tricky to do right. +// Most importantly, this function has the same max roundtrip (IEEE->ASCII->IEEE) error as the MS CRT functions and can reliably handle extremely large inputs. +static void FloatToString( char *pBuf, uint nBufSize, double fConst ) +{ + char *pEnd = pBuf + nBufSize; + char *pDst = pBuf; + + double flVal = fConst; + if ( IsFloatNaN( flVal ) ) + { + flVal = 0; + } + + if ( flVal < 0.0f ) + { + *pDst++ = '-'; + flVal = -flVal; + } + + double flInt; + double flFract = modf( flVal, &flInt ); + + flFract = floor( flFract * 1000000000000.0 + .5 ); + + if ( !flInt ) + { + *pDst++ = '0'; + } + else + { + uint l = PrintDoubleInt( pDst, pEnd - pDst, flInt, 0 ); + pDst += l; + } + + *pDst++ = '.'; + if ( !flFract ) + { + *pDst++ = '0'; + *pDst++ = '\0'; + } + else + { + uint l = PrintDoubleInt( pDst, pEnd - pDst, flFract, 12 ); + pDst += l; + + StripExtraTrailingZeros( pBuf ); // Turn 1.00000 into 1.0 + } +} + +#if 0 +#include "vstdlib/random.h" +static void TestFloatConversion() +{ + for ( ; ; ) + { + double fConst; + switch ( rand() % 4 ) + { + case 0: + fConst = RandomFloat( -1e-30, 1e+30 ); break; + case 1: + fConst = RandomFloat( -1e-10, 1e+10 ); break; + case 2: + fConst = RandomFloat( -1e-5, 1e+5 ); break; + default: + fConst = RandomFloat( -1, 1 ); break; + } + + char szTemp[1024]; + + // FloatToString does not rely on V_snprintf(), so it can't be affected by the current locale setting. + FloatToString( szTemp, sizeof( szTemp ), fConst ); + + static double flMaxErr1; + static double flMaxErr2; + + // Compare FloatToString()'s results vs. V_snprintf()'s, also track maximum error of each. + double flCheck = atof( szTemp ); + double flErr = fabs( flCheck - fConst ); + flMaxErr1 = MAX( flMaxErr1, flErr ); + Assert( EqualTol( flCheck, fConst, .000000125 ) ); + + char szTemp2[256]; + V_snprintf( szTemp2, sizeof( szTemp2 ), "%.12f", fConst ); + StripExtraTrailingZeros( szTemp2 ); + + if ( !strchr( szTemp2, '.' ) ) + { + V_strncat( szTemp2, ".0", sizeof( szTemp2 ) ); + } + double flCheck2 = atof( szTemp2 ); + double flErr2 = fabs( flCheck2 - fConst ); + flMaxErr2 = MAX( flMaxErr2, flErr2 ); + Assert( EqualTol( flCheck2, fConst, .000000125 ) ); + + if ( flMaxErr1 > flMaxErr2 ) + { + GLMDebugPrintf( "!\n" ); + } + } +} +#endif + +void D3DToGL::Handle_DEFIB( uint32 instruction ) +{ + Assert( ( instruction == D3DSIO_DEFI ) || ( instruction == D3DSIO_DEFB ) ); + + // which register is being defined + uint32 dwToken = GetNextToken(); + + uint32 nRegNum = dwToken & D3DSP_REGNUM_MASK; + + uint32 regType = GetRegTypeFromToken( dwToken ); + + + if ( regType == D3DSPR_CONSTINT ) + { + m_dwDefConstIntUsageMask |= ( 1 << nRegNum ); + + uint x = GetNextToken(); + uint y = GetNextToken(); + uint z = GetNextToken(); + uint w = GetNextToken(); + NOTE_UNUSED(y); NOTE_UNUSED(z); NOTE_UNUSED(w); + + Assert( nRegNum < 32 ); + if ( nRegNum < 32 ) + { + m_dwDefConstIntIterCount[nRegNum] = x; + } + } + else + { + TranslationError(); + } + +} + +void D3DToGL::Handle_DEF() +{ + //TestFloatConversion(); + + // + // JasonM TODO: catch D3D's sincos-specific D3DSINCOSCONST1 and D3DSINCOSCONST2 constants and filter them out here + // + + // Which register is being defined + uint32 dwToken = GetNextToken(); + + // Note that this constant was explicitly defined + m_bConstantRegisterDefined[dwToken & D3DSP_REGNUM_MASK] = true; + CUtlString sParamName = GetParameterString( dwToken, DST_REGISTER, false, NULL ); + + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + PrintToBuf( *m_pBufParamCode, "vec4 %s = vec4( ", sParamName.String() ); + + // Run through the 4 floats + for ( int i=0; i < 4; i++ ) + { + float fConst = uint32ToFloat( GetNextToken() ); + + char szTemp[1024]; + + FloatToString( szTemp, sizeof( szTemp ), fConst ); + +#if 0 + static double flMaxErr1; + static double flMaxErr2; + + // Compare FloatToString()'s results vs. V_snprintf()'s, also track maximum error of each. + double flCheck = atof( szTemp ); + double flErr = fabs( flCheck - fConst ); + flMaxErr1 = MAX( flMaxErr1, flErr ); + Assert( EqualTol( flCheck, fConst, .000000125 ) ); + + char szTemp2[256]; + V_snprintf( szTemp2, sizeof( szTemp2 ), "%.12f", fConst ); + StripExtraTrailingZeros( szTemp2 ); + + if ( !strchr( szTemp2, '.' ) ) + { + V_strncat( szTemp2, ".0", sizeof( szTemp2 ) ); + } + double flCheck2 = atof( szTemp2 ); + double flErr2 = fabs( flCheck2 - fConst ); + flMaxErr2 = MAX( flMaxErr2, flErr2 ); + Assert( EqualTol( flCheck2, fConst, .000000125 ) ); + + if ( flMaxErr1 > flMaxErr2 ) + { + GLMDebugPrintf( "!\n" ); + } +#endif + + PrintToBuf( *m_pBufParamCode, i != 3 ? "%s, " : "%s", szTemp ); // end with comma-space + } + + PrintToBuf( *m_pBufParamCode, " );\n" ); +} + +void D3DToGL::Handle_MAD( uint32 nInstruction ) +{ + uint32 nDestToken = GetNextToken(); + CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL ); + int nARLComp0 = ARL_DEST_NONE; + CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 ); + int nARLComp1 = ARL_DEST_NONE; + CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 ); + int nARLComp2 = ARL_DEST_NONE; + CUtlString sParam4 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp2 ); + + // This optionally inserts a move from our dummy address register to the .x component of the real one + InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1, nARLComp2 ); + + sParam2 = FixGLSLSwizzle( sParam1, sParam2 ); + sParam3 = FixGLSLSwizzle( sParam1, sParam3 ); + sParam4 = FixGLSLSwizzle( sParam1, sParam4 ); + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s * %s + %s;\n", sParam1.String(), sParam2.String(), sParam3.String(), sParam4.String() ); + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( sParam1.String() ); + if ( nComponents == 0 ) + nComponents = 4; + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + + +void D3DToGL::Handle_DP2ADD() +{ + char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64], pSrc2Reg[64]; + uint32 nDestToken = GetNextToken(); + PrintParameterToString( nDestToken, DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc2Reg, sizeof( pSrc2Reg ), false, NULL ); + + // We should only be assigning to a single component of the dest. + Assert( GetNumSwizzleComponents( pDestReg ) == 1 ); + Assert( GetNumSwizzleComponents( pSrc2Reg ) == 1 ); + + // This is a 2D dot product, so we only want two entries from the middle components. + CUtlString sArg0 = EnsureNumSwizzleComponents( pSrc0Reg, 2 ); + CUtlString sArg1 = EnsureNumSwizzleComponents( pSrc1Reg, 2 ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = dot( %s, %s ) + %s;\n", pDestReg, sArg0.String(), sArg1.String(), pSrc2Reg ); + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( pDestReg ); + if ( nComponents == 0 ) + nComponents = 4; + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", pDestReg, pDestReg, g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + + +void D3DToGL::Handle_SINCOS() +{ + char pDestReg[64], pSrc0Reg[64]; + PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), true, NULL ); + m_bNeedsSinCosDeclarations = true; + + + CUtlString sDest( pDestReg ); + CUtlString sArg0 = EnsureNumSwizzleComponents( pSrc0Reg, 1 );// Ensure input is scalar + CUtlString sResult( "vSinCosTmp.xy" ); // Always going to populate this + sResult = FixGLSLSwizzle( sDest, sResult ); // Make sure we match the desired output reg + + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.z = %s * %s;\n", sArg0.String(), sArg0.String() ); + + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.zz * scA.xy + scA.wz;\n" ); + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.zz + scB.xy;\n" ); + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.zz + scB.wz;\n" ); + + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.x = vSinCosTmp.x * %s;\n", sArg0.String() ); + + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.xx;\n" ); + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy + vSinCosTmp.xy;\n" ); + PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.x = -vSinCosTmp.x + scB.z;\n" ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sDest.String(), sResult.String() ); + + if ( m_dwMajorVersion < 3 ) + { + // Eat two more tokens since D3D defines Taylor series constants that we won't need + // Only valid for pixel and vertex shader version earlier than 3_0 + // (http://msdn.microsoft.com/en-us/library/windows/hardware/ff569710(v=vs.85).aspx) + SkipTokens( 2 ); + } +} + + +void D3DToGL::Handle_LRP( uint32 nInstruction ) +{ + uint32 nDestToken = GetNextToken(); + CUtlString sDest = GetParameterString( nDestToken, DST_REGISTER, false, NULL ); + int nARLComp0 = ARL_DEST_NONE; + CUtlString sParam0 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 ); + int nARLComp1 = ARL_DEST_NONE; + CUtlString sParam1 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 ); + int nARLComp2 = ARL_DEST_NONE; + CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp2 ); + + // This optionally inserts a move from our dummy address register to the .x component of the real one + InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1, nARLComp2 ); + + sParam0 = FixGLSLSwizzle( sDest, sParam0 ); + sParam1 = FixGLSLSwizzle( sDest, sParam1 ); + sParam2 = FixGLSLSwizzle( sDest, sParam2 ); + + // dest = src0 * (src1 - src2) + src2; + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s * ( %s - %s ) + %s;\n", sDest.String(), sParam0.String(), sParam1.String(), sParam2.String(), sParam2.String() ); + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( sDest.String() ); + if ( nComponents == 0 ) + nComponents = 4; + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sDest.String(), sDest.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + + +void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL ) +{ + char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64]; + PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL ); + + DWORD dwSrc1Token = GetNextToken(); + PrintParameterToString( dwSrc1Token, SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL ); + + Assert( (dwSrc1Token & D3DSP_REGNUM_MASK) < ARRAYSIZE( m_dwSamplerTypes ) ); + uint32 nSamplerType = m_dwSamplerTypes[dwSrc1Token & D3DSP_REGNUM_MASK]; + if ( nSamplerType == SAMPLER_TYPE_2D ) + { + const bool bIsShadowSampler = ( ( 1 << ( (int) ( dwSrc1Token & D3DSP_REGNUM_MASK ) ) ) & m_nShadowDepthSamplerMask ) != 0; + + if ( bIsTexLDL ) + { + CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 ); + + // Strip out the W component of the pSrc0Reg and pass that as the LOD to texture2DLod. + char szLOD[128], szExtra[8]; + GetParamNameWithoutSwizzle( pSrc0Reg, szLOD, sizeof( szLOD ) ); + V_snprintf( szExtra, sizeof( szExtra ), ".%c", GetSwizzleComponent( pSrc0Reg, 3 ) ); + V_strncat( szLOD, szExtra, sizeof( szLOD ) ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "texture2DLod", pSrc1Reg, sCoordVar.String(), szLOD ); + } + else if ( bIsShadowSampler ) + { + // .z is meant to contain the object depth, while .xy contains the 2D tex coords + CUtlString sCoordVar3D = EnsureNumSwizzleComponents( pSrc0Reg, 3 ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = shadow2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar3D.String() ); + Assert( m_dwSamplerTypes[dwSrc1Token & D3DSP_REGNUM_MASK] == SAMPLER_TYPE_2D ); + } + else if( ( OpcodeSpecificData( dwToken ) << D3DSP_OPCODESPECIFICCONTROL_SHIFT ) == D3DSI_TEXLD_PROJECT ) + { + // This projective case is after the shadow case intentionally, due to the way that "projective" + // loads are overloaded in our D3D shaders for shadow lookups. + // + // We use the vec4 variant of texture2DProj() intentionally here, since it lines up well with Direct3D. + + CUtlString s4DProjCoords = EnsureNumSwizzleComponents( pSrc0Reg, 4 ); // Ensure vec4 variant + PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2DProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() ); + } + else + { + CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 ); + PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() ); + } + } + else if ( nSamplerType == SAMPLER_TYPE_3D ) + { + if ( bIsTexLDL ) + { + TranslationError(); + } + + CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 ); + PrintToBufWithIndents( *m_pBufALUCode, "%s = texture3D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() ); + } + else if ( nSamplerType == SAMPLER_TYPE_CUBE ) + { + if ( bIsTexLDL ) + { + TranslationError(); + } + + CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 ); + PrintToBufWithIndents( *m_pBufALUCode, "%s = textureCube( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() ); + } + else + { + Error( "TEX instruction: unsupported sampler type used" ); + } +} + +void D3DToGL::StrcatToHeaderCode( const char *pBuf ) +{ + strcat_s( (char*)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), pBuf ); +} + +void D3DToGL::StrcatToALUCode( const char *pBuf ) +{ + PrintIndentation( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size() ); + + strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), pBuf ); +} + +void D3DToGL::StrcatToParamCode( const char *pBuf ) +{ + strcat_s( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size(), pBuf ); +} + +void D3DToGL::StrcatToAttribCode( const char *pBuf ) +{ + strcat_s( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size(), pBuf ); +} + +void D3DToGL::Handle_TexLDD( uint32 nInstruction ) +{ + TranslationError(); // Not supported yet, but can be if we need it. +} + + +void D3DToGL::Handle_TexCoord() +{ + TranslationError(); + + // If ps_1_4, this is texcrd + if ( (m_dwMajorVersion == 1) && (m_dwMinorVersion == 4) && (!m_bVertexShader) ) + { + StrcatToALUCode( "texcrd" ); + } + else // else it's texcoord + { + TranslationError(); + StrcatToALUCode( "texcoord" ); + } + + char buff[256]; + PrintParameterToString( GetNextToken(), DST_REGISTER, buff, sizeof( buff ), false, NULL ); + StrcatToALUCode( buff ); + + // If ps_1_4, texcrd also has a source parameter + if ((m_dwMajorVersion == 1) && (m_dwMinorVersion == 4) && (!m_bVertexShader)) + { + StrcatToALUCode( ", " ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, buff, sizeof( buff ), false, NULL ); + StrcatToALUCode( buff ); + } + + StrcatToALUCode( ";\n" ); +} + +void D3DToGL::Handle_BREAKC( uint32 dwToken ) +{ + uint nComparison = ( dwToken & D3DSHADER_COMPARISON_MASK ) >> D3DSHADER_COMPARISON_SHIFT; + + const char *pComparison = "?"; + switch ( nComparison ) + { + case D3DSPC_GT: pComparison = ">"; break; + case D3DSPC_EQ: pComparison = "=="; break; + case D3DSPC_GE: pComparison = ">="; break; + case D3DSPC_LT: pComparison = "<"; break; + case D3DSPC_NE: pComparison = "!="; break; + case D3DSPC_LE: pComparison = "<="; break; + default: + TranslationError(); + } + + char src0[256]; + uint32 src0Token = GetNextToken(); + PrintParameterToString( src0Token, SRC_REGISTER, src0, sizeof( src0 ), false, NULL ); + + char src1[256]; + uint32 src1Token = GetNextToken(); + PrintParameterToString( src1Token, SRC_REGISTER, src1, sizeof( src1 ), false, NULL ); + + PrintToBufWithIndents( *m_pBufALUCode, "if (%s %s %s) break;\n", src0, pComparison, src1 ); +} + +void D3DToGL::HandleBinaryOp_GLSL( uint32 nInstruction ) +{ + uint32 nDestToken = GetNextToken(); + CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL ); + int nARLComp0 = ARL_DEST_NONE; + CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 ); + int nARLComp1 = ARL_DEST_NONE; + CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 ); + + // This optionally inserts a move from our dummy address register to the .x component of the real one + InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1 ); + + // Since DP3 and DP4 have a scalar as the dest and vectors as the src, don't screw with the swizzle specifications. + if ( nInstruction == D3DSIO_DP3 ) + { + sParam2 = EnsureNumSwizzleComponents( sParam2, 3 ); + sParam3 = EnsureNumSwizzleComponents( sParam3, 3 ); + } + else if ( nInstruction == D3DSIO_DP4 ) + { + sParam2 = EnsureNumSwizzleComponents( sParam2, 4 ); + sParam3 = EnsureNumSwizzleComponents( sParam3, 4 ); + } + else if ( nInstruction == D3DSIO_DST ) + { + m_bUsesDSTInstruction = true; + sParam2 = EnsureNumSwizzleComponents( sParam2, 4 ); + sParam3 = EnsureNumSwizzleComponents( sParam3, 4 ); + } + else + { + sParam2 = FixGLSLSwizzle( sParam1, sParam2 ); + sParam3 = FixGLSLSwizzle( sParam1, sParam3 ); + } + + char buff[256]; + if ( nInstruction == D3DSIO_ADD || nInstruction == D3DSIO_SUB || nInstruction == D3DSIO_MUL ) + { + // These all look like x = y op z + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s %s %s;\n", sParam1.String(), sParam2.String(), GetGLSLOperatorString( nInstruction ), sParam3.String() ); + } + else + { + int nDestComponents = GetNumSwizzleComponents( sParam1.String() ); + int nSrcComponents = GetNumSwizzleComponents( sParam2.String() ); + + // All remaining instructions can use GLSL intrinsics like dot() and cross(). + bool bDoubleClose = OpenIntrinsic( nInstruction, buff, sizeof( buff ), nDestComponents, nSrcComponents ); + + if ( ( nSrcComponents == 1 ) && ( nInstruction == D3DSIO_SGE ) ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s >= %s );\n", sParam1.String(), buff, sParam2.String(), sParam3.String() ); + } + else if ( ( nSrcComponents == 1 ) && ( nInstruction == D3DSIO_SLT ) ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s < %s );\n", sParam1.String(), buff, sParam2.String(), sParam3.String() ); + } + else + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s, %s %s;\n", sParam1.String(), buff, sParam2.String(), sParam3.String(), bDoubleClose ? ") )" : ")" ); + } + } + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( sParam1.String() ); + if ( nComponents == 0 ) + nComponents = 4; + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + +void D3DToGL::HandleBinaryOp_ASM( uint32 nInstruction ) +{ + CUtlString sParam1 = GetParameterString( GetNextToken(), DST_REGISTER, false, NULL ); + int nARLComp0 = ARL_DEST_NONE; + CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 ); + int nARLComp1 = ARL_DEST_NONE; + CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 ); + + // This optionally inserts a move from our dummy address register to the .x component of the real one + InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1 ); + + char buff[256]; + PrintOpcode( nInstruction, buff, sizeof( buff ) ); + PrintToBufWithIndents( *m_pBufALUCode, "%s%s, %s, %s;\n", buff, sParam1.String(), sParam2.String(), sParam3.String() ); +} + +void D3DToGL::WriteGLSLCmp( const char *pDestReg, const char *pSrc0Reg, const char *pSrc1Reg, const char *pSrc2Reg ) +{ + int nWriteMaskEntries = GetNumWriteMaskEntries( pDestReg ); + for ( int i=0; i < nWriteMaskEntries; i++ ) + { + char params[4][256]; + WriteParamWithSingleMaskEntry( pDestReg, i, params[0], sizeof( params[0] ) ); + WriteParamWithSingleMaskEntry( pSrc0Reg, i, params[1], sizeof( params[1] ) ); + WriteParamWithSingleMaskEntry( pSrc1Reg, i, params[2], sizeof( params[2] ) ); + WriteParamWithSingleMaskEntry( pSrc2Reg, i, params[3], sizeof( params[3] ) ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = ( %s >= 0.0 ) ? %s : %s;\n", params[0], params[1], params[2], params[3] ); + } +} + +void D3DToGL::Handle_CMP() +{ + // In Direct3D, result = (src0 >= 0.0) ? src1 : src2 + // In OpenGL, result = (src0 < 0.0) ? src1 : src2 + // + // As a result, arguments are effectively in a different order than Direct3D! !#$&*!%#$& + char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64], pSrc2Reg[64]; + uint32 nDestToken = GetNextToken(); + PrintParameterToString( nDestToken, DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL ); + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc2Reg, sizeof( pSrc2Reg ), false, NULL ); + + // These are a tricky case.. we have to expand it out into multiple statements. + char szDestBase[256]; + GetParamNameWithoutSwizzle( pDestReg, szDestBase, sizeof( szDestBase ) ); + + V_strncpy( pSrc0Reg, FixGLSLSwizzle( pDestReg, pSrc0Reg ), sizeof( pSrc0Reg ) ); + V_strncpy( pSrc1Reg, FixGLSLSwizzle( pDestReg, pSrc1Reg ), sizeof( pSrc1Reg ) ); + V_strncpy( pSrc2Reg, FixGLSLSwizzle( pDestReg, pSrc2Reg ), sizeof( pSrc2Reg ) ); + + // This isn't reliable! + //if ( DoParamNamesMatch( pDestReg, pSrc0Reg ) && GetNumSwizzleComponents( pDestReg ) > 1 ) + if ( 1 ) + { + // So the dest register is the same as the comparand. We're in danger of screwing up our results. + // + // For example, this code: + // CMP r0.xy, r0.xx, r1, r2 + // would generate this: + // r0.x = (r0.x >= 0) ? r1.x : r2.x; + // r0.y = (r0.x >= 0) ? r1.x : r2.x; + // + // But the first lines changes r0.x and thus screws the atomicity of the CMP instruction for the second line. + // So we assign r0 to a temporary first and then write to the temporary. + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", g_pAtomicTempVarName, szDestBase ); + + char szTempVar[256]; + ReplaceParamName( pDestReg, g_pAtomicTempVarName, szTempVar, sizeof( szTempVar ) ); + WriteGLSLCmp( szTempVar, pSrc0Reg, pSrc1Reg, pSrc2Reg ); + + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", szDestBase, g_pAtomicTempVarName ); + m_bUsedAtomicTempVar = true; + } + else + { + // Just write out the simple expanded version of the CMP. No need to use atomic_temp_var. + WriteGLSLCmp( pDestReg, pSrc0Reg, pSrc1Reg, pSrc2Reg ); + } + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( pDestReg ); + if ( nComponents == 0 ) + nComponents = 4; + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", pDestReg, pDestReg, g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + +void D3DToGL::Handle_NRM() +{ + char pDestReg[64]; + char pSrc0Reg[64]; + PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL ); + int nARLSrcComp = ARL_DEST_NONE; + PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, &nARLSrcComp ); + + if ( nARLSrcComp != -1 ) + { + InsertMoveFromAddressRegister( m_pBufALUCode, nARLSrcComp, -1, -1 ); + } + + CUtlString sSrc = EnsureNumSwizzleComponents( pSrc0Reg, 3 ); + PrintToBufWithIndents( *m_pBufALUCode, "%s = normalize( %s );\n", pDestReg, sSrc.String() ); +} + +void D3DToGL::Handle_UnaryOp( uint32 nInstruction ) +{ + uint32 nDestToken = GetNextToken(); + CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL ); + CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, NULL ); + sParam2 = FixGLSLSwizzle( sParam1, sParam2 ); + + + if ( nInstruction == D3DSIO_MOV ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_RSQ ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = inversesqrt( %s );\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_RCP ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = 1.0 / %s;\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_EXP ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = exp2( %s );\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_FRC ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = fract( %s );\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_LOG ) // d3d 'log' is log base 2 + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = log2( %s );\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_ABS ) // rbarris did this one, Jason please check + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = abs( %s );\n", sParam1.String(), sParam2.String() ); + } + else if ( nInstruction == D3DSIO_MOVA ) + { + m_bDeclareAddressReg = true; + PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sParam1.String(), sParam2.String() ); + + if ( !m_bGenerateBoneUniformBuffer ) + { + m_nHighestRegister = DXABSTRACT_VS_PARAM_SLOTS - 1; + } + } + else + { + Error( "Unsupported instruction" ); + } + + // If the _SAT instruction modifier is used, then do a saturate here. + if ( nDestToken & D3DSPDM_SATURATE ) + { + int nComponents = GetNumSwizzleComponents( sParam1.String() ); + if ( nComponents == 0 ) + { + nComponents = 4; + } + + PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] ); + } +} + +void D3DToGL::WriteGLSLSamplerDefinitions() +{ + int nSamplersWritten = 0; + for ( int i=0; i < ARRAYSIZE( m_dwSamplerTypes ); i++ ) + { + if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_2D ) + { + if ( ( ( 1 << i ) & m_nShadowDepthSamplerMask ) != 0 ) + { + PrintToBuf( *m_pBufHeaderCode, "uniform sampler2DShadow sampler%d;\n", i ); + } + else + { + PrintToBuf( *m_pBufHeaderCode, "uniform sampler2D sampler%d;\n", i ); + } + ++nSamplersWritten; + } + else if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_3D ) + { + PrintToBuf( *m_pBufHeaderCode, "uniform sampler3D sampler%d;\n", i ); + ++nSamplersWritten; + } + else if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_CUBE ) + { + PrintToBuf( *m_pBufHeaderCode, "uniform samplerCube sampler%d;\n", i ); + ++nSamplersWritten; + } + else if ( m_dwSamplerTypes[i] != SAMPLER_TYPE_UNUSED ) + { + Error( "Unknown sampler type." ); + } + } + + if ( nSamplersWritten > 0 ) + PrintToBuf( *m_pBufHeaderCode, "\n\n" ); +} + +void D3DToGL::WriteGLSLOutputVariableAssignments() +{ + if ( m_bVertexShader ) + { + // Map output "oN" registers back to GLSL output variables. + if ( m_bAddHexCodeComments ) + { + PrintToBuf( *m_pBufAttribCode, "\n// Now we're storing the oN variables from the output dcl_ statements back into their GLSL equivalents.\n" ); + } + + for ( int i=0; i < ARRAYSIZE( m_DeclaredOutputs ); i++ ) + { + if ( m_DeclaredOutputs[i] == UNDECLARED_OUTPUT ) + continue; + + if ( ( m_dwTexCoordOutMask & ( 1 << i ) ) == 0 ) + continue; + + uint32 dwToken = m_DeclaredOutputs[i]; + + uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK ); + uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + if ( ( dwUsage == D3DDECLUSAGE_FOG ) || ( dwUsage == D3DDECLUSAGE_PSIZE ) ) + { + TranslationError(); // Not supported yet, but can be if we need it. + } + + if ( dwUsage == D3DDECLUSAGE_COLOR ) + { + PrintToBufWithIndents( *m_pBufALUCode, "%s = oTempT%d;\n", dwUsageIndex ? "gl_FrontSecondaryColor" : "gl_FrontColor", i ); + } + else if ( dwUsage == D3DDECLUSAGE_TEXCOORD ) + { + char buf[256]; + if ( m_nCentroidMask & ( 0x00000001 << dwUsageIndex ) ) + { + V_snprintf( buf, sizeof( buf ), "centroid varying vec4 oT%d;\n", dwUsageIndex ); // centroid varying + } + else + { + V_snprintf( buf, sizeof( buf ), "varying vec4 oT%d;\n", dwUsageIndex ); + } + StrcatToHeaderCode( buf ); + + PrintToBufWithIndents( *m_pBufALUCode, "oT%d = oTempT%d;\n", dwUsageIndex, i ); + } + } + } +} + +void D3DToGL::WriteGLSLInputVariableAssignments() +{ + if ( m_bVertexShader ) + return; + + for ( int i=0; i < ARRAYSIZE( m_DeclaredInputs ); i++ ) + { + if ( m_DeclaredInputs[i] == UNDECLARED_INPUT ) + continue; + + uint32 dwToken = m_DeclaredInputs[i]; + + uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK ); + uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT; + + if ( dwUsage == D3DDECLUSAGE_COLOR ) + { + PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "gl_SecondaryColor" : "gl_Color" ); + } + else if ( dwUsage == D3DDECLUSAGE_TEXCOORD ) + { + PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = oT%d;\n", i, dwUsageIndex ); + } + } +} + +void D3DToGL::Handle_DeclarativeNonDclOp( uint32 nInstruction ) +{ + char buff[128]; + uint32 dwToken = GetNextToken(); + PrintParameterToString( dwToken, DST_REGISTER, buff, sizeof( buff ), false, NULL ); + + if ( nInstruction == D3DSIO_TEXKILL ) + { + // TEXKILL is supposed to discard the pixel if any of the src register's X, Y, or Z components are less than zero. + // We have to translate it to something like: + // if ( r0.x < 0.0 || r0.y < 0.0 ) + // discard; + char c[3]; + c[0] = GetSwizzleComponent( buff, 0 ); + c[1] = GetSwizzleComponent( buff, 1 ); + c[2] = GetSwizzleComponent( buff, 2 ); + + // Get the unique components. + char cUnique[3]; + cUnique[0] = c[0]; + + int nUnique = 1; + if ( c[1] != c[0] ) + cUnique[nUnique++] = c[1]; + + if ( c[2] != c[1] && c[2] != c[0] ) + cUnique[nUnique++] = c[2]; + + // Get the src register base name. + char szBase[256]; + GetParamNameWithoutSwizzle( buff, szBase, sizeof( szBase ) ); + + PrintToBufWithIndents( *m_pBufALUCode, "if ( %s.%c < 0.0 ", szBase, cUnique[0] ); + for ( int i=1; i < nUnique; i++ ) + { + PrintToBuf( *m_pBufALUCode, "|| %s.%c < 0.0 ", szBase, cUnique[i] ); + } + PrintToBuf( *m_pBufALUCode, ")\n{\n\tdiscard;\n}\n" ); + } + else + { + char szOpcode[128]; + PrintOpcode( nInstruction, szOpcode, sizeof( szOpcode ) ); + StrcatToALUCode( szOpcode ); + + StrcatToALUCode( buff ); + StrcatToALUCode( ";\n" ); + } +} + + +void D3DToGL::NoteTangentInputUsed() +{ + if ( !m_bTangentInputUsed ) + { + m_bTangentInputUsed = true; +// PrintToBuf( *m_pBufParamCode, "attribute vec4 %s;\n", g_pTangentAttributeName ); + } +} + + +// These are the only ARL instructions that should appear in the instruction stream +void D3DToGL::InsertMoveInstruction( CUtlBuffer *pCode, int nARLComponent ) +{ + PrintIndentation( ( char * )pCode->Base(), pCode->Size() ); + + switch ( nARLComponent ) + { + case ARL_DEST_X: + strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.x );\n" ); + break; + case ARL_DEST_Y: + strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.y );\n" ); + break; + case ARL_DEST_Z: + strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.z );\n" ); + break; + case ARL_DEST_W: + strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.w );\n" ); + break; + } +} + +// This optionally inserts a move from our dummy address register to the .x component of the real one +void D3DToGL::InsertMoveFromAddressRegister( CUtlBuffer *pCode, int nARLComp0, int nARLComp1, int nARLComp2 /* = ARL_DEST_NONE */ ) +{ + // We no longer need to do this in GLSL - we put the cast to int from the dummy address register va_r.x, va_r.y, etc. directly into the instruction + return; +} + + +//------------------------------------------------------------------------------ +// TranslateShader() +// +// This is the main function that the outside world sees. A pointer to the +// uint32 stream returned from the D3DX compile routine is parsed and used +// to write human-readable asm code into the character array pointed to by +// pDisassembledCode. An error code is returned. +//------------------------------------------------------------------------------ + + +int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bool *bVertexShader, uint32 options, int32 nShadowDepthSamplerMask, uint32 nCentroidMask, char *debugLabel ) +{ + CUtlString sLine, sParamName; + uint32 i, dwToken, nInstruction, nNumTokensToSkip; + char buff[256]; + + // obey options + m_bUseEnvParams = (options & D3DToGL_OptionUseEnvParams) != 0; + m_bDoFixupZ = (options & D3DToGL_OptionDoFixupZ) != 0; + m_bDoFixupY = (options & D3DToGL_OptionDoFixupY) != 0; + m_bDoUserClipPlanes = (options & D3DToGL_OptionDoUserClipPlanes) != 0; + + m_bAddHexCodeComments = (options & D3DToGL_AddHexComments) != 0; + m_bPutHexCodesAfterLines = (options & D3DToGL_PutHexCommentsAfterLines) != 0; + m_bGeneratingDebugText = (options & D3DToGL_GeneratingDebugText) != 0; + m_bGenerateSRGBWriteSuffix = (options & D3DToGL_OptionSRGBWriteSuffix) != 0; + + m_NumIndentTabs = 1; // start code indented one tab + m_nLoopDepth = 0; + + // debugging + m_bSpew = (options & D3DToGL_OptionSpew) != 0; + + // These are not accessed below in a way that will cause them to glow, so + // we could overflow these and/or the buffer pointed to by pDisassembledCode + m_pBufAttribCode = new CUtlBuffer( 100, 10000, CUtlBuffer::TEXT_BUFFER ); + m_pBufParamCode = new CUtlBuffer( 100, 10000, CUtlBuffer::TEXT_BUFFER ); + m_pBufALUCode = new CUtlBuffer( 100, 60000, CUtlBuffer::TEXT_BUFFER ); + + // Pointers to text buffers for assembling sections of the program + m_pBufHeaderCode = pBufDisassembledCode; + char *pAttribMapStart = NULL; + ((char*)m_pBufHeaderCode->Base())[0] = 0; + ((char*)m_pBufAttribCode->Base())[0] = 0; + ((char*)m_pBufParamCode->Base())[0] = 0; + ((char*)m_pBufALUCode->Base())[0] = 0; + + + for ( i=0; i<MAX_SHADER_CONSTANTS; i++ ) + { + m_bConstantRegisterDefined[i] = false; + } + + // Track shadow sampler usage for proper declaration + m_nShadowDepthSamplerMask = nShadowDepthSamplerMask; + m_bDeclareShadowOption = false; + + // Various flags set while parsing code to drive various declaration instructions + m_bNeedsD2AddTemp = false; + m_bNeedsLerpTemp = false; + m_bNeedsNRMTemp = false; + m_bNeedsSinCosDeclarations = false; + m_bDeclareAddressReg = false; + m_bDeclareVSOPos = false; + m_bDeclareVSOFog = false; + m_dwTexCoordOutMask = 0x00000000; + m_nVSPositionOutput = -1; + m_bOutputColorRegister[0] = false; + m_bOutputColorRegister[1] = false; + m_bOutputColorRegister[2] = false; + m_bOutputColorRegister[3] = false; + m_bOutputDepthRegister = false; + m_bTangentInputUsed = false; + m_bUsesDSTInstruction = false; + m_dwTempUsageMask = 0x00000000; + m_dwSamplerUsageMask = 0x00000000; + m_dwConstIntUsageMask = 0x00000000; + m_dwDefConstIntUsageMask = 0x00000000; + memset( m_dwDefConstIntIterCount, 0, sizeof( m_dwDefConstIntIterCount ) ); + m_dwConstBoolUsageMask = 0x00000000; + m_nCentroidMask = nCentroidMask; + m_nHighestRegister = 0; + m_nHighestBoneRegister = -1; + m_bGenerateBoneUniformBuffer = false; + m_bUseBindlessTexturing = ((options & D3DToGL_OptionUseBindlessTexturing) != 0); + + m_bUsedAtomicTempVar = false; + for ( int i=0; i < ARRAYSIZE( m_dwSamplerTypes ); i++ ) + { + m_dwSamplerTypes[i] = SAMPLER_TYPE_UNUSED; + } + + for ( int i=0; i < ARRAYSIZE( m_DeclaredOutputs ); i++ ) + { + m_DeclaredOutputs[i] = UNDECLARED_OUTPUT; + } + + for ( int i=0; i < ARRAYSIZE( m_DeclaredInputs ); i++ ) + { + m_DeclaredInputs[i] = UNDECLARED_INPUT; + } + + memset( m_dwAttribMap, 0xFF, sizeof(m_dwAttribMap) ); + + m_pdwBaseToken = m_pdwNextToken = code; // Initialize dwToken pointers + + dwToken = GetNextToken(); + m_dwMajorVersion = D3DSHADER_VERSION_MAJOR( dwToken ); + m_dwMinorVersion = D3DSHADER_VERSION_MINOR( dwToken ); + + // If pixel shader + const char *glslExtText = "#extension GL_ARB_shader_texture_lod : require\n";//m_bUseBindlessTexturing ? "#extension GL_NV_bindless_texture : require\n" : ""; + // 7ls + const char *glslVersionText = m_bUseBindlessTexturing ? "330 compatibility" : "120"; + + if ( ( dwToken & 0xFFFF0000 ) == 0xFFFF0000 ) + { + // must explicitly enable extensions if emitting GLSL + V_snprintf( (char *)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), "#version %s\n%s", glslVersionText, glslExtText ); + m_bVertexShader = false; + } + else // vertex shader + { + m_bGenerateSRGBWriteSuffix = false; + + V_snprintf( (char *)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), "#version %s\n%s//ATTRIBMAP-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx\n", glslVersionText, glslExtText ); + + // find that first '-xx' which is where the attrib map will be written later. + pAttribMapStart = strstr( (char *)m_pBufHeaderCode->Base(), "-xx" ) + 1; + + m_bVertexShader = true; + } + + *bVertexShader = m_bVertexShader; + + m_bGenerateBoneUniformBuffer = m_bVertexShader && ((options & D3DToGL_OptionGenerateBoneUniformBuffer) != 0); + + if ( m_bAddHexCodeComments ) + { + RecordInputAndOutputPositions(); + } + + if ( m_bSpew ) + { + printf("\n************* translating shader " ); + } + + int opcounter = 0; + + // Loop until we hit the end dwToken...note that D3DPS_END() == D3DVS_END() so this works for either + while ( dwToken != D3DPS_END() ) + { + if ( m_bAddHexCodeComments ) + { + AddTokenHexCode(); + RecordInputAndOutputPositions(); + } + +#ifdef POSIX + int tokenIndex = m_pdwNextToken - code; +#endif + int aluCodeLength0 = V_strlen( (char *) m_pBufALUCode->Base() ); + + dwToken = GetNextToken(); // Get next dwToken in the stream + nInstruction = Opcode( dwToken ); // Mask out the instruction opcode + + if ( m_bSpew ) + { +#ifdef POSIX + printf("\n** token# %04x inst# %04d opcode %s (%08x)", tokenIndex, opcounter, GLMDecode(eD3D_SIO, nInstruction), dwToken ); +#endif + opcounter++; + } + + switch ( nInstruction ) + { + // -- No arguments at all ----------------------------------------------- + case D3DSIO_NOP: + // D3D compiler outputs NOPs when shader debugging/optimizations are disabled. + break; + + case D3DSIO_PHASE: + case D3DSIO_RET: + case D3DSIO_ENDLOOP: + case D3DSIO_BREAK: + TranslationError(); + PrintOpcode( nInstruction, buff, sizeof( buff ) ); + StrcatToALUCode( buff ); + StrcatToALUCode( ";\n" ); + break; + + // -- "Declarative" non dcl ops ---------------------------------------- + case D3DSIO_TEXDEPTH: + case D3DSIO_TEXKILL: + Handle_DeclarativeNonDclOp( nInstruction ); + break; + + // -- Unary ops ------------------------------------------------- + case D3DSIO_BEM: + case D3DSIO_TEXBEM: + case D3DSIO_TEXBEML: + case D3DSIO_TEXDP3: + case D3DSIO_TEXDP3TEX: + case D3DSIO_TEXM3x2DEPTH: + case D3DSIO_TEXM3x2TEX: + case D3DSIO_TEXM3x3: + case D3DSIO_TEXM3x3PAD: + case D3DSIO_TEXM3x3TEX: + case D3DSIO_TEXM3x3VSPEC: + case D3DSIO_TEXREG2AR: + case D3DSIO_TEXREG2GB: + case D3DSIO_TEXREG2RGB: + case D3DSIO_LABEL: + case D3DSIO_CALL: + case D3DSIO_LOOP: + case D3DSIO_BREAKP: + case D3DSIO_DSX: + case D3DSIO_DSY: + TranslationError(); + break; + + case D3DSIO_IFC: + { + static const char *s_szCompareStrings[ 7 ] = + { + "__INVALID__", + ">", + "==", + ">=", + "<", + "!=", + "<=" + }; + + // Compare mode is encoded in instruction token + uint32 dwCompareMode = OpcodeSpecificData( dwToken ); + + Assert( ( dwCompareMode >= 1 ) && ( dwCompareMode <= 6 ) ); + + // Get left side of compare + dwToken = GetNextToken(); + char szLeftSide[32]; + PrintParameterToString( dwToken, SRC_REGISTER, szLeftSide, sizeof( szLeftSide ), false, NULL ); + + // Get right side of compare + dwToken = GetNextToken(); + char szRightSide[32]; + PrintParameterToString( dwToken, SRC_REGISTER, szRightSide, sizeof( szRightSide ), false, NULL ); + + PrintToBufWithIndents( *m_pBufALUCode, "if ( %s %s %s )\n", szLeftSide, s_szCompareStrings[dwCompareMode], szRightSide ); + StrcatToALUCode( "{\n" ); + m_NumIndentTabs++; + + break; + } + case D3DSIO_IF: + dwToken = GetNextToken(); + PrintParameterToString( dwToken, SRC_REGISTER, buff, sizeof( buff ), false, NULL ); + + PrintToBufWithIndents( *m_pBufALUCode, "if ( %s )\n", buff ); + StrcatToALUCode( "{\n" ); + m_NumIndentTabs++; + + break; + + case D3DSIO_ELSE: + m_NumIndentTabs--; + StrcatToALUCode( "}\n" ); + StrcatToALUCode( "else\n" ); + StrcatToALUCode( "{\n" ); + m_NumIndentTabs++; + + break; + + case D3DSIO_ENDIF: + m_NumIndentTabs--; + StrcatToALUCode( "}\n" ); + + break; + + case D3DSIO_REP: + dwToken = GetNextToken(); + PrintParameterToString( dwToken, SRC_REGISTER, buff, sizeof( buff ), false, NULL ); + + // In practice, this is the only form of for loop that will appear in DX asm + PrintToBufWithIndents( *m_pBufALUCode, "for( int i=0; i < %s; i++ )\n", buff ); + StrcatToALUCode( "{\n" ); + + m_nLoopDepth++; + + // For now, we don't deal with loop nesting + // Easy enough to fix later with an array of loop names i, j, k etc + Assert( m_nLoopDepth <= 1 ); + + m_NumIndentTabs++; + + break; + + case D3DSIO_ENDREP: + m_nLoopDepth--; + m_NumIndentTabs--; + StrcatToALUCode( "}\n" ); + + break; + + case D3DSIO_NRM: + Handle_NRM(); + break; + + case D3DSIO_MOVA: + + Handle_UnaryOp( nInstruction ); + + break; + + // Unary operations + case D3DSIO_MOV: + case D3DSIO_RCP: + case D3DSIO_RSQ: + case D3DSIO_EXP: + case D3DSIO_EXPP: + case D3DSIO_LOG: + case D3DSIO_LOGP: + case D3DSIO_FRC: + case D3DSIO_LIT: + case D3DSIO_ABS: + Handle_UnaryOp( nInstruction ); + break; + + // -- Binary ops ------------------------------------------------- + case D3DSIO_TEXM3x3SPEC: + case D3DSIO_M4x4: + case D3DSIO_M4x3: + case D3DSIO_M3x4: + case D3DSIO_M3x3: + case D3DSIO_M3x2: + case D3DSIO_CALLNZ: + case D3DSIO_SETP: + TranslationError(); + break; + + case D3DSIO_BREAKC: + Handle_BREAKC( dwToken ); + break; + + // Binary Operations + case D3DSIO_ADD: + case D3DSIO_SUB: + case D3DSIO_MUL: + case D3DSIO_DP3: + case D3DSIO_DP4: + case D3DSIO_MIN: + case D3DSIO_MAX: + case D3DSIO_DST: + case D3DSIO_SLT: + case D3DSIO_SGE: + case D3DSIO_CRS: + case D3DSIO_POW: + HandleBinaryOp_GLSL( nInstruction ); + + break; + + // -- Ternary ops ------------------------------------------------- + case D3DSIO_DP2ADD: + Handle_DP2ADD(); + break; + case D3DSIO_LRP: + Handle_LRP( nInstruction ); + break; + case D3DSIO_SGN: + Assert( m_bVertexShader ); + TranslationError(); // TODO emulate with SLT etc + break; + case D3DSIO_CND: + TranslationError(); + break; + case D3DSIO_CMP: + Handle_CMP(); + break; + case D3DSIO_SINCOS: + Handle_SINCOS(); + break; + case D3DSIO_MAD: + Handle_MAD( nInstruction ); + break; + + // -- Quaternary op ------------------------------------------------ + case D3DSIO_TEXLDD: + Handle_TexLDD( nInstruction ); + break; + + // -- Special cases: texcoord vs texcrd and tex vs texld ----------- + case D3DSIO_TEXCOORD: + Handle_TexCoord(); + break; + + case D3DSIO_TEX: + Handle_TEX( dwToken, false ); + break; + + case D3DSIO_TEXLDL: + Handle_TEX( nInstruction, true ); + break; + + case D3DSIO_DCL: + Handle_DCL(); + break; + + case D3DSIO_DEFB: + case D3DSIO_DEFI: + Handle_DEFIB( nInstruction ); + break; + + case D3DSIO_DEF: + Handle_DEF(); + break; + + case D3DSIO_COMMENT: + // Using OpcodeSpecificData() can fail here since the comments can be longer than 0xff dwords + nNumTokensToSkip = ( dwToken & 0x0fff0000 ) >> 16; + SkipTokens( nNumTokensToSkip ); + break; + + case D3DSIO_END: + break; + } + + if ( m_bSpew ) + { + int aluCodeLength1 = V_strlen( (char *) m_pBufALUCode->Base() ); + if ( aluCodeLength1 != aluCodeLength0 ) + { + // code was emitted + printf( "\n > %s", ((char *)m_pBufALUCode->Base()) + aluCodeLength0 ); + + aluCodeLength0 = aluCodeLength1; + } + } + } + + // Note that this constant packing expects .wzyx swizzles in case we ever use the SINCOS code in a ps_2_x shader + // + // The Microsoft documentation on this is all kinds of broken and, strangely, these numbers don't even + // match the D3DSINCOSCONST1 and D3DSINCOSCONST2 constants used by the D3D assembly sincos instruction... + if ( m_bNeedsSinCosDeclarations ) + { + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + StrcatToParamCode( "vec4 scA = vec4( -1.55009923e-6, -2.17013894e-5, 0.00260416674, 0.00026041668 );\n" ); + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + StrcatToParamCode( "vec4 scB = vec4( -0.020833334, -0.125, 1.0, 0.5 );\n" ); + } + + // Stick in the sampler mask in hex + PrintToBuf( *m_pBufHeaderCode, "%sSAMPLERMASK-%x\n", "//", m_dwSamplerUsageMask ); + + uint nSamplerTypes = 0; + for ( int i = 0; i < 16; i++ ) + { + Assert( m_dwSamplerTypes[i] < 4); + nSamplerTypes |= ( m_dwSamplerTypes[i] << ( i * 2 ) ); + } + + PrintToBuf( *m_pBufHeaderCode, "%sSAMPLERTYPES-%x\n", "//", nSamplerTypes ); + + // fragData outputs referenced + uint nFragDataMask = 0; + for ( int i = 0; i < 4; i++ ) + { + nFragDataMask |= m_bOutputColorRegister[ i ] ? ( 1 << i ) : 0; + } + + PrintToBuf( *m_pBufHeaderCode, "%sFRAGDATAMASK-%x\n", "//", nFragDataMask ); + + // Uniforms + + PrintToBuf( *m_pBufHeaderCode, "//HIGHWATER-%d\n", m_nHighestRegister + 1 ); + if ( ( m_bVertexShader ) && ( m_bGenerateBoneUniformBuffer ) ) + { + PrintToBuf( *m_pBufHeaderCode, "//HIGHWATERBONE-%i\n", m_nHighestBoneRegister + 1 ); + } + + PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 %s[%d];\n", m_bVertexShader ? "vc" : "pc", m_nHighestRegister + 1 ); + + if ( ( m_nHighestBoneRegister >= 0 ) && ( m_bVertexShader ) && ( m_bGenerateBoneUniformBuffer ) ) + { + PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 %s[%d];\n", "vcbones", m_nHighestBoneRegister + 1 ); + } + + if ( m_bVertexShader ) + { + PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 vcscreen;\n" ); + } + + for( int i=0; i<32; i++ ) + { + if ( ( m_dwConstIntUsageMask & ( 0x00000001 << i ) ) && + ( !( m_dwDefConstIntUsageMask & ( 0x00000001 << i ) ) ) + ) + { + PrintToBuf( *m_pBufHeaderCode, "uniform int i%d ;\n", i ); + } + } + + for( int i=0; i<32; i++ ) + { + if ( m_dwDefConstIntUsageMask & ( 0x00000001 << i ) ) + { + PrintToBuf( *m_pBufHeaderCode, "const int i%d = %i;\n", i, m_dwDefConstIntIterCount[i] ); + } + } + + for( int i=0; i<32; i++ ) + { + if ( m_dwConstBoolUsageMask & ( 0x00000001 << i ) ) + { + PrintToBuf( *m_pBufHeaderCode, m_bVertexShader ? "uniform bool b%d;\n" : "uniform bool fb%d;\n", i ); + } + } + + // Control bit for sRGB Write suffix + if ( m_bGenerateSRGBWriteSuffix ) + { + // R500 Hookup + // Set this guy to 1 when the sRGBWrite state is true, otherwise 0 + StrcatToHeaderCode( "uniform float flSRGBWrite;\n" ); + } + + PrintToBuf( *m_pBufHeaderCode, "\n" ); + + // Write samplers + WriteGLSLSamplerDefinitions(); + + if ( m_bUsesDSTInstruction ) + { + PrintToBuf( *m_pBufHeaderCode, "vec4 dst(vec4 src0,vec4 src1) { return vec4(1.0f,src0.y*src1.y,src0.z,src1.w); }\n" ); + } + + if ( m_bDeclareAddressReg ) + { + if ( !m_bGenerateBoneUniformBuffer ) + { + m_nHighestRegister = DXABSTRACT_VS_PARAM_SLOTS - 1; + } + + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + StrcatToParamCode( "vec4 va_r;\n" ); + } + + char *pTempVarStr = "TEMP"; + pTempVarStr = "vec4"; + + // Declare temps in Param code buffer + for( int i=0; i<32; i++ ) + { + if ( m_dwTempUsageMask & ( 0x00000001 << i ) ) + { + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + PrintToBuf( *m_pBufParamCode, "%s r%d;\n", pTempVarStr, i ); + } + } + + if ( m_bVertexShader && (m_bDoUserClipPlanes || m_bDoFixupZ || m_bDoFixupY ) ) + { + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + StrcatToParamCode( "vec4 vTempPos;\n" ); + } + + if ( ( m_bVertexShader ) && ( m_dwMajorVersion == 3 ) ) + { + for ( int i = 0; i < 32; i++ ) + { + if ( m_dwTexCoordOutMask & ( 1 << i ) ) + { + PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() ); + + char buf[256]; + V_snprintf( buf, sizeof( buf ), "vec4 oTempT%i = vec4( 0, 0, 0, 0 );\n", i ); + StrcatToParamCode( buf ); + } + } + } + + if ( m_bNeedsSinCosDeclarations ) + { + StrcatToParamCode( "vec3 vSinCosTmp;\n" ); // declare temp used by GLSL sin and cos intrinsics + } + + // Optional temps needed to emulate d2add instruction in DX pixel shaders + if ( m_bNeedsD2AddTemp ) + { + PrintToBuf( *m_pBufParamCode, "%s DP2A0;\n%s DP2A1;\n", pTempVarStr, pTempVarStr ); + } + + // Optional temp needed to emulate lerp instruction in DX vertex shaders + if ( m_bNeedsLerpTemp ) + { + PrintToBuf( *m_pBufParamCode, "%s LRP_TEMP;\n", pTempVarStr ); + } + + // Optional temp needed to emulate NRM instruction in DX shaders + if ( m_bNeedsNRMTemp ) + { + PrintToBuf( *m_pBufParamCode, "%s NRM_TEMP;\n", pTempVarStr ); + } + + if ( m_bDeclareVSOPos && m_bVertexShader ) + { + if ( m_bDoUserClipPlanes ) + { + StrcatToALUCode( "gl_ClipVertex = vTempPos;\n" ); // if user clip is enabled, jam clip space position into gl_ClipVertex + } + + if ( m_bDoFixupZ || m_bDoFixupY ) + { + // TODO: insert clip distance computation something like this: + // + // StrcatToALUCode( "DP4 oCLP[0].x, oPos, vc[215]; \n" ); + // + + if ( m_bDoFixupZ ) + { + StrcatToALUCode( "vTempPos.z = vTempPos.z * vc[0].z - vTempPos.w; // z' = (2*z)-w\n" ); + } + + if ( m_bDoFixupY ) + { + // append instructions to flip Y over + // new Y = -(old Y) + StrcatToALUCode( "vTempPos.y = -vTempPos.y; // y' = -y \n" ); + } + + // Apply half pixel offset (0.5f pixel offset D3D) to output vertices to account for the pixel center difference between D3D9 and OpenGL. + // This is the actual work in the shader. This works out to be 0.5 pixels wide because clip space is 2 units wide (-1, 1). + StrcatToALUCode( "vTempPos.xy += vcscreen.xy * vTempPos.w;\n" ); + + StrcatToALUCode( "gl_Position = vTempPos;\n" ); + } + else + { + StrcatToParamCode( "OUTPUT oPos = result.position;\n" ); + + // TODO: insert clip distance computation something like this: + // + // StrcatToALUCode( "DP4 oCLP[0].x, oPos, c[215]; \n" ); + // + } + } + + if ( m_bVertexShader ) + { + if ( m_dwMajorVersion == 3 ) + { + WriteGLSLOutputVariableAssignments(); + } + else + { + for ( int i=0; i<32; i++ ) + { + char outTexCoordBuff[64]; + + // Don't declare a varying for the output that is mapped to the position output + if ( i != m_nVSPositionOutput ) + { + if ( m_dwTexCoordOutMask & ( 0x00000001 << i ) ) + { + if ( m_nCentroidMask & ( 0x00000001 << i ) ) + { + V_snprintf( outTexCoordBuff, sizeof( outTexCoordBuff ), "centroid varying vec4 oT%d;\n", i ); // centroid varying + StrcatToHeaderCode( outTexCoordBuff ); + } + else + { + V_snprintf( outTexCoordBuff, sizeof( outTexCoordBuff ), "varying vec4 oT%d;\n", i ); + StrcatToHeaderCode( outTexCoordBuff ); + } + } + } + } + } + } + else + { + if ( m_dwMajorVersion == 3 ) + { + WriteGLSLInputVariableAssignments(); + } + } + + // do some annotation at the end of the attrib block + { + char temp[1000]; + + if ( m_bVertexShader ) + { + // write attrib map into the text starting at pAttribMapStart - two hex digits per attrib + for( int i=0; i<16; i++ ) + { + if ( m_dwAttribMap[i] != 0xFFFFFFFF ) + { + V_snprintf( temp, sizeof(temp), "%02X", m_dwAttribMap[i] ); + memcpy( pAttribMapStart + (i*3), temp, 2 ); + } + } + } + + PrintIndentation( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size() ); + + // This used to write out a translation counter into the shader as a comment. However, the order that shaders get in here + // is non-deterministic between runs, and the change in this comment would cause shaders to appear different to the GL disk cache, + // significantly increasing app load time. + // Other code looks for trans#%d, so we can't just remove it. Instead, output it as 0. + V_snprintf( temp, sizeof(temp), "%s trans#%d label:%s\n", "//", 0, debugLabel ? debugLabel : "none" ); + StrcatToAttribCode( temp ); + } + + // If we actually sample from a shadow depth sampler, we need to declare the shadow option at the top + if ( m_bDeclareShadowOption ) + { + StrcatToHeaderCode( "OPTION ARB_fragment_program_shadow;\n" ); + } + + StrcatToHeaderCode( "\nvoid main()\n{\n" ); + if ( m_bUsedAtomicTempVar ) + { + PrintToBufWithIndents( *m_pBufHeaderCode, "vec4 %s;\n\n", g_pAtomicTempVarName ); + } + + // sRGB Write suffix + if ( m_bGenerateSRGBWriteSuffix ) + { + StrcatToALUCode( "vec3 sRGBFragData;\n" ); + StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" ); + StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.454545f, 0.454545f, 0.454545f );\n" ); + StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" ); + StrcatToALUCode( "gl_FragData[0].xyz = mix( gl_FragData[0].xyz, sRGBFragData, flSRGBWrite );\n" ); + } + + strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), "}\n" ); + + // Put all of the strings together for final program ( pHeaderCode + pAttribCode + pParamCode + pALUCode ) + StrcatToHeaderCode( (char*)m_pBufAttribCode->Base() ); + StrcatToHeaderCode( (char*)m_pBufParamCode->Base() ); + StrcatToHeaderCode( (char*)m_pBufALUCode->Base() ); + + // Cleanup - don't touch m_pBufHeaderCode, as it is managed by the caller + delete m_pBufAttribCode; + delete m_pBufParamCode; + delete m_pBufALUCode; + m_pBufAttribCode = m_pBufParamCode = m_pBufALUCode = NULL; + + if ( m_bSpew ) + { + printf("\n************* translation complete\n\n " ); + } + + return DISASM_OK; +} |