diff options
Diffstat (limited to 'mathlib/randsse.cpp')
| -rw-r--r-- | mathlib/randsse.cpp | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/mathlib/randsse.cpp b/mathlib/randsse.cpp new file mode 100644 index 0000000..b718d39 --- /dev/null +++ b/mathlib/randsse.cpp @@ -0,0 +1,109 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: generates 4 randum numbers in the range 0..1 quickly, using SIMD +// +//=====================================================================================// + +#include <math.h> +#include <float.h> // Needed for FLT_EPSILON +#include "basetypes.h" +#include <memory.h> +#include "tier0/dbg.h" +#include "mathlib/mathlib.h" +#include "mathlib/vector.h" +#include "mathlib/ssemath.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +// see knuth volume 3 for insight. + +class SIMDRandStreamContext +{ + fltx4 m_RandY[55]; + + fltx4 *m_pRand_J, *m_pRand_K; + + +public: + void Seed( uint32 seed ) + { + m_pRand_J=m_RandY+23; m_pRand_K=m_RandY+54; + for(int i=0;i<55;i++) + { + for(int j=0;j<4;j++) + { + SubFloat( m_RandY[i], j) = (seed>>16)/65536.0; + seed=(seed+1)*3141592621u; + } + } + } + + inline fltx4 RandSIMD( void ) + { + // ret= rand[k]+rand[j] + fltx4 retval=AddSIMD( *m_pRand_K, *m_pRand_J ); + + // if ( ret>=1.0) ret-=1.0 + fltx4 overflow_mask=CmpGeSIMD( retval, Four_Ones ); + retval=SubSIMD( retval, AndSIMD( Four_Ones, overflow_mask ) ); + + *m_pRand_K = retval; + + // update pointers w/ wrap-around + if ( --m_pRand_J < m_RandY ) + m_pRand_J=m_RandY+54; + if ( --m_pRand_K < m_RandY ) + m_pRand_K=m_RandY+54; + + return retval; + } +}; + +#define MAX_SIMULTANEOUS_RANDOM_STREAMS 32 + +static SIMDRandStreamContext s_SIMDRandContexts[MAX_SIMULTANEOUS_RANDOM_STREAMS]; + +static volatile int s_nRandContextsInUse[MAX_SIMULTANEOUS_RANDOM_STREAMS]; + +void SeedRandSIMD(uint32 seed) +{ + for( int i = 0; i<MAX_SIMULTANEOUS_RANDOM_STREAMS; i++) + s_SIMDRandContexts[i].Seed( seed+i ); +} + +fltx4 RandSIMD( int nContextIndex ) +{ + return s_SIMDRandContexts[nContextIndex].RandSIMD(); +} + +int GetSIMDRandContext( void ) +{ + for(;;) + { + for(int i=0; i < NELEMS( s_SIMDRandContexts ); i++) + { + if ( ! s_nRandContextsInUse[i] ) // available? + { + // try to take it! + if ( ThreadInterlockedAssignIf( &( s_nRandContextsInUse[i]), 1, 0 ) ) + { + return i; // done! + } + } + } + Assert(0); // why don't we have enough buffers? + ThreadSleep(); + } +} + +void ReleaseSIMDRandContext( int nContext ) +{ + s_nRandContextsInUse[ nContext ] = 0; +} + + +fltx4 RandSIMD( void ) +{ + return s_SIMDRandContexts[0].RandSIMD(); +} |