aboutsummaryrefslogtreecommitdiff
path: root/APEX_1.4/common/include/RandState.h
diff options
context:
space:
mode:
authorSheikh Dawood Abdul Ajees <[email protected]>2018-01-05 10:51:37 -0600
committerSheikh Dawood Abdul Ajees <[email protected]>2018-01-05 10:51:37 -0600
commit983c93ad16707bd2b092f425725140f145e967aa (patch)
treec5533c94dbffd31a1d179614354ea07dd769062a /APEX_1.4/common/include/RandState.h
parentMerge branch 'master' (diff)
downloadphysx-3.4-983c93ad16707bd2b092f425725140f145e967aa.tar.xz
physx-3.4-983c93ad16707bd2b092f425725140f145e967aa.zip
PhysX 3.4.1, APEX 1.4.1 Release @23307153
Diffstat (limited to 'APEX_1.4/common/include/RandState.h')
-rw-r--r--APEX_1.4/common/include/RandState.h19
1 files changed, 14 insertions, 5 deletions
diff --git a/APEX_1.4/common/include/RandState.h b/APEX_1.4/common/include/RandState.h
index 52f239c9..e3a4a0a1 100644
--- a/APEX_1.4/common/include/RandState.h
+++ b/APEX_1.4/common/include/RandState.h
@@ -103,13 +103,22 @@ struct PRNGInfo
// For CUDA PRNG: device part
#ifdef __CUDACC__
//*
+#if __CUDA_ARCH__ >= 300
+#define RAND_SCAN_OP(ofs) \
+ { \
+ unsigned int a = aData[scanIdx], c = cData[scanIdx]; \
+ unsigned int aOfs = __shfl_up(a, ofs), cOfs = __shfl_up(c, ofs); \
+ if (idxInWarp >= ofs) { a = a * aOfs; c = c * aOfs + cOfs; } \
+ aData[scanIdx] = a; cData[scanIdx] = c; \
+ }
+#else
#define RAND_SCAN_OP(ofs) \
{ \
unsigned int a = aData[scanIdx], c = cData[scanIdx]; \
unsigned int aOfs = aData[scanIdx - ofs], cOfs = cData[scanIdx - ofs]; \
- aData[scanIdx] = a * aOfs; \
- cData[scanIdx] = c * aOfs + cOfs; \
+ aData[scanIdx] = a * aOfs; cData[scanIdx] = c * aOfs + cOfs; \
}
+#endif
/*/
//THIS CODE CRASH ON CUDA 5.0.35
#define RAND_SCAN_OP(ofs) \
@@ -120,7 +129,7 @@ struct PRNGInfo
aData[scanIdx] = val.a; cData[scanIdx] = val.c; \
}
//*/
-PX_INLINE __device__ void randScanWarp(unsigned int scanIdx, volatile unsigned int* aData, volatile unsigned int* cData)
+PX_INLINE __device__ void randScanWarp(unsigned int scanIdx, volatile unsigned int* aData, volatile unsigned int* cData, unsigned int idxInWarp)
{
RAND_SCAN_OP(1);
RAND_SCAN_OP(2);
@@ -146,7 +155,7 @@ PX_INLINE __device__ nvidia::LCG_PRNG randScanBlock(nvidia::LCG_PRNG val, volati
aData[scanIdx] = val.a;
cData[scanIdx] = val.c;
- randScanWarp(scanIdx, aData, cData);
+ randScanWarp(scanIdx, aData, cData, idxInWarp);
//read value
val.a = aData[scanIdx];
@@ -164,7 +173,7 @@ PX_INLINE __device__ nvidia::LCG_PRNG randScanBlock(nvidia::LCG_PRNG val, volati
if (warpIdx == 0)
{
- randScanWarp(scanIdx, aData, cData);
+ randScanWarp(scanIdx, aData, cData, idxInWarp);
}
__syncthreads();