aboutsummaryrefslogtreecommitdiff
path: root/PxShared
diff options
context:
space:
mode:
Diffstat (limited to 'PxShared')
-rw-r--r--PxShared/include/cudamanager/PxCudaContextManager.h24
-rw-r--r--PxShared/include/foundation/PxIntrinsics.h4
-rw-r--r--PxShared/include/foundation/PxPreprocessor.h14
-rw-r--r--PxShared/include/foundation/nx/PxNXIntrinsics.h138
-rw-r--r--PxShared/include/task/PxTask.h39
-rw-r--r--PxShared/include/task/PxTaskManager.h3
-rw-r--r--PxShared/src/compiler/cmake/Android/CMakeLists.txt25
-rw-r--r--PxShared/src/compiler/cmake/IOS/CMakeLists.txt7
-rw-r--r--PxShared/src/compiler/cmake/Linux/CMakeLists.txt19
-rw-r--r--PxShared/src/compiler/cmake/Mac/CMakeLists.txt7
-rw-r--r--PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake2
-rw-r--r--PxShared/src/compiler/cmake/common/CMakeLists.txt2
-rw-r--r--PxShared/src/compiler/cmake/html5/CMakeLists.txt3
-rw-r--r--PxShared/src/compiler/cmake/windows/CMakeLists.txt17
-rw-r--r--PxShared/src/cudamanager/include/GpuDispatcher.h2
-rw-r--r--PxShared/src/cudamanager/src/BlockingWait.cpp7
-rw-r--r--PxShared/src/cudamanager/src/CudaContextManager.cpp213
-rw-r--r--PxShared/src/cudamanager/src/GpuDispatcher.cpp42
-rw-r--r--PxShared/src/foundation/include/PsAllocator.h2
-rw-r--r--PxShared/src/foundation/include/PsAoS.h2
-rw-r--r--PxShared/src/foundation/include/PsInlineAoS.h2
-rw-r--r--PxShared/src/foundation/include/PsIntrinsics.h4
-rw-r--r--PxShared/src/foundation/include/PsThread.h4
-rw-r--r--PxShared/src/foundation/include/PsVecMath.h2
-rw-r--r--PxShared/src/foundation/include/PsVecMathAoSScalarInline.h7
-rw-r--r--PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h2
-rw-r--r--PxShared/src/foundation/src/PsAssert.cpp6
-rw-r--r--PxShared/src/pvd/include/PxProfileAllocatorWrapper.h2
-rw-r--r--PxShared/src/task/src/TaskManager.cpp32
29 files changed, 145 insertions, 488 deletions
diff --git a/PxShared/include/cudamanager/PxCudaContextManager.h b/PxShared/include/cudamanager/PxCudaContextManager.h
index 24f4370..aca1112 100644
--- a/PxShared/include/cudamanager/PxCudaContextManager.h
+++ b/PxShared/include/cudamanager/PxCudaContextManager.h
@@ -23,7 +23,7 @@
// components in life support devices or systems without express written approval of
// NVIDIA Corporation.
//
-// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
#ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
@@ -42,6 +42,7 @@
/* Forward decl to avoid inclusion of cuda.h */
typedef struct CUctx_st *CUcontext;
typedef struct CUgraphicsResource_st *CUgraphicsResource;
+typedef int CUdevice;
namespace physx
{
@@ -58,7 +59,6 @@ struct PxCudaInteropMode
enum Enum
{
NO_INTEROP = 0,
- D3D9_INTEROP,
D3D10_INTEROP,
D3D11_INTEROP,
OGL_INTEROP,
@@ -154,6 +154,9 @@ public:
* to every CUDA work submission, so we recommend that you carefully tune
* this initial base memory size to closely approximate the amount of
* memory your application will consume.
+
+ Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
+ for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
*/
uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT];
@@ -162,11 +165,17 @@ public:
*
* The memory manager will dynamically grow and shrink in blocks multiple of
* this page size. Size has to be power of two and bigger than 0.
+
+ Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
+ for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
*/
uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT];
/**
* \brief Maximum size of memory that the memory manager will allocate
+
+ Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
+ for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
*/
uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT];
@@ -223,7 +232,7 @@ public:
* harmfull to (re)acquire the context in code that is shared between
* GpuTasks and non-task functions.
*/
- virtual CUcontext acquireContext() = 0;
+ virtual void acquireContext() = 0;
/**
* \brief Release the CUDA context from the current thread
@@ -234,9 +243,16 @@ public:
*/
virtual void releaseContext() = 0;
+ /**
+ * \brief Return the CUcontext
+ */
+ virtual CUcontext getContext() = 0;
+
/**
* \brief Return the PxCudaMemoryManager instance associated with this
* CUDA context
+ * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
+ * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
*/
virtual PxCudaMemoryManager *getMemoryManager() = 0;
@@ -268,6 +284,7 @@ public:
virtual bool supportsArchSM35() const = 0; //!< GK110
virtual bool supportsArchSM50() const = 0; //!< GM100
virtual bool supportsArchSM52() const = 0; //!< GM200
+ virtual bool supportsArchSM60() const = 0; //!< GP100
virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part
virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy)
virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion()
@@ -278,6 +295,7 @@ public:
virtual int getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes
virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block
virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver
+ virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver
virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with
virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work
diff --git a/PxShared/include/foundation/PxIntrinsics.h b/PxShared/include/foundation/PxIntrinsics.h
index 471f934..b4aff28 100644
--- a/PxShared/include/foundation/PxIntrinsics.h
+++ b/PxShared/include/foundation/PxIntrinsics.h
@@ -38,8 +38,8 @@
#include "foundation/unix/PxUnixIntrinsics.h"
#elif PX_XBOXONE
#include "foundation/XboxOne/PxXboxOneIntrinsics.h"
-#elif PX_NX
-#include "foundation/nx/PxNXIntrinsics.h"
+#elif PX_SWITCH
+#include "foundation/switch/PxSwitchIntrinsics.h"
#else
#error "Platform not supported!"
#endif
diff --git a/PxShared/include/foundation/PxPreprocessor.h b/PxShared/include/foundation/PxPreprocessor.h
index 446ca76..9b6e0f4 100644
--- a/PxShared/include/foundation/PxPreprocessor.h
+++ b/PxShared/include/foundation/PxPreprocessor.h
@@ -88,7 +88,7 @@ Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSyst
#elif defined(__ORBIS__)
#define PX_PS4 1
#elif defined(__NX__)
-#define PX_NX 1
+#define PX_SWITCH 1
#else
#error "Unknown operating system"
#endif
@@ -161,8 +161,8 @@ define anything not defined on this platform to 0
#ifndef PX_PS4
#define PX_PS4 0
#endif
-#ifndef PX_NX
-#define PX_NX 0
+#ifndef PX_SWITCH
+#define PX_SWITCH 0
#endif
#ifndef PX_X64
#define PX_X64 0
@@ -425,7 +425,7 @@ General defines
*/
// static assert
-#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_NX) || (PX_CLANG && PX_ARM)
+#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_SWITCH) || (PX_CLANG && PX_ARM)
#define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] __attribute__((unused))
#else
#define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1]
@@ -532,5 +532,11 @@ protected:
#define PX_SUPPORT_COMPUTE_PHYSX 0
+#ifndef PX_SUPPORT_EXTERN_TEMPLATE
+#define PX_SUPPORT_EXTERN_TEMPLATE ((!PX_ANDROID) && (PX_VC != 11))
+#else
+#define PX_SUPPORT_EXTERN_TEMPLATE 0
+#endif
+
/** @} */
#endif // #ifndef PXFOUNDATION_PXPREPROCESSOR_H
diff --git a/PxShared/include/foundation/nx/PxNXIntrinsics.h b/PxShared/include/foundation/nx/PxNXIntrinsics.h
deleted file mode 100644
index adf3bf5..0000000
--- a/PxShared/include/foundation/nx/PxNXIntrinsics.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
- *
- * NVIDIA CORPORATION and its licensors retain all intellectual property
- * and proprietary rights in and to this software, related documentation
- * and any modifications thereto. Any use, reproduction, disclosure or
- * distribution of this software and related documentation without an express
- * license agreement from NVIDIA CORPORATION is strictly prohibited.
- */
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-
-#ifndef PX_FOUNDATION_PX_NX_INTRINSICS_H
-#define PX_FOUNDATION_PX_NX_INTRINSICS_H
-
-#include "foundation/Px.h"
-#include "foundation/PxAssert.h"
-
-#if !PX_NX
- #error "This file should only be included by NX builds!!"
-#endif
-
-#include <math.h>
-#include <float.h>
-
-#include "nn/cstd/cstd_CMath.h"
-
-#if !PX_DOXYGEN
-namespace physx
-{
-namespace intrinsics
-{
-#endif
-
- //! \brief platform-specific absolute value
- PX_CUDA_CALLABLE PX_FORCE_INLINE float abs(float a) { return ::fabsf(a); }
-
- //! \brief platform-specific select float
- PX_CUDA_CALLABLE PX_FORCE_INLINE float fsel(float a, float b, float c) { return (a >= 0.0f) ? b : c; }
-
- //! \brief platform-specific sign
- PX_CUDA_CALLABLE PX_FORCE_INLINE float sign(float a) { return (a >= 0.0f) ? 1.0f : -1.0f; }
-
- //! \brief platform-specific reciprocal
- PX_CUDA_CALLABLE PX_FORCE_INLINE float recip(float a) { return 1.0f/a; }
-
- //! \brief platform-specific reciprocal estimate
- PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) { return 1.0f/a; }
-
- //! \brief platform-specific square root
- PX_CUDA_CALLABLE PX_FORCE_INLINE float sqrt(float a) { return ::sqrtf(a); }
-
- //! \brief platform-specific reciprocal square root
- PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrt(float a) { return 1.0f/::sqrtf(a); }
-
- //! \brief platform-specific reciprocal square root estimate
- PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) { return 1.0f/::sqrtf(a); }
-
- //! \brief platform-specific sine
- PX_CUDA_CALLABLE PX_FORCE_INLINE float sin(float a) { return ::sinf(a); }
-
- //! \brief platform-specific cosine
- PX_CUDA_CALLABLE PX_FORCE_INLINE float cos(float a) { return ::cosf(a); }
-
- //! \brief platform-specific minimum
- PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMin(float a, float b) { return a<b ? a : b; }
-
- //! \brief platform-specific maximum
- PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMax(float a, float b) { return a>b ? a : b; }
-
- //! \brief platform-specific finiteness check
- PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(float a)
- {
-#ifdef __CUDACC__
- return isfinite(a) ? true : false;
-#else
- return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a);
-#endif
- }
-
- //! \brief platform-specific finiteness check
- PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(double a)
- {
-#ifdef __CUDACC__
- return isfinite(a) ? true : false;
-#else
- return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a);
-#endif
- }
-
- /*!
- Sets \c count bytes starting at \c dst to zero.
- */
- PX_FORCE_INLINE void* memZero(void* PX_RESTRICT dest, uint32_t count)
- {
- return memset(dest, 0, count);
- }
-
- /*!
- Sets \c count bytes starting at \c dst to \c c.
- */
- PX_FORCE_INLINE void* memSet(void* PX_RESTRICT dest, int32_t c, uint32_t count)
- {
- return memset(dest, c, count);
- }
-
- /*!
- Copies \c count bytes from \c src to \c dst. User memMove if regions overlap.
- */
- PX_FORCE_INLINE void* memCopy(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count)
- {
- return memcpy(dest, src, count);
- }
-
- /*!
- Copies \c count bytes from \c src to \c dst. Supports overlapping regions.
- */
- PX_FORCE_INLINE void* memMove(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count)
- {
- return memmove(dest, src, count);
- }
-
- /*!
- Set 128B to zero starting at \c dst+offset. Must be aligned.
- */
- PX_FORCE_INLINE void memZero128(void* PX_RESTRICT dest, uint32_t offset = 0)
- {
- PX_ASSERT(((size_t(dest)+offset) & 0x7f) == 0);
- memSet((char* PX_RESTRICT)dest+offset, 0, 128);
- }
-
-#if !PX_DOXYGEN
-} // namespace intrinsics
-} // namespace physx
-#endif
-
-#endif
diff --git a/PxShared/include/task/PxTask.h b/PxShared/include/task/PxTask.h
index 2761109..85d91da 100644
--- a/PxShared/include/task/PxTask.h
+++ b/PxShared/include/task/PxTask.h
@@ -45,7 +45,7 @@ namespace physx
class PxBaseTask
{
public:
- PxBaseTask() : mEventID(0xFFFF), mProfileStat(0), mTm(0) {}
+ PxBaseTask() : mContextID(0), mTm(NULL) {}
virtual ~PxBaseTask() {}
/**
@@ -78,50 +78,24 @@ public:
* references to it - so it may safely run its destructor, recycle itself, etc.
* provided no additional user references to the task exist
*/
-
virtual void release() = 0;
- /**
- * \brief Execute user run method with wrapping profiling events.
- *
- * Optional entry point for use by CpuDispatchers.
- *
- * \param[in] threadId The threadId of the thread that executed the task.
- */
- PX_INLINE void runProfiled(uint32_t threadId=0)
- {
- mTm->emitStartEvent(*this, threadId);
- run();
- mTm->emitStopEvent(*this, threadId);
- }
-
- /**
- * \brief Specify stop event statistic
- *
- * If called before or while the task is executing, the given value
- * will appear in the task's event bar in the profile viewer
- *
- * \param[in] stat The stat to signal when the task is finished
- */
- PX_INLINE void setProfileStat( uint16_t stat )
- {
- mProfileStat = stat;
- }
-
/**
* \brief Return PxTaskManager to which this task was submitted
*
* Note, can return NULL if task was not submitted, or has been
* completed.
*/
- PX_INLINE PxTaskManager* getTaskManager() const
+ PX_FORCE_INLINE PxTaskManager* getTaskManager() const
{
return mTm;
}
+ PX_FORCE_INLINE void setContextId(PxU64 id) { mContextID = id; }
+ PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; }
+
protected:
- uint16_t mEventID; //!< Registered profile event ID
- uint16_t mProfileStat; //!< Profiling statistic
+ PxU64 mContextID; //!< Context ID for profiler interface
PxTaskManager* mTm; //!< Owning PxTaskManager instance
friend class PxTaskMgr;
@@ -212,7 +186,6 @@ public:
{
mStreamIndex = 0;
mPreSyncRequired = false;
- mProfileStat = 0;
}
/**
diff --git a/PxShared/include/task/PxTaskManager.h b/PxShared/include/task/PxTaskManager.h
index f6f29c4..f40f7b1 100644
--- a/PxShared/include/task/PxTaskManager.h
+++ b/PxShared/include/task/PxTaskManager.h
@@ -215,9 +215,6 @@ protected:
virtual void decrReference(PxLightCpuTask&) = 0;
virtual void addReference(PxLightCpuTask&) = 0;
- virtual void emitStartEvent(PxBaseTask&, uint32_t threadId=0) = 0;
- virtual void emitStopEvent(PxBaseTask&, uint32_t threadId=0) = 0;
-
/*! \endcond */
friend class PxBaseTask;
diff --git a/PxShared/src/compiler/cmake/Android/CMakeLists.txt b/PxShared/src/compiler/cmake/Android/CMakeLists.txt
index 0499c29..9aa00f9 100644
--- a/PxShared/src/compiler/cmake/Android/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/Android/CMakeLists.txt
@@ -1,34 +1,37 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared )
include(../common/CMakeLists.txt)
STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE)
-IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Android
- SET(TARGET_BUILD_PLATFORM "Android")
+IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to android
+ SET(TARGET_BUILD_PLATFORM "android")
ENDIF()
-SET(PLATFORM_LIST Android)
+SET(PLATFORM_LIST android)
IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST)
MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM})
ENDIF()
if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ")
+elseif(${ANDROID_ABI} STREQUAL "armeabi-v7a with NEON")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -ffast-math -fno-exceptions -ffunction-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ")
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof ")
elseif(${ANDROID_ABI} STREQUAL "x86")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -fpack-struct=8 -malign-double ")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -fpack-struct=8 -malign-double ")
elseif(${ANDROID_ABI} STREQUAL "x86_64")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -mstackrealign -msse3 ")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -mstackrealign -msse3 ")
endif()
SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
-SET(CMAKE_CXX_FLAGS_CHECKED "-O2")
-SET(CMAKE_CXX_FLAGS_PROFILE "-O2")
-SET(CMAKE_CXX_FLAGS_RELEASE "-O2")
+SET(CMAKE_CXX_FLAGS_CHECKED "-O3")
+SET(CMAKE_CXX_FLAGS_PROFILE "-O3")
+SET(CMAKE_CXX_FLAGS_RELEASE "-O3")
-SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;)
+SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;__STDC_LIMIT_MACROS;)
SET(PXSHARED_ANDROID_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1)
SET(PXSHARED_ANDROID_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1)
SET(PXSHARED_ANDROID_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1)
diff --git a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt
index d281e32..4e2f814 100644
--- a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt
@@ -1,12 +1,13 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared CXX)
include(../common/CMakeLists.txt)
-IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to IOS
- SET(TARGET_BUILD_PLATFORM "IOS")
+IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to ios
+ SET(TARGET_BUILD_PLATFORM "ios")
ENDIF()
-SET(PLATFORM_LIST IOS)
+SET(PLATFORM_LIST ios)
IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST)
MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM})
diff --git a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt
index 2fa592e..ea25a8b 100644
--- a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt
@@ -1,13 +1,14 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared CXX)
include(../common/CMakeLists.txt)
STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE)
-IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Linux
- SET(TARGET_BUILD_PLATFORM "Linux")
+IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to linux
+ SET(TARGET_BUILD_PLATFORM "linux")
ENDIF()
-SET(PLATFORM_LIST Linux)
+SET(PLATFORM_LIST linux)
IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST)
MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM})
@@ -16,16 +17,16 @@ ENDIF()
IF (${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu")
IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# using Clang
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast -Wno-disabled-macro-expansion")
ELSEIF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# using GCC
SET(LIBPATH_SUFFIX "x64")
SET(CMAKE_CXX_FLAGS "-Werror -m64 -fPIC -msse2 -mfpmath=sse -ffast-math -fno-exceptions -fno-rtti -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wextra -fno-strict-aliasing -fdiagnostics-show-option -Wno-invalid-offsetof -Wno-uninitialized -Wno-missing-field-initializers")
ENDIF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "arm-unknown-linux-gnueabihf")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-disabled-macro-expansion")
ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "aarch64-unknown-linux-gnueabi")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough")
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough -Wno-disabled-macro-expansion")
ELSE(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu")
MESSAGE(FATAL_ERROR "Unknown CMAKE_LIBRARY_ARCHITECTURE ${CMAKE_LIBRARY_ARCHITECTURE}")
ENDIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu")
@@ -36,7 +37,7 @@ SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g -gdwarf-3")
SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g -gdwarf-3")
SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -gdwarf-3")
-IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+IF(DEFINED GENERATE_GPU_PROJECTS)
SET(PXSHARED_LINUX_COMPILE_DEFS _LIB)
ELSE()
# Disable cuda and dx for all projects on windows
@@ -71,7 +72,7 @@ IF(DEFINED PX_SELECT_COMPONENTS)
INCLUDE(PxTask.cmake)
endif()
if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS)
- IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+ IF(DEFINED GENERATE_GPU_PROJECTS)
INCLUDE(PxCudaContextManager.cmake)
ENDIF()
endif()
@@ -80,7 +81,7 @@ INCLUDE(PxFoundation.cmake)
INCLUDE(PsFastXml.cmake)
INCLUDE(PxPvdSDK.cmake)
INCLUDE(PxTask.cmake)
-IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+IF(DEFINED GENERATE_GPU_PROJECTS)
INCLUDE(PxCudaContextManager.cmake)
ENDIF()
ENDIF()
diff --git a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt
index beb06bc..3242b46 100644
--- a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt
@@ -1,12 +1,13 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared CXX)
include(../common/CMakeLists.txt)
-IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Mac
- SET(TARGET_BUILD_PLATFORM "Mac")
+IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to mac
+ SET(TARGET_BUILD_PLATFORM "mac")
ENDIF()
-SET(PLATFORM_LIST Mac)
+SET(PLATFORM_LIST mac)
IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST)
MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM})
diff --git a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake
index c1675ae..f7bc761 100644
--- a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake
+++ b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake
@@ -13,7 +13,7 @@ FIND_PATH( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h
INCLUDE(FindPackageHandleStandardArgs)
-IF(TARGET_BUILD_PLATFORM STREQUAL "Windows")
+IF(TARGET_BUILD_PLATFORM STREQUAL "windows")
# NOTE: Doesn't make sense for all platforms - ARM
IF(CMAKE_CL_64)
SET(NVTOOLSEXT_LIBNAME "nvToolsExt64_1")
diff --git a/PxShared/src/compiler/cmake/common/CMakeLists.txt b/PxShared/src/compiler/cmake/common/CMakeLists.txt
index 22d2097..3bbb57a 100644
--- a/PxShared/src/compiler/cmake/common/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/common/CMakeLists.txt
@@ -1,7 +1,5 @@
cmake_minimum_required(VERSION 3.3)
-PROJECT(PxShared CXX)
-
CMAKE_POLICY(SET CMP0057 NEW) # Enable IN_LIST
IF(DEFINED ENV{GW_DEPS_ROOT})
diff --git a/PxShared/src/compiler/cmake/html5/CMakeLists.txt b/PxShared/src/compiler/cmake/html5/CMakeLists.txt
index 8b9587a..e0d2776 100644
--- a/PxShared/src/compiler/cmake/html5/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/html5/CMakeLists.txt
@@ -1,10 +1,11 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared CXX)
#set(CMAKE_VERBOSE_MAKEFILE ON)
include(../common/CMakeLists.txt)
IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to HTML5
- SET(TARGET_BUILD_PLATFORM "HTML5")
+ SET(TARGET_BUILD_PLATFORM "html5")
ENDIF()
SET(PLATFORM_LIST HTML5)
diff --git a/PxShared/src/compiler/cmake/windows/CMakeLists.txt b/PxShared/src/compiler/cmake/windows/CMakeLists.txt
index 39b7dfc..fe2b00d 100644
--- a/PxShared/src/compiler/cmake/windows/CMakeLists.txt
+++ b/PxShared/src/compiler/cmake/windows/CMakeLists.txt
@@ -1,12 +1,13 @@
cmake_minimum_required(VERSION 3.3)
+PROJECT(PxShared CXX)
include(../common/CMakeLists.txt)
-IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Windows
- SET(TARGET_BUILD_PLATFORM "Windows")
+IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to windows
+ SET(TARGET_BUILD_PLATFORM "windows")
ENDIF()
-SET(PLATFORM_LIST Windows)
+SET(PLATFORM_LIST windows)
IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST)
MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM})
@@ -33,7 +34,7 @@ SET(CMAKE_SHARED_LINKER_FLAGS "/DEBUG")
# Controls PX_NVTX for all projects on windows
SET(PXSHARED_WINDOWS_ENABLE_NVTX 0)
-IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+IF(DEFINED LINK_GPU_BINARIES)
SET(PXSHARED_WINDOWS_COMPILE_DEFS WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_WINSOCK_DEPRECATED_NO_WARNINGS;)
ELSE()
# Disable cuda and dx for all projects on windows
@@ -44,6 +45,10 @@ SET(PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1;PX_NVTX=${PXSHARED
SET(PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX})
SET(PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS NDEBUG)
+IF(DEFINED PX_SCALAR_MATH)
+ ADD_DEFINITIONS(-DPX_SIMD_DISABLED)
+ENDIF()
+
IF(CMAKE_CL_64)
ADD_DEFINITIONS(-DWIN64)
ENDIF(CMAKE_CL_64)
@@ -74,7 +79,7 @@ IF(DEFINED PX_SELECT_COMPONENTS)
INCLUDE(PxTask.cmake)
endif()
if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS)
- IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+ IF(DEFINED GENERATE_GPU_PROJECTS)
INCLUDE(PxCudaContextManager.cmake)
ENDIF()
endif()
@@ -83,7 +88,7 @@ INCLUDE(PxFoundation.cmake)
INCLUDE(PsFastXml.cmake)
INCLUDE(PxPvdSDK.cmake)
INCLUDE(PxTask.cmake)
-IF(DEFINED PX_GENERATE_GPU_PROJECTS)
+IF(DEFINED GENERATE_GPU_PROJECTS)
INCLUDE(PxCudaContextManager.cmake)
ENDIF()
ENDIF()
diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h
index aedb345..10c412f 100644
--- a/PxShared/src/cudamanager/include/GpuDispatcher.h
+++ b/PxShared/src/cudamanager/include/GpuDispatcher.h
@@ -258,8 +258,6 @@ public:
~PxGpuWorkerThread();
void setCudaContext(PxCudaContextManager& ctx);
- void emitStartEvent(const char *id);
- void emitStopEvent(const char *id);
/* API to TaskManager */
void startSimulation();
diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp
index fada532..8a2cc44 100644
--- a/PxShared/src/cudamanager/src/BlockingWait.cpp
+++ b/PxShared/src/cudamanager/src/BlockingWait.cpp
@@ -63,8 +63,9 @@ void PxGpuWorkerThread::blockingWaitFunc()
}
else if (!mFailureDetected)
{
- emitStartEvent("GpuDispatcher.BlockingWaitEvent");
-
+#if PX_SUPPORT_PXTASK_PROFILING
+ PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0);
+#endif
if (1 & ~intptr_t(b.blockingStream))
{
GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream));
@@ -73,8 +74,6 @@ void PxGpuWorkerThread::blockingWaitFunc()
{
GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent));
}
-
- emitStopEvent("GpuDispatcher.BlockingWaitEvent");
}
if (b.blockingEvent)
diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp
index b5b6efc..e05911e 100644
--- a/PxShared/src/cudamanager/src/CudaContextManager.cpp
+++ b/PxShared/src/cudamanager/src/CudaContextManager.cpp
@@ -23,7 +23,7 @@
// components in life support devices or systems without express written approval of
// NVIDIA Corporation.
//
-// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
#include "foundation/PxAssert.h"
#include "foundation/PxErrorCallback.h"
@@ -88,102 +88,6 @@ static void* GetProcAddress(void* handle, const char* name) { return dlsym(handl
#define ENABLE_DEVICE_INFO_BRINGUP 0
-#include "GPUProfile.h"
-
-#if ENABLE_CUDA_DEVICE_RESET
-#include "cudaProfiler.h"
-#endif
-
-#if USE_PERFKIT
-#pragma warning (push)
-#pragma warning (disable : 4099)
-#pragma warning (disable : 4191)
-#define NVPM_INITGUID
-#include <stdio.h>
-#include "cuda.h"
-#include "../../../../../../../externals/nvPerfKit/4.1.0.14260/inc/NvPmApi.Manager.h"
-static NvPmApiManager S_NVPMManager;
-extern NvPmApiManager *GetNvPmApiManager() {return &S_NVPMManager;}
-const NvPmApi *GetNvPmApi() {return S_NVPMManager.Api();}
-NVPMContext hNVPMContext(0);
-
-void initPerfKit()
-{
- //Sync with GPU
- cuCtxSynchronize();
-
- // Reset counters
- uint32_t nCount;
- GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount);
-}
-
-void endPerfKit()
-{
- //Sync with GPU
- cuCtxSynchronize();
-
- uint32_t nCount;
- GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount);
-
- uint64_t value;
- uint64_t cycle;
-
- uint64_t sum = 0;
- uint64_t maxVal = 0;
- char name[512];
-
- int nvStatus = 0;
-
- PX_UNUSED(value);
- PX_UNUSED(cycle);
- PX_UNUSED(sum);
- PX_UNUSED(maxVal);
- PX_UNUSED(name);
- PX_UNUSED(nvStatus);
-
- printf("counters:\n");
-
-#if COUNT_L2_TO_L1_BYTES
- nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "l2_read_bytes", 0, &value, &cycle);
- printf("L2->L1 bytes %d\n",value);
-#elif COUNT_SM_TO_L1_QUERIES
- nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "tex_cache_sector_queries", 0, &value, &cycle);
- printf("SM->L1 queries %d\n",value);
-#endif
-
-#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS
- for (int i = 0; i != SM_COUNT; i++)
- {
-#if COUNT_INST_EXECUTED
- sprintf_s(name, 512, "sm_inst_executed_vsm%d", i);
-#elif COUNT_STORE_INST_EXECUTED
- sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d", i);
-#elif COUNT_ACTIVE_CYCLES
- sprintf_s(name, 512, "sm_active_cycles_vsm%d", i);
-#elif COUNT_ACTIVE_WARPS
- sprintf_s(name, 512, "sm_active_warps_vsm%d", i);
-#endif
- nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, name, 0, &value, &cycle);
-
- sum += value;
- maxVal = physx::PxMax(maxVal, value);
- }
-#if COUNT_ACTIVE_CYCLES
- printf("sum %I64d\n", sum);
-#else
- printf("sum %I64d\n", sum);
-#endif
-
- if (!nvStatus)
- {
- PX_ASSERT(false);
- }
-#endif
-}
-
-#pragma warning (pop)
-#endif
-
namespace physx
{
@@ -201,7 +105,7 @@ public:
~CudaCtxMgr();
bool safeDelayImport(PxErrorCallback& errorCallback);
- CUcontext acquireContext();
+ void acquireContext();
void releaseContext();
/* All these methods can be called without acquiring the context */
@@ -219,6 +123,7 @@ public:
bool supportsArchSM35() const; // GK110
bool supportsArchSM50() const; // GM100
bool supportsArchSM52() const; // GM200
+ bool supportsArchSM60() const; // GP100
bool isIntegrated() const; // true if GPU is integrated (MCP) part
bool canMapHostMemory() const; // true if GPU map host memory to GPU
int getDriverVersion() const;
@@ -230,6 +135,7 @@ public:
unsigned int getClockRate() const;
const char* getDeviceName() const;
+ CUdevice getDevice() const;
const CUdevprop* getDeviceProperties() const;
PxCudaInteropMode::Enum getInteropMode() const;
@@ -251,6 +157,8 @@ public:
void release();
+ CUcontext getContext() { return mCtx; }
+
private:
int mSceneCount;
@@ -330,6 +238,10 @@ bool CudaCtxMgr::supportsArchSM52() const
{
return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2));
}
+bool CudaCtxMgr::supportsArchSM60() const
+{
+ return mIsValid && mComputeCapMajor >= 6;
+}
bool CudaCtxMgr::isIntegrated() const
{
@@ -380,6 +292,18 @@ const char* CudaCtxMgr::getDeviceName() const
}
}
+CUdevice CudaCtxMgr::getDevice() const
+{
+ if (mIsValid)
+ {
+ return mDevHandle;
+ }
+ else
+ {
+ return -1;
+ }
+}
+
PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const
{
return mInteropMode;
@@ -504,8 +428,7 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er
if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled)
{
- if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP ||
- mInteropMode == PxCudaInteropMode::D3D10_INTEROP ||
+ if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP ||
mInteropMode == PxCudaInteropMode::D3D11_INTEROP)
{
mInteropMode = PxCudaInteropMode::NO_INTEROP;
@@ -562,18 +485,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er
mOwnContext = true;
}
#if PX_WIN32 || PX_WIN64
- else if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP)
- {
- status = cuD3D9CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
- reinterpret_cast<IDirect3DDevice9*>(desc.graphicsDevice));
-
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D9CtxCreate failed",__FILE__,__LINE__);
- return;
- }
- mOwnContext = true;
- }
else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP)
{
status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
@@ -710,73 +621,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er
errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount);
errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock);
#endif
-
-#if USE_PERFKIT
- {
-#if _WIN64
- wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x64\\NvPmApi.Core.dll";
-#else
- wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x86\\NvPmApi.Core.dll";
-#endif
-
- NVPMRESULT nvResult;
-
- if ((nvResult = GetNvPmApiManager()->Construct(dllName)) != NVPM_OK)
- {
- printf("perfkit error 1\n");
- return;
- }
-
- if ((nvResult = GetNvPmApi()->Init()) != NVPM_OK)
- {
- printf("perfkit error 2\n");
- return;
- }
-
- acquireContext();
-
- CUcontext ctx;
- cuCtxGetCurrent(&ctx);
- if ((nvResult = GetNvPmApi()->CreateContextFromCudaContext((APIContextHandle)ctx, &hNVPMContext)) != NVPM_OK)
- {
- printf("perfkit error 3\n");
- return; // This is an error condition
- }
-
- uint32_t nvStatus = 0;
-
-#if COUNT_L2_TO_L1_BYTES
- nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "l2_read_bytes");
-#elif COUNT_SM_TO_L1_QUERIES
- nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "tex_cache_sector_queries");
-#endif
-
-#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS
- char name[512];
- for (int i = 0; i != SM_COUNT; i++)
- {
-#if COUNT_INST_EXECUTED
- sprintf_s(name,512,"sm_inst_executed_vsm%d",i);
-#elif COUNT_STORE_INST_EXECUTED
- sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d",i);
-#elif COUNT_ACTIVE_CYCLES
- sprintf_s(name, 512, "sm_active_cycles_vsm%d",i);
-#elif COUNT_ACTIVE_WARPS
- sprintf_s(name, 512, "sm_active_warps_vsm%d",i);
-#endif
- nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, name);
- }
-#elif COUNT_GPU_BUSY
- nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "gpu_busy");
-#endif
-
- if (nvStatus != 0)
- {
- printf("perfkit error 4\n");
- return; // This is an error condition
- }
- }
-#endif
}
/* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll
@@ -871,10 +715,6 @@ CudaCtxMgr::~CudaCtxMgr()
if(!--mManagerRefCount)
shdfnd::TlsFree(mContextRefCountTls);
#endif
-
-#if ENABLE_CUDA_DEVICE_RESET
- CUT_SAFE_CALL(cuProfilerStop());
-#endif
}
bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags)
@@ -900,9 +740,6 @@ bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* r
switch (mInteropMode)
{
- case PxCudaInteropMode::D3D9_INTEROP:
- ret = cuGraphicsD3D9RegisterResource(&resource, (IDirect3DResource9*)resourcePointer, uint32_t(flags));
- break;
case PxCudaInteropMode::D3D10_INTEROP:
ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags));
break;
@@ -938,7 +775,7 @@ bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource)
return ret == CUDA_SUCCESS;
}
-CUcontext CudaCtxMgr::acquireContext()
+void CudaCtxMgr::acquireContext()
{
CUcontext ctx = 0;
CUT_SAFE_CALL(cuCtxGetCurrent(&ctx));
@@ -955,8 +792,6 @@ CUcontext CudaCtxMgr::acquireContext()
char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls);
shdfnd::TlsSet(mContextRefCountTls, ++refCount);
#endif
-
- return mCtx;
}
void CudaCtxMgr::releaseContext()
diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp
index 0d05a97..432a0cd 100644
--- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp
+++ b/PxShared/src/cudamanager/src/GpuDispatcher.cpp
@@ -487,21 +487,6 @@ PxGpuWorkerThread::~PxGpuWorkerThread()
}
}
-void PxGpuWorkerThread::emitStartEvent(const char *id)
-{
- PX_UNUSED(id);
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_START_CROSSTHREAD(id,0);
-#endif
-}
-
-void PxGpuWorkerThread::emitStopEvent(const char *id)
-{
- PX_UNUSED(id);
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_STOP_CROSSTHREAD(id,0);
-#endif
-}
/* A TaskManager is informing us that its simulation is being stepped */
void PxGpuWorkerThread::startSimulation()
@@ -555,15 +540,14 @@ void PxGpuWorkerThread::execute()
*/
void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task)
{
- if (mFailureDetected)
- {
+ if(mFailureDetected)
return;
- }
- emitStartEvent("GpuDispatcher.AddCompletionEvent");
+#if PX_SUPPORT_PXTASK_PROFILING
+ PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId());
+#endif
task.addReference();
mCompletionTasks.pushBack(&task);
- emitStopEvent("GpuDispatcher.AddCompletionEvent");
}
namespace
@@ -757,7 +741,9 @@ void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready)
void PxGpuWorkerThread::processActiveTasks()
{
- emitStartEvent("GpuDispatcher.ProcessTasksEvent");
+#if PX_SUPPORT_PXTASK_PROFILING
+ PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0); // PT: TODO: fix invalid context
+#endif
if (mFailureDetected)
{
@@ -766,7 +752,6 @@ void PxGpuWorkerThread::processActiveTasks()
mInputReady.reset();
mSubmittedTaskList.popBack()->release();
}
- emitStopEvent("GpuDispatcher.ProcessTasksEvent");
return;
}
@@ -824,13 +809,16 @@ void PxGpuWorkerThread::processActiveTasks()
else
{
const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0;
+
+ bool active;
+ {
#if PX_PROFILE
- r.task->mTm->emitStartEvent(*r.task);
+#if PX_SUPPORT_PXTASK_PROFILING
+ PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId());
#endif
- bool active = r.task->launchInstance(s, int(r.iteration++));
-#if PX_PROFILE
- r.task->mTm->emitStopEvent(*r.task);
#endif
+ active = r.task->launchInstance(s, int(r.iteration++));
+ }
if(singleStream != r.task->mStreamIndex)
singleStream = 0;
@@ -935,8 +923,6 @@ void PxGpuWorkerThread::processActiveTasks()
while (tasksRemain);
mCachedNonBlockingEvents.add(nonBlockEv);
-
- emitStopEvent("GpuDispatcher.ProcessTasksEvent");
}
#endif
diff --git a/PxShared/src/foundation/include/PsAllocator.h b/PxShared/src/foundation/include/PsAllocator.h
index cbf32d3..f988b6c 100644
--- a/PxShared/src/foundation/include/PsAllocator.h
+++ b/PxShared/src/foundation/include/PsAllocator.h
@@ -108,7 +108,7 @@
#elif PX_XBOXONE
#include <malloc.h>
#define PxAlloca(x) alloca(x)
-#elif PX_NX
+#elif PX_SWITCH
#include <malloc.h>
#define PxAlloca(x) alloca(x)
#endif
diff --git a/PxShared/src/foundation/include/PsAoS.h b/PxShared/src/foundation/include/PsAoS.h
index 641a40a..5a7c82d 100644
--- a/PxShared/src/foundation/include/PsAoS.h
+++ b/PxShared/src/foundation/include/PsAoS.h
@@ -34,7 +34,7 @@
#if PX_WINDOWS && !PX_NEON
#include "windows/PsWindowsAoS.h"
-#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX)
+#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH)
#include "unix/PsUnixAoS.h"
#elif PX_XBOXONE
#include "XboxOne/PsXboxOneAoS.h"
diff --git a/PxShared/src/foundation/include/PsInlineAoS.h b/PxShared/src/foundation/include/PsInlineAoS.h
index 6d43607..6ae15cf 100644
--- a/PxShared/src/foundation/include/PsInlineAoS.h
+++ b/PxShared/src/foundation/include/PsInlineAoS.h
@@ -35,7 +35,7 @@
#if PX_WINDOWS
#include "windows/PsWindowsTrigConstants.h"
#include "windows/PsWindowsInlineAoS.h"
-#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX)
+#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH)
#include "unix/PsUnixTrigConstants.h"
#include "unix/PsUnixInlineAoS.h"
#elif PX_XBOXONE
diff --git a/PxShared/src/foundation/include/PsIntrinsics.h b/PxShared/src/foundation/include/PsIntrinsics.h
index 1e1b9d1..38b91ba 100644
--- a/PxShared/src/foundation/include/PsIntrinsics.h
+++ b/PxShared/src/foundation/include/PsIntrinsics.h
@@ -38,8 +38,8 @@
#include "unix/PsUnixIntrinsics.h"
#elif PX_XBOXONE
#include "XboxOne/PsXboxOneIntrinsics.h"
-#elif PX_NX
-#include "nx/PsNXIntrinsics.h"
+#elif PX_SWITCH
+#include "switch/PsSwitchIntrinsics.h"
#else
#error "Platform not supported!"
#endif
diff --git a/PxShared/src/foundation/include/PsThread.h b/PxShared/src/foundation/include/PsThread.h
index 8ba553a..4e7c104 100644
--- a/PxShared/src/foundation/include/PsThread.h
+++ b/PxShared/src/foundation/include/PsThread.h
@@ -41,7 +41,7 @@
#if PX_WINDOWS_FAMILY || PX_XBOXONE
#define PxSpinLockPause() __asm pause
-#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_NX
+#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_SWITCH
#define PxSpinLockPause() asm("nop")
#else
#error "Platform not supported!"
@@ -173,7 +173,7 @@ class PX_FOUNDATION_API ThreadImpl
Change the affinity mask for this thread. The mask is a platform
specific value.
- On Windows, Linux, PS4, XboxOne and NX platforms, each set mask bit represents
+ On Windows, Linux, PS4, XboxOne and Switch platforms, each set mask bit represents
the index of a logical processor that the OS may schedule thread execution on.
Bits outside the range of valid logical processors may be ignored or cause
the function to return an error.
diff --git a/PxShared/src/foundation/include/PsVecMath.h b/PxShared/src/foundation/include/PsVecMath.h
index 4e891d8..ffd2de8 100644
--- a/PxShared/src/foundation/include/PsVecMath.h
+++ b/PxShared/src/foundation/include/PsVecMath.h
@@ -54,7 +54,7 @@
#define COMPILE_VECTOR_INTRINSICS 1
#elif PX_IOS&& PX_NEON
#define COMPILE_VECTOR_INTRINSICS 1
-#elif PX_NX
+#elif PX_SWITCH
#define COMPILE_VECTOR_INTRINSICS 1
#else
#define COMPILE_VECTOR_INTRINSICS 0
diff --git a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h
index 9bef465..f5dea7b 100644
--- a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h
+++ b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h
@@ -40,8 +40,13 @@
#define BOOL_TO_U16(b) (PxU16)(- PxI32(b))
+#define PX_VECMATH_ASSERT_ENABLED 0
+#if PX_VECMATH_ASSERT_ENABLED
#define VECMATHAOS_ASSERT(x) { PX_ASSERT(x); }
+#else
+#define VECMATHAOS_ASSERT(x)
+#endif
/////////////////////////////////////////////////////////////////////
////INTERNAL USE ONLY AND TESTS
@@ -1479,7 +1484,7 @@ PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b)
PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b)
{
- return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? TRUE_TO_U32 : FALSE_TO_U32);
+ return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? 1 : 0);
}
PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a)
diff --git a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h
index 2a0578d..a97f821 100644
--- a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h
+++ b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h
@@ -42,7 +42,7 @@
// "exact"
#define VRECIPQ recipq_newton<4>
-#if PX_NX
+#if PX_SWITCH
// StabilizationTests.AveragePoint needs more precision to succeed.
#define VRECIP recip_newton<5>
#else
diff --git a/PxShared/src/foundation/src/PsAssert.cpp b/PxShared/src/foundation/src/PsAssert.cpp
index 3070383..295a81e 100644
--- a/PxShared/src/foundation/src/PsAssert.cpp
+++ b/PxShared/src/foundation/src/PsAssert.cpp
@@ -34,8 +34,8 @@
#if PX_WINDOWS_FAMILY
#include <crtdbg.h>
-#elif PX_NX
-#include "nx/PsNXAbort.h"
+#elif PX_SWITCH
+#include "switch/PsSwitchAbort.h"
#endif
namespace
@@ -63,7 +63,7 @@ class DefaultAssertHandler : public physx::PxAssertHandler
__debugbreak();
#elif PX_WINDOWS_FAMILY&& PX_CHECKED
__debugbreak();
-#elif PX_NX
+#elif PX_SWITCH
abort(buffer);
#else
abort();
diff --git a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h
index 22903ec..abac561 100644
--- a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h
+++ b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h
@@ -74,7 +74,7 @@ namespace physx { namespace profile {
{
static const char* getName()
{
-#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_NX
+#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_SWITCH
return __PRETTY_FUNCTION__;
#else
return typeid(T).name();
diff --git a/PxShared/src/task/src/TaskManager.cpp b/PxShared/src/task/src/TaskManager.cpp
index ffcbfcd..c6210bd 100644
--- a/PxShared/src/task/src/TaskManager.cpp
+++ b/PxShared/src/task/src/TaskManager.cpp
@@ -168,9 +168,6 @@ public:
void decrReference( PxLightCpuTask& lighttask );
void addReference( PxLightCpuTask& lighttask );
- void emitStartEvent( PxBaseTask& basetask, uint32_t threadId=0);
- void emitStopEvent( PxBaseTask& basetask, uint32_t threadId=0);
-
PxErrorCallback& mErrorCallback;
PxCpuDispatcher *mCpuDispatcher;
PxGpuDispatcher *mGpuDispatcher;
@@ -304,35 +301,6 @@ void PxTaskMgr::addReference(PxLightCpuTask& lighttask)
shdfnd::atomicIncrement(&lighttask.mRefCount);
}
-void PxTaskMgr::emitStartEvent(PxBaseTask& basetask, uint32_t threadId)
-{
-#if DOT_LOG
- currentTask = &basetask;
-#endif
-
- PxBaseTask* tmp = &basetask;
- PX_UNUSED(tmp);
- PX_UNUSED(threadId);
-
- /* This does not need a lock! */
-#if PX_SUPPORT_PXTASK_PROFILING
- //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID));
- PX_PROFILE_START_CROSSTHREAD(basetask.getName(),0);
-#endif
-}
-
-void PxTaskMgr::emitStopEvent(PxBaseTask& basetask, uint32_t threadId)
-{
- PxBaseTask* tmp = &basetask;
- PX_UNUSED(tmp);
- PX_UNUSED(threadId);
-
- /* This does not need a lock! */
-#if PX_SUPPORT_PXTASK_PROFILING
- //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID));
- PX_PROFILE_STOP_CROSSTHREAD(basetask.getName(),0);
-#endif
-}
/*
* Called by the owner (Scene) at the start of every frame, before