diff options
Diffstat (limited to 'PxShared')
29 files changed, 145 insertions, 488 deletions
diff --git a/PxShared/include/cudamanager/PxCudaContextManager.h b/PxShared/include/cudamanager/PxCudaContextManager.h index 24f4370..aca1112 100644 --- a/PxShared/include/cudamanager/PxCudaContextManager.h +++ b/PxShared/include/cudamanager/PxCudaContextManager.h @@ -23,7 +23,7 @@ // components in life support devices or systems without express written approval of // NVIDIA Corporation. // -// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. #ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H @@ -42,6 +42,7 @@ /* Forward decl to avoid inclusion of cuda.h */ typedef struct CUctx_st *CUcontext; typedef struct CUgraphicsResource_st *CUgraphicsResource; +typedef int CUdevice; namespace physx { @@ -58,7 +59,6 @@ struct PxCudaInteropMode enum Enum { NO_INTEROP = 0, - D3D9_INTEROP, D3D10_INTEROP, D3D11_INTEROP, OGL_INTEROP, @@ -154,6 +154,9 @@ public: * to every CUDA work submission, so we recommend that you carefully tune * this initial base memory size to closely approximate the amount of * memory your application will consume. + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT]; @@ -162,11 +165,17 @@ public: * * The memory manager will dynamically grow and shrink in blocks multiple of * this page size. Size has to be power of two and bigger than 0. + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT]; /** * \brief Maximum size of memory that the memory manager will allocate + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT]; @@ -223,7 +232,7 @@ public: * harmfull to (re)acquire the context in code that is shared between * GpuTasks and non-task functions. */ - virtual CUcontext acquireContext() = 0; + virtual void acquireContext() = 0; /** * \brief Release the CUDA context from the current thread @@ -234,9 +243,16 @@ public: */ virtual void releaseContext() = 0; + /** + * \brief Return the CUcontext + */ + virtual CUcontext getContext() = 0; + /** * \brief Return the PxCudaMemoryManager instance associated with this * CUDA context + * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ virtual PxCudaMemoryManager *getMemoryManager() = 0; @@ -268,6 +284,7 @@ public: virtual bool supportsArchSM35() const = 0; //!< GK110 virtual bool supportsArchSM50() const = 0; //!< GM100 virtual bool supportsArchSM52() const = 0; //!< GM200 + virtual bool supportsArchSM60() const = 0; //!< GP100 virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy) virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion() @@ -278,6 +295,7 @@ public: virtual int getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver + virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work diff --git a/PxShared/include/foundation/PxIntrinsics.h b/PxShared/include/foundation/PxIntrinsics.h index 471f934..b4aff28 100644 --- a/PxShared/include/foundation/PxIntrinsics.h +++ b/PxShared/include/foundation/PxIntrinsics.h @@ -38,8 +38,8 @@ #include "foundation/unix/PxUnixIntrinsics.h" #elif PX_XBOXONE #include "foundation/XboxOne/PxXboxOneIntrinsics.h" -#elif PX_NX -#include "foundation/nx/PxNXIntrinsics.h" +#elif PX_SWITCH +#include "foundation/switch/PxSwitchIntrinsics.h" #else #error "Platform not supported!" #endif diff --git a/PxShared/include/foundation/PxPreprocessor.h b/PxShared/include/foundation/PxPreprocessor.h index 446ca76..9b6e0f4 100644 --- a/PxShared/include/foundation/PxPreprocessor.h +++ b/PxShared/include/foundation/PxPreprocessor.h @@ -88,7 +88,7 @@ Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSyst #elif defined(__ORBIS__) #define PX_PS4 1 #elif defined(__NX__) -#define PX_NX 1 +#define PX_SWITCH 1 #else #error "Unknown operating system" #endif @@ -161,8 +161,8 @@ define anything not defined on this platform to 0 #ifndef PX_PS4 #define PX_PS4 0 #endif -#ifndef PX_NX -#define PX_NX 0 +#ifndef PX_SWITCH +#define PX_SWITCH 0 #endif #ifndef PX_X64 #define PX_X64 0 @@ -425,7 +425,7 @@ General defines */ // static assert -#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_NX) || (PX_CLANG && PX_ARM) +#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_SWITCH) || (PX_CLANG && PX_ARM) #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] __attribute__((unused)) #else #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] @@ -532,5 +532,11 @@ protected: #define PX_SUPPORT_COMPUTE_PHYSX 0 +#ifndef PX_SUPPORT_EXTERN_TEMPLATE +#define PX_SUPPORT_EXTERN_TEMPLATE ((!PX_ANDROID) && (PX_VC != 11)) +#else +#define PX_SUPPORT_EXTERN_TEMPLATE 0 +#endif + /** @} */ #endif // #ifndef PXFOUNDATION_PXPREPROCESSOR_H diff --git a/PxShared/include/foundation/nx/PxNXIntrinsics.h b/PxShared/include/foundation/nx/PxNXIntrinsics.h deleted file mode 100644 index adf3bf5..0000000 --- a/PxShared/include/foundation/nx/PxNXIntrinsics.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - */ -// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. -// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. - - -#ifndef PX_FOUNDATION_PX_NX_INTRINSICS_H -#define PX_FOUNDATION_PX_NX_INTRINSICS_H - -#include "foundation/Px.h" -#include "foundation/PxAssert.h" - -#if !PX_NX - #error "This file should only be included by NX builds!!" -#endif - -#include <math.h> -#include <float.h> - -#include "nn/cstd/cstd_CMath.h" - -#if !PX_DOXYGEN -namespace physx -{ -namespace intrinsics -{ -#endif - - //! \brief platform-specific absolute value - PX_CUDA_CALLABLE PX_FORCE_INLINE float abs(float a) { return ::fabsf(a); } - - //! \brief platform-specific select float - PX_CUDA_CALLABLE PX_FORCE_INLINE float fsel(float a, float b, float c) { return (a >= 0.0f) ? b : c; } - - //! \brief platform-specific sign - PX_CUDA_CALLABLE PX_FORCE_INLINE float sign(float a) { return (a >= 0.0f) ? 1.0f : -1.0f; } - - //! \brief platform-specific reciprocal - PX_CUDA_CALLABLE PX_FORCE_INLINE float recip(float a) { return 1.0f/a; } - - //! \brief platform-specific reciprocal estimate - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) { return 1.0f/a; } - - //! \brief platform-specific square root - PX_CUDA_CALLABLE PX_FORCE_INLINE float sqrt(float a) { return ::sqrtf(a); } - - //! \brief platform-specific reciprocal square root - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrt(float a) { return 1.0f/::sqrtf(a); } - - //! \brief platform-specific reciprocal square root estimate - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) { return 1.0f/::sqrtf(a); } - - //! \brief platform-specific sine - PX_CUDA_CALLABLE PX_FORCE_INLINE float sin(float a) { return ::sinf(a); } - - //! \brief platform-specific cosine - PX_CUDA_CALLABLE PX_FORCE_INLINE float cos(float a) { return ::cosf(a); } - - //! \brief platform-specific minimum - PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMin(float a, float b) { return a<b ? a : b; } - - //! \brief platform-specific maximum - PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMax(float a, float b) { return a>b ? a : b; } - - //! \brief platform-specific finiteness check - PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(float a) - { -#ifdef __CUDACC__ - return isfinite(a) ? true : false; -#else - return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a); -#endif - } - - //! \brief platform-specific finiteness check - PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(double a) - { -#ifdef __CUDACC__ - return isfinite(a) ? true : false; -#else - return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a); -#endif - } - - /*! - Sets \c count bytes starting at \c dst to zero. - */ - PX_FORCE_INLINE void* memZero(void* PX_RESTRICT dest, uint32_t count) - { - return memset(dest, 0, count); - } - - /*! - Sets \c count bytes starting at \c dst to \c c. - */ - PX_FORCE_INLINE void* memSet(void* PX_RESTRICT dest, int32_t c, uint32_t count) - { - return memset(dest, c, count); - } - - /*! - Copies \c count bytes from \c src to \c dst. User memMove if regions overlap. - */ - PX_FORCE_INLINE void* memCopy(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count) - { - return memcpy(dest, src, count); - } - - /*! - Copies \c count bytes from \c src to \c dst. Supports overlapping regions. - */ - PX_FORCE_INLINE void* memMove(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count) - { - return memmove(dest, src, count); - } - - /*! - Set 128B to zero starting at \c dst+offset. Must be aligned. - */ - PX_FORCE_INLINE void memZero128(void* PX_RESTRICT dest, uint32_t offset = 0) - { - PX_ASSERT(((size_t(dest)+offset) & 0x7f) == 0); - memSet((char* PX_RESTRICT)dest+offset, 0, 128); - } - -#if !PX_DOXYGEN -} // namespace intrinsics -} // namespace physx -#endif - -#endif diff --git a/PxShared/include/task/PxTask.h b/PxShared/include/task/PxTask.h index 2761109..85d91da 100644 --- a/PxShared/include/task/PxTask.h +++ b/PxShared/include/task/PxTask.h @@ -45,7 +45,7 @@ namespace physx class PxBaseTask { public: - PxBaseTask() : mEventID(0xFFFF), mProfileStat(0), mTm(0) {} + PxBaseTask() : mContextID(0), mTm(NULL) {} virtual ~PxBaseTask() {} /** @@ -78,50 +78,24 @@ public: * references to it - so it may safely run its destructor, recycle itself, etc. * provided no additional user references to the task exist */ - virtual void release() = 0; - /** - * \brief Execute user run method with wrapping profiling events. - * - * Optional entry point for use by CpuDispatchers. - * - * \param[in] threadId The threadId of the thread that executed the task. - */ - PX_INLINE void runProfiled(uint32_t threadId=0) - { - mTm->emitStartEvent(*this, threadId); - run(); - mTm->emitStopEvent(*this, threadId); - } - - /** - * \brief Specify stop event statistic - * - * If called before or while the task is executing, the given value - * will appear in the task's event bar in the profile viewer - * - * \param[in] stat The stat to signal when the task is finished - */ - PX_INLINE void setProfileStat( uint16_t stat ) - { - mProfileStat = stat; - } - /** * \brief Return PxTaskManager to which this task was submitted * * Note, can return NULL if task was not submitted, or has been * completed. */ - PX_INLINE PxTaskManager* getTaskManager() const + PX_FORCE_INLINE PxTaskManager* getTaskManager() const { return mTm; } + PX_FORCE_INLINE void setContextId(PxU64 id) { mContextID = id; } + PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; } + protected: - uint16_t mEventID; //!< Registered profile event ID - uint16_t mProfileStat; //!< Profiling statistic + PxU64 mContextID; //!< Context ID for profiler interface PxTaskManager* mTm; //!< Owning PxTaskManager instance friend class PxTaskMgr; @@ -212,7 +186,6 @@ public: { mStreamIndex = 0; mPreSyncRequired = false; - mProfileStat = 0; } /** diff --git a/PxShared/include/task/PxTaskManager.h b/PxShared/include/task/PxTaskManager.h index f6f29c4..f40f7b1 100644 --- a/PxShared/include/task/PxTaskManager.h +++ b/PxShared/include/task/PxTaskManager.h @@ -215,9 +215,6 @@ protected: virtual void decrReference(PxLightCpuTask&) = 0; virtual void addReference(PxLightCpuTask&) = 0; - virtual void emitStartEvent(PxBaseTask&, uint32_t threadId=0) = 0; - virtual void emitStopEvent(PxBaseTask&, uint32_t threadId=0) = 0; - /*! \endcond */ friend class PxBaseTask; diff --git a/PxShared/src/compiler/cmake/Android/CMakeLists.txt b/PxShared/src/compiler/cmake/Android/CMakeLists.txt index 0499c29..9aa00f9 100644 --- a/PxShared/src/compiler/cmake/Android/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Android/CMakeLists.txt @@ -1,34 +1,37 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared ) include(../common/CMakeLists.txt) STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Android - SET(TARGET_BUILD_PLATFORM "Android") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to android + SET(TARGET_BUILD_PLATFORM "android") ENDIF() -SET(PLATFORM_LIST Android) +SET(PLATFORM_LIST android) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) ENDIF() if(${ANDROID_ABI} STREQUAL "armeabi-v7a") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ") +elseif(${ANDROID_ABI} STREQUAL "armeabi-v7a with NEON") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -ffast-math -fno-exceptions -ffunction-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ") elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof ") elseif(${ANDROID_ABI} STREQUAL "x86") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -fpack-struct=8 -malign-double ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -fpack-struct=8 -malign-double ") elseif(${ANDROID_ABI} STREQUAL "x86_64") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -mstackrealign -msse3 ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -mstackrealign -msse3 ") endif() SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") -SET(CMAKE_CXX_FLAGS_CHECKED "-O2") -SET(CMAKE_CXX_FLAGS_PROFILE "-O2") -SET(CMAKE_CXX_FLAGS_RELEASE "-O2") +SET(CMAKE_CXX_FLAGS_CHECKED "-O3") +SET(CMAKE_CXX_FLAGS_PROFILE "-O3") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3") -SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;) +SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;__STDC_LIMIT_MACROS;) SET(PXSHARED_ANDROID_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) SET(PXSHARED_ANDROID_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) SET(PXSHARED_ANDROID_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) diff --git a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt index d281e32..4e2f814 100644 --- a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to IOS - SET(TARGET_BUILD_PLATFORM "IOS") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to ios + SET(TARGET_BUILD_PLATFORM "ios") ENDIF() -SET(PLATFORM_LIST IOS) +SET(PLATFORM_LIST ios) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt index 2fa592e..ea25a8b 100644 --- a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt @@ -1,13 +1,14 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Linux - SET(TARGET_BUILD_PLATFORM "Linux") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to linux + SET(TARGET_BUILD_PLATFORM "linux") ENDIF() -SET(PLATFORM_LIST Linux) +SET(PLATFORM_LIST linux) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) @@ -16,16 +17,16 @@ ENDIF() IF (${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # using Clang - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast -Wno-disabled-macro-expansion") ELSEIF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC SET(LIBPATH_SUFFIX "x64") SET(CMAKE_CXX_FLAGS "-Werror -m64 -fPIC -msse2 -mfpmath=sse -ffast-math -fno-exceptions -fno-rtti -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wextra -fno-strict-aliasing -fdiagnostics-show-option -Wno-invalid-offsetof -Wno-uninitialized -Wno-missing-field-initializers") ENDIF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "arm-unknown-linux-gnueabihf") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-disabled-macro-expansion") ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "aarch64-unknown-linux-gnueabi") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough -Wno-disabled-macro-expansion") ELSE(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") MESSAGE(FATAL_ERROR "Unknown CMAKE_LIBRARY_ARCHITECTURE ${CMAKE_LIBRARY_ARCHITECTURE}") ENDIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") @@ -36,7 +37,7 @@ SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g -gdwarf-3") SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g -gdwarf-3") SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -gdwarf-3") -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) SET(PXSHARED_LINUX_COMPILE_DEFS _LIB) ELSE() # Disable cuda and dx for all projects on windows @@ -71,7 +72,7 @@ IF(DEFINED PX_SELECT_COMPONENTS) INCLUDE(PxTask.cmake) endif() if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) - IF(DEFINED PX_GENERATE_GPU_PROJECTS) + IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() endif() @@ -80,7 +81,7 @@ INCLUDE(PxFoundation.cmake) INCLUDE(PsFastXml.cmake) INCLUDE(PxPvdSDK.cmake) INCLUDE(PxTask.cmake) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() ENDIF() diff --git a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt index beb06bc..3242b46 100644 --- a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Mac - SET(TARGET_BUILD_PLATFORM "Mac") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to mac + SET(TARGET_BUILD_PLATFORM "mac") ENDIF() -SET(PLATFORM_LIST Mac) +SET(PLATFORM_LIST mac) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake index c1675ae..f7bc761 100644 --- a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake +++ b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake @@ -13,7 +13,7 @@ FIND_PATH( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h INCLUDE(FindPackageHandleStandardArgs) -IF(TARGET_BUILD_PLATFORM STREQUAL "Windows") +IF(TARGET_BUILD_PLATFORM STREQUAL "windows") # NOTE: Doesn't make sense for all platforms - ARM IF(CMAKE_CL_64) SET(NVTOOLSEXT_LIBNAME "nvToolsExt64_1") diff --git a/PxShared/src/compiler/cmake/common/CMakeLists.txt b/PxShared/src/compiler/cmake/common/CMakeLists.txt index 22d2097..3bbb57a 100644 --- a/PxShared/src/compiler/cmake/common/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/common/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.3) -PROJECT(PxShared CXX) - CMAKE_POLICY(SET CMP0057 NEW) # Enable IN_LIST IF(DEFINED ENV{GW_DEPS_ROOT}) diff --git a/PxShared/src/compiler/cmake/html5/CMakeLists.txt b/PxShared/src/compiler/cmake/html5/CMakeLists.txt index 8b9587a..e0d2776 100644 --- a/PxShared/src/compiler/cmake/html5/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/html5/CMakeLists.txt @@ -1,10 +1,11 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) #set(CMAKE_VERBOSE_MAKEFILE ON) include(../common/CMakeLists.txt) IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to HTML5 - SET(TARGET_BUILD_PLATFORM "HTML5") + SET(TARGET_BUILD_PLATFORM "html5") ENDIF() SET(PLATFORM_LIST HTML5) diff --git a/PxShared/src/compiler/cmake/windows/CMakeLists.txt b/PxShared/src/compiler/cmake/windows/CMakeLists.txt index 39b7dfc..fe2b00d 100644 --- a/PxShared/src/compiler/cmake/windows/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/windows/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Windows - SET(TARGET_BUILD_PLATFORM "Windows") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to windows + SET(TARGET_BUILD_PLATFORM "windows") ENDIF() -SET(PLATFORM_LIST Windows) +SET(PLATFORM_LIST windows) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) @@ -33,7 +34,7 @@ SET(CMAKE_SHARED_LINKER_FLAGS "/DEBUG") # Controls PX_NVTX for all projects on windows SET(PXSHARED_WINDOWS_ENABLE_NVTX 0) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED LINK_GPU_BINARIES) SET(PXSHARED_WINDOWS_COMPILE_DEFS WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_WINSOCK_DEPRECATED_NO_WARNINGS;) ELSE() # Disable cuda and dx for all projects on windows @@ -44,6 +45,10 @@ SET(PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1;PX_NVTX=${PXSHARED SET(PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX}) SET(PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS NDEBUG) +IF(DEFINED PX_SCALAR_MATH) + ADD_DEFINITIONS(-DPX_SIMD_DISABLED) +ENDIF() + IF(CMAKE_CL_64) ADD_DEFINITIONS(-DWIN64) ENDIF(CMAKE_CL_64) @@ -74,7 +79,7 @@ IF(DEFINED PX_SELECT_COMPONENTS) INCLUDE(PxTask.cmake) endif() if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) - IF(DEFINED PX_GENERATE_GPU_PROJECTS) + IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() endif() @@ -83,7 +88,7 @@ INCLUDE(PxFoundation.cmake) INCLUDE(PsFastXml.cmake) INCLUDE(PxPvdSDK.cmake) INCLUDE(PxTask.cmake) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() ENDIF() diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h index aedb345..10c412f 100644 --- a/PxShared/src/cudamanager/include/GpuDispatcher.h +++ b/PxShared/src/cudamanager/include/GpuDispatcher.h @@ -258,8 +258,6 @@ public: ~PxGpuWorkerThread(); void setCudaContext(PxCudaContextManager& ctx); - void emitStartEvent(const char *id); - void emitStopEvent(const char *id); /* API to TaskManager */ void startSimulation(); diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp index fada532..8a2cc44 100644 --- a/PxShared/src/cudamanager/src/BlockingWait.cpp +++ b/PxShared/src/cudamanager/src/BlockingWait.cpp @@ -63,8 +63,9 @@ void PxGpuWorkerThread::blockingWaitFunc() } else if (!mFailureDetected) { - emitStartEvent("GpuDispatcher.BlockingWaitEvent"); - +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0); +#endif if (1 & ~intptr_t(b.blockingStream)) { GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream)); @@ -73,8 +74,6 @@ void PxGpuWorkerThread::blockingWaitFunc() { GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent)); } - - emitStopEvent("GpuDispatcher.BlockingWaitEvent"); } if (b.blockingEvent) diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp index b5b6efc..e05911e 100644 --- a/PxShared/src/cudamanager/src/CudaContextManager.cpp +++ b/PxShared/src/cudamanager/src/CudaContextManager.cpp @@ -23,7 +23,7 @@ // components in life support devices or systems without express written approval of // NVIDIA Corporation. // -// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. #include "foundation/PxAssert.h" #include "foundation/PxErrorCallback.h" @@ -88,102 +88,6 @@ static void* GetProcAddress(void* handle, const char* name) { return dlsym(handl #define ENABLE_DEVICE_INFO_BRINGUP 0 -#include "GPUProfile.h" - -#if ENABLE_CUDA_DEVICE_RESET -#include "cudaProfiler.h" -#endif - -#if USE_PERFKIT -#pragma warning (push) -#pragma warning (disable : 4099) -#pragma warning (disable : 4191) -#define NVPM_INITGUID -#include <stdio.h> -#include "cuda.h" -#include "../../../../../../../externals/nvPerfKit/4.1.0.14260/inc/NvPmApi.Manager.h" -static NvPmApiManager S_NVPMManager; -extern NvPmApiManager *GetNvPmApiManager() {return &S_NVPMManager;} -const NvPmApi *GetNvPmApi() {return S_NVPMManager.Api();} -NVPMContext hNVPMContext(0); - -void initPerfKit() -{ - //Sync with GPU - cuCtxSynchronize(); - - // Reset counters - uint32_t nCount; - GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); -} - -void endPerfKit() -{ - //Sync with GPU - cuCtxSynchronize(); - - uint32_t nCount; - GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); - - uint64_t value; - uint64_t cycle; - - uint64_t sum = 0; - uint64_t maxVal = 0; - char name[512]; - - int nvStatus = 0; - - PX_UNUSED(value); - PX_UNUSED(cycle); - PX_UNUSED(sum); - PX_UNUSED(maxVal); - PX_UNUSED(name); - PX_UNUSED(nvStatus); - - printf("counters:\n"); - -#if COUNT_L2_TO_L1_BYTES - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "l2_read_bytes", 0, &value, &cycle); - printf("L2->L1 bytes %d\n",value); -#elif COUNT_SM_TO_L1_QUERIES - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "tex_cache_sector_queries", 0, &value, &cycle); - printf("SM->L1 queries %d\n",value); -#endif - -#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS - for (int i = 0; i != SM_COUNT; i++) - { -#if COUNT_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_vsm%d", i); -#elif COUNT_STORE_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d", i); -#elif COUNT_ACTIVE_CYCLES - sprintf_s(name, 512, "sm_active_cycles_vsm%d", i); -#elif COUNT_ACTIVE_WARPS - sprintf_s(name, 512, "sm_active_warps_vsm%d", i); -#endif - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, name, 0, &value, &cycle); - - sum += value; - maxVal = physx::PxMax(maxVal, value); - } -#if COUNT_ACTIVE_CYCLES - printf("sum %I64d\n", sum); -#else - printf("sum %I64d\n", sum); -#endif - - if (!nvStatus) - { - PX_ASSERT(false); - } -#endif -} - -#pragma warning (pop) -#endif - namespace physx { @@ -201,7 +105,7 @@ public: ~CudaCtxMgr(); bool safeDelayImport(PxErrorCallback& errorCallback); - CUcontext acquireContext(); + void acquireContext(); void releaseContext(); /* All these methods can be called without acquiring the context */ @@ -219,6 +123,7 @@ public: bool supportsArchSM35() const; // GK110 bool supportsArchSM50() const; // GM100 bool supportsArchSM52() const; // GM200 + bool supportsArchSM60() const; // GP100 bool isIntegrated() const; // true if GPU is integrated (MCP) part bool canMapHostMemory() const; // true if GPU map host memory to GPU int getDriverVersion() const; @@ -230,6 +135,7 @@ public: unsigned int getClockRate() const; const char* getDeviceName() const; + CUdevice getDevice() const; const CUdevprop* getDeviceProperties() const; PxCudaInteropMode::Enum getInteropMode() const; @@ -251,6 +157,8 @@ public: void release(); + CUcontext getContext() { return mCtx; } + private: int mSceneCount; @@ -330,6 +238,10 @@ bool CudaCtxMgr::supportsArchSM52() const { return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2)); } +bool CudaCtxMgr::supportsArchSM60() const +{ + return mIsValid && mComputeCapMajor >= 6; +} bool CudaCtxMgr::isIntegrated() const { @@ -380,6 +292,18 @@ const char* CudaCtxMgr::getDeviceName() const } } +CUdevice CudaCtxMgr::getDevice() const +{ + if (mIsValid) + { + return mDevHandle; + } + else + { + return -1; + } +} + PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const { return mInteropMode; @@ -504,8 +428,7 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled) { - if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP || - mInteropMode == PxCudaInteropMode::D3D10_INTEROP || + if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP || mInteropMode == PxCudaInteropMode::D3D11_INTEROP) { mInteropMode = PxCudaInteropMode::NO_INTEROP; @@ -562,18 +485,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er mOwnContext = true; } #if PX_WIN32 || PX_WIN64 - else if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP) - { - status = cuD3D9CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, - reinterpret_cast<IDirect3DDevice9*>(desc.graphicsDevice)); - - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D9CtxCreate failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP) { status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, @@ -710,73 +621,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount); errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock); #endif - -#if USE_PERFKIT - { -#if _WIN64 - wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x64\\NvPmApi.Core.dll"; -#else - wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x86\\NvPmApi.Core.dll"; -#endif - - NVPMRESULT nvResult; - - if ((nvResult = GetNvPmApiManager()->Construct(dllName)) != NVPM_OK) - { - printf("perfkit error 1\n"); - return; - } - - if ((nvResult = GetNvPmApi()->Init()) != NVPM_OK) - { - printf("perfkit error 2\n"); - return; - } - - acquireContext(); - - CUcontext ctx; - cuCtxGetCurrent(&ctx); - if ((nvResult = GetNvPmApi()->CreateContextFromCudaContext((APIContextHandle)ctx, &hNVPMContext)) != NVPM_OK) - { - printf("perfkit error 3\n"); - return; // This is an error condition - } - - uint32_t nvStatus = 0; - -#if COUNT_L2_TO_L1_BYTES - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "l2_read_bytes"); -#elif COUNT_SM_TO_L1_QUERIES - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "tex_cache_sector_queries"); -#endif - -#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS - char name[512]; - for (int i = 0; i != SM_COUNT; i++) - { -#if COUNT_INST_EXECUTED - sprintf_s(name,512,"sm_inst_executed_vsm%d",i); -#elif COUNT_STORE_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d",i); -#elif COUNT_ACTIVE_CYCLES - sprintf_s(name, 512, "sm_active_cycles_vsm%d",i); -#elif COUNT_ACTIVE_WARPS - sprintf_s(name, 512, "sm_active_warps_vsm%d",i); -#endif - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, name); - } -#elif COUNT_GPU_BUSY - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "gpu_busy"); -#endif - - if (nvStatus != 0) - { - printf("perfkit error 4\n"); - return; // This is an error condition - } - } -#endif } /* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll @@ -871,10 +715,6 @@ CudaCtxMgr::~CudaCtxMgr() if(!--mManagerRefCount) shdfnd::TlsFree(mContextRefCountTls); #endif - -#if ENABLE_CUDA_DEVICE_RESET - CUT_SAFE_CALL(cuProfilerStop()); -#endif } bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags) @@ -900,9 +740,6 @@ bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* r switch (mInteropMode) { - case PxCudaInteropMode::D3D9_INTEROP: - ret = cuGraphicsD3D9RegisterResource(&resource, (IDirect3DResource9*)resourcePointer, uint32_t(flags)); - break; case PxCudaInteropMode::D3D10_INTEROP: ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags)); break; @@ -938,7 +775,7 @@ bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource) return ret == CUDA_SUCCESS; } -CUcontext CudaCtxMgr::acquireContext() +void CudaCtxMgr::acquireContext() { CUcontext ctx = 0; CUT_SAFE_CALL(cuCtxGetCurrent(&ctx)); @@ -955,8 +792,6 @@ CUcontext CudaCtxMgr::acquireContext() char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); shdfnd::TlsSet(mContextRefCountTls, ++refCount); #endif - - return mCtx; } void CudaCtxMgr::releaseContext() diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp index 0d05a97..432a0cd 100644 --- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp +++ b/PxShared/src/cudamanager/src/GpuDispatcher.cpp @@ -487,21 +487,6 @@ PxGpuWorkerThread::~PxGpuWorkerThread() } } -void PxGpuWorkerThread::emitStartEvent(const char *id) -{ - PX_UNUSED(id); -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_START_CROSSTHREAD(id,0); -#endif -} - -void PxGpuWorkerThread::emitStopEvent(const char *id) -{ - PX_UNUSED(id); -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_STOP_CROSSTHREAD(id,0); -#endif -} /* A TaskManager is informing us that its simulation is being stepped */ void PxGpuWorkerThread::startSimulation() @@ -555,15 +540,14 @@ void PxGpuWorkerThread::execute() */ void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task) { - if (mFailureDetected) - { + if(mFailureDetected) return; - } - emitStartEvent("GpuDispatcher.AddCompletionEvent"); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId()); +#endif task.addReference(); mCompletionTasks.pushBack(&task); - emitStopEvent("GpuDispatcher.AddCompletionEvent"); } namespace @@ -757,7 +741,9 @@ void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready) void PxGpuWorkerThread::processActiveTasks() { - emitStartEvent("GpuDispatcher.ProcessTasksEvent"); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0); // PT: TODO: fix invalid context +#endif if (mFailureDetected) { @@ -766,7 +752,6 @@ void PxGpuWorkerThread::processActiveTasks() mInputReady.reset(); mSubmittedTaskList.popBack()->release(); } - emitStopEvent("GpuDispatcher.ProcessTasksEvent"); return; } @@ -824,13 +809,16 @@ void PxGpuWorkerThread::processActiveTasks() else { const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0; + + bool active; + { #if PX_PROFILE - r.task->mTm->emitStartEvent(*r.task); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId()); #endif - bool active = r.task->launchInstance(s, int(r.iteration++)); -#if PX_PROFILE - r.task->mTm->emitStopEvent(*r.task); #endif + active = r.task->launchInstance(s, int(r.iteration++)); + } if(singleStream != r.task->mStreamIndex) singleStream = 0; @@ -935,8 +923,6 @@ void PxGpuWorkerThread::processActiveTasks() while (tasksRemain); mCachedNonBlockingEvents.add(nonBlockEv); - - emitStopEvent("GpuDispatcher.ProcessTasksEvent"); } #endif diff --git a/PxShared/src/foundation/include/PsAllocator.h b/PxShared/src/foundation/include/PsAllocator.h index cbf32d3..f988b6c 100644 --- a/PxShared/src/foundation/include/PsAllocator.h +++ b/PxShared/src/foundation/include/PsAllocator.h @@ -108,7 +108,7 @@ #elif PX_XBOXONE #include <malloc.h> #define PxAlloca(x) alloca(x) -#elif PX_NX +#elif PX_SWITCH #include <malloc.h> #define PxAlloca(x) alloca(x) #endif diff --git a/PxShared/src/foundation/include/PsAoS.h b/PxShared/src/foundation/include/PsAoS.h index 641a40a..5a7c82d 100644 --- a/PxShared/src/foundation/include/PsAoS.h +++ b/PxShared/src/foundation/include/PsAoS.h @@ -34,7 +34,7 @@ #if PX_WINDOWS && !PX_NEON #include "windows/PsWindowsAoS.h" -#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH) #include "unix/PsUnixAoS.h" #elif PX_XBOXONE #include "XboxOne/PsXboxOneAoS.h" diff --git a/PxShared/src/foundation/include/PsInlineAoS.h b/PxShared/src/foundation/include/PsInlineAoS.h index 6d43607..6ae15cf 100644 --- a/PxShared/src/foundation/include/PsInlineAoS.h +++ b/PxShared/src/foundation/include/PsInlineAoS.h @@ -35,7 +35,7 @@ #if PX_WINDOWS #include "windows/PsWindowsTrigConstants.h" #include "windows/PsWindowsInlineAoS.h" -#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH) #include "unix/PsUnixTrigConstants.h" #include "unix/PsUnixInlineAoS.h" #elif PX_XBOXONE diff --git a/PxShared/src/foundation/include/PsIntrinsics.h b/PxShared/src/foundation/include/PsIntrinsics.h index 1e1b9d1..38b91ba 100644 --- a/PxShared/src/foundation/include/PsIntrinsics.h +++ b/PxShared/src/foundation/include/PsIntrinsics.h @@ -38,8 +38,8 @@ #include "unix/PsUnixIntrinsics.h" #elif PX_XBOXONE #include "XboxOne/PsXboxOneIntrinsics.h" -#elif PX_NX -#include "nx/PsNXIntrinsics.h" +#elif PX_SWITCH +#include "switch/PsSwitchIntrinsics.h" #else #error "Platform not supported!" #endif diff --git a/PxShared/src/foundation/include/PsThread.h b/PxShared/src/foundation/include/PsThread.h index 8ba553a..4e7c104 100644 --- a/PxShared/src/foundation/include/PsThread.h +++ b/PxShared/src/foundation/include/PsThread.h @@ -41,7 +41,7 @@ #if PX_WINDOWS_FAMILY || PX_XBOXONE #define PxSpinLockPause() __asm pause -#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_NX +#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_SWITCH #define PxSpinLockPause() asm("nop") #else #error "Platform not supported!" @@ -173,7 +173,7 @@ class PX_FOUNDATION_API ThreadImpl Change the affinity mask for this thread. The mask is a platform specific value. - On Windows, Linux, PS4, XboxOne and NX platforms, each set mask bit represents + On Windows, Linux, PS4, XboxOne and Switch platforms, each set mask bit represents the index of a logical processor that the OS may schedule thread execution on. Bits outside the range of valid logical processors may be ignored or cause the function to return an error. diff --git a/PxShared/src/foundation/include/PsVecMath.h b/PxShared/src/foundation/include/PsVecMath.h index 4e891d8..ffd2de8 100644 --- a/PxShared/src/foundation/include/PsVecMath.h +++ b/PxShared/src/foundation/include/PsVecMath.h @@ -54,7 +54,7 @@ #define COMPILE_VECTOR_INTRINSICS 1 #elif PX_IOS&& PX_NEON #define COMPILE_VECTOR_INTRINSICS 1 -#elif PX_NX +#elif PX_SWITCH #define COMPILE_VECTOR_INTRINSICS 1 #else #define COMPILE_VECTOR_INTRINSICS 0 diff --git a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h index 9bef465..f5dea7b 100644 --- a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h +++ b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h @@ -40,8 +40,13 @@ #define BOOL_TO_U16(b) (PxU16)(- PxI32(b)) +#define PX_VECMATH_ASSERT_ENABLED 0 +#if PX_VECMATH_ASSERT_ENABLED #define VECMATHAOS_ASSERT(x) { PX_ASSERT(x); } +#else +#define VECMATHAOS_ASSERT(x) +#endif ///////////////////////////////////////////////////////////////////// ////INTERNAL USE ONLY AND TESTS @@ -1479,7 +1484,7 @@ PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) { - return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? TRUE_TO_U32 : FALSE_TO_U32); + return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? 1 : 0); } PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) diff --git a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h index 2a0578d..a97f821 100644 --- a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h +++ b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h @@ -42,7 +42,7 @@ // "exact" #define VRECIPQ recipq_newton<4> -#if PX_NX +#if PX_SWITCH // StabilizationTests.AveragePoint needs more precision to succeed. #define VRECIP recip_newton<5> #else diff --git a/PxShared/src/foundation/src/PsAssert.cpp b/PxShared/src/foundation/src/PsAssert.cpp index 3070383..295a81e 100644 --- a/PxShared/src/foundation/src/PsAssert.cpp +++ b/PxShared/src/foundation/src/PsAssert.cpp @@ -34,8 +34,8 @@ #if PX_WINDOWS_FAMILY #include <crtdbg.h> -#elif PX_NX -#include "nx/PsNXAbort.h" +#elif PX_SWITCH +#include "switch/PsSwitchAbort.h" #endif namespace @@ -63,7 +63,7 @@ class DefaultAssertHandler : public physx::PxAssertHandler __debugbreak(); #elif PX_WINDOWS_FAMILY&& PX_CHECKED __debugbreak(); -#elif PX_NX +#elif PX_SWITCH abort(buffer); #else abort(); diff --git a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h index 22903ec..abac561 100644 --- a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h +++ b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h @@ -74,7 +74,7 @@ namespace physx { namespace profile { { static const char* getName() { -#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_NX +#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_SWITCH return __PRETTY_FUNCTION__; #else return typeid(T).name(); diff --git a/PxShared/src/task/src/TaskManager.cpp b/PxShared/src/task/src/TaskManager.cpp index ffcbfcd..c6210bd 100644 --- a/PxShared/src/task/src/TaskManager.cpp +++ b/PxShared/src/task/src/TaskManager.cpp @@ -168,9 +168,6 @@ public: void decrReference( PxLightCpuTask& lighttask ); void addReference( PxLightCpuTask& lighttask ); - void emitStartEvent( PxBaseTask& basetask, uint32_t threadId=0); - void emitStopEvent( PxBaseTask& basetask, uint32_t threadId=0); - PxErrorCallback& mErrorCallback; PxCpuDispatcher *mCpuDispatcher; PxGpuDispatcher *mGpuDispatcher; @@ -304,35 +301,6 @@ void PxTaskMgr::addReference(PxLightCpuTask& lighttask) shdfnd::atomicIncrement(&lighttask.mRefCount); } -void PxTaskMgr::emitStartEvent(PxBaseTask& basetask, uint32_t threadId) -{ -#if DOT_LOG - currentTask = &basetask; -#endif - - PxBaseTask* tmp = &basetask; - PX_UNUSED(tmp); - PX_UNUSED(threadId); - - /* This does not need a lock! */ -#if PX_SUPPORT_PXTASK_PROFILING - //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); - PX_PROFILE_START_CROSSTHREAD(basetask.getName(),0); -#endif -} - -void PxTaskMgr::emitStopEvent(PxBaseTask& basetask, uint32_t threadId) -{ - PxBaseTask* tmp = &basetask; - PX_UNUSED(tmp); - PX_UNUSED(threadId); - - /* This does not need a lock! */ -#if PX_SUPPORT_PXTASK_PROFILING - //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); - PX_PROFILE_STOP_CROSSTHREAD(basetask.getName(),0); -#endif -} /* * Called by the owner (Scene) at the start of every frame, before |